From a0f39302919fbacf7a0d407f01b1a50413ea6f70 Mon Sep 17 00:00:00 2001
From: Matthias Nott <mnott@mnsoft.org>
Date: Mon, 02 Mar 2026 23:15:13 +0100
Subject: [PATCH] feat: on-device speech recognition, navigation screen, session picker

---
 components/chat/VoiceButton.tsx |  179 +++++++++++++++++++++++++++++++++++++++++------------------
 1 files changed, 125 insertions(+), 54 deletions(-)

diff --git a/components/chat/VoiceButton.tsx b/components/chat/VoiceButton.tsx
index b86a370..9ebaa82 100644
--- a/components/chat/VoiceButton.tsx
+++ b/components/chat/VoiceButton.tsx
@@ -1,121 +1,192 @@
-import React, { useCallback, useRef, useState } from "react";
+import React, { useCallback, useEffect, useRef, useState } from "react";
 import { Animated, Pressable, Text, View } from "react-native";
 import * as Haptics from "expo-haptics";
-import { startRecording, stopRecording } from "../../services/audio";
-import { Audio } from "expo-av";
+import {
+  ExpoSpeechRecognitionModule,
+  useSpeechRecognitionEvent,
+} from "expo-speech-recognition";
 
 interface VoiceButtonProps {
-  onVoiceMessage: (audioUri: string, durationMs: number) => void;
+  onTranscript: (text: string) => void;
 }
 
-const VOICE_BUTTON_SIZE = 88;
+const VOICE_BUTTON_SIZE = 72;
 
-export function VoiceButton({ onVoiceMessage }: VoiceButtonProps) {
-  const [isRecording, setIsRecording] = useState(false);
-  const recordingRef = useRef<Audio.Recording | null>(null);
-  const scaleAnim = useRef(new Animated.Value(1)).current;
+/**
+ * Tap-to-toggle voice button using on-device speech recognition.
+ * - Tap once: start listening
+ * - Tap again: stop and send transcript
+ * - Long-press while listening: cancel (discard)
+ */
+export function VoiceButton({ onTranscript }: VoiceButtonProps) {
+  const [isListening, setIsListening] = useState(false);
+  const [transcript, setTranscript] = useState("");
   const pulseAnim = useRef(new Animated.Value(1)).current;
+  const glowAnim = useRef(new Animated.Value(0)).current;
   const pulseLoop = useRef<Animated.CompositeAnimation | null>(null);
+  const cancelledRef = useRef(false);
+
+  // Speech recognition events
+  useSpeechRecognitionEvent("start", () => {
+    setIsListening(true);
+  });
+
+  useSpeechRecognitionEvent("end", () => {
+    setIsListening(false);
+    stopPulse();
+
+    // Send transcript if we have one and weren't cancelled
+    if (!cancelledRef.current && transcript.trim()) {
+      onTranscript(transcript.trim());
+    }
+    setTranscript("");
+    cancelledRef.current = false;
+  });
+
+  useSpeechRecognitionEvent("result", (event) => {
+    const text = event.results[0]?.transcript ?? "";
+    setTranscript(text);
+  });
+
+  useSpeechRecognitionEvent("error", (event) => {
+    console.error("Speech recognition error:", event.error, event.message);
+    setIsListening(false);
+    stopPulse();
+    setTranscript("");
+  });
 
   const startPulse = useCallback(() => {
     pulseLoop.current = Animated.loop(
       Animated.sequence([
         Animated.timing(pulseAnim, {
           toValue: 1.15,
-          duration: 600,
+          duration: 700,
           useNativeDriver: true,
         }),
         Animated.timing(pulseAnim, {
           toValue: 1,
-          duration: 600,
+          duration: 700,
           useNativeDriver: true,
         }),
       ])
     );
     pulseLoop.current.start();
-  }, [pulseAnim]);
+    Animated.timing(glowAnim, {
+      toValue: 1,
+      duration: 300,
+      useNativeDriver: true,
+    }).start();
+  }, [pulseAnim, glowAnim]);
 
   const stopPulse = useCallback(() => {
     pulseLoop.current?.stop();
     pulseAnim.setValue(1);
-  }, [pulseAnim]);
-
-  const handlePressIn = useCallback(async () => {
-    Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Medium);
-
-    Animated.spring(scaleAnim, {
-      toValue: 0.92,
+    Animated.timing(glowAnim, {
+      toValue: 0,
+      duration: 200,
       useNativeDriver: true,
     }).start();
+  }, [pulseAnim, glowAnim]);
 
-    const recording = await startRecording();
-    if (recording) {
-      recordingRef.current = recording;
-      setIsRecording(true);
-      startPulse();
-    }
-  }, [scaleAnim, startPulse]);
+  const startListening = useCallback(async () => {
+    const result = await ExpoSpeechRecognitionModule.requestPermissionsAsync();
+    if (!result.granted) return;
 
-  const handlePressOut = useCallback(async () => {
-    Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Light);
+    cancelledRef.current = false;
+    setTranscript("");
+    startPulse();
 
-    Animated.spring(scaleAnim, {
-      toValue: 1,
-      useNativeDriver: true,
-    }).start();
+    ExpoSpeechRecognitionModule.start({
+      lang: "en-US",
+      interimResults: true,
+      continuous: true,
+    });
+  }, [startPulse]);
 
+  const stopAndSend = useCallback(() => {
     stopPulse();
-    setIsRecording(false);
+    cancelledRef.current = false;
+    ExpoSpeechRecognitionModule.stop();
+  }, [stopPulse]);
 
-    if (recordingRef.current) {
-      const result = await stopRecording();
-      recordingRef.current = null;
+  const cancelListening = useCallback(() => {
+    Haptics.notificationAsync(Haptics.NotificationFeedbackType.Warning);
+    stopPulse();
+    cancelledRef.current = true;
+    setTranscript("");
+    ExpoSpeechRecognitionModule.abort();
+  }, [stopPulse]);
 
-      if (result && result.durationMs > 500) {
-        onVoiceMessage(result.uri, result.durationMs);
-      }
+  const handleTap = useCallback(async () => {
+    Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Medium);
+    if (isListening) {
+      stopAndSend();
+    } else {
+      await startListening();
     }
-  }, [scaleAnim, stopPulse, onVoiceMessage]);
+  }, [isListening, stopAndSend, startListening]);
+
+  const handleLongPress = useCallback(() => {
+    if (isListening) {
+      cancelListening();
+    }
+  }, [isListening, cancelListening]);
 
   return (
-    <View className="items-center justify-center py-4">
-      {/* Pulse ring — only visible while recording */}
+    <View style={{ alignItems: "center", justifyContent: "center" }}>
+      {/* Outer pulse ring */}
       <Animated.View
         style={{
           position: "absolute",
           width: VOICE_BUTTON_SIZE + 24,
           height: VOICE_BUTTON_SIZE + 24,
           borderRadius: (VOICE_BUTTON_SIZE + 24) / 2,
-          backgroundColor: isRecording ? "rgba(255, 159, 67, 0.15)" : "transparent",
+          backgroundColor: isListening ? "rgba(255, 159, 67, 0.12)" : "transparent",
           transform: [{ scale: pulseAnim }],
+          opacity: glowAnim,
         }}
       />
 
       {/* Button */}
-      <Animated.View style={{ transform: [{ scale: scaleAnim }] }}>
-        <Pressable
-          onPressIn={handlePressIn}
-          onPressOut={handlePressOut}
+      <Pressable
+        onPress={handleTap}
+        onLongPress={handleLongPress}
+        delayLongPress={600}
+      >
+        <View
           style={{
             width: VOICE_BUTTON_SIZE,
             height: VOICE_BUTTON_SIZE,
             borderRadius: VOICE_BUTTON_SIZE / 2,
-            backgroundColor: isRecording ? "#FF9F43" : "#4A9EFF",
+            backgroundColor: isListening ? "#FF9F43" : "#4A9EFF",
             alignItems: "center",
             justifyContent: "center",
-            shadowColor: isRecording ? "#FF9F43" : "#4A9EFF",
+            shadowColor: isListening ? "#FF9F43" : "#4A9EFF",
             shadowOffset: { width: 0, height: 4 },
             shadowOpacity: 0.4,
             shadowRadius: 12,
             elevation: 8,
           }}
         >
-          <Text style={{ fontSize: 32 }}>{isRecording ? "🎙" : "🎤"}</Text>
-        </Pressable>
-      </Animated.View>
+          <Text style={{ fontSize: 28 }}>{isListening ? "⏹" : "🎤"}</Text>
+        </View>
+      </Pressable>
 
-      <Text className="text-pai-text-muted text-xs mt-3">
-        {isRecording ? "Release to send" : "Hold to talk"}
+      {/* Label / transcript preview */}
+      <Text
+        style={{
+          color: isListening ? "#FF9F43" : "#5A5A78",
+          fontSize: 11,
+          marginTop: 4,
+          fontWeight: isListening ? "600" : "400",
+          maxWidth: 200,
+          textAlign: "center",
+        }}
+        numberOfLines={2}
+      >
+        {isListening
+          ? transcript || "Listening..."
+          : "Tap to talk"}
       </Text>
     </View>
   );

--
Gitblit v1.3.1