From a0f39302919fbacf7a0d407f01b1a50413ea6f70 Mon Sep 17 00:00:00 2001
From: Matthias Nott <mnott@mnsoft.org>
Date: Mon, 02 Mar 2026 23:15:13 +0100
Subject: [PATCH] feat: on-device speech recognition, navigation screen, session picker
---
components/chat/VoiceButton.tsx | 179 +++++++++++++++++++++++++++++++++++++++++------------------
1 files changed, 125 insertions(+), 54 deletions(-)
diff --git a/components/chat/VoiceButton.tsx b/components/chat/VoiceButton.tsx
index b86a370..9ebaa82 100644
--- a/components/chat/VoiceButton.tsx
+++ b/components/chat/VoiceButton.tsx
@@ -1,121 +1,192 @@
-import React, { useCallback, useRef, useState } from "react";
+import React, { useCallback, useEffect, useRef, useState } from "react";
import { Animated, Pressable, Text, View } from "react-native";
import * as Haptics from "expo-haptics";
-import { startRecording, stopRecording } from "../../services/audio";
-import { Audio } from "expo-av";
+import {
+ ExpoSpeechRecognitionModule,
+ useSpeechRecognitionEvent,
+} from "expo-speech-recognition";
interface VoiceButtonProps {
- onVoiceMessage: (audioUri: string, durationMs: number) => void;
+ onTranscript: (text: string) => void;
}
-const VOICE_BUTTON_SIZE = 88;
+const VOICE_BUTTON_SIZE = 72;
-export function VoiceButton({ onVoiceMessage }: VoiceButtonProps) {
- const [isRecording, setIsRecording] = useState(false);
- const recordingRef = useRef<Audio.Recording | null>(null);
- const scaleAnim = useRef(new Animated.Value(1)).current;
+/**
+ * Tap-to-toggle voice button using on-device speech recognition.
+ * - Tap once: start listening
+ * - Tap again: stop and send transcript
+ * - Long-press while listening: cancel (discard)
+ */
+export function VoiceButton({ onTranscript }: VoiceButtonProps) {
+ const [isListening, setIsListening] = useState(false);
+ const [transcript, setTranscript] = useState("");
const pulseAnim = useRef(new Animated.Value(1)).current;
+ const glowAnim = useRef(new Animated.Value(0)).current;
const pulseLoop = useRef<Animated.CompositeAnimation | null>(null);
+ const cancelledRef = useRef(false);
+
+ // Speech recognition events
+ useSpeechRecognitionEvent("start", () => {
+ setIsListening(true);
+ });
+
+ useSpeechRecognitionEvent("end", () => {
+ setIsListening(false);
+ stopPulse();
+
+ // Send transcript if we have one and weren't cancelled
+ if (!cancelledRef.current && transcript.trim()) {
+ onTranscript(transcript.trim());
+ }
+ setTranscript("");
+ cancelledRef.current = false;
+ });
+
+ useSpeechRecognitionEvent("result", (event) => {
+ const text = event.results[0]?.transcript ?? "";
+ setTranscript(text);
+ });
+
+ useSpeechRecognitionEvent("error", (event) => {
+ console.error("Speech recognition error:", event.error, event.message);
+ setIsListening(false);
+ stopPulse();
+ setTranscript("");
+ });
const startPulse = useCallback(() => {
pulseLoop.current = Animated.loop(
Animated.sequence([
Animated.timing(pulseAnim, {
toValue: 1.15,
- duration: 600,
+ duration: 700,
useNativeDriver: true,
}),
Animated.timing(pulseAnim, {
toValue: 1,
- duration: 600,
+ duration: 700,
useNativeDriver: true,
}),
])
);
pulseLoop.current.start();
- }, [pulseAnim]);
+ Animated.timing(glowAnim, {
+ toValue: 1,
+ duration: 300,
+ useNativeDriver: true,
+ }).start();
+ }, [pulseAnim, glowAnim]);
const stopPulse = useCallback(() => {
pulseLoop.current?.stop();
pulseAnim.setValue(1);
- }, [pulseAnim]);
-
- const handlePressIn = useCallback(async () => {
- Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Medium);
-
- Animated.spring(scaleAnim, {
- toValue: 0.92,
+ Animated.timing(glowAnim, {
+ toValue: 0,
+ duration: 200,
useNativeDriver: true,
}).start();
+ }, [pulseAnim, glowAnim]);
- const recording = await startRecording();
- if (recording) {
- recordingRef.current = recording;
- setIsRecording(true);
- startPulse();
- }
- }, [scaleAnim, startPulse]);
+ const startListening = useCallback(async () => {
+ const result = await ExpoSpeechRecognitionModule.requestPermissionsAsync();
+ if (!result.granted) return;
- const handlePressOut = useCallback(async () => {
- Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Light);
+ cancelledRef.current = false;
+ setTranscript("");
+ startPulse();
- Animated.spring(scaleAnim, {
- toValue: 1,
- useNativeDriver: true,
- }).start();
+ ExpoSpeechRecognitionModule.start({
+ lang: "en-US",
+ interimResults: true,
+ continuous: true,
+ });
+ }, [startPulse]);
+ const stopAndSend = useCallback(() => {
stopPulse();
- setIsRecording(false);
+ cancelledRef.current = false;
+ ExpoSpeechRecognitionModule.stop();
+ }, [stopPulse]);
- if (recordingRef.current) {
- const result = await stopRecording();
- recordingRef.current = null;
+ const cancelListening = useCallback(() => {
+ Haptics.notificationAsync(Haptics.NotificationFeedbackType.Warning);
+ stopPulse();
+ cancelledRef.current = true;
+ setTranscript("");
+ ExpoSpeechRecognitionModule.abort();
+ }, [stopPulse]);
- if (result && result.durationMs > 500) {
- onVoiceMessage(result.uri, result.durationMs);
- }
+ const handleTap = useCallback(async () => {
+ Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Medium);
+ if (isListening) {
+ stopAndSend();
+ } else {
+ await startListening();
}
- }, [scaleAnim, stopPulse, onVoiceMessage]);
+ }, [isListening, stopAndSend, startListening]);
+
+ const handleLongPress = useCallback(() => {
+ if (isListening) {
+ cancelListening();
+ }
+ }, [isListening, cancelListening]);
return (
- <View className="items-center justify-center py-4">
- {/* Pulse ring — only visible while recording */}
+ <View style={{ alignItems: "center", justifyContent: "center" }}>
+ {/* Outer pulse ring */}
<Animated.View
style={{
position: "absolute",
width: VOICE_BUTTON_SIZE + 24,
height: VOICE_BUTTON_SIZE + 24,
borderRadius: (VOICE_BUTTON_SIZE + 24) / 2,
- backgroundColor: isRecording ? "rgba(255, 159, 67, 0.15)" : "transparent",
+ backgroundColor: isListening ? "rgba(255, 159, 67, 0.12)" : "transparent",
transform: [{ scale: pulseAnim }],
+ opacity: glowAnim,
}}
/>
{/* Button */}
- <Animated.View style={{ transform: [{ scale: scaleAnim }] }}>
- <Pressable
- onPressIn={handlePressIn}
- onPressOut={handlePressOut}
+ <Pressable
+ onPress={handleTap}
+ onLongPress={handleLongPress}
+ delayLongPress={600}
+ >
+ <View
style={{
width: VOICE_BUTTON_SIZE,
height: VOICE_BUTTON_SIZE,
borderRadius: VOICE_BUTTON_SIZE / 2,
- backgroundColor: isRecording ? "#FF9F43" : "#4A9EFF",
+ backgroundColor: isListening ? "#FF9F43" : "#4A9EFF",
alignItems: "center",
justifyContent: "center",
- shadowColor: isRecording ? "#FF9F43" : "#4A9EFF",
+ shadowColor: isListening ? "#FF9F43" : "#4A9EFF",
shadowOffset: { width: 0, height: 4 },
shadowOpacity: 0.4,
shadowRadius: 12,
elevation: 8,
}}
>
- <Text style={{ fontSize: 32 }}>{isRecording ? "🎙" : "🎤"}</Text>
- </Pressable>
- </Animated.View>
+ <Text style={{ fontSize: 28 }}>{isListening ? "⏹" : "🎤"}</Text>
+ </View>
+ </Pressable>
- <Text className="text-pai-text-muted text-xs mt-3">
- {isRecording ? "Release to send" : "Hold to talk"}
+ {/* Label / transcript preview */}
+ <Text
+ style={{
+ color: isListening ? "#FF9F43" : "#5A5A78",
+ fontSize: 11,
+ marginTop: 4,
+ fontWeight: isListening ? "600" : "400",
+ maxWidth: 200,
+ textAlign: "center",
+ }}
+ numberOfLines={2}
+ >
+ {isListening
+ ? transcript || "Listening..."
+ : "Tap to talk"}
</Text>
</View>
);
--
Gitblit v1.3.1