From a0f39302919fbacf7a0d407f01b1a50413ea6f70 Mon Sep 17 00:00:00 2001
From: Matthias Nott <mnott@mnsoft.org>
Date: Mon, 02 Mar 2026 23:15:13 +0100
Subject: [PATCH] feat: on-device speech recognition, navigation screen, session picker
---
contexts/ChatContext.tsx | 198 +++++++++++++++++++++++++++++++++++++++---------
1 files changed, 159 insertions(+), 39 deletions(-)
diff --git a/contexts/ChatContext.tsx b/contexts/ChatContext.tsx
index a6c1ef9..a0b62fc 100644
--- a/contexts/ChatContext.tsx
+++ b/contexts/ChatContext.tsx
@@ -6,9 +6,9 @@
useRef,
useState,
} from "react";
-import { Message, WebSocketMessage } from "../types";
+import { Message, WsIncoming, WsSession } from "../types";
import { useConnection } from "./ConnectionContext";
-import { playAudio } from "../services/audio";
+import { playAudio, encodeAudioToBase64 } from "../services/audio";
function generateId(): string {
return Date.now().toString(36) + Math.random().toString(36).slice(2);
@@ -19,13 +19,29 @@
sendTextMessage: (text: string) => void;
sendVoiceMessage: (audioUri: string, durationMs?: number) => void;
clearMessages: () => void;
+ // Session management
+ sessions: WsSession[];
+ requestSessions: () => void;
+ switchSession: (sessionId: string) => void;
+ renameSession: (sessionId: string, name: string) => void;
+ // Screenshot / navigation
+ latestScreenshot: string | null;
+ requestScreenshot: () => void;
+ sendNavKey: (key: string) => void;
}
const ChatContext = createContext<ChatContextValue | null>(null);
export function ChatProvider({ children }: { children: React.ReactNode }) {
const [messages, setMessages] = useState<Message[]>([]);
- const { sendTextMessage: wsSend, sendVoiceMessage: wsVoice, onMessageReceived } = useConnection();
+ const [sessions, setSessions] = useState<WsSession[]>([]);
+ const [latestScreenshot, setLatestScreenshot] = useState<string | null>(null);
+ const {
+ sendTextMessage: wsSend,
+ sendVoiceMessage: wsVoice,
+ sendCommand,
+ onMessageReceived,
+ } = useConnection();
const addMessage = useCallback((msg: Message) => {
setMessages((prev) => [...prev, msg]);
@@ -42,34 +58,92 @@
// Handle incoming WebSocket messages
useEffect(() => {
- onMessageReceived.current = (data: WebSocketMessage) => {
- if (data.type === "text") {
- const msg: Message = {
- id: generateId(),
- role: "assistant",
- type: "text",
- content: data.content,
- timestamp: Date.now(),
- status: "sent",
- };
- setMessages((prev) => [...prev, msg]);
- } else if (data.type === "voice") {
- const msg: Message = {
- id: generateId(),
- role: "assistant",
- type: "voice",
- content: data.content ?? "",
- audioUri: data.audioBase64
- ? `data:audio/mp4;base64,${data.audioBase64}`
- : undefined,
- timestamp: Date.now(),
- status: "sent",
- };
- setMessages((prev) => [...prev, msg]);
-
- // Auto-play incoming voice messages
- if (msg.audioUri) {
- playAudio(msg.audioUri).catch(() => {});
+ onMessageReceived.current = (data: WsIncoming) => {
+ switch (data.type) {
+ case "text": {
+ const msg: Message = {
+ id: generateId(),
+ role: "assistant",
+ type: "text",
+ content: data.content,
+ timestamp: Date.now(),
+ status: "sent",
+ };
+ setMessages((prev) => [...prev, msg]);
+ break;
+ }
+ case "voice": {
+ const msg: Message = {
+ id: generateId(),
+ role: "assistant",
+ type: "voice",
+ content: data.content ?? "",
+ audioUri: data.audioBase64
+ ? `data:audio/mp4;base64,${data.audioBase64}`
+ : undefined,
+ timestamp: Date.now(),
+ status: "sent",
+ };
+ setMessages((prev) => [...prev, msg]);
+ if (msg.audioUri) {
+ playAudio(msg.audioUri).catch(() => {});
+ }
+ break;
+ }
+ case "image": {
+ // Store as latest screenshot for navigation mode
+ setLatestScreenshot(data.imageBase64);
+ // Also add to chat as an image message
+ const msg: Message = {
+ id: generateId(),
+ role: "assistant",
+ type: "image",
+ content: data.caption ?? "Screenshot",
+ imageBase64: data.imageBase64,
+ timestamp: Date.now(),
+ status: "sent",
+ };
+ setMessages((prev) => [...prev, msg]);
+ break;
+ }
+ case "sessions": {
+ setSessions(data.sessions);
+ break;
+ }
+ case "session_switched": {
+ const msg: Message = {
+ id: generateId(),
+ role: "system",
+ type: "text",
+ content: `Switched to ${data.name}`,
+ timestamp: Date.now(),
+ };
+ setMessages((prev) => [...prev, msg]);
+ break;
+ }
+ case "session_renamed": {
+ const msg: Message = {
+ id: generateId(),
+ role: "system",
+ type: "text",
+ content: `Renamed to ${data.name}`,
+ timestamp: Date.now(),
+ };
+ setMessages((prev) => [...prev, msg]);
+ // Refresh sessions to show updated name
+ sendCommand("sessions");
+ break;
+ }
+ case "error": {
+ const msg: Message = {
+ id: generateId(),
+ role: "system",
+ type: "text",
+ content: data.message,
+ timestamp: Date.now(),
+ };
+ setMessages((prev) => [...prev, msg]);
+ break;
}
}
};
@@ -77,7 +151,7 @@
return () => {
onMessageReceived.current = null;
};
- }, [onMessageReceived]);
+ }, [onMessageReceived, sendCommand]);
const sendTextMessage = useCallback(
(text: string) => {
@@ -91,7 +165,6 @@
status: "sending",
};
addMessage(msg);
-
const sent = wsSend(text);
updateMessageStatus(id, sent ? "sent" : "error");
},
@@ -99,7 +172,7 @@
);
const sendVoiceMessage = useCallback(
- (audioUri: string, durationMs?: number) => {
+ async (audioUri: string, durationMs?: number) => {
const id = generateId();
const msg: Message = {
id,
@@ -112,10 +185,14 @@
duration: durationMs,
};
addMessage(msg);
-
- // For now, send with empty base64 since we'd need expo-file-system to encode
- const sent = wsVoice("", "Voice message");
- updateMessageStatus(id, sent ? "sent" : "error");
+ try {
+ const base64 = await encodeAudioToBase64(audioUri);
+ const sent = wsVoice(base64);
+ updateMessageStatus(id, sent ? "sent" : "error");
+ } catch (err) {
+ console.error("Failed to encode audio:", err);
+ updateMessageStatus(id, "error");
+ }
},
[wsVoice, addMessage, updateMessageStatus]
);
@@ -124,9 +201,52 @@
setMessages([]);
}, []);
+ // --- Session management ---
+ const requestSessions = useCallback(() => {
+ sendCommand("sessions");
+ }, [sendCommand]);
+
+ const switchSession = useCallback(
+ (sessionId: string) => {
+ sendCommand("switch", { sessionId });
+ },
+ [sendCommand]
+ );
+
+ const renameSession = useCallback(
+ (sessionId: string, name: string) => {
+ sendCommand("rename", { sessionId, name });
+ },
+ [sendCommand]
+ );
+
+ // --- Screenshot / navigation ---
+ const requestScreenshot = useCallback(() => {
+ sendCommand("screenshot");
+ }, [sendCommand]);
+
+ const sendNavKey = useCallback(
+ (key: string) => {
+ sendCommand("nav", { key });
+ },
+ [sendCommand]
+ );
+
return (
<ChatContext.Provider
- value={{ messages, sendTextMessage, sendVoiceMessage, clearMessages }}
+ value={{
+ messages,
+ sendTextMessage,
+ sendVoiceMessage,
+ clearMessages,
+ sessions,
+ requestSessions,
+ switchSession,
+ renameSession,
+ latestScreenshot,
+ requestScreenshot,
+ sendNavKey,
+ }}
>
{children}
</ChatContext.Provider>
--
Gitblit v1.3.1