From a0f39302919fbacf7a0d407f01b1a50413ea6f70 Mon Sep 17 00:00:00 2001
From: Matthias Nott <mnott@mnsoft.org>
Date: Mon, 02 Mar 2026 23:15:13 +0100
Subject: [PATCH] feat: on-device speech recognition, navigation screen, session picker

---
 contexts/ChatContext.tsx |  198 +++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 159 insertions(+), 39 deletions(-)

diff --git a/contexts/ChatContext.tsx b/contexts/ChatContext.tsx
index a6c1ef9..a0b62fc 100644
--- a/contexts/ChatContext.tsx
+++ b/contexts/ChatContext.tsx
@@ -6,9 +6,9 @@
   useRef,
   useState,
 } from "react";
-import { Message, WebSocketMessage } from "../types";
+import { Message, WsIncoming, WsSession } from "../types";
 import { useConnection } from "./ConnectionContext";
-import { playAudio } from "../services/audio";
+import { playAudio, encodeAudioToBase64 } from "../services/audio";
 
 function generateId(): string {
   return Date.now().toString(36) + Math.random().toString(36).slice(2);
@@ -19,13 +19,29 @@
   sendTextMessage: (text: string) => void;
   sendVoiceMessage: (audioUri: string, durationMs?: number) => void;
   clearMessages: () => void;
+  // Session management
+  sessions: WsSession[];
+  requestSessions: () => void;
+  switchSession: (sessionId: string) => void;
+  renameSession: (sessionId: string, name: string) => void;
+  // Screenshot / navigation
+  latestScreenshot: string | null;
+  requestScreenshot: () => void;
+  sendNavKey: (key: string) => void;
 }
 
 const ChatContext = createContext<ChatContextValue | null>(null);
 
 export function ChatProvider({ children }: { children: React.ReactNode }) {
   const [messages, setMessages] = useState<Message[]>([]);
-  const { sendTextMessage: wsSend, sendVoiceMessage: wsVoice, onMessageReceived } = useConnection();
+  const [sessions, setSessions] = useState<WsSession[]>([]);
+  const [latestScreenshot, setLatestScreenshot] = useState<string | null>(null);
+  const {
+    sendTextMessage: wsSend,
+    sendVoiceMessage: wsVoice,
+    sendCommand,
+    onMessageReceived,
+  } = useConnection();
 
   const addMessage = useCallback((msg: Message) => {
     setMessages((prev) => [...prev, msg]);
@@ -42,34 +58,92 @@
 
   // Handle incoming WebSocket messages
   useEffect(() => {
-    onMessageReceived.current = (data: WebSocketMessage) => {
-      if (data.type === "text") {
-        const msg: Message = {
-          id: generateId(),
-          role: "assistant",
-          type: "text",
-          content: data.content,
-          timestamp: Date.now(),
-          status: "sent",
-        };
-        setMessages((prev) => [...prev, msg]);
-      } else if (data.type === "voice") {
-        const msg: Message = {
-          id: generateId(),
-          role: "assistant",
-          type: "voice",
-          content: data.content ?? "",
-          audioUri: data.audioBase64
-            ? `data:audio/mp4;base64,${data.audioBase64}`
-            : undefined,
-          timestamp: Date.now(),
-          status: "sent",
-        };
-        setMessages((prev) => [...prev, msg]);
-
-        // Auto-play incoming voice messages
-        if (msg.audioUri) {
-          playAudio(msg.audioUri).catch(() => {});
+    onMessageReceived.current = (data: WsIncoming) => {
+      switch (data.type) {
+        case "text": {
+          const msg: Message = {
+            id: generateId(),
+            role: "assistant",
+            type: "text",
+            content: data.content,
+            timestamp: Date.now(),
+            status: "sent",
+          };
+          setMessages((prev) => [...prev, msg]);
+          break;
+        }
+        case "voice": {
+          const msg: Message = {
+            id: generateId(),
+            role: "assistant",
+            type: "voice",
+            content: data.content ?? "",
+            audioUri: data.audioBase64
+              ? `data:audio/mp4;base64,${data.audioBase64}`
+              : undefined,
+            timestamp: Date.now(),
+            status: "sent",
+          };
+          setMessages((prev) => [...prev, msg]);
+          if (msg.audioUri) {
+            playAudio(msg.audioUri).catch(() => {});
+          }
+          break;
+        }
+        case "image": {
+          // Store as latest screenshot for navigation mode
+          setLatestScreenshot(data.imageBase64);
+          // Also add to chat as an image message
+          const msg: Message = {
+            id: generateId(),
+            role: "assistant",
+            type: "image",
+            content: data.caption ?? "Screenshot",
+            imageBase64: data.imageBase64,
+            timestamp: Date.now(),
+            status: "sent",
+          };
+          setMessages((prev) => [...prev, msg]);
+          break;
+        }
+        case "sessions": {
+          setSessions(data.sessions);
+          break;
+        }
+        case "session_switched": {
+          const msg: Message = {
+            id: generateId(),
+            role: "system",
+            type: "text",
+            content: `Switched to ${data.name}`,
+            timestamp: Date.now(),
+          };
+          setMessages((prev) => [...prev, msg]);
+          break;
+        }
+        case "session_renamed": {
+          const msg: Message = {
+            id: generateId(),
+            role: "system",
+            type: "text",
+            content: `Renamed to ${data.name}`,
+            timestamp: Date.now(),
+          };
+          setMessages((prev) => [...prev, msg]);
+          // Refresh sessions to show updated name
+          sendCommand("sessions");
+          break;
+        }
+        case "error": {
+          const msg: Message = {
+            id: generateId(),
+            role: "system",
+            type: "text",
+            content: data.message,
+            timestamp: Date.now(),
+          };
+          setMessages((prev) => [...prev, msg]);
+          break;
         }
       }
     };
@@ -77,7 +151,7 @@
     return () => {
       onMessageReceived.current = null;
     };
-  }, [onMessageReceived]);
+  }, [onMessageReceived, sendCommand]);
 
   const sendTextMessage = useCallback(
     (text: string) => {
@@ -91,7 +165,6 @@
         status: "sending",
       };
       addMessage(msg);
-
       const sent = wsSend(text);
       updateMessageStatus(id, sent ? "sent" : "error");
     },
@@ -99,7 +172,7 @@
   );
 
   const sendVoiceMessage = useCallback(
-    (audioUri: string, durationMs?: number) => {
+    async (audioUri: string, durationMs?: number) => {
       const id = generateId();
       const msg: Message = {
         id,
@@ -112,10 +185,14 @@
         duration: durationMs,
       };
       addMessage(msg);
-
-      // For now, send with empty base64 since we'd need expo-file-system to encode
-      const sent = wsVoice("", "Voice message");
-      updateMessageStatus(id, sent ? "sent" : "error");
+      try {
+        const base64 = await encodeAudioToBase64(audioUri);
+        const sent = wsVoice(base64);
+        updateMessageStatus(id, sent ? "sent" : "error");
+      } catch (err) {
+        console.error("Failed to encode audio:", err);
+        updateMessageStatus(id, "error");
+      }
     },
     [wsVoice, addMessage, updateMessageStatus]
   );
@@ -124,9 +201,52 @@
     setMessages([]);
   }, []);
 
+  // --- Session management ---
+  const requestSessions = useCallback(() => {
+    sendCommand("sessions");
+  }, [sendCommand]);
+
+  const switchSession = useCallback(
+    (sessionId: string) => {
+      sendCommand("switch", { sessionId });
+    },
+    [sendCommand]
+  );
+
+  const renameSession = useCallback(
+    (sessionId: string, name: string) => {
+      sendCommand("rename", { sessionId, name });
+    },
+    [sendCommand]
+  );
+
+  // --- Screenshot / navigation ---
+  const requestScreenshot = useCallback(() => {
+    sendCommand("screenshot");
+  }, [sendCommand]);
+
+  const sendNavKey = useCallback(
+    (key: string) => {
+      sendCommand("nav", { key });
+    },
+    [sendCommand]
+  );
+
   return (
     <ChatContext.Provider
-      value={{ messages, sendTextMessage, sendVoiceMessage, clearMessages }}
+      value={{
+        messages,
+        sendTextMessage,
+        sendVoiceMessage,
+        clearMessages,
+        sessions,
+        requestSessions,
+        switchSession,
+        renameSession,
+        latestScreenshot,
+        requestScreenshot,
+        sendNavKey,
+      }}
     >
       {children}
     </ChatContext.Provider>

--
Gitblit v1.3.1