From e25bdba29f49b1b55a8a8cccdc4583aea3c101ed Mon Sep 17 00:00:00 2001
From: Matthias Nott <mnott@mnsoft.org>
Date: Sun, 15 Mar 2026 13:41:09 +0100
Subject: [PATCH] feat: multi-image upload and catch_up message delivery
---
types/index.ts | 15 +
app/chat.tsx | 53 +++--
contexts/ChatContext.tsx | 324 +++++++++++++++++++++++++---------------
components/chat/ImageCaptionModal.tsx | 51 +++++
4 files changed, 293 insertions(+), 150 deletions(-)
diff --git a/app/chat.tsx b/app/chat.tsx
index 2a177cc..ccfd6d5 100644
--- a/app/chat.tsx
+++ b/app/chat.tsx
@@ -30,7 +30,7 @@
const [isTextMode, setIsTextMode] = useState(false);
const [showSessions, setShowSessions] = useState(false);
const [audioPlaying, setAudioPlaying] = useState(false);
- const [stagedImage, setStagedImage] = useState<StagedImage | null>(null);
+ const [stagedImages, setStagedImages] = useState<StagedImage[]>([]);
useEffect(() => {
return onPlayingChange((uri) => setAudioPlaying(uri !== null));
@@ -59,17 +59,19 @@
clearMessages();
}, [clearMessages]);
- // Resolve a picked asset into a StagedImage
- const stageAsset = useCallback(async (asset: { base64?: string | null; uri: string; mimeType?: string | null }) => {
- const mimeType = asset.mimeType ?? (asset.uri.endsWith(".png") ? "image/png" : "image/jpeg");
- let base64 = asset.base64 ?? "";
- if (!base64 && asset.uri) {
- const { readAsStringAsync } = await import("expo-file-system/legacy");
- base64 = await readAsStringAsync(asset.uri, { encoding: "base64" });
+ // Resolve picked assets into StagedImage array
+ const stageAssets = useCallback(async (assets: Array<{ base64?: string | null; uri: string; mimeType?: string | null }>) => {
+ const staged: StagedImage[] = [];
+ for (const asset of assets) {
+ const mimeType = asset.mimeType ?? (asset.uri.endsWith(".png") ? "image/png" : "image/jpeg");
+ let base64 = asset.base64 ?? "";
+ if (!base64 && asset.uri) {
+ const { readAsStringAsync } = await import("expo-file-system/legacy");
+ base64 = await readAsStringAsync(asset.uri, { encoding: "base64" });
+ }
+ if (base64) staged.push({ base64, uri: asset.uri, mimeType });
}
- if (base64) {
- setStagedImage({ base64, uri: asset.uri, mimeType });
- }
+ if (staged.length > 0) setStagedImages(staged);
}, []);
const pickFromLibrary = useCallback(async () => {
@@ -82,15 +84,17 @@
}
const result = await ImagePicker.launchImageLibraryAsync({
mediaTypes: ["images"],
+ allowsMultipleSelection: true,
+ selectionLimit: 10,
quality: 0.7,
base64: true,
});
- if (result.canceled || !result.assets?.[0]) return;
- await stageAsset(result.assets[0]);
+ if (result.canceled || !result.assets?.length) return;
+ await stageAssets(result.assets);
} catch (err: any) {
Alert.alert("Image Error", err?.message ?? String(err));
}
- }, [stageAsset]);
+ }, [stageAssets]);
const pickFromCamera = useCallback(async () => {
try {
@@ -105,11 +109,11 @@
base64: true,
});
if (result.canceled || !result.assets?.[0]) return;
- await stageAsset(result.assets[0]);
+ await stageAssets([result.assets[0]]);
} catch (err: any) {
Alert.alert("Camera Error", err?.message ?? String(err));
}
- }, [stageAsset]);
+ }, [stageAssets]);
const handlePickImage = useCallback(() => {
if (Platform.OS === "ios") {
@@ -131,11 +135,14 @@
const handleImageSend = useCallback(
(caption: string) => {
- if (!stagedImage) return;
- sendImageMessage(stagedImage.base64, caption, stagedImage.mimeType);
- setStagedImage(null);
+ if (stagedImages.length === 0) return;
+ // Send each image as a separate message; caption on the first only
+ stagedImages.forEach((img, i) => {
+ sendImageMessage(img.base64, i === 0 ? caption : "", img.mimeType);
+ });
+ setStagedImages([]);
},
- [stagedImage, sendImageMessage],
+ [stagedImages, sendImageMessage],
);
const handleReplay = useCallback(async () => {
@@ -340,10 +347,10 @@
{/* Image caption modal — WhatsApp-style full-screen preview */}
<ImageCaptionModal
- visible={!!stagedImage}
- imageUri={stagedImage ? `data:${stagedImage.mimeType};base64,${stagedImage.base64}` : ""}
+ visible={stagedImages.length > 0}
+ images={stagedImages.map((img) => ({ uri: `data:${img.mimeType};base64,${img.base64}` }))}
onSend={handleImageSend}
- onCancel={() => setStagedImage(null)}
+ onCancel={() => setStagedImages([])}
/>
{/* Session drawer — absolute overlay outside KAV */}
diff --git a/components/chat/ImageCaptionModal.tsx b/components/chat/ImageCaptionModal.tsx
index f408157..6c753e0 100644
--- a/components/chat/ImageCaptionModal.tsx
+++ b/components/chat/ImageCaptionModal.tsx
@@ -1,6 +1,7 @@
import React, { useEffect, useRef, useState } from "react";
import {
Dimensions,
+ FlatList,
Image,
KeyboardAvoidingView,
Modal,
@@ -12,22 +13,28 @@
} from "react-native";
import { useTheme } from "../../contexts/ThemeContext";
+interface ImageItem {
+ uri: string;
+}
+
interface ImageCaptionModalProps {
visible: boolean;
- imageUri: string;
+ images: ImageItem[];
onSend: (caption: string) => void;
onCancel: () => void;
}
-export function ImageCaptionModal({ visible, imageUri, onSend, onCancel }: ImageCaptionModalProps) {
+export function ImageCaptionModal({ visible, images, onSend, onCancel }: ImageCaptionModalProps) {
const { colors } = useTheme();
const [caption, setCaption] = useState("");
+ const [selectedIndex, setSelectedIndex] = useState(0);
const inputRef = useRef<TextInput>(null);
const { width, height } = Dimensions.get("window");
useEffect(() => {
if (visible) {
setCaption("");
+ setSelectedIndex(0);
setTimeout(() => inputRef.current?.focus(), 300);
}
}, [visible]);
@@ -37,6 +44,9 @@
setCaption("");
};
+ const currentImage = images[selectedIndex]?.uri ?? "";
+ const isMultiple = images.length > 1;
+
return (
<Modal visible={visible} animationType="slide" transparent={false} onRequestClose={onCancel}>
<View style={{ flex: 1, backgroundColor: "#000" }}>
@@ -45,7 +55,7 @@
behavior={Platform.OS === "ios" ? "padding" : undefined}
keyboardVerticalOffset={0}
>
- {/* Top bar with cancel */}
+ {/* Top bar with cancel + count */}
<View
style={{
paddingTop: 54,
@@ -68,17 +78,48 @@
>
<Text style={{ color: "#fff", fontSize: 16, fontWeight: "600" }}>Cancel</Text>
</Pressable>
+ {isMultiple && (
+ <Text style={{ color: "rgba(255,255,255,0.6)", fontSize: 14 }}>
+ {selectedIndex + 1} / {images.length}
+ </Text>
+ )}
</View>
{/* Image preview */}
<View style={{ flex: 1, justifyContent: "center", alignItems: "center" }}>
<Image
- source={{ uri: imageUri }}
- style={{ width, height: height * 0.55 }}
+ source={{ uri: currentImage }}
+ style={{ width, height: isMultiple ? height * 0.45 : height * 0.55 }}
resizeMode="contain"
/>
</View>
+ {/* Thumbnail strip — only for multiple images */}
+ {isMultiple && (
+ <FlatList
+ data={images}
+ horizontal
+ showsHorizontalScrollIndicator={false}
+ keyExtractor={(_, i) => String(i)}
+ contentContainerStyle={{ paddingHorizontal: 12, paddingVertical: 8, gap: 8 }}
+ renderItem={({ item, index }) => (
+ <Pressable onPress={() => setSelectedIndex(index)}>
+ <Image
+ source={{ uri: item.uri }}
+ style={{
+ width: 56,
+ height: 56,
+ borderRadius: 8,
+ borderWidth: index === selectedIndex ? 2 : 0,
+ borderColor: colors.accent,
+ }}
+ resizeMode="cover"
+ />
+ </Pressable>
+ )}
+ />
+ )}
+
{/* Caption input + send */}
<View
style={{
diff --git a/contexts/ChatContext.tsx b/contexts/ChatContext.tsx
index 144f375..3297da9 100644
--- a/contexts/ChatContext.tsx
+++ b/contexts/ChatContext.tsx
@@ -6,6 +6,7 @@
useRef,
useState,
} from "react";
+import { AppState, AppStateStatus } from "react-native";
import { Message, WsIncoming, WsSession, PaiProject } from "../types";
import { useConnection } from "./ConnectionContext";
import { playAudio, encodeAudioToBase64, saveBase64Audio, canAutoplay } from "../services/audio";
@@ -142,6 +143,7 @@
loadMoreMessages: () => void;
hasMoreMessages: boolean;
unreadCounts: Record<string, number>;
+ unreadSessions: Set<string>;
incomingToast: IncomingToast | null;
dismissToast: () => void;
latestScreenshot: string | null;
@@ -158,12 +160,18 @@
const [latestScreenshot, setLatestScreenshot] = useState<string | null>(null);
const needsSync = useRef(true);
+ // Sequence tracking for catch_up protocol
+ const lastSeqRef = useRef(0);
+ const seenSeqsRef = useRef(new Set<number>());
+
// Per-session message storage
const messagesMapRef = useRef<Record<string, Message[]>>({});
// Messages for the active session (drives re-renders)
const [messages, setMessages] = useState<Message[]>([]);
// Unread counts for non-active sessions
const [unreadCounts, setUnreadCounts] = useState<Record<string, number>>({});
+ // Server-pushed unread indicators (sessions with new activity since last viewed)
+ const [unreadSessions, setUnreadSessions] = useState<Set<string>>(new Set());
// Per-session typing indicator (sessionId → boolean)
const typingMapRef = useRef<Record<string, boolean>>({});
const [isTyping, setIsTyping] = useState(false);
@@ -211,6 +219,12 @@
delete next[active.id];
return next;
});
+ setUnreadSessions((prev) => {
+ if (!prev.has(active.id)) return prev;
+ const next = new Set(prev);
+ next.delete(active.id);
+ return next;
+ });
// Sync typing indicator for the new active session
const activeTyping = typingMapRef.current[active.id] ?? false;
setIsTyping(activeTyping);
@@ -221,18 +235,34 @@
}
}, []);
- // On connect: ask gateway to sync sessions. If we already had a session
- // selected, tell the gateway so it preserves our selection instead of
- // jumping to whatever iTerm has focused on the Mac.
+ // On connect: ask gateway to sync sessions, then request catch_up for missed messages.
useEffect(() => {
if (status === "connected") {
needsSync.current = true;
const id = activeSessionIdRef.current;
sendCommand("sync", id ? { activeSessionId: id } : undefined);
+ // Request any messages we missed while disconnected/backgrounded
+ sendCommand("catch_up", { lastSeq: lastSeqRef.current });
} else if (status === "disconnected") {
setIsTyping(false);
}
// eslint-disable-next-line react-hooks/exhaustive-deps — only fire on status change
+ }, [status, sendCommand]);
+
+ // On foreground resume: request catch_up for any messages missed while backgrounded.
+ // iOS keeps the WebSocket "open" at TCP level but suspends the app — messages sent
+ // during that time are lost. catch_up replays them from the server's message log.
+ useEffect(() => {
+ let lastState: AppStateStatus = AppState.currentState;
+ const sub = AppState.addEventListener("change", (nextState) => {
+ if (lastState.match(/inactive|background/) && nextState === "active") {
+ if (status === "connected") {
+ sendCommand("catch_up", { lastSeq: lastSeqRef.current });
+ }
+ }
+ lastState = nextState;
+ });
+ return () => sub.remove();
}, [status, sendCommand]);
// Helper: add a message to the active session
@@ -309,135 +339,173 @@
});
}, []);
+ // Process a single incoming message (used by both live delivery and catch_up replay)
+ const processIncoming = useCallback(async (data: WsIncoming, isCatchUp = false) => {
+ // Dedup by seq: if we've seen this seq before, skip it
+ const seq = (data as any).seq as number | undefined;
+ if (seq) {
+ if (seenSeqsRef.current.has(seq)) return;
+ seenSeqsRef.current.add(seq);
+ lastSeqRef.current = Math.max(lastSeqRef.current, seq);
+ // Keep seen set bounded (last 500 seqs)
+ if (seenSeqsRef.current.size > 500) {
+ const arr = Array.from(seenSeqsRef.current).sort((a, b) => a - b);
+ seenSeqsRef.current = new Set(arr.slice(-300));
+ }
+ }
+
+ switch (data.type) {
+ case "text": {
+ if (!isCatchUp) setIsTyping(false);
+ const msg: Message = {
+ id: generateId(),
+ role: "assistant",
+ type: "text",
+ content: data.content,
+ timestamp: Date.now(),
+ status: "sent",
+ };
+ if (data.sessionId) {
+ addMessageToSession(data.sessionId, msg);
+ } else {
+ addMessageToActive(msg);
+ }
+ if (!isCatchUp) notifyIncomingMessage("PAILot", data.content ?? "New message");
+ break;
+ }
+ case "voice": {
+ if (!isCatchUp) setIsTyping(false);
+ let audioUri: string | undefined;
+ if (data.audioBase64) {
+ try {
+ audioUri = await saveBase64Audio(data.audioBase64);
+ } catch {
+ // fallback: no playable audio
+ }
+ }
+ const msg: Message = {
+ id: generateId(),
+ role: "assistant",
+ type: "voice",
+ content: data.content ?? "",
+ audioUri,
+ timestamp: Date.now(),
+ status: "sent",
+ };
+ const isForActive = !data.sessionId || data.sessionId === activeSessionIdRef.current;
+ if (data.sessionId) {
+ addMessageToSession(data.sessionId, msg);
+ } else {
+ addMessageToActive(msg);
+ }
+ if (!isCatchUp) notifyIncomingMessage("PAILot", data.content ?? "Voice message");
+ // Only autoplay if live (not catch_up) and for the currently viewed session
+ if (!isCatchUp && msg.audioUri && canAutoplay() && isForActive) {
+ playAudio(msg.audioUri).catch(() => {});
+ }
+ break;
+ }
+ case "image": {
+ setLatestScreenshot(data.imageBase64);
+ const msg: Message = {
+ id: generateId(),
+ role: "assistant",
+ type: "image",
+ content: data.caption ?? "Screenshot",
+ imageBase64: data.imageBase64,
+ timestamp: Date.now(),
+ status: "sent",
+ };
+ if (data.sessionId) {
+ addMessageToSession(data.sessionId, msg);
+ } else {
+ addMessageToActive(msg);
+ }
+ if (!isCatchUp) notifyIncomingMessage("PAILot", data.caption ?? "New image");
+ break;
+ }
+ case "sessions": {
+ const incoming = data.sessions as WsSession[];
+ setSessions(incoming);
+ syncActiveFromSessions(incoming);
+ needsSync.current = false;
+ break;
+ }
+ case "session_switched": {
+ sendCommand("sessions");
+ break;
+ }
+ case "session_renamed": {
+ sendCommand("sessions");
+ break;
+ }
+ case "transcript": {
+ updateMessageContent(data.messageId, data.content);
+ break;
+ }
+ case "typing": {
+ const typingSession = (data.sessionId as string) || activeSessionIdRef.current || "_global";
+ typingMapRef.current[typingSession] = !!data.typing;
+ const activeTyping = typingMapRef.current[activeSessionIdRef.current ?? ""] ?? false;
+ setIsTyping(activeTyping);
+ break;
+ }
+ case "status": {
+ break;
+ }
+ case "projects": {
+ setProjects(data.projects ?? []);
+ break;
+ }
+ case "unread": {
+ const targetId = data.sessionId as string;
+ if (targetId && targetId !== activeSessionIdRef.current) {
+ setUnreadSessions((prev) => {
+ if (prev.has(targetId)) return prev;
+ const next = new Set(prev);
+ next.add(targetId);
+ return next;
+ });
+ }
+ break;
+ }
+ case "error": {
+ const errMsg: Message = {
+ id: generateId(),
+ role: "system",
+ type: "text",
+ content: data.message,
+ timestamp: Date.now(),
+ };
+ addMessageToActive(errMsg);
+ break;
+ }
+ }
+ }, [addMessageToActive, addMessageToSession, sendCommand, syncActiveFromSessions, updateMessageContent]);
+
// Handle incoming WebSocket messages
useEffect(() => {
onMessageReceived.current = async (data: WsIncoming) => {
- switch (data.type) {
- case "text": {
- setIsTyping(false);
- const msg: Message = {
- id: generateId(),
- role: "assistant",
- type: "text",
- content: data.content,
- timestamp: Date.now(),
- status: "sent",
- };
- if (data.sessionId) {
- addMessageToSession(data.sessionId, msg);
- } else {
- addMessageToActive(msg);
+ // Handle catch_up response: replay all missed messages
+ if (data.type === "catch_up") {
+ const messages = (data as any).messages as WsIncoming[];
+ const serverSeq = (data as any).serverSeq as number | undefined;
+ if (serverSeq) lastSeqRef.current = Math.max(lastSeqRef.current, serverSeq);
+ if (messages && messages.length > 0) {
+ for (const msg of messages) {
+ await processIncoming(msg, true);
}
- notifyIncomingMessage("PAILot", data.content ?? "New message");
- break;
}
- case "voice": {
- setIsTyping(false);
- let audioUri: string | undefined;
- if (data.audioBase64) {
- try {
- audioUri = await saveBase64Audio(data.audioBase64);
- } catch {
- // fallback: no playable audio
- }
- }
- const msg: Message = {
- id: generateId(),
- role: "assistant",
- type: "voice",
- content: data.content ?? "",
- audioUri,
- timestamp: Date.now(),
- status: "sent",
- };
- const isForActive = !data.sessionId || data.sessionId === activeSessionIdRef.current;
- if (data.sessionId) {
- addMessageToSession(data.sessionId, msg);
- } else {
- addMessageToActive(msg);
- }
- notifyIncomingMessage("PAILot", data.content ?? "Voice message");
- // Only autoplay if this voice note is for the currently viewed session
- if (msg.audioUri && canAutoplay() && isForActive) {
- playAudio(msg.audioUri).catch(() => {});
- }
- break;
- }
- case "image": {
- setLatestScreenshot(data.imageBase64);
- const msg: Message = {
- id: generateId(),
- role: "assistant",
- type: "image",
- content: data.caption ?? "Screenshot",
- imageBase64: data.imageBase64,
- timestamp: Date.now(),
- status: "sent",
- };
- if (data.sessionId) {
- addMessageToSession(data.sessionId, msg);
- } else {
- addMessageToActive(msg);
- }
- notifyIncomingMessage("PAILot", data.caption ?? "New image");
- break;
- }
- case "sessions": {
- const incoming = data.sessions as WsSession[];
- setSessions(incoming);
- syncActiveFromSessions(incoming);
- needsSync.current = false;
- break;
- }
- case "session_switched": {
- // Just refresh session list — no system message needed
- sendCommand("sessions");
- break;
- }
- case "session_renamed": {
- // Just refresh session list — no system message needed
- sendCommand("sessions");
- break;
- }
- case "transcript": {
- // Voice → text reflection: replace voice bubble with transcribed text
- updateMessageContent(data.messageId, data.content);
- break;
- }
- case "typing": {
- const typingSession = (data.sessionId as string) || activeSessionIdRef.current || "_global";
- typingMapRef.current[typingSession] = !!data.typing;
- // Only show typing indicator if it's for the active session
- const activeTyping = typingMapRef.current[activeSessionIdRef.current ?? ""] ?? false;
- setIsTyping(activeTyping);
- break;
- }
- case "status": {
- // Connection status update — ignore for now
- break;
- }
- case "projects": {
- setProjects(data.projects ?? []);
- break;
- }
- case "error": {
- const msg: Message = {
- id: generateId(),
- role: "system",
- type: "text",
- content: data.message,
- timestamp: Date.now(),
- };
- addMessageToActive(msg);
- break;
- }
+ return;
}
+ // Live message — process normally
+ await processIncoming(data);
};
return () => {
onMessageReceived.current = null;
};
- }, [onMessageReceived, sendCommand, addMessageToActive, updateMessageContent, syncActiveFromSessions]);
+ }, [onMessageReceived, processIncoming]);
const sendTextMessage = useCallback(
(text: string) => {
@@ -532,6 +600,13 @@
(sessionId: string) => {
// messagesMapRef is already kept in sync by all mutators — no need to save here
sendCommand("switch", { sessionId });
+ // Clear the server-pushed unread indicator immediately on user intent
+ setUnreadSessions((prev) => {
+ if (!prev.has(sessionId)) return prev;
+ const next = new Set(prev);
+ next.delete(sessionId);
+ return next;
+ });
},
[sendCommand]
);
@@ -552,6 +627,12 @@
if (!u[sessionId]) return u;
const next = { ...u };
delete next[sessionId];
+ return next;
+ });
+ setUnreadSessions((prev) => {
+ if (!prev.has(sessionId)) return prev;
+ const next = new Set(prev);
+ next.delete(sessionId);
return next;
});
},
@@ -622,6 +703,7 @@
loadMoreMessages,
hasMoreMessages,
unreadCounts,
+ unreadSessions,
incomingToast,
dismissToast,
latestScreenshot,
diff --git a/types/index.ts b/types/index.ts
index 259915f..21b7275 100644
--- a/types/index.ts
+++ b/types/index.ts
@@ -137,6 +137,17 @@
projects: PaiProject[];
}
+export interface WsIncomingUnread {
+ type: "unread";
+ sessionId: string;
+}
+
+export interface WsIncomingCatchUp {
+ type: "catch_up";
+ messages: Record<string, unknown>[];
+ serverSeq: number;
+}
+
export type WsIncoming =
| WsIncomingText
| WsIncomingVoice
@@ -148,4 +159,6 @@
| WsIncomingTyping
| WsIncomingError
| WsIncomingStatus
- | WsIncomingProjects;
+ | WsIncomingProjects
+ | WsIncomingUnread
+ | WsIncomingCatchUp;
--
Gitblit v1.3.1