From fa34201bc07e5312ff0c6825933cd02ce7900254 Mon Sep 17 00:00:00 2001
From: Matthias Nott <mnott@mnsoft.org>
Date: Sat, 21 Mar 2026 20:55:10 +0100
Subject: [PATCH] fix: voice caption ordering, background audio, image persistence

---
 lib/screens/chat_screen.dart |  172 ++++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 108 insertions(+), 64 deletions(-)

diff --git a/lib/screens/chat_screen.dart b/lib/screens/chat_screen.dart
index 417781d..0d6cc0f 100644
--- a/lib/screens/chat_screen.dart
+++ b/lib/screens/chat_screen.dart
@@ -47,14 +47,16 @@
   void initState() {
     super.initState();
     WidgetsBinding.instance.addObserver(this);
-    _loadLastSeq();
-    _initConnection();
+    _initAll();
     _scrollController.addListener(_onScroll);
   }
 
-  Future<void> _loadLastSeq() async {
+  Future<void> _initAll() async {
+    // Load lastSeq BEFORE connecting so catch_up sends the right value
     final prefs = await SharedPreferences.getInstance();
     _lastSeq = prefs.getInt('lastSeq') ?? 0;
+    if (!mounted) return;
+    _initConnection();
   }
 
   void _saveLastSeq() {
@@ -538,25 +540,8 @@
       textCaption = '';
     }
 
-    // Send all images together — first with caption, rest without
-    for (var i = 0; i < encodedImages.length; i++) {
-      final isFirst = i == 0;
-      final msgCaption = isFirst ? textCaption : '';
-
-      _ws?.send({
-        'type': 'image',
-        'imageBase64': encodedImages[i],
-        'mimeType': 'image/jpeg',
-        'caption': msgCaption,
-        if (isFirst && voiceB64 != null) 'audioBase64': voiceB64,
-        'sessionId': ref.read(activeSessionIdProvider),
-        // Signal how many images follow so receiving session can wait
-        if (isFirst && encodedImages.length > 1)
-          'totalImages': encodedImages.length,
-      });
-    }
-
-    // If voice caption, also send the voice message so it gets transcribed
+    // Send voice FIRST so Whisper transcribes it and the [PAILot:voice] prefix
+    // sets the reply channel. Images follow — Claude sees transcript + images together.
     if (voiceB64 != null) {
       final voiceMsg = Message.voice(
         role: MessageRole.user,
@@ -569,6 +554,20 @@
         'audioBase64': voiceB64,
         'content': '',
         'messageId': voiceMsg.id,
+        'sessionId': ref.read(activeSessionIdProvider),
+      });
+    }
+
+    // Send images — first with text caption (if any), rest without
+    for (var i = 0; i < encodedImages.length; i++) {
+      final isFirst = i == 0;
+      final msgCaption = isFirst ? textCaption : '';
+
+      _ws?.send({
+        'type': 'image',
+        'imageBase64': encodedImages[i],
+        'mimeType': 'image/jpeg',
+        'caption': msgCaption,
         'sessionId': ref.read(activeSessionIdProvider),
       });
     }
@@ -591,8 +590,9 @@
     final captionController = TextEditingController();
     String? voicePath;
     bool isVoiceRecording = false;
+    bool hasVoiceCaption = false;
 
-    return showModalBottomSheet<String>(
+    final result = await showModalBottomSheet<String>(
       context: context,
       isScrollControlled: true,
       builder: (ctx) => StatefulBuilder(
@@ -611,58 +611,94 @@
                 style: Theme.of(ctx).textTheme.titleSmall,
               ),
               const SizedBox(height: 12),
-              TextField(
-                controller: captionController,
-                decoration: InputDecoration(
-                  hintText: 'Add a caption (optional)',
-                  border: const OutlineInputBorder(),
-                  suffixIcon: IconButton(
-                    icon: Icon(
-                      isVoiceRecording ? Icons.stop_circle : Icons.mic,
-                      color: isVoiceRecording ? Colors.red : null,
-                    ),
-                    onPressed: () async {
-                      if (isVoiceRecording) {
-                        final path = await AudioService.stopRecording();
-                        setSheetState(() => isVoiceRecording = false);
-                        if (path != null) {
-                          voicePath = path;
-                          captionController.text = '🎤 Voice caption recorded';
-                        }
-                      } else {
-                        final path = await AudioService.startRecording();
-                        if (path != null) {
-                          setSheetState(() => isVoiceRecording = true);
-                        }
-                      }
-                    },
+              // Text caption input
+              if (!isVoiceRecording && !hasVoiceCaption)
+                TextField(
+                  controller: captionController,
+                  decoration: const InputDecoration(
+                    hintText: 'Add a text caption (optional)',
+                    border: OutlineInputBorder(),
+                  ),
+                  autofocus: true,
+                  maxLines: 3,
+                ),
+              // Voice recording indicator
+              if (isVoiceRecording)
+                Container(
+                  padding: const EdgeInsets.symmetric(vertical: 20),
+                  child: const Row(
+                    mainAxisAlignment: MainAxisAlignment.center,
+                    children: [
+                      Icon(Icons.fiber_manual_record, color: Colors.red, size: 16),
+                      SizedBox(width: 8),
+                      Text('Recording voice caption...', style: TextStyle(fontSize: 16)),
+                    ],
                   ),
                 ),
-                autofocus: true,
-                maxLines: 3,
-                enabled: !isVoiceRecording,
-              ),
+              // Voice recorded confirmation
+              if (hasVoiceCaption && !isVoiceRecording)
+                Container(
+                  padding: const EdgeInsets.symmetric(vertical: 20),
+                  child: const Row(
+                    mainAxisAlignment: MainAxisAlignment.center,
+                    children: [
+                      Icon(Icons.check_circle, color: Colors.green, size: 20),
+                      SizedBox(width: 8),
+                      Text('Voice caption recorded', style: TextStyle(fontSize: 16)),
+                    ],
+                  ),
+                ),
               const SizedBox(height: 12),
+              // Action row: mic/stop + cancel + send
               Row(
-                mainAxisAlignment: MainAxisAlignment.end,
                 children: [
+                  // Mic / Stop button — large and clear
+                  if (!hasVoiceCaption)
+                    IconButton.filled(
+                      onPressed: () async {
+                        if (isVoiceRecording) {
+                          final path = await AudioService.stopRecording();
+                          setSheetState(() {
+                            isVoiceRecording = false;
+                            if (path != null) {
+                              voicePath = path;
+                              hasVoiceCaption = true;
+                            }
+                          });
+                        } else {
+                          final path = await AudioService.startRecording();
+                          if (path != null) {
+                            setSheetState(() => isVoiceRecording = true);
+                          }
+                        }
+                      },
+                      icon: Icon(isVoiceRecording ? Icons.stop : Icons.mic),
+                      style: IconButton.styleFrom(
+                        backgroundColor: isVoiceRecording ? Colors.red : null,
+                        foregroundColor: isVoiceRecording ? Colors.white : null,
+                      ),
+                    ),
+                  const Spacer(),
                   TextButton(
-                    onPressed: () {
-                      if (isVoiceRecording) AudioService.cancelRecording();
-                      Navigator.pop(ctx);
+                    onPressed: () async {
+                      if (isVoiceRecording) {
+                        await AudioService.cancelRecording();
+                      }
+                      if (ctx.mounted) Navigator.pop(ctx);
                     },
                     child: const Text('Cancel'),
                   ),
                   const SizedBox(width: 8),
                   FilledButton(
-                    onPressed: () {
-                      if (voicePath != null) {
-                        // Voice caption: send as voice message with images
-                        Navigator.pop(ctx, '__voice__:$voicePath');
-                      } else {
-                        Navigator.pop(ctx, captionController.text);
-                      }
-                    },
+                    onPressed: isVoiceRecording
+                        ? null // disable Send while recording
+                        : () {
+                            if (voicePath != null) {
+                              Navigator.pop(ctx, '__voice__:$voicePath');
+                            } else {
+                              Navigator.pop(ctx, captionController.text);
+                            }
+                          },
                     child: const Text('Send'),
                   ),
                 ],
@@ -673,6 +709,14 @@
         ),
       ),
     );
+
+    // Safety net: clean up recording if sheet dismissed by swipe/tap outside
+    if (isVoiceRecording) {
+      await AudioService.cancelRecording();
+    }
+
+    captionController.dispose();
+    return result;
   }
 
   void _clearChat() {

--
Gitblit v1.3.1