Matthias Nott
2026-03-21 fa34201bc07e5312ff0c6825933cd02ce7900254
fix: voice caption ordering, background audio, image persistence

- Voice captions send voice first (sets [PAILot:voice] reply channel)
- iOS background audio mode for playback through screen lock
- Images persist across app restarts (no longer stripped on save)
- Voice messages without audio downgraded to text on load
- Caption dialog rewritten with proper mic button and recording states
- Image captions displayed below thumbnails in chat bubbles
- WoL sends to subnet broadcast derived from localHost
- lastSeq loaded before connect to prevent catch-up replay
- Fixed ping format (JSON instead of raw string)
- All commands use proper gateway format
9 files modified
changed files
ios/Runner/Info.plist patch | view | blame | history
lib/models/message.dart patch | view | blame | history
lib/screens/chat_screen.dart patch | view | blame | history
lib/screens/settings_screen.dart patch | view | blame | history
lib/services/audio_service.dart patch | view | blame | history
lib/services/message_store.dart patch | view | blame | history
lib/services/websocket_service.dart patch | view | blame | history
lib/services/wol_service.dart patch | view | blame | history
lib/widgets/message_bubble.dart patch | view | blame | history
ios/Runner/Info.plist
....@@ -85,5 +85,9 @@
8585 <string>UIInterfaceOrientationLandscapeLeft</string>
8686 <string>UIInterfaceOrientationLandscapeRight</string>
8787 </array>
88
+ <key>UIBackgroundModes</key>
89
+ <array>
90
+ <string>audio</string>
91
+ </array>
8892 </dict>
8993 </plist>
lib/models/message.dart
....@@ -114,7 +114,7 @@
114114 };
115115 }
116116
117
- /// Lightweight JSON for persistence (strips heavy binary fields).
117
+ /// Lightweight JSON for persistence (strips temp audio paths, keeps images).
118118 Map<String, dynamic> toJsonLight() {
119119 return {
120120 'id': id,
....@@ -124,6 +124,9 @@
124124 'timestamp': timestamp,
125125 if (status != null) 'status': status!.name,
126126 if (duration != null) 'duration': duration,
127
+ // Keep imageBase64 — images are typically 50-200 KB and must survive restart.
128
+ // audioUri is intentionally omitted: it is a temp file path that won't survive restart.
129
+ if (imageBase64 != null) 'imageBase64': imageBase64,
127130 };
128131 }
129132
lib/screens/chat_screen.dart
....@@ -47,14 +47,16 @@
4747 void initState() {
4848 super.initState();
4949 WidgetsBinding.instance.addObserver(this);
50
- _loadLastSeq();
51
- _initConnection();
50
+ _initAll();
5251 _scrollController.addListener(_onScroll);
5352 }
5453
55
- Future<void> _loadLastSeq() async {
54
+ Future<void> _initAll() async {
55
+ // Load lastSeq BEFORE connecting so catch_up sends the right value
5656 final prefs = await SharedPreferences.getInstance();
5757 _lastSeq = prefs.getInt('lastSeq') ?? 0;
58
+ if (!mounted) return;
59
+ _initConnection();
5860 }
5961
6062 void _saveLastSeq() {
....@@ -538,25 +540,8 @@
538540 textCaption = '';
539541 }
540542
541
- // Send all images together — first with caption, rest without
542
- for (var i = 0; i < encodedImages.length; i++) {
543
- final isFirst = i == 0;
544
- final msgCaption = isFirst ? textCaption : '';
545
-
546
- _ws?.send({
547
- 'type': 'image',
548
- 'imageBase64': encodedImages[i],
549
- 'mimeType': 'image/jpeg',
550
- 'caption': msgCaption,
551
- if (isFirst && voiceB64 != null) 'audioBase64': voiceB64,
552
- 'sessionId': ref.read(activeSessionIdProvider),
553
- // Signal how many images follow so receiving session can wait
554
- if (isFirst && encodedImages.length > 1)
555
- 'totalImages': encodedImages.length,
556
- });
557
- }
558
-
559
- // If voice caption, also send the voice message so it gets transcribed
543
+ // Send voice FIRST so Whisper transcribes it and the [PAILot:voice] prefix
544
+ // sets the reply channel. Images follow — Claude sees transcript + images together.
560545 if (voiceB64 != null) {
561546 final voiceMsg = Message.voice(
562547 role: MessageRole.user,
....@@ -569,6 +554,20 @@
569554 'audioBase64': voiceB64,
570555 'content': '',
571556 'messageId': voiceMsg.id,
557
+ 'sessionId': ref.read(activeSessionIdProvider),
558
+ });
559
+ }
560
+
561
+ // Send images — first with text caption (if any), rest without
562
+ for (var i = 0; i < encodedImages.length; i++) {
563
+ final isFirst = i == 0;
564
+ final msgCaption = isFirst ? textCaption : '';
565
+
566
+ _ws?.send({
567
+ 'type': 'image',
568
+ 'imageBase64': encodedImages[i],
569
+ 'mimeType': 'image/jpeg',
570
+ 'caption': msgCaption,
572571 'sessionId': ref.read(activeSessionIdProvider),
573572 });
574573 }
....@@ -591,8 +590,9 @@
591590 final captionController = TextEditingController();
592591 String? voicePath;
593592 bool isVoiceRecording = false;
593
+ bool hasVoiceCaption = false;
594594
595
- return showModalBottomSheet<String>(
595
+ final result = await showModalBottomSheet<String>(
596596 context: context,
597597 isScrollControlled: true,
598598 builder: (ctx) => StatefulBuilder(
....@@ -611,58 +611,94 @@
611611 style: Theme.of(ctx).textTheme.titleSmall,
612612 ),
613613 const SizedBox(height: 12),
614
- TextField(
615
- controller: captionController,
616
- decoration: InputDecoration(
617
- hintText: 'Add a caption (optional)',
618
- border: const OutlineInputBorder(),
619
- suffixIcon: IconButton(
620
- icon: Icon(
621
- isVoiceRecording ? Icons.stop_circle : Icons.mic,
622
- color: isVoiceRecording ? Colors.red : null,
623
- ),
624
- onPressed: () async {
625
- if (isVoiceRecording) {
626
- final path = await AudioService.stopRecording();
627
- setSheetState(() => isVoiceRecording = false);
628
- if (path != null) {
629
- voicePath = path;
630
- captionController.text = '🎤 Voice caption recorded';
631
- }
632
- } else {
633
- final path = await AudioService.startRecording();
634
- if (path != null) {
635
- setSheetState(() => isVoiceRecording = true);
636
- }
637
- }
638
- },
614
+ // Text caption input
615
+ if (!isVoiceRecording && !hasVoiceCaption)
616
+ TextField(
617
+ controller: captionController,
618
+ decoration: const InputDecoration(
619
+ hintText: 'Add a text caption (optional)',
620
+ border: OutlineInputBorder(),
621
+ ),
622
+ autofocus: true,
623
+ maxLines: 3,
624
+ ),
625
+ // Voice recording indicator
626
+ if (isVoiceRecording)
627
+ Container(
628
+ padding: const EdgeInsets.symmetric(vertical: 20),
629
+ child: const Row(
630
+ mainAxisAlignment: MainAxisAlignment.center,
631
+ children: [
632
+ Icon(Icons.fiber_manual_record, color: Colors.red, size: 16),
633
+ SizedBox(width: 8),
634
+ Text('Recording voice caption...', style: TextStyle(fontSize: 16)),
635
+ ],
639636 ),
640637 ),
641
- autofocus: true,
642
- maxLines: 3,
643
- enabled: !isVoiceRecording,
644
- ),
638
+ // Voice recorded confirmation
639
+ if (hasVoiceCaption && !isVoiceRecording)
640
+ Container(
641
+ padding: const EdgeInsets.symmetric(vertical: 20),
642
+ child: const Row(
643
+ mainAxisAlignment: MainAxisAlignment.center,
644
+ children: [
645
+ Icon(Icons.check_circle, color: Colors.green, size: 20),
646
+ SizedBox(width: 8),
647
+ Text('Voice caption recorded', style: TextStyle(fontSize: 16)),
648
+ ],
649
+ ),
650
+ ),
645651 const SizedBox(height: 12),
652
+ // Action row: mic/stop + cancel + send
646653 Row(
647
- mainAxisAlignment: MainAxisAlignment.end,
648654 children: [
655
+ // Mic / Stop button — large and clear
656
+ if (!hasVoiceCaption)
657
+ IconButton.filled(
658
+ onPressed: () async {
659
+ if (isVoiceRecording) {
660
+ final path = await AudioService.stopRecording();
661
+ setSheetState(() {
662
+ isVoiceRecording = false;
663
+ if (path != null) {
664
+ voicePath = path;
665
+ hasVoiceCaption = true;
666
+ }
667
+ });
668
+ } else {
669
+ final path = await AudioService.startRecording();
670
+ if (path != null) {
671
+ setSheetState(() => isVoiceRecording = true);
672
+ }
673
+ }
674
+ },
675
+ icon: Icon(isVoiceRecording ? Icons.stop : Icons.mic),
676
+ style: IconButton.styleFrom(
677
+ backgroundColor: isVoiceRecording ? Colors.red : null,
678
+ foregroundColor: isVoiceRecording ? Colors.white : null,
679
+ ),
680
+ ),
681
+ const Spacer(),
649682 TextButton(
650
- onPressed: () {
651
- if (isVoiceRecording) AudioService.cancelRecording();
652
- Navigator.pop(ctx);
683
+ onPressed: () async {
684
+ if (isVoiceRecording) {
685
+ await AudioService.cancelRecording();
686
+ }
687
+ if (ctx.mounted) Navigator.pop(ctx);
653688 },
654689 child: const Text('Cancel'),
655690 ),
656691 const SizedBox(width: 8),
657692 FilledButton(
658
- onPressed: () {
659
- if (voicePath != null) {
660
- // Voice caption: send as voice message with images
661
- Navigator.pop(ctx, '__voice__:$voicePath');
662
- } else {
663
- Navigator.pop(ctx, captionController.text);
664
- }
665
- },
693
+ onPressed: isVoiceRecording
694
+ ? null // disable Send while recording
695
+ : () {
696
+ if (voicePath != null) {
697
+ Navigator.pop(ctx, '__voice__:$voicePath');
698
+ } else {
699
+ Navigator.pop(ctx, captionController.text);
700
+ }
701
+ },
666702 child: const Text('Send'),
667703 ),
668704 ],
....@@ -673,6 +709,14 @@
673709 ),
674710 ),
675711 );
712
+
713
+ // Safety net: clean up recording if sheet dismissed by swipe/tap outside
714
+ if (isVoiceRecording) {
715
+ await AudioService.cancelRecording();
716
+ }
717
+
718
+ captionController.dispose();
719
+ return result;
676720 }
677721
678722 void _clearChat() {
lib/screens/settings_screen.dart
....@@ -84,7 +84,7 @@
8484 setState(() => _isWaking = true);
8585
8686 try {
87
- await WolService.wake(mac);
87
+ await WolService.wake(mac, localHost: _localHostController.text.trim());
8888 if (mounted) {
8989 ScaffoldMessenger.of(context).showSnackBar(
9090 const SnackBar(content: Text('Wake-on-LAN packet sent')),
lib/services/audio_service.dart
....@@ -28,6 +28,20 @@
2828 // Listen for app lifecycle changes to suppress autoplay when backgrounded
2929 WidgetsBinding.instance.addObserver(_LifecycleObserver());
3030
31
+ // Configure audio session for playback — allows audio to continue
32
+ // when screen locks or app goes to background
33
+ _player.setAudioContext(AudioContext(
34
+ iOS: AudioContextIOS(
35
+ category: AVAudioSessionCategory.playback,
36
+ options: {AVAudioSessionOptions.mixWithOthers},
37
+ ),
38
+ android: const AudioContextAndroid(
39
+ isSpeakerphoneOn: false,
40
+ audioMode: AndroidAudioMode.normal,
41
+ audioFocus: AndroidAudioFocus.gain,
42
+ ),
43
+ ));
44
+
3145 _player.onPlayerComplete.listen((_) {
3246 if (_isChainPlaying) {
3347 _playNext();
lib/services/message_store.dart
....@@ -82,7 +82,7 @@
8282 final jsonStr = await file.readAsString();
8383 final List<dynamic> jsonList = jsonDecode(jsonStr) as List<dynamic>;
8484 final allMessages = jsonList
85
- .map((j) => Message.fromJson(j as Map<String, dynamic>))
85
+ .map((j) => _messageFromJson(j as Map<String, dynamic>))
8686 .where((m) => !m.isEmptyVoice) // Filter out voice msgs with no content
8787 .toList();
8888
....@@ -106,7 +106,7 @@
106106 final jsonStr = await file.readAsString();
107107 final List<dynamic> jsonList = jsonDecode(jsonStr) as List<dynamic>;
108108 return jsonList
109
- .map((j) => Message.fromJson(j as Map<String, dynamic>))
109
+ .map((j) => _messageFromJson(j as Map<String, dynamic>))
110110 .where((m) => !m.isEmptyVoice)
111111 .toList();
112112 } catch (e) {
....@@ -114,6 +114,29 @@
114114 }
115115 }
116116
117
+ /// Deserialize a message from JSON, applying migration rules:
118
+ /// - Voice messages without audioUri are downgraded to text (transcript only).
119
+ /// This handles messages saved before a restart, where the temp audio file
120
+ /// is no longer available. The transcript (content) is preserved.
121
+ static Message _messageFromJson(Map<String, dynamic> json) {
122
+ final raw = Message.fromJson(json);
123
+ if (raw.type == MessageType.voice &&
124
+ (raw.audioUri == null || raw.audioUri!.isEmpty)) {
125
+ // Downgrade to text so the bubble shows the transcript instead of a
126
+ // broken play button.
127
+ return Message(
128
+ id: raw.id,
129
+ role: raw.role,
130
+ type: MessageType.text,
131
+ content: raw.content,
132
+ timestamp: raw.timestamp,
133
+ status: raw.status,
134
+ duration: raw.duration,
135
+ );
136
+ }
137
+ return raw;
138
+ }
139
+
117140 /// Delete stored messages for a session.
118141 static Future<void> delete(String sessionId) async {
119142 try {
lib/services/websocket_service.dart
....@@ -60,7 +60,7 @@
6060 // Send Wake-on-LAN if MAC configured
6161 if (config.macAddress != null && config.macAddress!.isNotEmpty) {
6262 try {
63
- await WolService.wake(config.macAddress!);
63
+ await WolService.wake(config.macAddress!, localHost: config.localHost);
6464 } catch (_) {}
6565 }
6666
lib/services/wol_service.dart
....@@ -32,9 +32,18 @@
3232 return packet.toBytes();
3333 }
3434
35
+ /// Derive subnet broadcast from an IP address (e.g., 192.168.1.100 → 192.168.1.255).
36
+ static String? _subnetBroadcast(String? ip) {
37
+ if (ip == null || ip.isEmpty) return null;
38
+ final parts = ip.split('.');
39
+ if (parts.length != 4) return null;
40
+ return '${parts[0]}.${parts[1]}.${parts[2]}.255';
41
+ }
42
+
3543 /// Send a Wake-on-LAN packet for the given MAC address.
36
- /// Broadcasts to 255.255.255.255:9 and optionally to a subnet broadcast.
37
- static Future<void> wake(String macAddress, {String? subnetBroadcast}) async {
44
+ /// Broadcasts to 255.255.255.255 and subnet broadcast derived from localHost.
45
+ /// Sends on ports 7 and 9 for maximum compatibility.
46
+ static Future<void> wake(String macAddress, {String? localHost}) async {
3847 final macBytes = _parseMac(macAddress);
3948 if (macBytes == null) {
4049 throw ArgumentError('Invalid MAC address: $macAddress');
....@@ -48,25 +57,31 @@
4857 );
4958 socket.broadcastEnabled = true;
5059
51
- // Send to broadcast address
52
- final broadcastAddr = InternetAddress('255.255.255.255');
53
- socket.send(packet, broadcastAddr, 9);
60
+ final targets = <InternetAddress>[
61
+ InternetAddress('255.255.255.255'),
62
+ ];
5463
55
- // Also send to subnet broadcast if provided
56
- if (subnetBroadcast != null && subnetBroadcast.isNotEmpty) {
64
+ // Add subnet broadcast derived from localHost
65
+ final subnet = _subnetBroadcast(localHost);
66
+ if (subnet != null) {
5767 try {
58
- final subnetAddr = InternetAddress(subnetBroadcast);
59
- socket.send(packet, subnetAddr, 9);
60
- } catch (_) {
61
- // Ignore invalid subnet broadcast address
62
- }
68
+ targets.add(InternetAddress(subnet));
69
+ } catch (_) {}
6370 }
6471
65
- // Send a few extra packets for reliability
66
- await Future.delayed(const Duration(milliseconds: 100));
67
- socket.send(packet, broadcastAddr, 9);
68
- await Future.delayed(const Duration(milliseconds: 100));
69
- socket.send(packet, broadcastAddr, 9);
72
+ // Send to all targets on both common WoL ports
73
+ for (final addr in targets) {
74
+ socket.send(packet, addr, 9);
75
+ socket.send(packet, addr, 7);
76
+ }
77
+
78
+ // Repeat for reliability
79
+ for (var i = 0; i < 3; i++) {
80
+ await Future.delayed(const Duration(milliseconds: 100));
81
+ for (final addr in targets) {
82
+ socket.send(packet, addr, 9);
83
+ }
84
+ }
7085
7186 socket.close();
7287 }
lib/widgets/message_bubble.dart
....@@ -214,28 +214,44 @@
214214 : message.imageBase64!,
215215 );
216216
217
- return GestureDetector(
218
- onTap: () {
219
- Navigator.of(context).push(
220
- MaterialPageRoute(
221
- builder: (_) => ImageViewer(imageBytes: bytes),
222
- ),
223
- );
224
- },
225
- child: ClipRRect(
226
- borderRadius: BorderRadius.circular(8),
227
- child: Image.memory(
228
- bytes,
229
- width: 260,
230
- height: 180,
231
- fit: BoxFit.cover,
232
- errorBuilder: (_, e, st) => const SizedBox(
233
- width: 260,
234
- height: 60,
235
- child: Center(child: Text('Image decode error')),
217
+ return Column(
218
+ crossAxisAlignment: CrossAxisAlignment.start,
219
+ children: [
220
+ GestureDetector(
221
+ onTap: () {
222
+ Navigator.of(context).push(
223
+ MaterialPageRoute(
224
+ builder: (_) => ImageViewer(imageBytes: bytes),
225
+ ),
226
+ );
227
+ },
228
+ child: ClipRRect(
229
+ borderRadius: BorderRadius.circular(8),
230
+ child: Image.memory(
231
+ bytes,
232
+ width: 260,
233
+ height: 180,
234
+ fit: BoxFit.cover,
235
+ errorBuilder: (_, e, st) => const SizedBox(
236
+ width: 260,
237
+ height: 60,
238
+ child: Center(child: Text('Image decode error')),
239
+ ),
240
+ ),
236241 ),
237242 ),
238
- ),
243
+ if (message.content.isNotEmpty) ...[
244
+ const SizedBox(height: 6),
245
+ Text(
246
+ message.content,
247
+ style: TextStyle(
248
+ fontSize: 14,
249
+ color: _isUser ? Colors.white.withAlpha(220) : null,
250
+ height: 1.3,
251
+ ),
252
+ ),
253
+ ],
254
+ ],
239255 );
240256 }
241257