Matthias Nott
2026-03-24 66a18b533fe563e6bcb60f79647e57982c3562b5
fix: reflect voice transcript into image caption via voiceMessageId
1 files modified
changed files
lib/screens/chat_screen.dart patch | view | blame | history
lib/screens/chat_screen.dart
....@@ -1017,25 +1017,31 @@
10171017 <String, dynamic>{'data': b64, 'mimeType': 'image/jpeg'}
10181018 ).toList();
10191019
1020
+ // Create the first image message early so we have its ID for transcript reflection
1021
+ final firstImageMsg = Message.image(
1022
+ role: MessageRole.user,
1023
+ imageBase64: encodedImages[0],
1024
+ content: textCaption.isNotEmpty ? textCaption : (voiceB64 != null ? '🎤 ...' : ''),
1025
+ status: MessageStatus.sent,
1026
+ );
1027
+
10201028 // Send everything as a single atomic bundle
10211029 _ws?.send({
10221030 'type': 'bundle',
10231031 'caption': textCaption,
10241032 if (voiceB64 != null) 'audioBase64': voiceB64,
1033
+ if (voiceB64 != null) 'voiceMessageId': firstImageMsg.id,
10251034 'attachments': attachments,
10261035 'sessionId': targetSessionId,
10271036 });
10281037
1029
- // Show as combined image+caption bubbles (voice caption shows as text under image)
1030
- final voiceLabel = voiceB64 != null ? '🎤 Voice caption' : '';
1031
- for (var i = 0; i < encodedImages.length; i++) {
1032
- final captionText = i == 0
1033
- ? (textCaption.isNotEmpty ? textCaption : voiceLabel)
1034
- : '';
1038
+ // Show as combined image+caption bubbles
1039
+ ref.read(messagesProvider.notifier).addMessage(firstImageMsg);
1040
+ for (var i = 1; i < encodedImages.length; i++) {
10351041 final message = Message.image(
10361042 role: MessageRole.user,
10371043 imageBase64: encodedImages[i],
1038
- content: captionText,
1044
+ content: '',
10391045 status: MessageStatus.sent,
10401046 );
10411047 ref.read(messagesProvider.notifier).addMessage(message);