Forráskód Böngészése

Fix Cumulative Wording

Benjamin Harris 1 hónapja
szülő
commit
d647ebc3bc
2 módosított fájl, 41 hozzáadás és 16 törlés
  1. 2 2
      bridge/admin.py
  2. 39 14
      bridge/bridge.py

+ 2 - 2
bridge/admin.py

@@ -1164,7 +1164,7 @@ DISPLAY_HTML = """<!DOCTYPE html>
 
   .display-line {
     font-family: Georgia, 'Times New Roman', serif;
-    font-size: clamp(22px, 8vw, 110px);
+    font-size: 22px; /*clamp(22px, 8vw, 110px);*/
     line-height: 1.25;
     letter-spacing: 0.02em;
     min-height: 1.25em;
@@ -1173,7 +1173,7 @@ DISPLAY_HTML = """<!DOCTYPE html>
   }
 
   .display-line.is-speaker {
-    font-size: clamp(16px, 5.5vw, 72px);
+    font-size: 22px; /*clamp(16px, 5.5vw, 72px);*/
     color: #f5c518;
     font-weight: 700;
     letter-spacing: 0.1em;

+ 39 - 14
bridge/bridge.py

@@ -281,22 +281,47 @@ async def audio_processor_loop(state: BridgeState, mqtt_client: mqtt.Client, eng
     results_generator  = await audio_processor.create_tasks()
 
     async def _receive_results():
-        # FrontData.lines is a cumulative list of committed Segment objects.
-        # Track how many we've already processed so we only push new ones.
-        seen_lines = 0
+        # FrontData.lines is validated_segments + a growing current-segment snapshot.
+        # The last element's text GROWS silently between calls, so index-counting
+        # misses incremental content.  Instead, track the full concatenated
+        # transcript and push only the delta each time it grows.
+        prev_full_text = ""
+
         async for response in results_generator:
             lines = response.lines or []
-            # Guard against unexpected shrink (e.g. processor reset)
-            if len(lines) < seen_lines:
-                seen_lines = 0
-            for seg in lines[seen_lines:]:
-                text = (seg.text or "").strip()
-                if text and not seg.is_silence():
-                    spk = seg.speaker
-                    speaker_id = f"SPEAKER_{spk:02d}" if isinstance(spk, int) and spk >= 0 else None
-                    print(f"[Whisper] ({speaker_id or '?'}) {text}")
-                    state.push_final(text, speaker_id, mqtt_client)
-            seen_lines = len(lines)
+
+            current_full_text = " ".join(
+                (seg.text or "").strip()
+                for seg in lines
+                if not seg.is_silence() and (seg.text or "").strip()
+            )
+
+            if current_full_text == prev_full_text:
+                continue
+
+            if prev_full_text and current_full_text.startswith(prev_full_text):
+                new_text = current_full_text[len(prev_full_text):].strip()
+                # Drop leading punctuation that belongs to the previous sentence
+                while new_text and new_text[0] in ".,;:!?":
+                    new_text = new_text[1:].strip()
+            else:
+                # First segment or context reset after a long silence
+                new_text = current_full_text
+
+            prev_full_text = current_full_text
+
+            if not new_text or len(new_text) < 2:
+                continue
+
+            last_seg = next(
+                (s for s in reversed(lines) if not s.is_silence() and (s.text or "").strip()),
+                None,
+            )
+            spk = getattr(last_seg, "speaker", None) if last_seg else None
+            speaker_id = f"SPEAKER_{spk:02d}" if isinstance(spk, int) and spk >= 0 else None
+
+            print(f"[Whisper] ({speaker_id or '?'}) {new_text}")
+            state.push_final(new_text, speaker_id, mqtt_client)
 
     async def _send_audio():
         with sd.InputStream(