|
|
@@ -281,22 +281,47 @@ async def audio_processor_loop(state: BridgeState, mqtt_client: mqtt.Client, eng
|
|
|
results_generator = await audio_processor.create_tasks()
|
|
|
|
|
|
async def _receive_results():
|
|
|
- # FrontData.lines is a cumulative list of committed Segment objects.
|
|
|
- # Track how many we've already processed so we only push new ones.
|
|
|
- seen_lines = 0
|
|
|
+ # FrontData.lines is validated_segments + a growing current-segment snapshot.
|
|
|
+ # The last element's text GROWS silently between calls, so index-counting
|
|
|
+ # misses incremental content. Instead, track the full concatenated
|
|
|
+ # transcript and push only the delta each time it grows.
|
|
|
+ prev_full_text = ""
|
|
|
+
|
|
|
async for response in results_generator:
|
|
|
lines = response.lines or []
|
|
|
- # Guard against unexpected shrink (e.g. processor reset)
|
|
|
- if len(lines) < seen_lines:
|
|
|
- seen_lines = 0
|
|
|
- for seg in lines[seen_lines:]:
|
|
|
- text = (seg.text or "").strip()
|
|
|
- if text and not seg.is_silence():
|
|
|
- spk = seg.speaker
|
|
|
- speaker_id = f"SPEAKER_{spk:02d}" if isinstance(spk, int) and spk >= 0 else None
|
|
|
- print(f"[Whisper] ({speaker_id or '?'}) {text}")
|
|
|
- state.push_final(text, speaker_id, mqtt_client)
|
|
|
- seen_lines = len(lines)
|
|
|
+
|
|
|
+ current_full_text = " ".join(
|
|
|
+ (seg.text or "").strip()
|
|
|
+ for seg in lines
|
|
|
+ if not seg.is_silence() and (seg.text or "").strip()
|
|
|
+ )
|
|
|
+
|
|
|
+ if current_full_text == prev_full_text:
|
|
|
+ continue
|
|
|
+
|
|
|
+ if prev_full_text and current_full_text.startswith(prev_full_text):
|
|
|
+ new_text = current_full_text[len(prev_full_text):].strip()
|
|
|
+ # Drop leading punctuation that belongs to the previous sentence
|
|
|
+ while new_text and new_text[0] in ".,;:!?":
|
|
|
+ new_text = new_text[1:].strip()
|
|
|
+ else:
|
|
|
+ # First segment or context reset after a long silence
|
|
|
+ new_text = current_full_text
|
|
|
+
|
|
|
+ prev_full_text = current_full_text
|
|
|
+
|
|
|
+ if not new_text or len(new_text) < 2:
|
|
|
+ continue
|
|
|
+
|
|
|
+ last_seg = next(
|
|
|
+ (s for s in reversed(lines) if not s.is_silence() and (s.text or "").strip()),
|
|
|
+ None,
|
|
|
+ )
|
|
|
+ spk = getattr(last_seg, "speaker", None) if last_seg else None
|
|
|
+ speaker_id = f"SPEAKER_{spk:02d}" if isinstance(spk, int) and spk >= 0 else None
|
|
|
+
|
|
|
+ print(f"[Whisper] ({speaker_id or '?'}) {new_text}")
|
|
|
+ state.push_final(new_text, speaker_id, mqtt_client)
|
|
|
|
|
|
async def _send_audio():
|
|
|
with sd.InputStream(
|