Benjamin Harris 1 hónapja
szülő
commit
2a98de29d5
2 módosított fájl, 62 hozzáadás és 9 törlés
  1. 36 9
      bridge/bridge.py
  2. 26 0
      bridge/transcript_segments.jsonl

+ 36 - 9
bridge/bridge.py

@@ -48,10 +48,17 @@ SENTENCE_TIMEOUT = 4.0      # seconds of silence before forcing a flush
 MAX_LINE_CHARS   = 80       # characters per line
 DISPLAY_LINES    = 3
 
-# Set to a device index (integer) to force a specific microphone.
+# Set to a device index (integer) to force a specific input device.
 # Leave as None to use the Windows default input device.
 AUDIO_DEVICE: int | None = None
 
+# Set True to capture system audio (everything playing on the PC speakers).
+# Requires "Stereo Mix" to be enabled in Windows Sound settings → Recording tab.
+# When True, AUDIO_DEVICE is ignored and the loopback device is auto-detected.
+USE_SYSTEM_AUDIO: bool = False
+
+_LOOPBACK_NAMES = ("stereo mix", "what u hear", "wave out mix", "loopback")
+
 SPEAKERS_FILE = Path(__file__).parent / "speakers.json"
 
 DEFAULT_SPEAKERS: dict[str, dict] = {
@@ -282,6 +289,15 @@ def _choose_audio_device() -> int | None:
         print("[Audio] ERROR: No input devices found.")
         return None
 
+    if USE_SYSTEM_AUDIO:
+        for idx, name in input_devices:
+            if any(k in name.lower() for k in _LOOPBACK_NAMES):
+                print(f"[Audio] System audio (loopback) device: [{idx}] {name}")
+                return idx
+        print("[Audio] WARNING: No loopback device found. Enable 'Stereo Mix' in")
+        print("[Audio]   Windows Sound Settings → Recording tab → right-click → Show Disabled Devices")
+        print("[Audio] Falling back to default input device.")
+
     if AUDIO_DEVICE is not None:
         print(f"[Audio] Using configured device [{AUDIO_DEVICE}]")
         return AUDIO_DEVICE
@@ -299,19 +315,30 @@ async def audio_processor_loop(state: BridgeState, mqtt_client: mqtt.Client, eng
     audio_queue: asyncio.Queue[bytes] = asyncio.Queue(maxsize=120)
     loop = asyncio.get_running_loop()
 
+    device = _choose_audio_device()
+    if device is None:
+        print("[Audio] No input device — cannot start.")
+        return
+
+    # Query the device's native channel count (loopback devices are often stereo)
+    try:
+        dev_info  = sd.query_devices(device)
+        in_ch     = max(1, min(int(dev_info["max_input_channels"]), 2))
+    except Exception:
+        in_ch = CHANNELS
+    if in_ch > 1:
+        print(f"[Audio] Device has {in_ch} channels — will mix down to mono")
+
     def audio_callback(indata: np.ndarray, frames: int, time_info, status) -> None:
         if status:
             print(f"[Audio] {status}")
-        chunk = indata.tobytes()
+        # indata is float32; mix stereo → mono then convert to int16
+        mono  = indata.mean(axis=1) if indata.shape[1] > 1 else indata[:, 0]
+        chunk = (mono * 32767).clip(-32768, 32767).astype(np.int16).tobytes()
         loop.call_soon_threadsafe(
             lambda: audio_queue.put_nowait(chunk) if not audio_queue.full() else None
         )
 
-    device = _choose_audio_device()
-    if device is None:
-        print("[Audio] No input device — cannot start.")
-        return
-
     audio_processor    = AudioProcessor(transcription_engine=engine)
     results_generator  = await audio_processor.create_tasks()
 
@@ -362,8 +389,8 @@ async def audio_processor_loop(state: BridgeState, mqtt_client: mqtt.Client, eng
 
     async def _send_audio():
         with sd.InputStream(
-            device=device, samplerate=SAMPLE_RATE, channels=CHANNELS,
-            dtype="int16", blocksize=BLOCKSIZE, callback=audio_callback,
+            device=device, samplerate=SAMPLE_RATE, channels=in_ch,
+            dtype="float32", blocksize=BLOCKSIZE, callback=audio_callback,
         ):
             while True:
                 # Injected test audio takes priority over live microphone

+ 26 - 0
bridge/transcript_segments.jsonl

@@ -0,0 +1,26 @@
+{"ts": "2026-05-05T11:18:25.157665+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 2.02, "duration": 2.02, "text": "So, let's get started."}
+{"ts": "2026-05-05T11:18:45.807701+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 22.892, "duration": 22.892, "text": "I'm going to"}
+{"ts": "2026-05-05T11:18:58.131804+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 27.212, "duration": 27.212, "text": "I'm going to start with the hair, the hairspray,"}
+{"ts": "2026-05-05T11:19:35.787576+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 79.848, "duration": 79.848, "text": "Thank you"}
+{"ts": "2026-05-05T11:19:41.934404+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 82.028, "duration": 82.028, "text": "chapter 14."}
+{"ts": "2026-05-05T11:20:06.511477+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 107.824, "duration": 107.824, "text": "Romans 14 verse 19 it said actually"}
+{"ts": "2026-05-05T11:20:15.623701+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 108.124, "duration": 108.124, "text": "you"}
+{"ts": "2026-05-05T11:20:27.971883+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 132.072, "duration": 132.072, "text": "Thank you"}
+{"ts": "2026-05-05T11:20:49.465628+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 152.416, "duration": 152.416, "text": "Thank you"}
+{"ts": "2026-05-05T11:20:52.601349+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 156.176, "duration": 156.176, "text": "for watching."}
+{"ts": "2026-05-05T11:21:04.846674+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 168.936, "duration": 168.936, "text": "Thank you for watching."}
+{"ts": "2026-05-05T11:21:14.071128+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 178.156, "duration": 178.156, "text": "Thank you for watching."}
+{"ts": "2026-05-05T11:21:23.297422+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 187.356, "duration": 187.356, "text": "Thank you for watching."}
+{"ts": "2026-05-05T11:21:32.466509+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 196.576, "duration": 196.576, "text": "Thank you for watching."}
+{"ts": "2026-05-05T11:21:44.804757+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 208.856, "duration": 208.856, "text": "Thank you for watching."}
+{"ts": "2026-05-05T11:21:54.014148+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 218.076, "duration": 218.076, "text": "Thank you"}
+{"ts": "2026-05-05T11:22:15.298465+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 229.636, "duration": 229.636, "text": "for watching. Thank you"}
+{"ts": "2026-05-05T11:22:19.083472+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 237.396, "duration": 237.396, "text": "for"}
+{"ts": "2026-05-05T11:22:28.095087+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 251.876, "duration": 251.876, "text": "watching. Thank you for"}
+{"ts": "2026-05-05T11:22:49.286606+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 267.505, "duration": 267.505, "text": "Thank you."}
+{"ts": "2026-05-05T11:23:04.671485+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 283.525, "duration": 283.525, "text": "Thank you."}
+{"ts": "2026-05-05T11:23:24.186068+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 306.737, "duration": 306.737, "text": "Thank you for watching."}
+{"ts": "2026-05-05T11:23:38.475869+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 322.437, "duration": 322.437, "text": "Thank you for watching."}
+{"ts": "2026-05-05T11:23:44.581128+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 328.677, "duration": 328.677, "text": "Thank you for watching."}
+{"ts": "2026-05-05T11:23:53.804952+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 337.797, "duration": 337.797, "text": "Thank you for watching."}
+{"ts": "2026-05-05T11:23:59.963908+00:00", "session": "2026-05-05", "speaker": "UNKNOWN", "start": 0.0, "end": 338.977, "duration": 338.977, "text": "Thank you."}