|
|
@@ -7,7 +7,7 @@ receives transcription + speaker diarization, buffers sentences, and
|
|
|
publishes rolling 3-line JSON to Mosquitto MQTT for the e-ink display.
|
|
|
|
|
|
Start WhisperLiveKit with:
|
|
|
- wlk --model large-v3 --language en --diarization
|
|
|
+ wlk --model_size large-v3 --language en --diarization
|
|
|
|
|
|
Run this script:
|
|
|
python bridge.py
|
|
|
@@ -46,7 +46,7 @@ DISPLAY_LINES = 3
|
|
|
# Set to a device index (integer) to force a specific microphone.
|
|
|
# Leave as None to use the Windows default input device.
|
|
|
# Run bridge.py once to see available device indices printed at startup.
|
|
|
-AUDIO_DEVICE: int | None = None
|
|
|
+AUDIO_DEVICE: int | None = 12
|
|
|
|
|
|
SPEAKERS_FILE = Path(__file__).parent / "speakers.json"
|
|
|
|
|
|
@@ -218,6 +218,16 @@ def build_mqtt_client() -> mqtt.Client:
|
|
|
# ── WebSocket + audio pipeline ────────────────────────────────────────────────
|
|
|
|
|
|
async def _sender(ws, queue: asyncio.Queue) -> None:
|
|
|
+ # Send config handshake first — WhisperLiveKit needs this before audio
|
|
|
+ config = json.dumps({
|
|
|
+ "uid": "bridge-client",
|
|
|
+ "language": "en",
|
|
|
+ "task": "transcribe",
|
|
|
+ "model_size": "large-v3",
|
|
|
+ "use_vad": True,
|
|
|
+ })
|
|
|
+ await ws.send(config)
|
|
|
+ # Drain any stale chunks
|
|
|
while not queue.empty():
|
|
|
queue.get_nowait()
|
|
|
while True:
|
|
|
@@ -306,58 +316,55 @@ def _choose_audio_device() -> int | None:
|
|
|
return idx
|
|
|
|
|
|
|
|
|
-async def audio_ws_loop(state: BridgeState, mqtt_client: mqtt.Client) -> None:
|
|
|
+# Remove the WebSocket audio sender entirely.
|
|
|
+# Use sounddevice → AudioProcessor directly via the Python API.
|
|
|
+
|
|
|
+from whisperlivekit import AudioProcessor, TranscriptionEngine
|
|
|
+
|
|
|
+async def audio_processor_loop(state: BridgeState, mqtt_client: mqtt.Client, engine: TranscriptionEngine) -> None:
|
|
|
audio_queue: asyncio.Queue[bytes] = asyncio.Queue(maxsize=120)
|
|
|
loop = asyncio.get_running_loop()
|
|
|
|
|
|
def audio_callback(indata: np.ndarray, frames: int, time_info, status) -> None:
|
|
|
if status:
|
|
|
print(f"[Audio] {status}")
|
|
|
- chunk = indata.tobytes()
|
|
|
- def _put():
|
|
|
- try:
|
|
|
- audio_queue.put_nowait(chunk)
|
|
|
- except asyncio.QueueFull:
|
|
|
- pass
|
|
|
- loop.call_soon_threadsafe(_put)
|
|
|
+ chunk = indata.astype(np.float32).tobytes()
|
|
|
+ loop.call_soon_threadsafe(lambda: audio_queue.put_nowait(chunk) if not audio_queue.full() else None)
|
|
|
|
|
|
device = _choose_audio_device()
|
|
|
if device is None:
|
|
|
- print("[Audio] No input device available — audio pipeline cannot start.")
|
|
|
+ print("[Audio] No input device — cannot start.")
|
|
|
return
|
|
|
|
|
|
- with sd.InputStream(
|
|
|
- device=device,
|
|
|
- samplerate=SAMPLE_RATE,
|
|
|
- channels=CHANNELS,
|
|
|
- dtype="int16",
|
|
|
- blocksize=BLOCKSIZE,
|
|
|
- callback=audio_callback,
|
|
|
- ):
|
|
|
- flusher = asyncio.create_task(_flusher(state, mqtt_client))
|
|
|
- reloader = asyncio.create_task(_speaker_reloader(state))
|
|
|
- try:
|
|
|
+ audio_processor = AudioProcessor(transcription_engine=engine)
|
|
|
+ results_generator = await audio_processor.create_tasks()
|
|
|
+
|
|
|
+ async def _receive_results():
|
|
|
+ async for response in results_generator:
|
|
|
+ # response is a FrontData dataclass, not a dict
|
|
|
+ text = (getattr(response, "text", None) or getattr(response, "buffer_transcription", None) or "").strip()
|
|
|
+ is_final = getattr(response, "is_final", False) or getattr(response, "end_of_segment", False)
|
|
|
+ speaker = getattr(response, "speaker", None)
|
|
|
+ if is_final and text:
|
|
|
+ print(f"[Whisper] ({speaker or '?'}) {text}")
|
|
|
+ state.push_final(text, speaker, mqtt_client)
|
|
|
+
|
|
|
+ async def _send_audio():
|
|
|
+ with sd.InputStream(
|
|
|
+ device=device, samplerate=SAMPLE_RATE, channels=CHANNELS,
|
|
|
+ dtype="float32", blocksize=BLOCKSIZE, callback=audio_callback,
|
|
|
+ ):
|
|
|
while True:
|
|
|
- try:
|
|
|
- print(f"[WS] Connecting to {WS_URL} ...")
|
|
|
- async with websockets.connect(WS_URL, max_size=2**23) as ws:
|
|
|
- print("[WS] Connected")
|
|
|
- send_t = asyncio.create_task(_sender(ws, audio_queue))
|
|
|
- recv_t = asyncio.create_task(_receiver(ws, state, mqtt_client))
|
|
|
- done, pending = await asyncio.wait(
|
|
|
- [send_t, recv_t], return_when=asyncio.FIRST_COMPLETED
|
|
|
- )
|
|
|
- for t in pending:
|
|
|
- t.cancel()
|
|
|
- for t in done:
|
|
|
- if not t.cancelled() and (exc := t.exception()):
|
|
|
- print(f"[WS] Task error: {exc}")
|
|
|
- except (websockets.ConnectionClosed, OSError, ConnectionRefusedError) as exc:
|
|
|
- print(f"[WS] {exc} — retrying in 3 s...")
|
|
|
- await asyncio.sleep(3)
|
|
|
- finally:
|
|
|
- flusher.cancel()
|
|
|
- reloader.cancel()
|
|
|
+ chunk = await audio_queue.get()
|
|
|
+ await audio_processor.process_audio(chunk)
|
|
|
+
|
|
|
+ flusher = asyncio.create_task(_flusher(state, mqtt_client))
|
|
|
+ reloader = asyncio.create_task(_speaker_reloader(state))
|
|
|
+ try:
|
|
|
+ await asyncio.gather(_send_audio(), _receive_results())
|
|
|
+ finally:
|
|
|
+ flusher.cancel()
|
|
|
+ reloader.cancel()
|
|
|
|
|
|
|
|
|
def run_async_loop(state: BridgeState, mqtt_client: mqtt.Client) -> None:
|
|
|
@@ -367,17 +374,17 @@ def run_async_loop(state: BridgeState, mqtt_client: mqtt.Client) -> None:
|
|
|
# ── Entry point ───────────────────────────────────────────────────────────────
|
|
|
|
|
|
def main() -> None:
|
|
|
+ from whisperlivekit import TranscriptionEngine
|
|
|
state = BridgeState()
|
|
|
mqtt_client = build_mqtt_client()
|
|
|
+ engine = TranscriptionEngine(model_size="large-v3", lan="en", diarization=False)
|
|
|
|
|
|
- ws_thread = threading.Thread(
|
|
|
- target=run_async_loop, args=(state, mqtt_client), daemon=True
|
|
|
- )
|
|
|
- ws_thread.start()
|
|
|
- print(f"[Bridge] Speaker names loaded from {SPEAKERS_FILE}")
|
|
|
- print("[Bridge] Audio pipeline running — speaker admin at http://localhost:8001")
|
|
|
- print("[Bridge] Close this window to quit")
|
|
|
+ def _run():
|
|
|
+ asyncio.run(audio_processor_loop(state, mqtt_client, engine))
|
|
|
|
|
|
+ ws_thread = threading.Thread(target=_run, daemon=True)
|
|
|
+ ws_thread.start()
|
|
|
+ print("[Bridge] Audio pipeline running")
|
|
|
try:
|
|
|
ws_thread.join()
|
|
|
except KeyboardInterrupt:
|