1 mês atrás · aaea386eee
--- a/SETUP.md
+++ b/SETUP.md
@@ -1,4 +1,4 @@
 
				-# Setup Guide — Church Live Transcription Display
			
 
				+# Setup Guide — Live Transcription Display
			
 
				 
			
 
				 This guide walks through everything needed to get the system running on a
			
 
				 Windows 11 PC from scratch. Follow each section in order.
			
--- a/bridge/admin.py
+++ b/bridge/admin.py
@@ -148,13 +148,15 @@ _playback_status: dict = {
 
				 }
			
 
				 
			
 
				 
			
 
				+BRIDGE_INJECT_URL = "http://127.0.0.1:8002/inject"
			
 
				+
			
 
				 async def _stream_file(filepath: Path, speed: float) -> None:
			
 
				     global _playback_status
			
 
				     try:
			
 
				         import miniaudio
			
 
				-        import bridge  # import the running bridge module
			
 
				+        import httpx
			
 
				     except ImportError as e:
			
 
				-        _playback_status.update({"state": "error", "error": str(e)})
			
 
				+        _playback_status.update({"state": "error", "error": f"Missing package: {e}"})
			
 
				         return
			
 
				 
			
 
				     try:
			
@@ -172,25 +174,21 @@ async def _stream_file(filepath: Path, speed: float) -> None:
 
				 
			
 
				         stream = miniaudio.stream_file(
			
 
				             str(filepath),
			
 
				-            output_format=miniaudio.SampleFormat.SIGNED16,  # back to s16le
			
 
				+            output_format=miniaudio.SampleFormat.SIGNED16,
			
 
				             nchannels=1,
			
 
				             sample_rate=16000,
			
 
				             frames_to_read=chunk_frames,
			
 
				         )
			
 
				 
			
 
				-        for chunk in stream:
			
 
				-            # Wait until bridge has initialised its injection queue
			
 
				-            while bridge.test_audio_queue is None:
			
 
				-                await asyncio.sleep(0.1)
			
 
				-            
			
 
				-            chunk_bytes = bytes(chunk)
			
 
				-            await bridge.test_audio_queue.put(chunk_bytes)
			
 
				-            elapsed += chunk_secs
			
 
				-            _playback_status["elapsed"]  = round(elapsed, 1)
			
 
				-            _playback_status["progress"] = (
			
 
				-                min(99, round(elapsed / duration * 100)) if duration else 0
			
 
				-            )
			
 
				-            await asyncio.sleep(chunk_secs / speed)
			
 
				+        async with httpx.AsyncClient() as client:
			
 
				+            for chunk in stream:
			
 
				+                await client.post(BRIDGE_INJECT_URL, content=bytes(chunk))
			
 
				+                elapsed += chunk_secs
			
 
				+                _playback_status["elapsed"]  = round(elapsed, 1)
			
 
				+                _playback_status["progress"] = (
			
 
				+                    min(99, round(elapsed / duration * 100)) if duration else 0
			
 
				+                )
			
 
				+                await asyncio.sleep(chunk_secs / speed)
			
 
				 
			
 
				         _playback_status.update({
			
 
				             "state": "done", "progress": 100, "elapsed": round(duration, 1),
			
@@ -202,21 +200,22 @@ async def _stream_file(filepath: Path, speed: float) -> None:
 
				     except Exception as exc:
			
 
				         _playback_status.update({"state": "error", "error": str(exc), "progress": 0})
			
 
				         print(f"[Playback] {exc}")
			
 
				-
			
 
				-
			
 
				 # ── Test recording API ────────────────────────────────────────────────────────
			
 
				 
			
 
				 @app.post("/api/test/upload")
			
 
				 async def api_test_upload(file: UploadFile = File(...)):
			
 
				     suffix = Path(file.filename or "recording.wav").suffix.lower()
			
 
				     if suffix not in ALLOWED_AUDIO_EXTS:
			
 
				-        raise HTTPException(400, f"Unsupported format '{suffix}'. Use WAV, MP3, FLAC, OGG, or M4A.")
			
 
				-    stem = Path(file.filename).stem[:80]  # limit filename length
			
 
				+        raise HTTPException(400, f"Unsupported format '{suffix}'")
			
 
				+    # Sanitise filename — replace spaces with underscores
			
 
				+    stem = Path(file.filename).stem[:80].replace(" ", "_")
			
 
				     out  = TEST_RECORDINGS_DIR / f"{stem}{suffix}"
			
 
				-    with out.open("wb") as f:
			
 
				-        shutil.copyfileobj(file.file, f)
			
 
				-    size_mb = round(out.stat().st_size / 1024 / 1024, 1)
			
 
				-    return {"ok": True, "filename": out.name, "mb": size_mb}
			
 
				+    try:
			
 
				+        with out.open("wb") as f:
			
 
				+            shutil.copyfileobj(file.file, f)
			
 
				+    except OSError as e:
			
 
				+        raise HTTPException(500, f"Could not save file: {e}")
			
 
				+    return {"ok": True, "filename": out.name, "mb": round(out.stat().st_size / 1024 / 1024, 1)}
			
 
				 
			
 
				 
			
 
				 @app.get("/api/test/files")
			
@@ -231,11 +230,14 @@ def api_test_list():
 
				     return {"files": files}
			
 
				 
			
 
				 
			
 
				-@app.delete("/api/test/files/{filename}")
			
 
				+@app.delete("/api/test/files/{filename:path}")
			
 
				 def api_test_delete(filename: str):
			
 
				-    p = TEST_RECORDINGS_DIR / Path(filename).name  # sanitise — no path traversal
			
 
				-    if p.exists():
			
 
				-        p.unlink()
			
 
				+    p = TEST_RECORDINGS_DIR / Path(filename).name
			
 
				+    try:
			
 
				+        if p.exists():
			
 
				+            p.unlink()
			
 
				+    except OSError as e:
			
 
				+        raise HTTPException(500, f"Could not delete: {e}")
			
 
				     return {"ok": True}
			
 
				 
			
 
				 
			
--- a/bridge/bridge.py
+++ b/bridge/bridge.py
@@ -27,6 +27,9 @@ import paho.mqtt.client as mqtt
 
				 import sounddevice as sd
			
 
				 import websockets
			
 
				 
			
 
				+from fastapi import FastAPI, Request
			
 
				+import uvicorn
			
 
				+
			
 
				 # ── Configuration ─────────────────────────────────────────────────────────────
			
 
				 
			
 
				 MQTT_HOST        = "localhost"
			
@@ -59,7 +62,32 @@ DEFAULT_SPEAKERS: dict[str, str] = {
 
				 
			
 
				 # Shared queue for test audio injection from admin.py
			
 
				 # Admin feeds decoded PCM float32 chunks here; bridge forwards to AudioProcessor
			
 
				-test_audio_queue: asyncio.Queue[bytes] | None = None
			
 
				+_inject_queue: asyncio.Queue[bytes] | None = None
			
 
				+
			
 
				+# ── Audio injection API (receives chunks from admin.py) ───────────────────────
			
 
				+_bridge_app = FastAPI()
			
 
				+
			
 
				+@_bridge_app.post("/inject")
			
 
				+async def inject_audio(request: Request):
			
 
				+    chunk = await request.body()
			
 
				+    if _inject_queue is not None and chunk:
			
 
				+        try:
			
 
				+            _inject_queue.put_nowait(chunk)
			
 
				+        except asyncio.QueueFull:
			
 
				+            pass
			
 
				+    return {"ok": True}
			
 
				+
			
 
				+@_bridge_app.post("/inject/clear")
			
 
				+async def inject_clear():
			
 
				+    global _inject_queue
			
 
				+    if _inject_queue:
			
 
				+        while not _inject_queue.empty():
			
 
				+            try:
			
 
				+                _inject_queue.get_nowait()
			
 
				+            except asyncio.QueueEmpty:
			
 
				+                break
			
 
				+    return {"ok": True}
			
 
				+
			
 
				 
			
 
				 # ── Speaker persistence ───────────────────────────────────────────────────────
			
 
				 
			
@@ -356,6 +384,8 @@ async def audio_processor_loop(state: BridgeState, mqtt_client: mqtt.Client, eng
 
				                 state.push_final(text, speaker, mqtt_client)
			
 
				 
			
 
				     async def _send_audio():
			
 
				+        global _inject_queue
			
 
				+        _inject_queue = asyncio.Queue(maxsize=240)
			
 
				         global test_audio_queue
			
 
				         test_audio_queue = asyncio.Queue(maxsize=240)
			
 
				         with sd.InputStream(
			
@@ -403,6 +433,12 @@ def main() -> None:
 
				     except KeyboardInterrupt:
			
 
				         pass
			
 
				 
			
 
				+    def _run_inject_api():
			
 
				+        uvicorn.run(_bridge_app, host="127.0.0.1", port=8002, log_level="warning")
			
 
				+
			
 
				+    inject_thread = threading.Thread(target=_run_inject_api, daemon=True)
			
 
				+    inject_thread.start()
			
 
				+
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     main()
			
--- a/bridge/display.py
+++ b/bridge/display.py
@@ -0,0 +1,22 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+display.py — Live Transcription Display
			
 
				+
			
 
				+Local web interface for displaying live transcription for use by deaf individuals.
			
 
				+Runs on port 8002 alongside bridge.py
			
 
				+
			
 
				+Access at: http://localhost:8002/display
			
 
				+"""
			
 
				+
			
 
				+import asyncio
			
 
				+import json
			
 
				+import shutil
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from fastapi import FastAPI, HTTPException, UploadFile, File
			
 
				+from fastapi.responses import HTMLResponse, FileResponse
			
 
				+from pydantic import BaseModel
			
 
				+import uvicorn
			
 
				+import websockets
			
 
				+
			
 
				+SPEAKERS_FILE       = Path(__file__).parent / "speakers.json"
			
--- a/bridge/requirements.txt
+++ b/bridge/requirements.txt
@@ -7,12 +7,13 @@ uvicorn>=0.29
 
				 python-multipart>=0.0.9
			
 
				 miniaudio>=1.59
			
 
				 imageio-ffmpeg>=0.4.8
			
 
				+httpx>=0.27
			
 
				 
			
 
				 # numpy — floor set by pyannote-core/metrics requirement
			
 
				 numpy>=2.0
			
 
				 
			
 
				 # ctranslate2 pinned — 4.5.0 confirmed working with RTX 5060 Ti / CUDA 13 + CUDA 12 libs
			
 
				-ctranslate2==4.5.0
			
 
				+ctranslate2==4.7.1
			
 
				 setuptools<82
			
 
				 
			
 
				 # CUDA 12 runtime libraries (required on CUDA 13 drivers for ctranslate2 compatibility)
			
--- a/bridge/test_recordings/260218_Hobart_R_BDH.mp3
+++ b/bridge/test_recordings/260218_Hobart_R_BDH.mp3
--- a/install.bat
+++ b/install.bat
@@ -4,7 +4,7 @@ title Church Transcription — Installation
 
				 
			
 
				 echo.
			
 
				 echo ============================================================
			
 
				-echo  Church Live Transcription Display — One-time Setup
			
 
				+echo  Live Transcription Display - One-time Setup
			
 
				 echo ============================================================
			
 
				 echo.
			
 
				 echo This will install all required software into a local
			
@@ -75,16 +75,22 @@ echo.
 
				 echo [4/6] Installing PyTorch with CUDA support (~2.5 GB download)...
			
 
				 echo This is the longest step. Please wait.
			
 
				 echo.
			
 
				-pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
			
 
				+:: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
			
 
				+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130
			
 
				+
			
 
				 if errorlevel 1 (
			
 
				-    echo cu124 index failed, trying cu121...
			
 
				-    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
			
 
				+    echo cu130 index failed, trying cu121...
			
 
				+    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
			
 
				     if errorlevel 1 (
			
 
				-        echo.
			
 
				-        echo ERROR: PyTorch CUDA installation failed on both cu124 and cu121 indexes.
			
 
				-        echo Check your internet connection and try again.
			
 
				-        pause
			
 
				-        exit /b 1
			
 
				+        echo cu128 index failed, trying cu121...
			
 
				+        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
			
 
				+        if errorlevel 1 (
			
 
				+            echo.
			
 
				+            echo ERROR: PyTorch CUDA installation failed on both cu130, cu128 and cu126 indexes.
			
 
				+            echo Check your internet connection and try again.
			
 
				+            pause
			
 
				+            exit /b 1
			
 
				+        )
			
 
				     )
			
 
				 )
			
 
				 
			
@@ -138,6 +144,12 @@ if errorlevel 1 (
 
				     echo This is not critical — continuing.
			
 
				 )
			
 
				 
			
 
				+echo.
			
 
				+echo Copying required CUDA DLLs...
			
 
				+for /d %p in (.venv\Lib\site-packages\nvidia\*) do (
			
 
				+  if exist "%p\bin\" copy "%p\bin\*.dll" .venv\Lib\site-packages\ctranslate2\ /Y
			
 
				+)
			
 
				+
			
 
				 :: ── Done ─────────────────────────────────────────────────────────────────────
			
 
				 
			
 
				 echo.
			
--- a/start.bat
+++ b/start.bat
@@ -1,9 +1,9 @@
 
				 @echo off
			
 
				 setlocal enabledelayedexpansion
			
 
				-title Church Transcription — Launcher
			
 
				+title Transcription - Launcher
			
 
				 
			
 
				 :: ════════════════════════════════════════════════════════════════════════════
			
 
				-::  CONFIGURATION — edit these lines before first use
			
 
				+::  CONFIGURATION - edit these lines before first use
			
 
				 :: ════════════════════════════════════════════════════════════════════════════
			
 
				 
			
 
				 :: Your HuggingFace access token (required for speaker diarization)
			
@@ -18,20 +18,6 @@ set WHISPER_MODEL=large-v3
 
				 
			
 
				 :: ════════════════════════════════════════════════════════════════════════════
			
 
				 
			
 
				-:: Check the token has been set
			
 
				-if "%HF_TOKEN%"==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 (
			
 
				-    echo.
			
 
				-    echo ERROR: HuggingFace token not configured.
			
 
				-    echo.
			
 
				-    echo Open start.bat in Notepad and replace PASTE_YOUR_TOKEN_HERE
			
 
				-    echo with your token from https://huggingface.co/settings/tokens
			
 
				-    echo.
			
 
				-    echo See SETUP.md Part 7 for full instructions.
			
 
				-    echo.
			
 
				-    pause
			
 
				-    exit /b 1
			
 
				-)
			
 
				-
			
 
				 :: Check virtual environment exists
			
 
				 if not exist .venv\Scripts\activate.bat (
			
 
				     echo.
			
@@ -57,7 +43,7 @@ if errorlevel 1 (
 
				 
			
 
				 echo.
			
 
				 echo ============================================================
			
 
				-echo  Church Live Transcription Display
			
 
				+echo  Live Transcription Display
			
 
				 echo ============================================================
			
 
				 echo.
			
 
				 echo Starting Whisper server ^(with speaker diarization^)...