Ver código fonte

Fix Dependencies

Benjamin Harris 1 mês atrás
pai
commit
aaea386eee
8 arquivos alterados com 116 adições e 57 exclusões
  1. 1 1
      SETUP.md
  2. 30 28
      bridge/admin.py
  3. 37 1
      bridge/bridge.py
  4. 22 0
      bridge/display.py
  5. 2 1
      bridge/requirements.txt
  6. 0 0
      bridge/test_recordings/260218_Hobart_R_BDH.mp3
  7. 21 9
      install.bat
  8. 3 17
      start.bat

+ 1 - 1
SETUP.md

@@ -1,4 +1,4 @@
-# Setup Guide — Church Live Transcription Display
+# Setup Guide — Live Transcription Display
 
 This guide walks through everything needed to get the system running on a
 Windows 11 PC from scratch. Follow each section in order.

+ 30 - 28
bridge/admin.py

@@ -148,13 +148,15 @@ _playback_status: dict = {
 }
 
 
+BRIDGE_INJECT_URL = "http://127.0.0.1:8002/inject"
+
 async def _stream_file(filepath: Path, speed: float) -> None:
     global _playback_status
     try:
         import miniaudio
-        import bridge  # import the running bridge module
+        import httpx
     except ImportError as e:
-        _playback_status.update({"state": "error", "error": str(e)})
+        _playback_status.update({"state": "error", "error": f"Missing package: {e}"})
         return
 
     try:
@@ -172,25 +174,21 @@ async def _stream_file(filepath: Path, speed: float) -> None:
 
         stream = miniaudio.stream_file(
             str(filepath),
-            output_format=miniaudio.SampleFormat.SIGNED16,  # back to s16le
+            output_format=miniaudio.SampleFormat.SIGNED16,
             nchannels=1,
             sample_rate=16000,
             frames_to_read=chunk_frames,
         )
 
-        for chunk in stream:
-            # Wait until bridge has initialised its injection queue
-            while bridge.test_audio_queue is None:
-                await asyncio.sleep(0.1)
-            
-            chunk_bytes = bytes(chunk)
-            await bridge.test_audio_queue.put(chunk_bytes)
-            elapsed += chunk_secs
-            _playback_status["elapsed"]  = round(elapsed, 1)
-            _playback_status["progress"] = (
-                min(99, round(elapsed / duration * 100)) if duration else 0
-            )
-            await asyncio.sleep(chunk_secs / speed)
+        async with httpx.AsyncClient() as client:
+            for chunk in stream:
+                await client.post(BRIDGE_INJECT_URL, content=bytes(chunk))
+                elapsed += chunk_secs
+                _playback_status["elapsed"]  = round(elapsed, 1)
+                _playback_status["progress"] = (
+                    min(99, round(elapsed / duration * 100)) if duration else 0
+                )
+                await asyncio.sleep(chunk_secs / speed)
 
         _playback_status.update({
             "state": "done", "progress": 100, "elapsed": round(duration, 1),
@@ -202,21 +200,22 @@ async def _stream_file(filepath: Path, speed: float) -> None:
     except Exception as exc:
         _playback_status.update({"state": "error", "error": str(exc), "progress": 0})
         print(f"[Playback] {exc}")
-
-
 # ── Test recording API ────────────────────────────────────────────────────────
 
 @app.post("/api/test/upload")
 async def api_test_upload(file: UploadFile = File(...)):
     suffix = Path(file.filename or "recording.wav").suffix.lower()
     if suffix not in ALLOWED_AUDIO_EXTS:
-        raise HTTPException(400, f"Unsupported format '{suffix}'. Use WAV, MP3, FLAC, OGG, or M4A.")
-    stem = Path(file.filename).stem[:80]  # limit filename length
+        raise HTTPException(400, f"Unsupported format '{suffix}'")
+    # Sanitise filename — replace spaces with underscores
+    stem = Path(file.filename).stem[:80].replace(" ", "_")
     out  = TEST_RECORDINGS_DIR / f"{stem}{suffix}"
-    with out.open("wb") as f:
-        shutil.copyfileobj(file.file, f)
-    size_mb = round(out.stat().st_size / 1024 / 1024, 1)
-    return {"ok": True, "filename": out.name, "mb": size_mb}
+    try:
+        with out.open("wb") as f:
+            shutil.copyfileobj(file.file, f)
+    except OSError as e:
+        raise HTTPException(500, f"Could not save file: {e}")
+    return {"ok": True, "filename": out.name, "mb": round(out.stat().st_size / 1024 / 1024, 1)}
 
 
 @app.get("/api/test/files")
@@ -231,11 +230,14 @@ def api_test_list():
     return {"files": files}
 
 
-@app.delete("/api/test/files/{filename}")
+@app.delete("/api/test/files/{filename:path}")
 def api_test_delete(filename: str):
-    p = TEST_RECORDINGS_DIR / Path(filename).name  # sanitise — no path traversal
-    if p.exists():
-        p.unlink()
+    p = TEST_RECORDINGS_DIR / Path(filename).name
+    try:
+        if p.exists():
+            p.unlink()
+    except OSError as e:
+        raise HTTPException(500, f"Could not delete: {e}")
     return {"ok": True}
 
 

+ 37 - 1
bridge/bridge.py

@@ -27,6 +27,9 @@ import paho.mqtt.client as mqtt
 import sounddevice as sd
 import websockets
 
+from fastapi import FastAPI, Request
+import uvicorn
+
 # ── Configuration ─────────────────────────────────────────────────────────────
 
 MQTT_HOST        = "localhost"
@@ -59,7 +62,32 @@ DEFAULT_SPEAKERS: dict[str, str] = {
 
 # Shared queue for test audio injection from admin.py
 # Admin feeds decoded PCM float32 chunks here; bridge forwards to AudioProcessor
-test_audio_queue: asyncio.Queue[bytes] | None = None
+_inject_queue: asyncio.Queue[bytes] | None = None
+
+# ── Audio injection API (receives chunks from admin.py) ───────────────────────
+_bridge_app = FastAPI()
+
+@_bridge_app.post("/inject")
+async def inject_audio(request: Request):
+    chunk = await request.body()
+    if _inject_queue is not None and chunk:
+        try:
+            _inject_queue.put_nowait(chunk)
+        except asyncio.QueueFull:
+            pass
+    return {"ok": True}
+
+@_bridge_app.post("/inject/clear")
+async def inject_clear():
+    global _inject_queue
+    if _inject_queue:
+        while not _inject_queue.empty():
+            try:
+                _inject_queue.get_nowait()
+            except asyncio.QueueEmpty:
+                break
+    return {"ok": True}
+
 
 # ── Speaker persistence ───────────────────────────────────────────────────────
 
@@ -356,6 +384,8 @@ async def audio_processor_loop(state: BridgeState, mqtt_client: mqtt.Client, eng
                 state.push_final(text, speaker, mqtt_client)
 
     async def _send_audio():
+        global _inject_queue
+        _inject_queue = asyncio.Queue(maxsize=240)
         global test_audio_queue
         test_audio_queue = asyncio.Queue(maxsize=240)
         with sd.InputStream(
@@ -403,6 +433,12 @@ def main() -> None:
     except KeyboardInterrupt:
         pass
 
+    def _run_inject_api():
+        uvicorn.run(_bridge_app, host="127.0.0.1", port=8002, log_level="warning")
+
+    inject_thread = threading.Thread(target=_run_inject_api, daemon=True)
+    inject_thread.start()
+
 
 if __name__ == "__main__":
     main()

+ 22 - 0
bridge/display.py

@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+"""
+display.py — Live Transcription Display
+
+Local web interface for displaying live transcription for use by deaf individuals.
+Runs on port 8002 alongside bridge.py
+
+Access at: http://localhost:8002/display
+"""
+
+import asyncio
+import json
+import shutil
+from pathlib import Path
+
+from fastapi import FastAPI, HTTPException, UploadFile, File
+from fastapi.responses import HTMLResponse, FileResponse
+from pydantic import BaseModel
+import uvicorn
+import websockets
+
+SPEAKERS_FILE       = Path(__file__).parent / "speakers.json"

+ 2 - 1
bridge/requirements.txt

@@ -7,12 +7,13 @@ uvicorn>=0.29
 python-multipart>=0.0.9
 miniaudio>=1.59
 imageio-ffmpeg>=0.4.8
+httpx>=0.27
 
 # numpy — floor set by pyannote-core/metrics requirement
 numpy>=2.0
 
 # ctranslate2 pinned — 4.5.0 confirmed working with RTX 5060 Ti / CUDA 13 + CUDA 12 libs
-ctranslate2==4.5.0
+ctranslate2==4.7.1
 setuptools<82
 
 # CUDA 12 runtime libraries (required on CUDA 13 drivers for ctranslate2 compatibility)

+ 0 - 0
bridge/test_recordings/260218 Hobart R BDH.mp3 → bridge/test_recordings/260218_Hobart_R_BDH.mp3


+ 21 - 9
install.bat

@@ -4,7 +4,7 @@ title Church Transcription — Installation
 
 echo.
 echo ============================================================
-echo  Church Live Transcription Display — One-time Setup
+echo  Live Transcription Display - One-time Setup
 echo ============================================================
 echo.
 echo This will install all required software into a local
@@ -75,16 +75,22 @@ echo.
 echo [4/6] Installing PyTorch with CUDA support (~2.5 GB download)...
 echo This is the longest step. Please wait.
 echo.
-pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
+:: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130
+
 if errorlevel 1 (
-    echo cu124 index failed, trying cu121...
-    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
+    echo cu130 index failed, trying cu121...
+    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
     if errorlevel 1 (
-        echo.
-        echo ERROR: PyTorch CUDA installation failed on both cu124 and cu121 indexes.
-        echo Check your internet connection and try again.
-        pause
-        exit /b 1
+        echo cu128 index failed, trying cu121...
+        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
+        if errorlevel 1 (
+            echo.
+            echo ERROR: PyTorch CUDA installation failed on both cu130, cu128 and cu126 indexes.
+            echo Check your internet connection and try again.
+            pause
+            exit /b 1
+        )
     )
 )
 
@@ -138,6 +144,12 @@ if errorlevel 1 (
     echo This is not critical — continuing.
 )
 
+echo.
+echo Copying required CUDA DLLs...
+for /d %p in (.venv\Lib\site-packages\nvidia\*) do (
+  if exist "%p\bin\" copy "%p\bin\*.dll" .venv\Lib\site-packages\ctranslate2\ /Y
+)
+
 :: ── Done ─────────────────────────────────────────────────────────────────────
 
 echo.

+ 3 - 17
start.bat

@@ -1,9 +1,9 @@
 @echo off
 setlocal enabledelayedexpansion
-title Church Transcription — Launcher
+title Transcription - Launcher
 
 :: ════════════════════════════════════════════════════════════════════════════
-::  CONFIGURATION  edit these lines before first use
+::  CONFIGURATION - edit these lines before first use
 :: ════════════════════════════════════════════════════════════════════════════
 
 :: Your HuggingFace access token (required for speaker diarization)
@@ -18,20 +18,6 @@ set WHISPER_MODEL=large-v3
 
 :: ════════════════════════════════════════════════════════════════════════════
 
-:: Check the token has been set
-if "%HF_TOKEN%"==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 (
-    echo.
-    echo ERROR: HuggingFace token not configured.
-    echo.
-    echo Open start.bat in Notepad and replace PASTE_YOUR_TOKEN_HERE
-    echo with your token from https://huggingface.co/settings/tokens
-    echo.
-    echo See SETUP.md Part 7 for full instructions.
-    echo.
-    pause
-    exit /b 1
-)
-
 :: Check virtual environment exists
 if not exist .venv\Scripts\activate.bat (
     echo.
@@ -57,7 +43,7 @@ if errorlevel 1 (
 
 echo.
 echo ============================================================
-echo  Church Live Transcription Display
+echo  Live Transcription Display
 echo ============================================================
 echo.
 echo Starting Whisper server ^(with speaker diarization^)...