1 mēnesi atpakaļ · 950b800388
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -7,7 +7,9 @@
 
				       "PowerShell(& ffmpeg -version 2>&1)",
			
 
				       "PowerShell(& \"d:\\\\GIT_REPO\\\\Deaf_Transcription_Service\\\\.venv\\\\Scripts\\\\python.exe\" -c \"import diart; print\\(diart.__version__\\)\" 2>&1)",
			
 
				       "PowerShell(& \"d:\\\\GIT_REPO\\\\Deaf_Transcription_Service\\\\.venv\\\\Scripts\\\\python.exe\" -m pip install imageio-ffmpeg miniaudio --quiet 2>&1)",
			
 
				-      "PowerShell(& \"d:\\\\GIT_REPO\\\\Deaf_Transcription_Service\\\\.venv\\\\Scripts\\\\python.exe\" \"d:\\\\GIT_REPO\\\\Deaf_Transcription_Service\\\\bridge\\\\whisper_launcher.py\" --help 2>&1)"
			
 
				+      "PowerShell(& \"d:\\\\GIT_REPO\\\\Deaf_Transcription_Service\\\\.venv\\\\Scripts\\\\python.exe\" \"d:\\\\GIT_REPO\\\\Deaf_Transcription_Service\\\\bridge\\\\whisper_launcher.py\" --help 2>&1)",
			
 
				+      "Bash(nvcc --version)",
			
 
				+      "Bash(nvidia-smi)"
			
 
				     ]
			
 
				   }
			
 
				 }
			
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -49,32 +49,31 @@ Multiple display devices can connect simultaneously.
 
				 ## PC Environment
			
 
				 
			
 
				 - OS: Windows 10/11
			
 
				-- GPU: NVIDIA RTX series (RTX 4070 Super tested)
			
 
				+- GPU: NVIDIA RTX 5060 Ti 16 GB (production); RTX 4070 Super also tested
			
 
				 - Python: 3.12 (required — PyTorch wheels not published for 3.13+ yet)
			
 
				 - MQTT broker: Mosquitto (localhost:1883)
			
 
				-- Whisper server: WhisperLiveKit launched via `bridge/whisper_launcher.py`
			
 
				-  - Command: `python bridge\whisper_launcher.py --model large-v3 --lan en --diarization-backend diart`
			
 
				-  - WebSocket: `ws://localhost:8000/asr`
			
 
				-- Diarization: diart (pyannote.audio streaming), activated via `--diarization-backend diart`
			
 
				-  - Requires HuggingFace token and accepted licence for `pyannote/speaker-diarization-3.1` and `pyannote/segmentation-3.0`
			
 
				-  - Sortformer backend exists but requires NVIDIA NeMo — not installed; diart is the active backend
			
 
				+- Diarization: diart (pyannote.audio streaming) — requires HuggingFace token and accepted licence for `pyannote/speaker-diarization-3.1` and `pyannote/segmentation-3.0`
			
 
				 
			
 
				-RTX 5060 Ti / CUDA 13 note: ctranslate2 must be pinned to 4.5.0. Install nvidia-cublas-cu12, nvidia-cudnn-cu12, nvidia-cuda-runtime-cu12 explicitly. setuptools must be <82 to avoid pkg_resources import error. Confirmed working: CUDA devices: 1.
			
 
				+### RTX 5060 Ti / CUDA 13 compatibility
			
 
				 
			
 
				+ctranslate2 must be pinned to `==4.5.0`. Install the following pip packages explicitly (the CUDA runtime is bundled this way, no system CUDA Toolkit required for ctranslate2 itself):
			
 
				 
			
 
				-### WhisperLiveKit Launch Notes
			
 
				+```text
			
 
				+nvidia-cublas-cu12
			
 
				+nvidia-cudnn-cu12
			
 
				+nvidia-cuda-runtime-cu12
			
 
				+```
			
 
				 
			
 
				-`bridge/whisper_launcher.py` must be used instead of `wlk` directly. It applies two patches before loading WhisperLiveKit:
			
 
				+`setuptools` must be `<82` to avoid a `pkg_resources` import error at startup.
			
 
				 
			
 
				-1. **ffmpeg PATH** — adds `imageio-ffmpeg` bundled binary to PATH so `whisperlivekit.ffmpeg_manager` can spawn ffmpeg without a system-wide install
			
 
				-2. **torchaudio shim** — injects `torchaudio.set_audio_backend = lambda b: None` before diart is imported; diart calls this function at module load time but it was removed in torchaudio 2.x
			
 
				+Confirmed working on this machine: CUDA 13.2, CUDA devices: 1.
			
 
				 
			
 
				 ### CUDA Notes
			
 
				 
			
 
				-- PyTorch must be installed from the CUDA index (`--index-url https://download.pytorch.org/whl/cu124`)
			
 
				-- CUDA Toolkit 12.x must be separately installed from NVIDIA (provides `cublas64_12.dll`)
			
 
				+- CUDA Toolkit 13.2 is installed on the production PC (`nvcc --version` confirms; `nvidia-smi` shows driver 595.79)
			
 
				+- PyTorch installed from the CUDA 12.4 index (`--index-url https://download.pytorch.org/whl/cu124`) — PyTorch cu124 wheels are forward-compatible with CUDA 13.x drivers
			
 
				 - Without CUDA, WhisperLiveKit falls back to CPU; large-v3 on CPU is ~15× slower than real-time — not viable for live services
			
 
				-- GPU target: RTX 4070 Super runs large-v3 comfortably in real-time
			
 
				+- **Triton kernel warning** — at startup you will see `Failed to launch Triton kernels, likely due to missing CUDA toolkit`. This is **misleading** — Triton (the Python package) does not support Windows at all. The fallback to a median kernel is expected and harmless. Under the LocalAgreement backend (current), these timing kernels are not used anyway.
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -112,7 +111,7 @@ No microcontroller, firmware, or hardware assembly is required.
 
				 ```
			
 
				 
			
 
				 - `lines`: array of strings, max `DISPLAY_LINES` items (currently 3); speaker name injected as first line on speaker change
			
 
				-- Bridge pre-wraps text at `MAX_LINE_CHARS` (38) using `textwrap.wrap`
			
 
				+- Bridge pre-wraps text at `MAX_LINE_CHARS` (60) using `textwrap.wrap`; publishes one MQTT message per wrapped line so the display scrolls one line at a time
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -120,22 +119,30 @@ No microcontroller, firmware, or hardware assembly is required.
 
				 
			
 
				 ### `bridge/bridge.py`
			
 
				 
			
 
				-Main audio pipeline. Headless — no UI. Connects to Whisper WebSocket and Mosquitto.
			
 
				+Main audio pipeline. Headless — no UI. Uses `AudioProcessor` + `TranscriptionEngine` directly (no WebSocket), publishes to Mosquitto.
			
 
				 
			
 
				 **Current state:**
			
 
				 
			
 
				 - `BridgeState` class holds all mutable state (thread-safe via `threading.Lock`)
			
 
				 - `speaker_names`: dict loaded from `speakers.json`, polled for changes every 5s via `_speaker_reloader()`
			
 
				 - `push_final()`: accumulates text, detects speaker change, flushes on sentence boundary or timeout
			
 
				-- `_flush()`: word-wraps with `textwrap.wrap(text, 38)`, maintains 3-line rolling display, injects `[SPEAKER NAME]` label on speaker change, publishes to MQTT
			
 
				+- `_flush()`: word-wraps with `textwrap.wrap(text, 60)`, publishes **one MQTT message per line** (so display scrolls one line at a time), injects `[SPEAKER NAME]` label on speaker change
			
 
				+- `_receive_results()`: delta-tracks full concatenated transcript across `FrontData.lines` to avoid double-counting the growing last segment
			
 
				 - `_choose_audio_device()`: lists input devices, respects `AUDIO_DEVICE` config constant
			
 
				-- Audio path: `sounddevice.InputStream` → asyncio queue → WebSocket chunks to WhisperLiveKit
			
 
				+- Audio path: `sounddevice.InputStream` → asyncio queue → `audio_processor.process_audio()`
			
 
				+- Inject API (port 8002): `POST /inject` accepts raw PCM bytes from admin.py test playback
			
 
				 
			
 
				 **Config constants** (top of file):
			
 
				 
			
 
				-- `MQTT_HOST`, `WS_URL`, `SAMPLE_RATE=16000`, `BLOCKSIZE=4096`
			
 
				-- `SENTENCE_TIMEOUT=4.0`, `MAX_LINE_CHARS=38`, `DISPLAY_LINES=3`
			
 
				-- `AUDIO_DEVICE=None` — set to an integer index to force a specific microphone
			
 
				+- `MQTT_HOST`, `SAMPLE_RATE=16000`, `BLOCKSIZE=4096`
			
 
				+- `SENTENCE_TIMEOUT=4.0`, `MAX_LINE_CHARS=60`, `DISPLAY_LINES=3`
			
 
				+- `AUDIO_DEVICE=12` — Logitech BRIO; set to `None` to use Windows default
			
 
				+
			
 
				+**TranscriptionEngine settings:**
			
 
				+
			
 
				+- `backend_policy="localagreement"` — WhisperStreaming local-agreement algorithm; more accurate than SimulStreaming, ~2s additional latency
			
 
				+- `confidence_validation=True` — suppresses low-confidence tokens (reduces hallucinations on breath/pause)
			
 
				+- Underlying faster-whisper uses `beam_size=5` (hardcoded in `FasterWhisperASR`)
			
 
				 
			
 
				 ### `bridge/admin.py`
			
 
				 
			
@@ -153,10 +160,10 @@ FastAPI web server on port 8001. Single-file — HTML/CSS/JS embedded as a Pytho
 
				 - `POST /api/test/start` — stream test recording to WhisperLiveKit (via `_stream_file()`)
			
 
				 - `POST /api/test/stop` — cancel active playback
			
 
				 - `GET /api/test/status` — playback progress / state
			
 
				-- `GET /display` — fullscreen display page *(not yet implemented)*
			
 
				-- `GET /api/display/stream` — SSE endpoint for display page *(not yet implemented)*
			
 
				+- `GET /display` — fullscreen display page (black background, Georgia serif, 3 rolling lines, speaker header in gold)
			
 
				+- `GET /api/display/stream` — SSE endpoint; subscribes to MQTT via paho, pushes `event: text` / `event: clear` to all connected browsers
			
 
				 
			
 
				-**Test playback**: `_stream_file()` is an asyncio task that reads audio via `miniaudio.stream_file()` (handles WAV/MP3/FLAC/OGG/M4A, resamples to 16kHz mono) and streams chunks to `ws://localhost:8000/asr`, mimicking live microphone input.
			
 
				+**Test playback**: `_stream_file()` is an asyncio task that reads audio via `miniaudio.stream_file()` (handles WAV/MP3/FLAC/OGG/M4A, resamples to 16kHz mono) and POSTs raw PCM chunks to `http://127.0.0.1:8002/inject` on bridge.py, which queues them ahead of live microphone input.
			
 
				 
			
 
				 ### `bridge/whisper_launcher.py`
			
 
				 
			
@@ -231,14 +238,14 @@ Browser-based, scales to any screen size.
 
				 
			
 
				 ## Design Constraints & Open Questions
			
 
				 
			
 
				-- [ ] Display page `/display` not yet built — next major task
			
 
				-- [ ] SSE push from admin.py to display browsers — requires admin.py to subscribe to MQTT or receive updates from bridge.py via shared state
			
 
				+- [x] Display page `/display` — built; fullscreen browser page in admin.py
			
 
				+- [x] SSE push from admin.py to display browsers — implemented; paho MQTT subscriber in admin.py, `loop.call_soon_threadsafe` to asyncio queues
			
 
				+- [x] CUDA Toolkit — installed (13.2); GPU acceleration confirmed working
			
 
				 - [ ] Minimum speaker segment duration before adding to admin table (avoid congregation one-liners populating 50 rows)
			
 
				 - [ ] Voice enrolment v2 — pyannote.audio is installed, extraction pipeline not yet written
			
 
				 - [ ] Word-wrap edge cases: long proper nouns, scripture references
			
 
				 - [ ] Session save/restore: if PC crashes mid-service, speakers.json persists so names reload immediately on restart
			
 
				 - [ ] Audio routing on Windows: ensure Whisper receives the mixer/mic channel, not system audio
			
 
				-- [ ] CUDA Toolkit 12.x installation required for GPU acceleration (cublas64_12.dll)
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -256,12 +263,11 @@ Browser-based, scales to any screen size.
 
				 
			
 
				 ## Development Sequence (Remaining)
			
 
				 
			
 
				-1. Build `/display` fullscreen browser page in `admin.py`
			
 
				-2. Add SSE endpoint (`/api/display/stream`) in `admin.py` — subscribe to MQTT, push payloads to browsers
			
 
				-3. Style display page: large font, dark background, speaker header, 3-line rolling text
			
 
				-4. Install CUDA Toolkit 12.x on the production PC to enable GPU acceleration
			
 
				-5. Voice enrolment v2 — extract pyannote embeddings from uploaded samples, add matching logic to bridge
			
 
				-6. Church deployment trial
			
 
				+1. ~~Build `/display` fullscreen browser page~~ — done
			
 
				+2. ~~SSE push (`/api/display/stream`)~~ — done
			
 
				+3. ~~CUDA Toolkit installation~~ — done (13.2)
			
 
				+4. Voice enrolment v2 — extract pyannote embeddings from uploaded samples, add matching logic to bridge
			
 
				+5. Church deployment trial
			
 
				 
			
 
				 ---
			
 
				 
			
--- a/SETUP.md
+++ b/SETUP.md
@@ -15,6 +15,7 @@ natively on the host machine regardless. Packaging everything into a single
 
				 `.exe` is not practical for software of this type.
			
 
				 
			
 
				 Instead this guide provides:
			
 
				+
			
 
				 - `install.bat` — run **once** to set everything up
			
 
				 - `start.bat` — run each time to launch the full system
			
 
				 
			
@@ -27,7 +28,7 @@ After setup, operation is a double-click.
 
				 Before starting, confirm your PC meets these requirements:
			
 
				 
			
 
				 | Requirement | Minimum | Recommended |
			
 
				-|---|---|---|
			
 
				+| --- | --- | --- |
			
 
				 | OS | Windows 10 64-bit | Windows 11 |
			
 
				 | GPU | NVIDIA GTX 1060 6 GB | NVIDIA RTX 3070 or better |
			
 
				 | VRAM | 6 GB | 8 GB+ |
			
@@ -36,13 +37,14 @@ Before starting, confirm your PC meets these requirements:
 
				 | Internet | Required for setup | Not needed during services |
			
 
				 
			
 
				 > The RTX 4070 Super (tested hardware) runs `large-v3` in real time comfortably.
			
 
				+> The RTX 5060 Ti (production hardware) also confirmed working.
			
 
				 
			
 
				 ---
			
 
				 
			
 
				 ## Part 2 — NVIDIA Driver
			
 
				 
			
 
				-You need an up-to-date NVIDIA driver. You do **not** need to install the CUDA
			
 
				-Toolkit separately — PyTorch bundles everything it needs.
			
 
				+You need an up-to-date NVIDIA driver. You will also need the CUDA Toolkit
			
 
				+(Part 2b below) — the driver alone is not sufficient for all components.
			
 
				 
			
 
				 1. Open **GeForce Experience** (if installed) → Drivers → Check for updates.
			
 
				 
			
@@ -54,30 +56,28 @@ Toolkit separately — PyTorch bundles everything it needs.
 
				 3. Restart the PC when prompted.
			
 
				 
			
 
				 4. Verify the driver is working:
			
 
				+
			
 
				    - Press `Win + R`, type `cmd`, press Enter.
			
 
				    - Type `nvidia-smi` and press Enter.
			
 
				    - You should see a table with your GPU name and driver version.
			
 
				 
			
 
				-   ```
			
 
				+   ```text
			
 
				    +-----------------------------------------------------------------------------+
			
 
				-   | NVIDIA-SMI 560.x   Driver Version: 560.x   CUDA Version: 12.6              |
			
 
				+   | NVIDIA-SMI 595.x   Driver Version: 595.x   CUDA Version: 13.x              |
			
 
				    +-----------------------------------------------------------------------------+
			
 
				-   | RTX 4070 Super ...
			
 
				+   | RTX 5060 Ti ...
			
 
				    ```
			
 
				 
			
 
				    If this command is not found, the driver did not install correctly.
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-## Part 2b — CUDA Toolkit 12.x
			
 
				+## Part 2b — CUDA Toolkit
			
 
				 
			
 
				-The NVIDIA driver alone is not enough. WhisperLiveKit uses **faster-whisper**
			
 
				-(via ctranslate2) for inference, which requires the CUDA runtime libraries to
			
 
				-be installed separately. Without this you will see `cublas64_12.dll not found`
			
 
				-and the server will fall back to CPU-only mode, making transcription too slow
			
 
				-for live use.
			
 
				+The NVIDIA driver alone is not enough for all GPU components. The CUDA Toolkit
			
 
				+provides compiler tools (`nvcc`) and low-level libraries used by WhisperLiveKit.
			
 
				 
			
 
				-> `nvidia-smi` showing "CUDA Version: 12.6" means your *driver supports* up
			
 
				+> `nvidia-smi` showing "CUDA Version: 13.x" means your *driver supports* up
			
 
				 > to that version — it does **not** mean the toolkit is installed.
			
 
				 
			
 
				 1. Go to [developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads)
			
@@ -91,14 +91,28 @@ for live use.
 
				 
			
 
				 5. Verify:
			
 
				 
			
 
				-   ```
			
 
				+   ```cmd
			
 
				    nvcc --version
			
 
				    ```
			
 
				 
			
 
				-   Expected: `release 12.x, V12.x.xxx`
			
 
				+   Expected output ends with something like `release 13.x, V13.x.xxx` (the
			
 
				+   exact version will match whatever you downloaded).
			
 
				+
			
 
				+   > If `nvcc` is not found, add the toolkit's `bin` folder to your system
			
 
				+   > PATH (e.g. `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin`)
			
 
				+   > using the same method as the Mosquitto PATH fix in Part 4.
			
 
				+
			
 
				+**Triton kernel warning** — after installing the CUDA Toolkit you will still
			
 
				+see this at bridge startup:
			
 
				+
			
 
				+```text
			
 
				+Failed to launch Triton kernels, likely due to missing CUDA toolkit;
			
 
				+falling back to a slower median kernel implementation...
			
 
				+```
			
 
				 
			
 
				-   > If `nvcc` is not found, add `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.x\bin`
			
 
				-   > to your system PATH (same method as the Mosquitto PATH fix in Part 4).
			
 
				+This message is **misleading**. The `triton` Python package does not support
			
 
				+Windows — there is no Windows build. The fallback is expected and has no
			
 
				+practical effect on transcription quality.
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -116,12 +130,13 @@ WhisperLiveKit) does not yet publish pre-built packages for Python 3.14 or
 
				    the latest **Python 3.12.x** release. Download the **Windows installer (64-bit)**.
			
 
				 
			
 
				 2. Run the installer. On the first screen:
			
 
				+
			
 
				    - **Tick "Add Python to PATH"** (important — do this before clicking Install Now)
			
 
				    - Click **Install Now**
			
 
				 
			
 
				 3. Once complete, verify in a new Command Prompt window:
			
 
				 
			
 
				-   ```
			
 
				+   ```cmd
			
 
				    py -3.12 --version
			
 
				    ```
			
 
				 
			
@@ -131,7 +146,7 @@ WhisperLiveKit) does not yet publish pre-built packages for Python 3.14 or
 
				 
			
 
				 ## Part 4 — Mosquitto (MQTT Broker)
			
 
				 
			
 
				-Mosquitto is the message relay between the PC and the display.
			
 
				+Mosquitto is the message relay between the transcription bridge and the display.
			
 
				 
			
 
				 1. Download the Windows installer from
			
 
				    [mosquitto.org/download](https://mosquitto.org/download/) — choose the
			
@@ -142,7 +157,7 @@ Mosquitto is the message relay between the PC and the display.
 
				 3. **Add Mosquitto to the system PATH** (the installer does not do this
			
 
				    automatically). Run Command Prompt **as Administrator**:
			
 
				 
			
 
				-   ```
			
 
				+   ```cmd
			
 
				    setx /M PATH "%PATH%;C:\Program Files\mosquitto"
			
 
				    ```
			
 
				 
			
@@ -151,19 +166,19 @@ Mosquitto is the message relay between the PC and the display.
 
				 
			
 
				 4. Start Mosquitto as a Windows service (still as Administrator):
			
 
				 
			
 
				-   ```
			
 
				+   ```cmd
			
 
				    net start mosquitto
			
 
				    ```
			
 
				 
			
 
				 5. Set it to start automatically with Windows:
			
 
				 
			
 
				-   ```
			
 
				+   ```cmd
			
 
				    sc config mosquitto start=auto
			
 
				    ```
			
 
				 
			
 
				 6. Verify the tools are working:
			
 
				 
			
 
				-   ```
			
 
				+   ```cmd
			
 
				    mosquitto_sub -h localhost -t test -v
			
 
				    ```
			
 
				 
			
@@ -180,6 +195,7 @@ accepting its licence terms. This is free — it just needs an account.
 
				 1. Go to [huggingface.co](https://huggingface.co) and create a free account.
			
 
				 
			
 
				 2. Accept the licence for the diarization model:
			
 
				+
			
 
				    - Visit [huggingface.co/pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1)
			
 
				    - Click **"Agree and access repository"**
			
 
				    - Also visit [huggingface.co/pyannote/segmentation-3.0](https://huggingface.co/pyannote/segmentation-3.0)
			
@@ -188,13 +204,13 @@ accepting its licence terms. This is free — it just needs an account.
 
				    > If you skip this step, the server will fail to start with a 403 error.
			
 
				 
			
 
				 3. Create an access token:
			
 
				+
			
 
				    - Go to [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
			
 
				    - Click **New token**
			
 
				    - Name: `church-transcription` (or anything you like)
			
 
				    - Role: **Read**
			
 
				    - Click **Generate token**
			
 
				    - Copy the token — it starts with `hf_`
			
 
				-   
			
 
				 
			
 
				 4. **Save this token somewhere safe** (Notepad or a password manager). You will
			
 
				    paste it into `start.bat` in Part 7.
			
@@ -204,6 +220,7 @@ accepting its licence terms. This is free — it just needs an account.
 
				 ## Part 6 — Run install.bat
			
 
				 
			
 
				 The `install.bat` script in this folder does the following automatically:
			
 
				+
			
 
				 - Creates a Python virtual environment in `.venv\`
			
 
				 - Installs PyTorch with CUDA support
			
 
				 - Installs WhisperLiveKit
			
@@ -221,7 +238,7 @@ The `install.bat` script in this folder does the following automatically:
 
				 
			
 
				 3. Near the end you will see the Whisper model downloading for the first time:
			
 
				 
			
 
				-   ```
			
 
				+   ```text
			
 
				    Downloading model large-v3 (~3 GB) ...
			
 
				    ```
			
 
				 
			
@@ -264,29 +281,34 @@ pyannote can download the diarization model.
 
				 
			
 
				 1. Double-click **`start.bat`**.
			
 
				 
			
 
				-   Two windows will open:
			
 
				-   - **Window 1 — Whisper Server**: shows the transcription engine loading.
			
 
				-     On first run this downloads the speaker diarization model (~500 MB).
			
 
				-     Wait until you see `Server running on ws://0.0.0.0:8000`.
			
 
				-   - **Window 2 — Bridge**: the speaker name mapping window appears, and the
			
 
				-     Command Prompt behind it shows connection status.
			
 
				+   Two Command Prompt windows will open:
			
 
				+
			
 
				+   - **Window 1 — Bridge**: the transcription pipeline. Wait until you see
			
 
				+     `Audio pipeline running`.
			
 
				+   - **Window 2 — Admin**: the web server. Wait until it shows
			
 
				+     `Application startup complete`.
			
 
				+
			
 
				+2. Open the speaker admin page:
			
 
				 
			
 
				-2. Verify the Whisper server is working:
			
 
				-   - Open a browser and go to `http://localhost:8000`
			
 
				-   - You should see a simple web interface. Speak into the microphone — text
			
 
				-     should appear.
			
 
				+   - Open a browser and go to `http://localhost:8001`
			
 
				+   - You should see the Speaker Admin table.
			
 
				 
			
 
				-3. Verify the display:
			
 
				-   - With the ESP32 powered on and connected to the same WiFi, send a test
			
 
				-     message. Open a third Command Prompt and run:
			
 
				+3. Open the display page on a tablet or spare screen:
			
 
				+
			
 
				+   - On any device on the same WiFi, open `http://[PC-IP]:8001/display`
			
 
				+   - Press `F11` for fullscreen. A green dot in the corner means it is
			
 
				+     connected and receiving updates.
			
 
				+
			
 
				+4. Send a test message to verify the full pipeline. Open a Command Prompt and run:
			
 
				+
			
 
				+   ```cmd
			
 
				+   mosquitto_pub -h localhost -t display/text -m "{\"lines\":[\"Test line 1\",\"Test line 2\",\"Ready\"]}"
			
 
				+   ```
			
 
				 
			
 
				-     ```
			
 
				-     mosquitto_pub -h localhost -t display/text -m "{\"lines\":[\"Test line 1\",\"Test line 2\",\"Ready\"]}"
			
 
				-     ```
			
 
				+   The display page should update immediately with those three lines.
			
 
				 
			
 
				-   - The e-ink display should refresh within 2 seconds showing those three lines.
			
 
				+5. Full pipeline test:
			
 
				 
			
 
				-4. Full pipeline test:
			
 
				    - Speak naturally into the microphone.
			
 
				    - After a sentence or natural pause, text should appear on the display within
			
 
				      3–5 seconds.
			
@@ -297,15 +319,13 @@ pyannote can download the diarization model.
 
				 
			
 
				 ## Part 9 — Assigning speaker names
			
 
				 
			
 
				-The bridge window shows a **Speaker Name Mapping** panel. The system
			
 
				-automatically detects different speakers and labels them SPEAKER_00,
			
 
				-SPEAKER_01, etc.
			
 
				+The speaker admin page at `http://localhost:8001` shows all detected speakers.
			
 
				+The system automatically labels them `SPEAKER_00`, `SPEAKER_01`, etc.
			
 
				 
			
 
				-- The defaults (Pastor, Reader, Guest, Choir) are applied immediately when the
			
 
				-  bridge starts.
			
 
				-- If a different person is speaking than expected, type their name in the
			
 
				-  matching row and click **Apply**.
			
 
				-- Speaker labels appear on the display as a short heading line (e.g. `[PASTOR]`)
			
 
				+- The defaults (Pastor, Reader, Guest, Choir) are loaded on first run.
			
 
				+- When a new speaker appears, click their name in the table and type the
			
 
				+  correct name. Changes take effect within 5 seconds.
			
 
				+- Speaker labels appear on the display as a gold heading line (e.g. `PASTOR`)
			
 
				   whenever the speaker changes.
			
 
				 
			
 
				 ---
			
@@ -314,7 +334,7 @@ SPEAKER_01, etc.
 
				 
			
 
				 1. Double-click `start.bat`.
			
 
				 2. Wait ~30 seconds for both windows to show "ready" status.
			
 
				-3. The display will show `DISPLAY READY` when the ESP32 connects.
			
 
				+3. Open `http://[PC-IP]:8001/display` on the tablet and press `F11`.
			
 
				 4. Begin the service — transcription runs automatically.
			
 
				 5. Close both windows when done.
			
 
				 
			
@@ -343,10 +363,12 @@ setx /M PATH "%PATH%;C:\Program Files\mosquitto"
 
				 Close and reopen the Command Prompt, then retry the command.
			
 
				 
			
 
				 ### `nvidia-smi` not found
			
 
				+
			
 
				 The NVIDIA driver is not installed or not in PATH. Re-run the driver installer
			
 
				 and restart the PC.
			
 
				 
			
 
				 ### `python --version` shows wrong version or "not found"
			
 
				+
			
 
				 Python was not added to PATH. Re-run the Python installer, choose "Modify",
			
 
				 and tick "Add Python to environment variables".
			
 
				 
			
@@ -361,27 +383,34 @@ If the error occurs on Python 3.12, the PyTorch download may have failed
 
				 mid-way. Delete `.venv` and re-run `install.bat` with a stable connection.
			
 
				 
			
 
				 ### Whisper server fails with `401` or `403`
			
 
				+
			
 
				 Your HuggingFace token is incorrect, or you have not accepted the model licence
			
 
				 terms. Re-check Part 5 — both model pages must have "Agree and access
			
 
				 repository" clicked while logged into the same account that generated the token.
			
 
				 
			
 
				-### Whisper server starts but no text appears
			
 
				+### Bridge starts but no text appears on the display
			
 
				+
			
 
				 Check that the correct audio input device is selected:
			
 
				+
			
 
				 - Open Windows **Sound Settings** → Input → ensure the microphone or audio
			
 
				   interface is set as the default device.
			
 
				-- The bridge uses the Windows default input device.
			
 
				+- Or set `AUDIO_DEVICE` to a specific device index in `bridge/bridge.py`.
			
 
				+
			
 
				+### Display page does not update
			
 
				 
			
 
				-### Display does not update
			
 
				-- Check the ESP32 Serial Monitor for WiFi/MQTT connection messages.
			
 
				-- Verify `MQTT_HOST` in `main.cpp` matches the PC's IP address (`ipconfig` →
			
 
				-  look for the WiFi adapter IPv4 address).
			
 
				+- Check the green/red dot in the bottom-right corner of the display page.
			
 
				+  Red means the browser lost its connection to the admin server.
			
 
				+- Confirm `admin.py` is running and accessible at `http://[PC-IP]:8001`.
			
 
				 - Confirm Mosquitto is running: `sc query mosquitto`
			
 
				+- Verify the PC's IP has not changed — tablets store the URL, so update it
			
 
				+  if the PC was assigned a new address.
			
 
				 
			
 
				 ### `large-v3` is too slow (display lags more than 5–6 seconds)
			
 
				-Switch to a faster model by editing `start.bat`:
			
 
				 
			
 
				-```
			
 
				-set WHISPER_MODEL=distil-large-v3
			
 
				+Switch to a faster model by editing `bridge/bridge.py`:
			
 
				+
			
 
				+```python
			
 
				+engine = TranscriptionEngine(model_size="distil-large-v3", ...)
			
 
				 ```
			
 
				 
			
 
				-`distil-large-v3` is ~50%% faster with only a small accuracy reduction.
			
 
				+`distil-large-v3` is ~50% faster with only a small accuracy reduction.
			
--- a/bridge/bridge.py
+++ b/bridge/bridge.py
@@ -49,10 +49,10 @@ AUDIO_DEVICE: int | None = 12
 
				 SPEAKERS_FILE = Path(__file__).parent / "speakers.json"
			
 
				 
			
 
				 DEFAULT_SPEAKERS: dict[str, str] = {
			
 
				-    "SPEAKER_00": "Pastor",
			
 
				-    "SPEAKER_01": "Reader",
			
 
				-    "SPEAKER_02": "Guest",
			
 
				-    "SPEAKER_03": "Choir",
			
 
				+    "SPEAKER_00": "Serving Brother",
			
 
				+    "SPEAKER_01": "Contributor",
			
 
				+    "SPEAKER_02": "Contributor",
			
 
				+    "SPEAKER_03": "Contributor",
			
 
				 }
			
 
				 
			
 
				 # ── Audio injection queue ─────────────────────────────────────────────────────