vor 1 Monat · e172d9596d
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 
				+/.venv
			
 
				+Installers
			
 
				+codes.md
			
--- a/SETUP.md
+++ b/SETUP.md
@@ -69,13 +69,51 @@ Toolkit separately — PyTorch bundles everything it needs.
 
				 
			
 
				 ---
			
 
				 
			
 
				-## Part 3 — Python 3.11
			
 
				+## Part 2b — CUDA Toolkit 12.x
			
 
				 
			
 
				-1. Go to [python.org/downloads](https://www.python.org/downloads/release/python-3119/)
			
 
				-   and download **Python 3.11.x** (Windows installer, 64-bit).
			
 
				+The NVIDIA driver alone is not enough. WhisperLiveKit uses **faster-whisper**
			
 
				+(via ctranslate2) for inference, which requires the CUDA runtime libraries to
			
 
				+be installed separately. Without this you will see `cublas64_12.dll not found`
			
 
				+and the server will fall back to CPU-only mode, making transcription too slow
			
 
				+for live use.
			
 
				 
			
 
				-   > Use Python **3.11** specifically. Some ML libraries have known issues with
			
 
				-   > Python 3.13 on Windows.
			
 
				+> `nvidia-smi` showing "CUDA Version: 12.6" means your *driver supports* up
			
 
				+> to that version — it does **not** mean the toolkit is installed.
			
 
				+
			
 
				+1. Go to [developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads)
			
 
				+
			
 
				+2. Select: **Windows → x86_64 → 11 → exe (local)**
			
 
				+
			
 
				+3. Download and run the installer. Choose **Custom install** and ensure
			
 
				+   **CUDA Runtime** and **cuBLAS** are ticked.
			
 
				+
			
 
				+4. Restart the PC after installation.
			
 
				+
			
 
				+5. Verify:
			
 
				+
			
 
				+   ```
			
 
				+   nvcc --version
			
 
				+   ```
			
 
				+
			
 
				+   Expected: `release 12.x, V12.x.xxx`
			
 
				+
			
 
				+   > If `nvcc` is not found, add `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.x\bin`
			
 
				+   > to your system PATH (same method as the Mosquitto PATH fix in Part 4).
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Part 3 — Python 3.12
			
 
				+
			
 
				+Python 3.12 is the required version. PyTorch (the AI engine that powers
			
 
				+WhisperLiveKit) does not yet publish pre-built packages for Python 3.14 or
			
 
				+3.13, so newer versions will fail at the PyTorch install step.
			
 
				+
			
 
				+> If you already have Python 3.13 or 3.14 installed, **do not uninstall it**
			
 
				+> — just install 3.12 alongside it. Windows supports multiple Python versions
			
 
				+> at the same time and `install.bat` will automatically pick the right one.
			
 
				+
			
 
				+1. Go to [python.org/downloads](https://www.python.org/downloads/) and look for
			
 
				+   the latest **Python 3.12.x** release. Download the **Windows installer (64-bit)**.
			
 
				 
			
 
				 2. Run the installer. On the first screen:
			
 
				    - **Tick "Add Python to PATH"** (important — do this before clicking Install Now)
			
@@ -84,10 +122,10 @@ Toolkit separately — PyTorch bundles everything it needs.
 
				 3. Once complete, verify in a new Command Prompt window:
			
 
				 
			
 
				    ```
			
 
				-   python --version
			
 
				+   py -3.12 --version
			
 
				    ```
			
 
				 
			
 
				-   Expected output: `Python 3.11.x`
			
 
				+   Expected output: `Python 3.12.x`
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -101,19 +139,29 @@ Mosquitto is the message relay between the PC and the display.
 
				 
			
 
				 2. Run the installer, accept all defaults.
			
 
				 
			
 
				-3. Start Mosquitto as a Windows service (run Command Prompt **as Administrator**):
			
 
				+3. **Add Mosquitto to the system PATH** (the installer does not do this
			
 
				+   automatically). Run Command Prompt **as Administrator**:
			
 
				+
			
 
				+   ```
			
 
				+   setx /M PATH "%PATH%;C:\Program Files\mosquitto"
			
 
				+   ```
			
 
				+
			
 
				+   Close and reopen the Command Prompt window after running this — PATH changes
			
 
				+   don't take effect in the current window.
			
 
				+
			
 
				+4. Start Mosquitto as a Windows service (still as Administrator):
			
 
				 
			
 
				    ```
			
 
				    net start mosquitto
			
 
				    ```
			
 
				 
			
 
				-4. Set it to start automatically with Windows:
			
 
				+5. Set it to start automatically with Windows:
			
 
				 
			
 
				    ```
			
 
				    sc config mosquitto start=auto
			
 
				    ```
			
 
				 
			
 
				-5. Verify it's running:
			
 
				+6. Verify the tools are working:
			
 
				 
			
 
				    ```
			
 
				    mosquitto_sub -h localhost -t test -v
			
@@ -146,6 +194,7 @@ accepting its licence terms. This is free — it just needs an account.
 
				    - Role: **Read**
			
 
				    - Click **Generate token**
			
 
				    - Copy the token — it starts with `hf_`
			
 
				+   
			
 
				 
			
 
				 4. **Save this token somewhere safe** (Notepad or a password manager). You will
			
 
				    paste it into `start.bat` in Part 7.
			
@@ -188,20 +237,22 @@ The `install.bat` script in this folder does the following automatically:
 
				 ## Part 7 — Configure start.bat
			
 
				 
			
 
				 Before running the system for the first time, you need to add your HuggingFace
			
 
				-token to the startup script.
			
 
				+token to the startup script. The token is passed as an **environment variable**
			
 
				+— `start.bat` sets it automatically before launching WhisperLiveKit, so
			
 
				+pyannote can download the diarization model.
			
 
				 
			
 
				 1. Right-click **`start.bat`** → **Edit** (opens in Notepad).
			
 
				 
			
 
				 2. Find this line near the top:
			
 
				 
			
 
				-   ```
			
 
				+   ```bat
			
 
				    set HF_TOKEN=PASTE_YOUR_TOKEN_HERE
			
 
				    ```
			
 
				 
			
 
				 3. Replace `PASTE_YOUR_TOKEN_HERE` with the token you copied in Part 5.
			
 
				    Example:
			
 
				 
			
 
				-   ```
			
 
				+   ```bat
			
 
				    set HF_TOKEN=hf_aBcDeFgHiJkLmNoPqRsTuVwXyZ
			
 
				    ```
			
 
				 
			
@@ -271,6 +322,26 @@ SPEAKER_01, etc.
 
				 
			
 
				 ## Troubleshooting
			
 
				 
			
 
				+### `SyntaxError: f-string expression part cannot include a backslash`
			
 
				+
			
 
				+WhisperLiveKit requires Python 3.12+. Your virtual environment was built with
			
 
				+Python 3.11. To fix:
			
 
				+
			
 
				+1. Install Python 3.12 or later from python.org/downloads (3.11 can stay — they coexist).
			
 
				+2. Delete the `.venv` folder in the project directory.
			
 
				+3. Run `install.bat` again — it will detect and use the newest compatible version.
			
 
				+
			
 
				+### `mosquitto_sub` or `mosquitto_pub` is not recognised
			
 
				+
			
 
				+The Mosquitto installer sets up the Windows service but does not add its tools
			
 
				+to the system PATH. Run Command Prompt **as Administrator** and execute:
			
 
				+
			
 
				+```bat
			
 
				+setx /M PATH "%PATH%;C:\Program Files\mosquitto"
			
 
				+```
			
 
				+
			
 
				+Close and reopen the Command Prompt, then retry the command.
			
 
				+
			
 
				 ### `nvidia-smi` not found
			
 
				 The NVIDIA driver is not installed or not in PATH. Re-run the driver installer
			
 
				 and restart the PC.
			
@@ -279,9 +350,15 @@ and restart the PC.
 
				 Python was not added to PATH. Re-run the Python installer, choose "Modify",
			
 
				 and tick "Add Python to environment variables".
			
 
				 
			
 
				-### install.bat fails with "torch" errors
			
 
				-PyTorch may have failed to download. Delete the `.venv` folder and run
			
 
				-`install.bat` again with a stable internet connection.
			
 
				+### install.bat fails with "torch" errors — `No matching distribution found`
			
 
				+
			
 
				+PyTorch does not publish pre-built packages for Python 3.14 (or very new
			
 
				+versions). Install **Python 3.12** from python.org alongside your current
			
 
				+version — they coexist safely. Then delete `.venv` and re-run `install.bat`;
			
 
				+it will automatically select Python 3.12.
			
 
				+
			
 
				+If the error occurs on Python 3.12, the PyTorch download may have failed
			
 
				+mid-way. Delete `.venv` and re-run `install.bat` with a stable connection.
			
 
				 
			
 
				 ### Whisper server fails with `401` or `403`
			
 
				 Your HuggingFace token is incorrect, or you have not accepted the model licence
			
--- a/bridge/bridge.py
+++ b/bridge/bridge.py
@@ -20,6 +20,7 @@ import textwrap
 
				 import threading
			
 
				 import time
			
 
				 from collections import Counter
			
 
				+from pathlib import Path
			
 
				 
			
 
				 import numpy as np
			
 
				 import paho.mqtt.client as mqtt
			
@@ -44,25 +45,77 @@ SENTENCE_TIMEOUT = 4.0      # seconds of silence before forcing a flush
 
				 MAX_LINE_CHARS   = 38       # characters per line (~24pt font at 800 px wide)
			
 
				 DISPLAY_LINES    = 3
			
 
				 
			
 
				+# Set to a device index (integer) to force a specific microphone.
			
 
				+# Leave as None to use the Windows default input device.
			
 
				+# Run bridge.py once to see available device indices printed at startup.
			
 
				+AUDIO_DEVICE: int | None = None
			
 
				+
			
 
				+SPEAKERS_FILE = Path(__file__).parent / "speakers.json"
			
 
				+
			
 
				+DEFAULT_SPEAKERS: dict[str, str] = {
			
 
				+    "SPEAKER_00": "Pastor",
			
 
				+    "SPEAKER_01": "Reader",
			
 
				+    "SPEAKER_02": "Guest",
			
 
				+    "SPEAKER_03": "Choir",
			
 
				+}
			
 
				+
			
 
				+# ── Speaker persistence ───────────────────────────────────────────────────────
			
 
				+
			
 
				+def _load_speakers() -> dict[str, str]:
			
 
				+    if SPEAKERS_FILE.exists():
			
 
				+        try:
			
 
				+            data = json.loads(SPEAKERS_FILE.read_text(encoding="utf-8"))
			
 
				+            if isinstance(data, dict):
			
 
				+                return data
			
 
				+        except (json.JSONDecodeError, OSError):
			
 
				+            pass
			
 
				+    # First run — seed with defaults and save
			
 
				+    _write_speakers(DEFAULT_SPEAKERS)
			
 
				+    return dict(DEFAULT_SPEAKERS)
			
 
				+
			
 
				+
			
 
				+def _write_speakers(names: dict[str, str]) -> None:
			
 
				+    try:
			
 
				+        SPEAKERS_FILE.write_text(
			
 
				+            json.dumps(names, indent=2, ensure_ascii=False),
			
 
				+            encoding="utf-8",
			
 
				+        )
			
 
				+    except OSError as exc:
			
 
				+        print(f"[Speakers] Save failed: {exc}")
			
 
				+
			
 
				+
			
 
				 # ── State ─────────────────────────────────────────────────────────────────────
			
 
				 
			
 
				 class BridgeState:
			
 
				     """All mutable state, protected by a single lock."""
			
 
				 
			
 
				     def __init__(self):
			
 
				-        self._lock             = threading.Lock()
			
 
				-        self.speaker_names: dict[str, str] = {}   # "SPEAKER_00" → "Pastor"
			
 
				+        self._lock                         = threading.Lock()
			
 
				+        self.speaker_names: dict[str, str] = _load_speakers()
			
 
				+        self._seen: set[str]               = set(self.speaker_names)
			
 
				         self._current_speaker: str | None  = None
			
 
				         self._speaker_changed              = False
			
 
				         self._text_buffer                  = ""
			
 
				         self._display: list[str]           = [""] * DISPLAY_LINES
			
 
				         self._last_final_time              = time.monotonic()
			
 
				 
			
 
				-    # ── Speaker name mapping ──────────────────────────────────────────────────
			
 
				+    # ── Speaker name management ───────────────────────────────────────────────
			
 
				 
			
 
				     def set_speaker_name(self, speaker_id: str, name: str) -> None:
			
 
				         with self._lock:
			
 
				             self.speaker_names[speaker_id] = name.strip()
			
 
				+            self._seen.add(speaker_id)
			
 
				+            _write_speakers(self.speaker_names)
			
 
				+
			
 
				+    def delete_speaker(self, speaker_id: str) -> None:
			
 
				+        with self._lock:
			
 
				+            self.speaker_names.pop(speaker_id, None)
			
 
				+            self._seen.discard(speaker_id)
			
 
				+            _write_speakers(self.speaker_names)
			
 
				+
			
 
				+    def seen_speakers_snapshot(self) -> set[str]:
			
 
				+        with self._lock:
			
 
				+            return set(self._seen)
			
 
				 
			
 
				     def _resolve(self, speaker_id: str | None) -> str | None:
			
 
				         if not speaker_id:
			
@@ -74,11 +127,13 @@ class BridgeState:
 
				     def push_final(self, text: str, speaker_id: str | None, mqtt_client: mqtt.Client) -> None:
			
 
				         """Accept a finalised segment; flush on sentence boundary or speaker change."""
			
 
				         with self._lock:
			
 
				-            resolved = self._resolve(speaker_id)
			
 
				+            if speaker_id:
			
 
				+                self._seen.add(speaker_id)
			
 
				 
			
 
				+            resolved = self._resolve(speaker_id)
			
 
				             if resolved != self._current_speaker:
			
 
				                 if self._text_buffer:
			
 
				-                    self._flush(mqtt_client)          # push previous speaker's words first
			
 
				+                    self._flush(mqtt_client)
			
 
				                 self._current_speaker = resolved
			
 
				                 self._speaker_changed = True
			
 
				 
			
@@ -107,7 +162,6 @@ class BridgeState:
 
				             self._speaker_changed = False
			
 
				 
			
 
				         new_lines.extend(textwrap.wrap(text, MAX_LINE_CHARS) or [""])
			
 
				-
			
 
				         self._display.extend(new_lines)
			
 
				         self._display = self._display[-DISPLAY_LINES:]
			
 
				         while len(self._display) < DISPLAY_LINES:
			
@@ -134,20 +188,13 @@ def _is_sentence_end(text: str) -> bool:
 
				 
			
 
				 
			
 
				 def _extract_speaker(data: dict) -> str | None:
			
 
				-    """
			
 
				-    Extract speaker ID from a WhisperLiveKit response dict.
			
 
				-    Handles segment-level {"speaker": "SPEAKER_00"} and word-level
			
 
				-    {"words": [{"speaker": "SPEAKER_00", ...}, ...]} formats.
			
 
				-    """
			
 
				     if "speaker" in data:
			
 
				         return data["speaker"] or None
			
 
				-
			
 
				     words = data.get("words", [])
			
 
				     if words:
			
 
				         ids = [w.get("speaker") for w in words if w.get("speaker")]
			
 
				         if ids:
			
 
				             return Counter(ids).most_common(1)[0][0]
			
 
				-
			
 
				     return None
			
 
				 
			
 
				 
			
@@ -173,7 +220,7 @@ def build_mqtt_client() -> mqtt.Client:
 
				 # ── WebSocket + audio pipeline ────────────────────────────────────────────────
			
 
				 
			
 
				 async def _sender(ws, queue: asyncio.Queue) -> None:
			
 
				-    while not queue.empty():        # drain stale audio before streaming
			
 
				+    while not queue.empty():
			
 
				         queue.get_nowait()
			
 
				     while True:
			
 
				         chunk = await queue.get()
			
@@ -202,6 +249,48 @@ async def _flusher(state: BridgeState, mqtt_client: mqtt.Client) -> None:
 
				         state.maybe_timeout_flush(mqtt_client)
			
 
				 
			
 
				 
			
 
				+def _choose_audio_device() -> int | None:
			
 
				+    """
			
 
				+    List all input devices and return the index to use.
			
 
				+    Prefers AUDIO_DEVICE if set, otherwise the system default,
			
 
				+    otherwise the first device with input channels.
			
 
				+    """
			
 
				+    try:
			
 
				+        devices   = sd.query_devices()
			
 
				+        default_in = sd.default.device[0]  # may be -1 if unset
			
 
				+    except Exception as exc:
			
 
				+        print(f"[Audio] Cannot query devices: {exc}")
			
 
				+        return None
			
 
				+
			
 
				+    print("[Audio] Available input devices:")
			
 
				+    input_devices: list[tuple[int, str]] = []
			
 
				+    for i, dev in enumerate(devices):
			
 
				+        if dev["max_input_channels"] > 0:
			
 
				+            marker = "  ← default" if i == default_in else ""
			
 
				+            print(f"  [{i}] {dev['name']}{marker}")
			
 
				+            input_devices.append((i, dev["name"]))
			
 
				+
			
 
				+    if not input_devices:
			
 
				+        print("[Audio] ERROR: No input devices found. Connect a microphone and restart.")
			
 
				+        return None
			
 
				+
			
 
				+    # Explicit override from config
			
 
				+    if AUDIO_DEVICE is not None:
			
 
				+        print(f"[Audio] Using configured device [{AUDIO_DEVICE}]")
			
 
				+        return AUDIO_DEVICE
			
 
				+
			
 
				+    # System default (if valid)
			
 
				+    if default_in >= 0:
			
 
				+        print(f"[Audio] Using default input device [{default_in}]")
			
 
				+        return default_in
			
 
				+
			
 
				+    # Fall back to first available input
			
 
				+    idx, name = input_devices[0]
			
 
				+    print(f"[Audio] No system default set — using [{idx}] {name}")
			
 
				+    print("[Audio] To choose a different device, set AUDIO_DEVICE in bridge.py")
			
 
				+    return idx
			
 
				+
			
 
				+
			
 
				 async def audio_ws_loop(state: BridgeState, mqtt_client: mqtt.Client) -> None:
			
 
				     audio_queue: asyncio.Queue[bytes] = asyncio.Queue(maxsize=120)
			
 
				     loop = asyncio.get_running_loop()
			
@@ -217,7 +306,13 @@ async def audio_ws_loop(state: BridgeState, mqtt_client: mqtt.Client) -> None:
 
				                 pass
			
 
				         loop.call_soon_threadsafe(_put)
			
 
				 
			
 
				+    device = _choose_audio_device()
			
 
				+    if device is None:
			
 
				+        print("[Audio] No input device available — audio pipeline cannot start.")
			
 
				+        return
			
 
				+
			
 
				     with sd.InputStream(
			
 
				+        device=device,
			
 
				         samplerate=SAMPLE_RATE,
			
 
				         channels=CHANNELS,
			
 
				         dtype="int16",
			
@@ -252,93 +347,152 @@ def run_async_loop(state: BridgeState, mqtt_client: mqtt.Client) -> None:
 
				     asyncio.run(audio_ws_loop(state, mqtt_client))
			
 
				 
			
 
				 
			
 
				-# ── Speaker name-mapping UI ───────────────────────────────────────────────────
			
 
				-
			
 
				-PRESET_SPEAKERS = [
			
 
				-    ("SPEAKER_00", "Pastor"),
			
 
				-    ("SPEAKER_01", "Reader"),
			
 
				-    ("SPEAKER_02", "Guest"),
			
 
				-    ("SPEAKER_03", "Choir"),
			
 
				-]
			
 
				-
			
 
				+# ── Speaker UI ────────────────────────────────────────────────────────────────
			
 
				 
			
 
				 def run_speaker_ui(state: BridgeState, mqtt_client: mqtt.Client) -> None:
			
 
				     root = tk.Tk()
			
 
				     root.title("Transcription Bridge — Speaker Names")
			
 
				     root.attributes("-topmost", True)
			
 
				-    root.resizable(False, False)
			
 
				+    root.minsize(440, 360)
			
 
				+    root.geometry("460x480")
			
 
				 
			
 
				-    tk.Label(root, text="Speaker Name Mapping", font=("Helvetica", 12, "bold")).grid(
			
 
				-        row=0, column=0, columnspan=3, pady=(12, 2), padx=12
			
 
				+    # ── Header ────────────────────────────────────────────────────────────────
			
 
				+    tk.Label(root, text="Speaker Name Mapping", font=("Helvetica", 12, "bold")).pack(
			
 
				+        pady=(12, 2)
			
 
				     )
			
 
				     tk.Label(
			
 
				         root,
			
 
				-        text="Diarization is automatic. Assign readable names to each speaker ID.",
			
 
				+        text="Names are saved to speakers.json and restored each session.\n"
			
 
				+             "New speakers detected by diarization appear here automatically.",
			
 
				         font=("Helvetica", 9), fg="gray", justify="center",
			
 
				-    ).grid(row=1, column=0, columnspan=3, pady=(0, 8))
			
 
				+    ).pack(pady=(0, 6))
			
 
				 
			
 
				-    tk.Label(root, text="Speaker ID",     font=("Helvetica", 10, "bold")).grid(row=2, column=0, padx=8)
			
 
				-    tk.Label(root, text="Friendly Name",  font=("Helvetica", 10, "bold")).grid(row=2, column=1, padx=8)
			
 
				+    # ── Scrollable list ───────────────────────────────────────────────────────
			
 
				+    list_outer = tk.Frame(root, relief="sunken", bd=1)
			
 
				+    list_outer.pack(fill="both", expand=True, padx=12, pady=(0, 4))
			
 
				 
			
 
				-    entries: list[tuple[str, tk.Entry]] = []
			
 
				-    for i, (sid, default) in enumerate(PRESET_SPEAKERS):
			
 
				-        tk.Label(root, text=sid, font=("Courier", 10)).grid(row=3+i, column=0, sticky="e", padx=8, pady=3)
			
 
				-        e = tk.Entry(root, width=16, font=("Helvetica", 10))
			
 
				-        e.insert(0, default)
			
 
				-        e.grid(row=3+i, column=1, padx=8, pady=3)
			
 
				-        entries.append((sid, e))
			
 
				+    canvas    = tk.Canvas(list_outer, highlightthickness=0)
			
 
				+    scrollbar = ttk.Scrollbar(list_outer, orient="vertical", command=canvas.yview)
			
 
				+    rows_frame = tk.Frame(canvas)
			
 
				 
			
 
				-        def _apply(s=sid, entry=e):
			
 
				-            state.set_speaker_name(s, entry.get())
			
 
				-            print(f"[UI] {s} → {entry.get()!r}")
			
 
				+    rows_frame.bind(
			
 
				+        "<Configure>",
			
 
				+        lambda e: canvas.configure(scrollregion=canvas.bbox("all")),
			
 
				+    )
			
 
				+    canvas.create_window((0, 0), window=rows_frame, anchor="nw")
			
 
				+    canvas.configure(yscrollcommand=scrollbar.set)
			
 
				 
			
 
				-        tk.Button(root, text="Apply", command=_apply, width=6).grid(row=3+i, column=2, padx=6)
			
 
				+    canvas.pack(side="left", fill="both", expand=True)
			
 
				+    scrollbar.pack(side="right", fill="y")
			
 
				 
			
 
				-    ttk.Separator(root, orient="horizontal").grid(
			
 
				-        row=7, column=0, columnspan=3, sticky="ew", padx=8, pady=8
			
 
				+    canvas.bind_all(
			
 
				+        "<MouseWheel>",
			
 
				+        lambda e: canvas.yview_scroll(int(-1 * e.delta / 120), "units"),
			
 
				     )
			
 
				 
			
 
				-    # Custom ID row
			
 
				-    tk.Label(root, text="Custom ID:").grid(row=8, column=0, sticky="e", padx=8)
			
 
				-    cid = tk.Entry(root, width=14, font=("Courier", 10))
			
 
				-    cid.insert(0, "SPEAKER_04")
			
 
				-    cid.grid(row=8, column=1, sticky="w", padx=8, pady=2)
			
 
				+    # Column headers
			
 
				+    hdr = tk.Frame(rows_frame, bg="#e8e8e8")
			
 
				+    hdr.pack(fill="x", pady=(2, 0))
			
 
				+    tk.Label(hdr, text="  Speaker ID",   bg="#e8e8e8", font=("Helvetica", 9, "bold"), width=14, anchor="w").pack(side="left")
			
 
				+    tk.Label(hdr, text="Friendly Name",  bg="#e8e8e8", font=("Helvetica", 9, "bold"), width=18, anchor="w").pack(side="left")
			
 
				+
			
 
				+    # ── Row management ────────────────────────────────────────────────────────
			
 
				+    rendered_sids: set[str] = set()
			
 
				+
			
 
				+    def _add_row(sid: str, name: str) -> None:
			
 
				+        if sid in rendered_sids:
			
 
				+            return
			
 
				+        rendered_sids.add(sid)
			
 
				+
			
 
				+        row = tk.Frame(rows_frame)
			
 
				+        row.pack(fill="x", padx=4, pady=2)
			
 
				+
			
 
				+        tk.Label(row, text=sid, font=("Courier", 9), width=14, anchor="w").pack(side="left")
			
 
				+
			
 
				+        entry = tk.Entry(row, font=("Helvetica", 10), width=18)
			
 
				+        entry.insert(0, name)
			
 
				+        entry.pack(side="left", padx=4)
			
 
				 
			
 
				-    tk.Label(root, text="Name:").grid(row=9, column=0, sticky="e", padx=8)
			
 
				-    cname = tk.Entry(root, width=14, font=("Helvetica", 10))
			
 
				-    cname.grid(row=9, column=1, sticky="w", padx=8, pady=2)
			
 
				+        saved_lbl = tk.Label(row, text="", font=("Helvetica", 8), fg="#2a7a2a", width=5)
			
 
				+        saved_lbl.pack(side="left")
			
 
				 
			
 
				-    def _apply_custom():
			
 
				-        s, n = cid.get().strip(), cname.get().strip()
			
 
				-        if s and n:
			
 
				+        def _save(s=sid, e=entry, lbl=saved_lbl):
			
 
				+            n = e.get().strip()
			
 
				+            if not n:
			
 
				+                return
			
 
				             state.set_speaker_name(s, n)
			
 
				-            print(f"[UI] Custom: {s} → {n!r}")
			
 
				+            lbl.config(text="Saved")
			
 
				+            row.after(2000, lambda: lbl.config(text=""))
			
 
				+            print(f"[UI] {s} → {n!r}")
			
 
				 
			
 
				-    tk.Button(root, text="Apply", command=_apply_custom, width=6).grid(row=9, column=2, padx=6)
			
 
				+        def _delete(s=sid, r=row):
			
 
				+            state.delete_speaker(s)
			
 
				+            rendered_sids.discard(s)
			
 
				+            r.destroy()
			
 
				+            print(f"[UI] Removed {s}")
			
 
				 
			
 
				-    ttk.Separator(root, orient="horizontal").grid(
			
 
				-        row=10, column=0, columnspan=3, sticky="ew", padx=8, pady=8
			
 
				-    )
			
 
				+        entry.bind("<Return>", lambda _e, s=sid, e=entry, lbl=saved_lbl: _save(s, e, lbl))
			
 
				 
			
 
				-    def _apply_all():
			
 
				-        for sid, entry in entries:
			
 
				-            state.set_speaker_name(sid, entry.get())
			
 
				-        print("[UI] All names applied")
			
 
				+        tk.Button(row, text="Save", command=_save, width=5).pack(side="left", padx=2)
			
 
				+        tk.Button(row, text="✕",    command=_delete, fg="red", width=3).pack(side="left")
			
 
				 
			
 
				-    tk.Button(root, text="Apply All Names", width=18, command=_apply_all).grid(
			
 
				-        row=11, column=0, columnspan=2, padx=8, pady=4, sticky="w"
			
 
				-    )
			
 
				-    tk.Button(root, text="Clear Display", width=14, fg="red",
			
 
				-              command=lambda: state.clear(mqtt_client)).grid(
			
 
				-        row=11, column=2, padx=8, pady=4
			
 
				-    )
			
 
				+    # Populate from persisted state (sorted so order is stable)
			
 
				+    for sid, name in sorted(state.speaker_names.items()):
			
 
				+        _add_row(sid, name)
			
 
				 
			
 
				-    tk.Label(root, text="Speaker labels appear on the display when the speaker changes.",
			
 
				-             font=("Helvetica", 8), fg="gray").grid(
			
 
				-        row=12, column=0, columnspan=3, pady=(0, 10)
			
 
				-    )
			
 
				+    # Poll every 2 s for speaker IDs newly seen from Whisper this session
			
 
				+    def _poll():
			
 
				+        for sid in sorted(state.seen_speakers_snapshot() - rendered_sids):
			
 
				+            _add_row(sid, state.speaker_names.get(sid, sid))
			
 
				+        root.after(2000, _poll)
			
 
				+
			
 
				+    _poll()
			
 
				+
			
 
				+    # ── Add row manually ──────────────────────────────────────────────────────
			
 
				+    ttk.Separator(root, orient="horizontal").pack(fill="x", padx=12, pady=4)
			
 
				+
			
 
				+    add_row = tk.Frame(root)
			
 
				+    add_row.pack(fill="x", padx=12)
			
 
				+
			
 
				+    tk.Label(add_row, text="Add:", font=("Helvetica", 9)).pack(side="left")
			
 
				+
			
 
				+    add_id = tk.Entry(add_row, font=("Courier", 9), width=13)
			
 
				+    add_id.insert(0, "SPEAKER_04")
			
 
				+    add_id.pack(side="left", padx=4)
			
 
				+
			
 
				+    tk.Label(add_row, text="→").pack(side="left")
			
 
				+
			
 
				+    add_name = tk.Entry(add_row, font=("Helvetica", 10), width=16)
			
 
				+    add_name.pack(side="left", padx=4)
			
 
				+
			
 
				+    def _add_manual():
			
 
				+        sid  = add_id.get().strip()
			
 
				+        name = add_name.get().strip()
			
 
				+        if sid and name:
			
 
				+            state.set_speaker_name(sid, name)
			
 
				+            _add_row(sid, name)
			
 
				+            add_name.delete(0, tk.END)
			
 
				+            print(f"[UI] Added {sid} → {name!r}")
			
 
				+
			
 
				+    add_name.bind("<Return>", lambda _e: _add_manual())
			
 
				+    tk.Button(add_row, text="Add", command=_add_manual, width=5).pack(side="left", padx=2)
			
 
				+
			
 
				+    # ── Footer buttons ────────────────────────────────────────────────────────
			
 
				+    ttk.Separator(root, orient="horizontal").pack(fill="x", padx=12, pady=6)
			
 
				+
			
 
				+    footer = tk.Frame(root)
			
 
				+    footer.pack(fill="x", padx=12, pady=(0, 12))
			
 
				+
			
 
				+    tk.Label(
			
 
				+        footer, text="Changes save instantly to speakers.json",
			
 
				+        font=("Helvetica", 8), fg="gray",
			
 
				+    ).pack(side="left")
			
 
				+
			
 
				+    tk.Button(
			
 
				+        footer, text="Clear Display", fg="red", width=14,
			
 
				+        command=lambda: state.clear(mqtt_client),
			
 
				+    ).pack(side="right")
			
 
				 
			
 
				-    _apply_all()   # activate defaults immediately
			
 
				     root.mainloop()
			
 
				 
			
 
				 
			
@@ -352,6 +506,7 @@ def main() -> None:
 
				         target=run_async_loop, args=(state, mqtt_client), daemon=True
			
 
				     )
			
 
				     ws_thread.start()
			
 
				+    print(f"[Bridge] Speaker names loaded from {SPEAKERS_FILE}")
			
 
				     print("[Bridge] Audio pipeline running — close this window to quit")
			
 
				 
			
 
				     run_speaker_ui(state, mqtt_client)
			
--- a/bridge/speakers.json
+++ b/bridge/speakers.json
@@ -0,0 +1,6 @@
 
				+{
			
 
				+  "SPEAKER_00": "Pastor",
			
 
				+  "SPEAKER_01": "Reader",
			
 
				+  "SPEAKER_02": "Guest",
			
 
				+  "SPEAKER_03": "Choir"
			
 
				+}
			
--- a/install.bat
+++ b/install.bat
@@ -17,29 +17,43 @@ pause
 
				 
			
 
				 :: ── Check Python ────────────────────────────────────────────────────────────
			
 
				 
			
 
				-echo [1/6] Checking Python version...
			
 
				-python --version >nul 2>&1
			
 
				-if errorlevel 1 (
			
 
				+echo [1/6] Checking Python version (3.12+ required)...
			
 
				+
			
 
				+:: Prefer 3.12 then 3.13 — PyTorch does not yet publish wheels for 3.14+
			
 
				+set PYEXE=
			
 
				+for %%v in (3.12 3.13) do (
			
 
				+    if not defined PYEXE (
			
 
				+        py -%%v --version >nul 2>&1
			
 
				+        if not errorlevel 1 set PYEXE=py -%%v
			
 
				+    )
			
 
				+)
			
 
				+
			
 
				+if not defined PYEXE (
			
 
				     echo.
			
 
				-    echo ERROR: Python is not installed or not in PATH.
			
 
				-    echo Please install Python 3.11 from https://python.org
			
 
				-    echo Make sure you tick "Add Python to PATH" during install.
			
 
				+    echo ERROR: Python 3.12 or 3.13 not found.
			
 
				+    echo.
			
 
				+    echo PyTorch ^(required by WhisperLiveKit^) does not yet publish wheels
			
 
				+    echo for Python 3.14. Please install Python 3.12 alongside any newer
			
 
				+    echo version — they coexist safely on the same PC.
			
 
				+    echo.
			
 
				+    echo Download: https://www.python.org/downloads/release/python-31210/
			
 
				+    echo Tick "Add Python to PATH" during install.
			
 
				     echo.
			
 
				     pause
			
 
				     exit /b 1
			
 
				 )
			
 
				 
			
 
				-for /f "tokens=2 delims= " %%v in ('python --version 2^>^&1') do set PYVER=%%v
			
 
				-echo Found Python %PYVER%
			
 
				+for /f "tokens=2 delims= " %%v in ('%PYEXE% --version 2^>^&1') do set PYVER=%%v
			
 
				+echo Found Python %PYVER% ^(using %PYEXE%^)
			
 
				 
			
 
				 :: ── Create virtual environment ───────────────────────────────────────────────
			
 
				 
			
 
				 echo.
			
 
				 echo [2/6] Creating virtual environment in .venv\ ...
			
 
				 if exist .venv (
			
 
				-    echo .venv already exists — skipping creation.
			
 
				+    echo .venv already exists — delete it and re-run if you need a clean install.
			
 
				 ) else (
			
 
				-    python -m venv .venv
			
 
				+    %PYEXE% -m venv .venv
			
 
				     if errorlevel 1 (
			
 
				         echo ERROR: Failed to create virtual environment.
			
 
				         pause
			
@@ -63,11 +77,28 @@ echo This is the longest step. Please wait.
 
				 echo.
			
 
				 pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
			
 
				 if errorlevel 1 (
			
 
				+    echo cu124 index failed, trying cu121...
			
 
				+    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
			
 
				+    if errorlevel 1 (
			
 
				+        echo.
			
 
				+        echo ERROR: PyTorch CUDA installation failed on both cu124 and cu121 indexes.
			
 
				+        echo Check your internet connection and try again.
			
 
				+        pause
			
 
				+        exit /b 1
			
 
				+    )
			
 
				+)
			
 
				+
			
 
				+:: Confirm the CUDA build was installed, not the CPU fallback
			
 
				+for /f "delims=" %%v in ('python -c "import torch; print(torch.__version__)"') do set TORCH_VER=%%v
			
 
				+echo Installed PyTorch %TORCH_VER%
			
 
				+echo %TORCH_VER% | find "+cpu" >nul
			
 
				+if not errorlevel 1 (
			
 
				+    echo.
			
 
				+    echo WARNING: CPU-only PyTorch was installed instead of the CUDA build.
			
 
				+    echo Transcription will work but will be slow without GPU acceleration.
			
 
				+    echo See SETUP.md Part 2b for CUDA Toolkit installation instructions.
			
 
				     echo.
			
 
				-    echo ERROR: PyTorch installation failed.
			
 
				-    echo Check your internet connection and try again.
			
 
				     pause
			
 
				-    exit /b 1
			
 
				 )
			
 
				 
			
 
				 :: ── Install WhisperLiveKit ────────────────────────────────────────────────────
			
@@ -75,7 +106,7 @@ if errorlevel 1 (
 
				 echo.
			
 
				 echo [5/6] Installing WhisperLiveKit and dependencies...
			
 
				 echo.
			
 
				-pip install whisperlivekit pyannote.audio
			
 
				+pip install whisperlivekit pyannote.audio python-multipart diart
			
 
				 if errorlevel 1 (
			
 
				     echo.
			
 
				     echo ERROR: WhisperLiveKit installation failed.
			
--- a/start.bat
+++ b/start.bat
@@ -8,7 +8,7 @@ title Church Transcription — Launcher
 
				 
			
 
				 :: Your HuggingFace access token (required for speaker diarization)
			
 
				 :: Get one at https://huggingface.co/settings/tokens
			
 
				-set HF_TOKEN=PASTE_YOUR_TOKEN_HERE
			
 
				+set HF_TOKEN=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
			
 
				 
			
 
				 :: Whisper model to use:
			
 
				 ::   large-v3          — most accurate, needs ~6 GB VRAM, ~3 s latency
			
@@ -19,7 +19,7 @@ set WHISPER_MODEL=large-v3
 
				 :: ════════════════════════════════════════════════════════════════════════════
			
 
				 
			
 
				 :: Check the token has been set
			
 
				-if "%HF_TOKEN%"=="PASTE_YOUR_TOKEN_HERE" (
			
 
				+if "%HF_TOKEN%"==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 (
			
 
				     echo.
			
 
				     echo ERROR: HuggingFace token not configured.
			
 
				     echo.
			
@@ -68,20 +68,13 @@ echo Close this window or both others to shut down.
 
				 echo.
			
 
				 
			
 
				 :: Activate venv and launch WhisperLiveKit in its own window
			
 
				-start "Whisper Transcription Server" cmd /k ^
			
 
				-    "call .venv\Scripts\activate.bat && ^
			
 
				-     set HF_TOKEN=%HF_TOKEN% && ^
			
 
				-     echo Starting WhisperLiveKit (%WHISPER_MODEL%) with diarization... && ^
			
 
				-     wlk --model %WHISPER_MODEL% --language en --diarization --hf-token %HF_TOKEN%"
			
 
				+start "Whisper Transcription Server" cmd /k "call .venv\Scripts\activate.bat && set HF_TOKEN=%HF_TOKEN% && echo Starting WhisperLiveKit (%WHISPER_MODEL%) with diarization... && wlk --model %WHISPER_MODEL% --lan en"
			
 
				 
			
 
				 :: Brief pause so Whisper can begin loading before the bridge connects
			
 
				 timeout /t 5 /nobreak >nul
			
 
				 
			
 
				 :: Activate venv and launch the bridge (speaker UI opens in this process)
			
 
				-start "Transcription Bridge" cmd /k ^
			
 
				-    "call .venv\Scripts\activate.bat && ^
			
 
				-     echo Starting bridge... && ^
			
 
				-     python bridge\bridge.py"
			
 
				+start "Transcription Bridge" cmd /k "call .venv\Scripts\activate.bat && echo Starting bridge... && python bridge\bridge.py"
			
 
				 
			
 
				 echo Both windows launched. You can minimise this window.
			
 
				 echo.