1 сар өмнө · ee90055248
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -2,9 +2,13 @@
 
															 This file provides context for AI-assisted development sessions on the Church Live Transcription Display project.
														
 
															+---
														
 
															+
														
 
															 ## Project Summary
														
 
															-A live captioning system for deaf/hard-of-hearing church congregants. A Windows PC captures audio, transcribes it locally using Whisper (GPU-accelerated), and sends rolling text over MQTT to an ESP32 driving a large e-ink display. No cloud services. No internet required during operation.
														
 
															+A live captioning system for deaf/hard-of-hearing church congregants. A Windows PC captures audio, transcribes it locally using Whisper (GPU-accelerated), performs real-time speaker diarization, maps anonymous speaker IDs to real names, and sends speaker-tagged rolling text over MQTT to an ESP32 driving a large e-ink display. No cloud services. No internet required during operation.
														
 
															+
														
 
															+---
														
 
															 ## Architecture
														
@@ -12,33 +16,67 @@ A live captioning system for deaf/hard-of-hearing church congregants. A Windows
 
															 [Audio source]
														
 
															      ↓ (USB mic or mixer line-in)
														
 
															 [Windows PC]
														
 
															-  ├── WhisperLiveKit (local Whisper server, WebSocket on port 8000)
														
 
															+  ├── WhisperLiveKit
														
 
															+  │     ├── Whisper large-v3 (transcription)
														
 
															+  │     └── Streaming Sortformer (real-time speaker diarization)
														
 
															+  │     WebSocket output: ws://localhost:8000/asr
														
 
															+  │
														
 
															   ├── Mosquitto MQTT broker (port 1883)
														
 
															-  └── bridge.py (Python: WS subscriber → sentence buffer → MQTT publisher)
														
 
															-     ↓ (WiFi / MQTT topic: display/text)
														
 
															-[ESP32-WROOM or S3]
														
 
															+  │
														
 
															+  ├── bridge.py
														
 
															+  │     ├── Subscribes to Whisper WebSocket
														
 
															+  │     ├── Receives: {text, speaker_id, is_final, ...}
														
 
															+  │     ├── Resolves speaker_id → name via speaker_registry
														
 
															+  │     ├── Buffers text to sentence boundary
														
 
															+  │     └── Publishes JSON payload to MQTT topic display/text
														
 
															+  │
														
 
															+  └── admin_ui.py (Tkinter)
														
 
															+        ├── Shows "New speaker detected" prompts
														
 
															+        ├── Operator types name once per unknown speaker
														
 
															+        └── Updates speaker_registry in real time
														
 
															+
														
 
															+     ↓ WiFi / MQTT
														
 
															+[ESP32-S3]
														
 
															   └── Waveshare 7.5" V2 e-ink display (SPI, GxEPD2 library)
														
 
															 ```
														
 
															+---
														
 
															+
														
 
															 ## PC Environment
														
 
															 - OS: Windows 10/11
														
 
															-- GPU: NVIDIA RTX series (tested with RTX 4070 Super)
														
 
															+- GPU: NVIDIA RTX series (RTX 4070 Super available)
														
 
															 - Python: 3.11+
														
 
															 - MQTT broker: Mosquitto (localhost:1883)
														
 
															-- Whisper server: WhisperLiveKit (`wlk --model large-v3 --language en`)
														
 
															-- Whisper WebSocket: `ws://localhost:8000/asr`
														
 
															+- Whisper server: WhisperLiveKit with `--diarization` flag
														
 
															+  - Command: `whisperlivekit-server --model large-v3 --diarization --language en`
														
 
															+  - WebSocket: `ws://localhost:8000/asr`
														
 
															+- Diarization model: Streaming Sortformer (SOTA 2025, via WhisperLiveKit)
														
 
															+  - Fallback: Diart (more stable, slightly older, also integrated in WhisperLiveKit)
														
 
															+  - Requires pyannote model access (HuggingFace token + model agreement)
														
 
															+
														
 
															+### WhisperLiveKit Diarization Setup Notes
														
 
															+- Install with diarization extra: `pip install whisperlivekit[diarization-sortformer]`
														
 
															+- Sortformer and Voxtral extras are incompatible — install in separate environments
														
 
															+- Must accept HuggingFace user conditions for:
														
 
															+  - `pyannote/segmentation`
														
 
															+  - `pyannote/segmentation-3.0`
														
 
															+  - `pyannote/embedding`
														
 
															+- Login: `huggingface-cli login`
														
 
															+- Streaming Sortformer is marked as in active development — fallback to Diart if unstable
														
 
															+
														
 
															+---
														
 
															 ## ESP32 Environment
														
 
															-- Board: ESP32-WROOM-32 or ESP32-S3
														
 
															-- Framework: Arduino (via PlatformIO)
														
 
															+- Board: ESP32-S3 (PSRAM required for large font glyph buffers)
														
 
															+- Framework: Arduino via PlatformIO
														
 
															 - Display: Waveshare 7.5" V2 (800×480 pixels, black/white)
														
 
															 - Display library: GxEPD2
														
 
															-- MQTT library: PubSubClient
														
 
															+- MQTT library: PubSubClient (increase buffer: `client.setBufferSize(512)`)
														
 
															 - Build tool: PlatformIO (VSCode)
														
 
															-### SPI Wiring (Waveshare 7.5" V2 to ESP32)
														
 
															+### SPI Wiring (Waveshare 7.5" V2 → ESP32)
														
 
															 | Display Pin | ESP32 Pin |
														
 
															 |---|---|
														
@@ -51,79 +89,168 @@ A live captioning system for deaf/hard-of-hearing church congregants. A Windows
 
															 | GND | GND |
														
 
															 | VCC | 3.3V |
														
 
															+---
														
 
															+
														
 
															 ## MQTT Topics
														
 
															 | Topic | Direction | Payload |
														
 
															 |---|---|---|
														
 
															-| `display/text` | PC → ESP32 | JSON: `{"lines": ["line1", "line2", "line3"]}` |
														
 
															-| `display/clear` | PC → ESP32 | Empty / any |
														
 
															+| `display/text` | PC → ESP32 | JSON: see payload schema below |
														
 
															+| `display/clear` | PC → ESP32 | Empty / any value |
														
 
															 | `display/status` | ESP32 → PC | JSON: `{"ready": true}` |
														
 
															+### display/text Payload Schema
														
 
															+
														
 
															+```json
														
 
															+{
														
 
															+  "speaker": "PASTOR JOHN",
														
 
															+  "speaker_changed": true,
														
 
															+  "lines": [
														
 
															+    "...and He said unto them, go",
														
 
															+    "into all the world and preach"
														
 
															+  ]
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+- `speaker`: resolved name string, or `null` if unknown/unnamed
														
 
															+- `speaker_changed`: `true` triggers full display refresh + speaker header redraw
														
 
															+- `lines`: array of pre-wrapped strings, max 40 chars each, max 3 items
														
 
															+
														
 
															+---
														
 
															+
														
 
															 ## Key Files
														
 
															-- `bridge/bridge.py` — Main Python bridge. Connects to Whisper WS, buffers text, publishes to MQTT.
														
 
															-- `esp32/src/main.cpp` — ESP32 firmware. WiFi + MQTT client, renders text to e-ink.
														
 
															-- `esp32/platformio.ini` — Board and library config.
														
 
															+### `bridge/bridge.py`
														
 
															+Main orchestrator. Connects to Whisper WebSocket and Mosquitto. Receives incremental diarized transcription. Buffers text. Resolves speaker names. Publishes MQTT payloads.
														
 
															+
														
 
															+**WebSocket message fields from WhisperLiveKit (with diarization):**
														
 
															+```json
														
 
															+{
														
 
															+  "text": "and He said unto them",
														
 
															+  "speaker": "SPEAKER_0",
														
 
															+  "is_final": true,
														
 
															+  "start": 12.4,
														
 
															+  "end": 15.1
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+**Bridge logic:**
														
 
															+1. On each `is_final` segment, extract `text` and `speaker`
														
 
															+2. Resolve `speaker` → name via `speaker_registry`
														
 
															+3. If speaker is unknown, notify `admin_ui` (via queue or callback)
														
 
															+4. Accumulate text into rolling buffer
														
 
															+5. On sentence boundary or 4s timeout, word-wrap and publish to MQTT
														
 
															+6. Set `speaker_changed: true` if speaker differs from last published segment
														
 
															+
														
 
															+### `bridge/speaker_registry.py`
														
 
															+Manages the session-persistent mapping of `SPEAKER_N` IDs to real names.
														
 
															+
														
 
															+```python
														
 
															+# Core interface
														
 
															+registry = SpeakerRegistry()
														
 
															+registry.assign(speaker_id="SPEAKER_0", name="Pastor John")
														
 
															+name = registry.resolve("SPEAKER_0")  # Returns "Pastor John" or None
														
 
															+registry.is_known("SPEAKER_1")        # Returns False
														
 
															+registry.save_session()               # Persist to JSON for the session
														
 
															+```
														
 
															-## Design Constraints & Decisions
														
 
															+- Session data stored in `bridge/sessions/YYYY-MM-DD.json`
														
 
															+- v2: will also store voice embeddings per speaker for cross-session recognition
														
 
															-### Refresh Strategy
														
 
															-- Full e-ink refresh: ~1.5–2 seconds with flash. Acceptable for sentence-level updates.
														
 
															-- Partial refresh: ~300ms, some ghosting. Use for rapid updates if needed.
														
 
															-- **Current approach**: buffer until sentence boundary or 4-second silence, then push full screen update.
														
 
															-- Display shows 3 lines of text. New text pushes old text up; oldest line drops off.
														
 
															+### `bridge/admin_ui.py`
														
 
															+Lightweight Tkinter window. Runs in a separate thread alongside bridge.py.
														
 
															-### Text Formatting
														
 
															-- Target font size: large enough to read at 3–5 metres (approx 36–48px equivalent at 800px wide)
														
 
															-- At ~800px wide with a large font: approximately 35–45 characters per line
														
 
															-- Lines wrap at word boundaries
														
 
															-- All caps optional for readability (configurable)
														
 
															+**Behaviour:**
														
 
															+- Displays current speaker label and resolved name (or "Unknown")
														
 
															+- When a new unknown `SPEAKER_N` is detected, shows a prompt: "New speaker detected. Who is this?"
														
 
															+- Operator types name and hits Enter
														
 
															+- Calls `registry.assign()` and the display updates immediately
														
 
															+- Also shows a manual override: operator can retype any name at any time
														
 
															-### Audio Input
														
 
															-- Preferred: direct feed from church mixing desk (line-in or USB audio interface)
														
 
															-- Fallback: USB condenser microphone near pulpit/lectern
														
 
															-- Whisper performs best with clean, low-noise input
														
 
															-- VAD (Voice Activity Detection) in WhisperLiveKit handles silence automatically
														
 
															+### `esp32/src/main.cpp`
														
 
															+ESP32 firmware. WiFi + MQTT client. Receives JSON payloads and renders to e-ink.
														
 
															-### Network
														
 
															-- All on local WiFi (church LAN or dedicated hotspot)
														
 
															-- MQTT broker on Windows PC
														
 
															-- ESP32 connects to same WiFi network
														
 
															-- Static IP recommended for ESP32 to avoid reconnection delays
														
 
															+**Display rendering logic:**
														
 
															+- On `speaker_changed: true`: full refresh, print speaker name in large CAPS on line 1, then print text lines below
														
 
															+- On `speaker_changed: false`: partial refresh, overwrite text lines only (speaker header stays)
														
 
															+- Track partial refresh count; force full refresh every 10 cycles to clear ghosting
														
 
															+- Font: large enough for ~40 chars across 800px (approx FreeSans 18–24pt at this resolution)
														
 
															-## Bridge Script Logic (bridge.py)
														
 
															+---
														
 
															+
														
 
															+## Display Layout (800×480 pixels)
														
 
															 ```
														
 
															-1. Connect to Mosquitto MQTT broker
														
 
															-2. Connect to WhisperLiveKit WebSocket (ws://localhost:8000/asr)
														
 
															-3. Receive partial transcription updates
														
 
															-4. Accumulate words into a sentence buffer
														
 
															-5. On sentence-end signal (or timeout):
														
 
															-   a. Word-wrap text into lines (max ~40 chars each)
														
 
															-   b. Maintain a rolling 3-line buffer
														
 
															-   c. Publish JSON payload to MQTT topic display/text
														
 
															-6. On reconnect events: re-establish WS and MQTT connections
														
 
															+┌────────────────────────────────────────────────┐  ← full width
														
 
															+│ PASTOR JOHN                                    │  ← speaker name, top ~80px, bold/large
														
 
															+│────────────────────────────────────────────────│
														
 
															+│ ...and He said unto them, go into all the      │  ← text line 1
														
 
															+│ world and preach the gospel to every           │  ← text line 2
														
 
															+│ creature. He that believeth and is baptised    │  ← text line 3
														
 
															+└────────────────────────────────────────────────┘
														
 
															 ```
														
 
															-## Known Issues / Open Questions
														
 
															+- Speaker name zone: top ~80px
														
 
															+- Text zone: remaining ~380px, 3 lines at ~120px each
														
 
															+- On speaker change: full clear, redraw both zones
														
 
															+- On same speaker new text: partial refresh text zone only
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## Speaker Diarization Notes
														
 
															-- [ ] Partial refresh ghosting threshold — how many partial refreshes before forcing a full clear?
														
 
															-- [ ] Whisper latency with large-v3 model — may need to test medium or distil-large-v3 for lower latency
														
 
															-- [ ] Line-wrapping edge cases with long words (e.g. proper nouns, scripture references)
														
 
															-- [ ] ESP32 RAM: WROOM has 520KB; large font bitmaps may require PSRAM (use S3 variant)
														
 
															-- [ ] WiFi reconnection handling in firmware — need watchdog/retry logic
														
 
															+### v1 — Operator-Assisted Naming
														
 
															+- Zero prep before service
														
 
															+- admin_ui.py shows prompt when new `SPEAKER_N` appears
														
 
															+- Operator at sound desk types name (e.g. "Pastor John") once
														
 
															+- Registry holds the mapping for the entire session
														
 
															-## Development Notes
														
 
															+### v2 — Voice Enrolment (future)
														
 
															+- Record 10–30s of each speaker saying natural speech (not word lists)
														
 
															+- Extract speaker embedding using pyannote `SpeakerEmbedding` pipeline
														
 
															+- Store embedding in `bridge/profiles/<name>.npy`
														
 
															+- At runtime, compare incoming `SPEAKER_N` embedding to stored profiles
														
 
															+- If cosine similarity > threshold (~0.85), auto-assign name
														
 
															+- Fall back to operator prompt if no match above threshold
														
 
															-- WhisperLiveKit WebSocket returns incremental JSON with `text` and `is_final` fields
														
 
															-- GxEPD2 supports both full and partial refresh; partial requires `setPartialWindow()`
														
 
															-- PubSubClient default packet size is 128 bytes — must increase to handle JSON payloads (~200 bytes)
														
 
															-- Use `client.setBufferSize(512)` in PubSubClient setup
														
 
															+### Known Diarization Constraints
														
 
															+- Streaming Sortformer tracks 2–4+ speakers reliably
														
 
															+- Works best with clean, low-noise audio — direct mixer feed strongly preferred
														
 
															+- Background music (worship) may confuse diarization; consider muting music channel on the transcription input
														
 
															+- Congregation responses ("Amen", "Hallelujah") may appear as brief unknown speakers — consider a minimum-duration filter (~2s) before triggering a speaker prompt
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## Design Constraints & Open Questions
														
 
															+
														
 
															+- [ ] Streaming Sortformer stability in WhisperLiveKit — test early; fall back to Diart if needed
														
 
															+- [ ] Minimum speaker segment duration before triggering name prompt (avoid congregation one-liners)
														
 
															+- [ ] Partial refresh ghosting — determine optimal full-refresh interval for the chosen display
														
 
															+- [ ] ESP32-S3 PSRAM: confirm font glyph buffer fits; WROOM (no PSRAM) likely insufficient for large fonts
														
 
															+- [ ] Word-wrap edge cases: long proper nouns, scripture references, place names
														
 
															+- [ ] Session save/restore: if PC crashes mid-service, can operator reload speaker assignments quickly?
														
 
															+- [ ] Audio routing on Windows: ensure Whisper receives the mixer/mic channel, not system audio
														
 
															+
														
 
															+---
														
 
															 ## Testing Approach
														
 
															-1. Test Whisper server standalone: speak into mic, verify text in browser at `http://localhost:8000`
														
 
															-2. Test MQTT: use MQTT Explorer or `mosquitto_sub` to verify bridge publishes correctly
														
 
															-3. Test ESP32 display: send static MQTT messages manually before connecting bridge
														
 
															-4. End-to-end: full pipeline test with recorded sermon audio
														
 
															-5. In-situ trial: 1–2 Sunday services with a volunteer congregant providing feedback
														
 
															+1. **Whisper standalone**: speak into mic, verify text output in browser at `http://localhost:8000`
														
 
															+2. **Diarization standalone**: two people alternate speaking, verify `SPEAKER_0` / `SPEAKER_1` labels in WS output
														
 
															+3. **Registry + bridge**: run bridge.py, verify name prompts appear in admin_ui.py, verify MQTT payloads via `mosquitto_sub -t display/#`
														
 
															+4. **ESP32 display**: send static MQTT messages manually before connecting bridge
														
 
															+5. **End-to-end**: full pipeline test with recorded sermon audio (mix of 2–3 speakers)
														
 
															+6. **In-situ trial**: 1–2 Sunday services with a volunteer congregant providing feedback
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## Development Sequence (Suggested)
														
 
															+
														
 
															+1. Get WhisperLiveKit running with `--diarization` flag, confirm WS output includes speaker labels
														
 
															+2. Write `bridge.py` (transcription only, no diarization yet) → verify MQTT publish works
														
 
															+3. Add `speaker_registry.py` and `admin_ui.py` → test name mapping loop
														
 
															+4. Integrate diarization into bridge — handle `speaker_changed` logic
														
 
															+5. Write ESP32 firmware — basic text display
														
 
															+6. Add speaker header zone and refresh logic to ESP32 firmware
														
 
															+7. Full end-to-end test on bench
														
 
															+8. Church trial
														
--- a/README.md
+++ b/README.md
@@ -1,94 +1,187 @@
 
															 # Church Live Transcription Display
														
 
															-A live speech-to-text system for deaf and hard-of-hearing congregants, displaying real-time transcriptions on an e-ink screen driven by an ESP32 microcontroller.
														
 
															+A live speech-to-text system for deaf and hard-of-hearing congregants, displaying real-time transcriptions with speaker identification on an e-ink screen driven by an ESP32 microcontroller.
														
 
															 ## Overview
														
 
															-Audio from the church service is captured on a Windows PC, transcribed locally using a Whisper-based model, and the resulting text is pushed over WiFi/MQTT to an ESP32 that drives a large e-ink display. The display is readable in any lighting condition and requires no screen brightness — ideal for a church environment.
														
 
															+Audio from the church service is captured on a Windows PC, transcribed and speaker-diarized locally using WhisperLiveKit, and the resulting speaker-tagged text is pushed over WiFi/MQTT to an ESP32 that drives a large e-ink display. The display shows who is speaking alongside what they are saying, updates in real time, and requires no internet connection.
														
 
															 ```
														
 
															-[Microphone / Mixer] → [Windows PC: Whisper transcription]
														
 
															-                              ↓ MQTT over WiFi
														
 
															-                       [ESP32 + e-ink display]
														
 
															+[Microphone / Mixer]
														
 
															+        ↓
														
 
															+[Windows PC]
														
 
															+  ├── WhisperLiveKit  (transcription + speaker diarization)
														
 
															+  ├── Mosquitto MQTT broker
														
 
															+  ├── bridge.py       (WebSocket → name mapping → MQTT)
														
 
															+  └── Speaker Admin UI (operator names speakers live)
														
 
															+        ↓ MQTT / WiFi
														
 
															+[ESP32 + e-ink display]
														
 
															 ```
														
 
															 ## Goals
														
 
															 - Real-time captions with minimal latency (target: < 3 seconds end-to-end)
														
 
															+- Speaker identification — display who is speaking when the speaker changes
														
 
															+- Named speakers — operator maps anonymous speaker IDs to real names during service
														
 
															+- Future: voice enrolment so names are matched automatically from pre-recorded samples
														
 
															 - Runs entirely on local network — no cloud dependency
														
 
															 - Readable at distance with large font (36–48pt equivalent)
														
 
															-- Displays 3–4 lines of rolling text, clearing as new content arrives
														
 
															 - Low cost, low complexity hardware
														
 
															+---
														
 
															+
														
 
															+## Speaker Identification
														
 
															+
														
 
															+### How It Works
														
 
															+
														
 
															+WhisperLiveKit includes **Streaming Sortformer** (SOTA 2025), a real-time speaker diarization model developed by NVIDIA. It runs alongside Whisper transcription and tags each segment of speech with an anonymous speaker label (`SPEAKER_0`, `SPEAKER_1`, etc.).
														
 
															+
														
 
															+A name mapping layer in the bridge script translates these anonymous labels into real names, which are then included in the MQTT payload sent to the display.
														
 
															+
														
 
															+### Display Format
														
 
															+
														
 
															+When a speaker changes, their name is shown as a header line above their words. The name is not repeated on every line — only when the speaker changes.
														
 
															+
														
 
															+```
														
 
															+┌─────────────────────────────────┐
														
 
															+│ PASTOR JOHN                     │
														
 
															+│ ...and He said unto them, go    │
														
 
															+│ into all the world and preach   │
														
 
															+└─────────────────────────────────┘
														
 
															+
														
 
															+┌─────────────────────────────────┐
														
 
															+│ MARY (READER)                   │
														
 
															+│ A reading from Luke chapter 4,  │
														
 
															+│ verse 18...                     │
														
 
															+└─────────────────────────────────┘
														
 
															+```
														
 
															+
														
 
															+### Speaker Naming — Two Approaches
														
 
															+
														
 
															+**v1 — Operator-Assisted Naming (implemented first)**
														
 
															+
														
 
															+A simple admin UI runs on the PC alongside the bridge script. When a new unknown speaker is detected, the operator sees a prompt ("New speaker detected — who is this?") and types the name once. That name is stored for the session and used every time that speaker is detected again.
														
 
															+
														
 
															+- No setup required before the service
														
 
															+- Works from the very first Sunday
														
 
															+- Operator (e.g. sound desk volunteer) assigns names as speakers appear
														
 
															+
														
 
															+**v2 — Voice Enrolment (future upgrade)**
														
 
															+
														
 
															+Before the service, a short voice sample (10–30 seconds) is recorded for each expected speaker. The bridge script compares incoming speaker embeddings against enrolled voices and automatically assigns the correct name without operator input.
														
 
															+
														
 
															+- No operator intervention during the service
														
 
															+- More accurate for recurring speakers (pastor, regular readers)
														
 
															+- Enrolled voice profiles persist week to week
														
 
															+
														
 
															+### Typical Church Speakers
														
 
															+
														
 
															+| Role | Frequency | Notes |
														
 
															+|---|---|---|
														
 
															+| Pastor / Preacher | Every service | Primary speaker, longest segments |
														
 
															+| Worship leader | Most services | May overlap with congregation response |
														
 
															+| Reader / Scripture | Weekly | Short, distinct segments |
														
 
															+| Visiting speaker | Occasionally | New enrolment or operator naming needed |
														
 
															+| Announcements | Weekly | Often the same person each week |
														
 
															+
														
 
															+---
														
 
															+
														
 
															 ## System Components
														
 
															 ### PC Side (Windows)
														
 
															-- **WhisperLiveKit** — local GPU-accelerated speech-to-text server with WebSocket output
														
 
															-- **Mosquitto** — lightweight MQTT broker running on the same PC
														
 
															-- **Python bridge script** — subscribes to Whisper WebSocket, buffers sentences, publishes to MQTT
														
 
															+- **WhisperLiveKit** — local GPU-accelerated transcription + diarization server
														
 
															+- **Mosquitto** — lightweight MQTT broker (same PC, port 1883)
														
 
															+- **bridge.py** — WebSocket subscriber, speaker name mapper, MQTT publisher
														
 
															+- **admin_ui.py** — lightweight operator interface for live speaker naming
														
 
															+- **speaker_registry.py** — manages speaker ID ↔ name mappings and voice enrolment
														
 
															 ### ESP32 Side
														
 
															-- **ESP32 (WROOM or S3)** — WiFi-enabled microcontroller
														
 
															+- **ESP32-S3** — WiFi microcontroller (S3 preferred — PSRAM needed for large font bitmaps)
														
 
															 - **Waveshare e-ink display** — 7.5" V2 (800×480) or larger
														
 
															-- **GxEPD2 / Adafruit GFX** — display driver library
														
 
															-- **PubSubClient** — MQTT client library for Arduino
														
 
															+- **GxEPD2** — display driver library
														
 
															+- **PubSubClient** — MQTT client library
														
 
															+
														
 
															+---
														
 
															 ## Hardware
														
 
															 | Component | Model | Notes |
														
 
															 |---|---|---|
														
 
															-| Microcontroller | ESP32-WROOM-32 or ESP32-S3 | S3 preferred for more RAM |
														
 
															+| Microcontroller | ESP32-S3 | PSRAM required for large font bitmaps |
														
 
															 | Display | Waveshare 7.5" V2 e-Paper | 800×480, supports partial refresh |
														
 
															-| PC | Windows 10/11 with NVIDIA GPU | RTX series recommended for real-time Whisper |
														
 
															-| Microphone | USB condenser or mixer feed | Direct mixer feed preferred for clean audio |
														
 
															+| PC | Windows 10/11 with NVIDIA GPU | RTX series recommended |
														
 
															+| Microphone | USB condenser or direct mixer feed | Mixer feed preferred for clean diarization |
														
 
															+
														
 
															+---
														
 
															 ## Key Design Decisions
														
 
															-### Text Buffering Strategy
														
 
															-E-ink full refresh takes ~1–2 seconds. Rather than updating word-by-word, the bridge script accumulates text until a natural pause (sentence boundary or ~5 seconds of speech), then pushes a complete "screen's worth" as a single MQTT message. Partial refresh mode can be used for faster but ghosting-prone updates.
														
 
															+### Text & Speaker Buffering
														
 
															+The bridge script accumulates text until a sentence boundary or natural pause (~4s), then checks whether the speaker has changed. If the speaker is unchanged, only new text lines are pushed. If the speaker has changed, a full new payload is sent including the speaker name header, triggering a full display refresh.
														
 
															 ### Display Layout
														
 
															-- 3–4 lines of large text
														
 
															-- Most recent line at bottom, scrolling upward
														
 
															-- Simple black-on-white, no graphics
														
 
															-- Font size prioritises readability at 3–5 metres
														
 
															+- **Line 1:** Speaker name in CAPS — printed only on speaker change
														
 
															+- **Lines 2–4:** Rolling transcription text, wrapping at ~40 chars per line
														
 
															+- On speaker change: full screen clear then redraw with new name header
														
 
															+- Font targets readability at 3–5 metres
														
 
															+
														
 
															+### E-ink Refresh Strategy
														
 
															+- Speaker change → **full refresh** (~1.5s flash — clean slate, acceptable at speaker transition)
														
 
															+- Same speaker, new text → **partial refresh** (~300ms, minor ghosting)
														
 
															+- Force full refresh every 10 partial refreshes to clear accumulated ghosting
														
 
															 ### Network
														
 
															-- All traffic stays on local WiFi network
														
 
															-- MQTT broker on PC (port 1883)
														
 
															-- No internet required during operation
														
 
															+- All traffic on local WiFi (church LAN or dedicated hotspot)
														
 
															+- MQTT broker on Windows PC (port 1883)
														
 
															+- Static IP recommended for ESP32 to avoid reconnection delays
														
 
															+
														
 
															+---
														
 
															 ## Repository Structure
														
 
															 ```
														
 
															 /
														
 
															-├── README.md               — This file
														
 
															-├── CLAUDE.md               — AI assistant context for development sessions
														
 
															+├── README.md                     — This file
														
 
															+├── CLAUDE.md                     — AI assistant context for development sessions
														
 
															 ├── bridge/
														
 
															-│   └── bridge.py           — Python: Whisper WebSocket → MQTT publisher
														
 
															+│   ├── bridge.py                 — Main bridge: Whisper WS → name map → MQTT
														
 
															+│   ├── speaker_registry.py       — Speaker ID ↔ name mapping and voice enrolment
														
 
															+│   └── admin_ui.py               — Operator UI for live speaker naming (Tkinter)
														
 
															 ├── esp32/
														
 
															 │   ├── src/
														
 
															-│   │   └── main.cpp        — ESP32 Arduino firmware
														
 
															-│   └── platformio.ini      — PlatformIO build config
														
 
															+│   │   └── main.cpp              — ESP32 Arduino firmware
														
 
															+│   └── platformio.ini            — PlatformIO build config
														
 
															 └── docs/
														
 
															-    ├── hardware-wiring.md  — SPI pin connections for display
														
 
															-    └── setup.md            — Installation and configuration guide
														
 
															+    ├── hardware-wiring.md        — SPI pin connections for Waveshare display
														
 
															+    ├── setup.md                  — Installation and configuration guide
														
 
															+    └── speaker-enrolment.md     — Guide for recording and enrolling voice samples (v2)
														
 
															 ```
														
 
															+---
														
 
															+
														
 
															 ## Reference Projects
														
 
															-- [WhisperLiveKit](https://github.com/QuentinFuxa/WhisperLiveKit) — real-time Whisper server with WebSocket API
														
 
															-- [reriiasu/speech-to-text](https://github.com/reriiasu/speech-to-text) — faster-whisper with VAD and WebSocket output
														
 
															-- [denwilliams/mqtt-epaper](https://github.com/denwilliams/mqtt-epaper) — ESP32 e-paper display driven by MQTT JSON
														
 
															+- [WhisperLiveKit](https://github.com/QuentinFuxa/WhisperLiveKit) — real-time Whisper + Streaming Sortformer diarization
														
 
															+- [NVIDIA Streaming Sortformer](https://developer.nvidia.com/blog/identify-speakers-in-meetings-calls-and-voice-apps-in-real-time-with-nvidia-streaming-sortformer/) — the diarization model integrated into WhisperLiveKit
														
 
															+- [pyannote.audio](https://github.com/pyannote/pyannote-audio) — fallback diarization (Diart integration in WhisperLiveKit)
														
 
															+- [denwilliams/mqtt-epaper](https://github.com/denwilliams/mqtt-epaper) — ESP32 e-paper display driven by MQTT
														
 
															 - [cuci90/epaper_mqtt_esp32](https://github.com/cuci90/epaper_mqtt_esp32) — ESP32 Waveshare display MQTT template
														
 
															+---
														
 
															+
														
 
															 ## Status
														
 
															 🟡 **Planning / Research phase**
														
 
															 - [x] Architecture defined
														
 
															-- [ ] Python bridge script
														
 
															-- [ ] ESP32 firmware
														
 
															-- [ ] Hardware wiring and test
														
 
															+- [x] Speaker diarization approach selected (WhisperLiveKit + Streaming Sortformer)
														
 
															+- [x] Speaker naming strategy defined (operator-assisted v1, voice enrolment v2)
														
 
															+- [ ] Python bridge script (transcription only)
														
 
															+- [ ] Speaker name mapping layer (`speaker_registry.py`)
														
 
															+- [ ] Operator admin UI (`admin_ui.py`)
														
 
															+- [ ] ESP32 firmware — basic text display
														
 
															+- [ ] ESP32 firmware — speaker header layout + refresh logic
														
 
															+- [ ] Hardware wiring and bench test
														
 
															 - [ ] End-to-end integration test
														
 
															-- [ ] Church deployment trial
														
 
															+- [ ] Voice enrolment system (v2)
														
 
															+- [ ] Church deployment trial
														
--- a/bridge/bridge.py
+++ b/bridge/bridge.py
@@ -0,0 +1,361 @@
 
															+#!/usr/bin/env python3
														
 
															+"""
														
 
															+bridge.py — Church Live Transcription Bridge
														
 
															+
														
 
															+Streams microphone audio to WhisperLiveKit (ws://localhost:8000/asr),
														
 
															+receives transcription + speaker diarization, buffers sentences, and
														
 
															+publishes rolling 3-line JSON to Mosquitto MQTT for the e-ink display.
														
 
															+
														
 
															+Start WhisperLiveKit with:
														
 
															+    wlk --model large-v3 --language en --diarization
														
 
															+
														
 
															+Run this script:
														
 
															+    python bridge.py
														
 
															+"""
														
 
															+
														
 
															+import asyncio
														
 
															+import json
														
 
															+import re
														
 
															+import textwrap
														
 
															+import threading
														
 
															+import time
														
 
															+from collections import Counter
														
 
															+
														
 
															+import numpy as np
														
 
															+import paho.mqtt.client as mqtt
														
 
															+import sounddevice as sd
														
 
															+import websockets
														
 
															+import tkinter as tk
														
 
															+from tkinter import ttk
														
 
															+
														
 
															+# ── Configuration ─────────────────────────────────────────────────────────────
														
 
															+
														
 
															+MQTT_HOST        = "localhost"
														
 
															+MQTT_PORT        = 1883
														
 
															+MQTT_TOPIC_TEXT  = "display/text"
														
 
															+MQTT_TOPIC_CLEAR = "display/clear"
														
 
															+
														
 
															+WS_URL      = "ws://localhost:8000/asr"
														
 
															+SAMPLE_RATE = 16000
														
 
															+CHANNELS    = 1
														
 
															+BLOCKSIZE   = 4096          # ~256 ms per chunk at 16 kHz
														
 
															+
														
 
															+SENTENCE_TIMEOUT = 4.0      # seconds of silence before forcing a flush
														
 
															+MAX_LINE_CHARS   = 38       # characters per line (~24pt font at 800 px wide)
														
 
															+DISPLAY_LINES    = 3
														
 
															+
														
 
															+# ── State ─────────────────────────────────────────────────────────────────────
														
 
															+
														
 
															+class BridgeState:
														
 
															+    """All mutable state, protected by a single lock."""
														
 
															+
														
 
															+    def __init__(self):
														
 
															+        self._lock             = threading.Lock()
														
 
															+        self.speaker_names: dict[str, str] = {}   # "SPEAKER_00" → "Pastor"
														
 
															+        self._current_speaker: str | None  = None
														
 
															+        self._speaker_changed              = False
														
 
															+        self._text_buffer                  = ""
														
 
															+        self._display: list[str]           = [""] * DISPLAY_LINES
														
 
															+        self._last_final_time              = time.monotonic()
														
 
															+
														
 
															+    # ── Speaker name mapping ──────────────────────────────────────────────────
														
 
															+
														
 
															+    def set_speaker_name(self, speaker_id: str, name: str) -> None:
														
 
															+        with self._lock:
														
 
															+            self.speaker_names[speaker_id] = name.strip()
														
 
															+
														
 
															+    def _resolve(self, speaker_id: str | None) -> str | None:
														
 
															+        if not speaker_id:
														
 
															+            return None
														
 
															+        return self.speaker_names.get(speaker_id, speaker_id)
														
 
															+
														
 
															+    # ── Text ingestion ────────────────────────────────────────────────────────
														
 
															+
														
 
															+    def push_final(self, text: str, speaker_id: str | None, mqtt_client: mqtt.Client) -> None:
														
 
															+        """Accept a finalised segment; flush on sentence boundary or speaker change."""
														
 
															+        with self._lock:
														
 
															+            resolved = self._resolve(speaker_id)
														
 
															+
														
 
															+            if resolved != self._current_speaker:
														
 
															+                if self._text_buffer:
														
 
															+                    self._flush(mqtt_client)          # push previous speaker's words first
														
 
															+                self._current_speaker = resolved
														
 
															+                self._speaker_changed = True
														
 
															+
														
 
															+            sep = " " if self._text_buffer else ""
														
 
															+            self._text_buffer += sep + text.strip()
														
 
															+            self._last_final_time = time.monotonic()
														
 
															+
														
 
															+            if _is_sentence_end(text):
														
 
															+                self._flush(mqtt_client)
														
 
															+
														
 
															+    def maybe_timeout_flush(self, mqtt_client: mqtt.Client) -> None:
														
 
															+        with self._lock:
														
 
															+            if self._text_buffer and (time.monotonic() - self._last_final_time) > SENTENCE_TIMEOUT:
														
 
															+                self._flush(mqtt_client)
														
 
															+
														
 
															+    def _flush(self, mqtt_client: mqtt.Client) -> None:
														
 
															+        """Word-wrap buffer → rolling display → publish. Must hold lock."""
														
 
															+        text = self._text_buffer.strip()
														
 
															+        self._text_buffer = ""
														
 
															+        if not text:
														
 
															+            return
														
 
															+
														
 
															+        new_lines: list[str] = []
														
 
															+        if self._speaker_changed and self._current_speaker:
														
 
															+            new_lines.append(f"[{self._current_speaker.upper()}]")
														
 
															+            self._speaker_changed = False
														
 
															+
														
 
															+        new_lines.extend(textwrap.wrap(text, MAX_LINE_CHARS) or [""])
														
 
															+
														
 
															+        self._display.extend(new_lines)
														
 
															+        self._display = self._display[-DISPLAY_LINES:]
														
 
															+        while len(self._display) < DISPLAY_LINES:
														
 
															+            self._display.insert(0, "")
														
 
															+
														
 
															+        payload = json.dumps({"lines": list(self._display)})
														
 
															+        mqtt_client.publish(MQTT_TOPIC_TEXT, payload)
														
 
															+        print(f"[Display] {self._display}")
														
 
															+
														
 
															+    def clear(self, mqtt_client: mqtt.Client) -> None:
														
 
															+        with self._lock:
														
 
															+            self._display         = [""] * DISPLAY_LINES
														
 
															+            self._text_buffer     = ""
														
 
															+            self._current_speaker = None
														
 
															+            self._speaker_changed = False
														
 
															+        mqtt_client.publish(MQTT_TOPIC_CLEAR, "")
														
 
															+        print("[Display] Cleared")
														
 
															+
														
 
															+
														
 
															+# ── Helpers ───────────────────────────────────────────────────────────────────
														
 
															+
														
 
															+def _is_sentence_end(text: str) -> bool:
														
 
															+    return bool(re.search(r'[.!?…]\s*$', text.strip()))
														
 
															+
														
 
															+
														
 
															+def _extract_speaker(data: dict) -> str | None:
														
 
															+    """
														
 
															+    Extract speaker ID from a WhisperLiveKit response dict.
														
 
															+    Handles segment-level {"speaker": "SPEAKER_00"} and word-level
														
 
															+    {"words": [{"speaker": "SPEAKER_00", ...}, ...]} formats.
														
 
															+    """
														
 
															+    if "speaker" in data:
														
 
															+        return data["speaker"] or None
														
 
															+
														
 
															+    words = data.get("words", [])
														
 
															+    if words:
														
 
															+        ids = [w.get("speaker") for w in words if w.get("speaker")]
														
 
															+        if ids:
														
 
															+            return Counter(ids).most_common(1)[0][0]
														
 
															+
														
 
															+    return None
														
 
															+
														
 
															+
														
 
															+# ── MQTT ──────────────────────────────────────────────────────────────────────
														
 
															+
														
 
															+def build_mqtt_client() -> mqtt.Client:
														
 
															+    client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2)
														
 
															+
														
 
															+    def on_connect(client, userdata, flags, rc, props):
														
 
															+        print("[MQTT] Connected" if rc == 0 else f"[MQTT] Failed: {rc}")
														
 
															+
														
 
															+    def on_disconnect(client, userdata, flags, rc, props):
														
 
															+        print(f"[MQTT] Disconnected ({rc}), will reconnect...")
														
 
															+
														
 
															+    client.on_connect    = on_connect
														
 
															+    client.on_disconnect = on_disconnect
														
 
															+    client.reconnect_delay_set(min_delay=1, max_delay=30)
														
 
															+    client.connect_async(MQTT_HOST, MQTT_PORT)
														
 
															+    client.loop_start()
														
 
															+    return client
														
 
															+
														
 
															+
														
 
															+# ── WebSocket + audio pipeline ────────────────────────────────────────────────
														
 
															+
														
 
															+async def _sender(ws, queue: asyncio.Queue) -> None:
														
 
															+    while not queue.empty():        # drain stale audio before streaming
														
 
															+        queue.get_nowait()
														
 
															+    while True:
														
 
															+        chunk = await queue.get()
														
 
															+        await ws.send(chunk)
														
 
															+
														
 
															+
														
 
															+async def _receiver(ws, state: BridgeState, mqtt_client: mqtt.Client) -> None:
														
 
															+    async for message in ws:
														
 
															+        try:
														
 
															+            data = json.loads(message)
														
 
															+        except (json.JSONDecodeError, TypeError):
														
 
															+            continue
														
 
															+
														
 
															+        text     = (data.get("text") or data.get("buffer_transcription") or "").strip()
														
 
															+        is_final = data.get("is_final", False) or data.get("end_of_segment", False)
														
 
															+        speaker  = _extract_speaker(data)
														
 
															+
														
 
															+        if is_final and text:
														
 
															+            print(f"[Whisper] ({speaker or '?'}) {text}")
														
 
															+            state.push_final(text, speaker, mqtt_client)
														
 
															+
														
 
															+
														
 
															+async def _flusher(state: BridgeState, mqtt_client: mqtt.Client) -> None:
														
 
															+    while True:
														
 
															+        await asyncio.sleep(1.0)
														
 
															+        state.maybe_timeout_flush(mqtt_client)
														
 
															+
														
 
															+
														
 
															+async def audio_ws_loop(state: BridgeState, mqtt_client: mqtt.Client) -> None:
														
 
															+    audio_queue: asyncio.Queue[bytes] = asyncio.Queue(maxsize=120)
														
 
															+    loop = asyncio.get_running_loop()
														
 
															+
														
 
															+    def audio_callback(indata: np.ndarray, frames: int, time_info, status) -> None:
														
 
															+        if status:
														
 
															+            print(f"[Audio] {status}")
														
 
															+        chunk = indata.tobytes()
														
 
															+        def _put():
														
 
															+            try:
														
 
															+                audio_queue.put_nowait(chunk)
														
 
															+            except asyncio.QueueFull:
														
 
															+                pass
														
 
															+        loop.call_soon_threadsafe(_put)
														
 
															+
														
 
															+    with sd.InputStream(
														
 
															+        samplerate=SAMPLE_RATE,
														
 
															+        channels=CHANNELS,
														
 
															+        dtype="int16",
														
 
															+        blocksize=BLOCKSIZE,
														
 
															+        callback=audio_callback,
														
 
															+    ):
														
 
															+        flusher = asyncio.create_task(_flusher(state, mqtt_client))
														
 
															+        try:
														
 
															+            while True:
														
 
															+                try:
														
 
															+                    print(f"[WS] Connecting to {WS_URL} ...")
														
 
															+                    async with websockets.connect(WS_URL, max_size=2**23) as ws:
														
 
															+                        print("[WS] Connected")
														
 
															+                        send_t = asyncio.create_task(_sender(ws, audio_queue))
														
 
															+                        recv_t = asyncio.create_task(_receiver(ws, state, mqtt_client))
														
 
															+                        done, pending = await asyncio.wait(
														
 
															+                            [send_t, recv_t], return_when=asyncio.FIRST_COMPLETED
														
 
															+                        )
														
 
															+                        for t in pending:
														
 
															+                            t.cancel()
														
 
															+                        for t in done:
														
 
															+                            if not t.cancelled() and (exc := t.exception()):
														
 
															+                                print(f"[WS] Task error: {exc}")
														
 
															+                except (websockets.ConnectionClosed, OSError, ConnectionRefusedError) as exc:
														
 
															+                    print(f"[WS] {exc}  — retrying in 3 s...")
														
 
															+                    await asyncio.sleep(3)
														
 
															+        finally:
														
 
															+            flusher.cancel()
														
 
															+
														
 
															+
														
 
															+def run_async_loop(state: BridgeState, mqtt_client: mqtt.Client) -> None:
														
 
															+    asyncio.run(audio_ws_loop(state, mqtt_client))
														
 
															+
														
 
															+
														
 
															+# ── Speaker name-mapping UI ───────────────────────────────────────────────────
														
 
															+
														
 
															+PRESET_SPEAKERS = [
														
 
															+    ("SPEAKER_00", "Pastor"),
														
 
															+    ("SPEAKER_01", "Reader"),
														
 
															+    ("SPEAKER_02", "Guest"),
														
 
															+    ("SPEAKER_03", "Choir"),
														
 
															+]
														
 
															+
														
 
															+
														
 
															+def run_speaker_ui(state: BridgeState, mqtt_client: mqtt.Client) -> None:
														
 
															+    root = tk.Tk()
														
 
															+    root.title("Transcription Bridge — Speaker Names")
														
 
															+    root.attributes("-topmost", True)
														
 
															+    root.resizable(False, False)
														
 
															+
														
 
															+    tk.Label(root, text="Speaker Name Mapping", font=("Helvetica", 12, "bold")).grid(
														
 
															+        row=0, column=0, columnspan=3, pady=(12, 2), padx=12
														
 
															+    )
														
 
															+    tk.Label(
														
 
															+        root,
														
 
															+        text="Diarization is automatic. Assign readable names to each speaker ID.",
														
 
															+        font=("Helvetica", 9), fg="gray", justify="center",
														
 
															+    ).grid(row=1, column=0, columnspan=3, pady=(0, 8))
														
 
															+
														
 
															+    tk.Label(root, text="Speaker ID",     font=("Helvetica", 10, "bold")).grid(row=2, column=0, padx=8)
														
 
															+    tk.Label(root, text="Friendly Name",  font=("Helvetica", 10, "bold")).grid(row=2, column=1, padx=8)
														
 
															+
														
 
															+    entries: list[tuple[str, tk.Entry]] = []
														
 
															+    for i, (sid, default) in enumerate(PRESET_SPEAKERS):
														
 
															+        tk.Label(root, text=sid, font=("Courier", 10)).grid(row=3+i, column=0, sticky="e", padx=8, pady=3)
														
 
															+        e = tk.Entry(root, width=16, font=("Helvetica", 10))
														
 
															+        e.insert(0, default)
														
 
															+        e.grid(row=3+i, column=1, padx=8, pady=3)
														
 
															+        entries.append((sid, e))
														
 
															+
														
 
															+        def _apply(s=sid, entry=e):
														
 
															+            state.set_speaker_name(s, entry.get())
														
 
															+            print(f"[UI] {s} → {entry.get()!r}")
														
 
															+
														
 
															+        tk.Button(root, text="Apply", command=_apply, width=6).grid(row=3+i, column=2, padx=6)
														
 
															+
														
 
															+    ttk.Separator(root, orient="horizontal").grid(
														
 
															+        row=7, column=0, columnspan=3, sticky="ew", padx=8, pady=8
														
 
															+    )
														
 
															+
														
 
															+    # Custom ID row
														
 
															+    tk.Label(root, text="Custom ID:").grid(row=8, column=0, sticky="e", padx=8)
														
 
															+    cid = tk.Entry(root, width=14, font=("Courier", 10))
														
 
															+    cid.insert(0, "SPEAKER_04")
														
 
															+    cid.grid(row=8, column=1, sticky="w", padx=8, pady=2)
														
 
															+
														
 
															+    tk.Label(root, text="Name:").grid(row=9, column=0, sticky="e", padx=8)
														
 
															+    cname = tk.Entry(root, width=14, font=("Helvetica", 10))
														
 
															+    cname.grid(row=9, column=1, sticky="w", padx=8, pady=2)
														
 
															+
														
 
															+    def _apply_custom():
														
 
															+        s, n = cid.get().strip(), cname.get().strip()
														
 
															+        if s and n:
														
 
															+            state.set_speaker_name(s, n)
														
 
															+            print(f"[UI] Custom: {s} → {n!r}")
														
 
															+
														
 
															+    tk.Button(root, text="Apply", command=_apply_custom, width=6).grid(row=9, column=2, padx=6)
														
 
															+
														
 
															+    ttk.Separator(root, orient="horizontal").grid(
														
 
															+        row=10, column=0, columnspan=3, sticky="ew", padx=8, pady=8
														
 
															+    )
														
 
															+
														
 
															+    def _apply_all():
														
 
															+        for sid, entry in entries:
														
 
															+            state.set_speaker_name(sid, entry.get())
														
 
															+        print("[UI] All names applied")
														
 
															+
														
 
															+    tk.Button(root, text="Apply All Names", width=18, command=_apply_all).grid(
														
 
															+        row=11, column=0, columnspan=2, padx=8, pady=4, sticky="w"
														
 
															+    )
														
 
															+    tk.Button(root, text="Clear Display", width=14, fg="red",
														
 
															+              command=lambda: state.clear(mqtt_client)).grid(
														
 
															+        row=11, column=2, padx=8, pady=4
														
 
															+    )
														
 
															+
														
 
															+    tk.Label(root, text="Speaker labels appear on the display when the speaker changes.",
														
 
															+             font=("Helvetica", 8), fg="gray").grid(
														
 
															+        row=12, column=0, columnspan=3, pady=(0, 10)
														
 
															+    )
														
 
															+
														
 
															+    _apply_all()   # activate defaults immediately
														
 
															+    root.mainloop()
														
 
															+
														
 
															+
														
 
															+# ── Entry point ───────────────────────────────────────────────────────────────
														
 
															+
														
 
															+def main() -> None:
														
 
															+    state       = BridgeState()
														
 
															+    mqtt_client = build_mqtt_client()
														
 
															+
														
 
															+    ws_thread = threading.Thread(
														
 
															+        target=run_async_loop, args=(state, mqtt_client), daemon=True
														
 
															+    )
														
 
															+    ws_thread.start()
														
 
															+    print("[Bridge] Audio pipeline running — close this window to quit")
														
 
															+
														
 
															+    run_speaker_ui(state, mqtt_client)
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    main()
														
--- a/bridge/requirements.txt
+++ b/bridge/requirements.txt
@@ -0,0 +1,4 @@
 
															+paho-mqtt>=2.0
														
 
															+websockets>=12.0
														
 
															+sounddevice>=0.4.6
														
 
															+numpy>=1.24
														
--- a/docs/hardware-wiring.md
+++ b/docs/hardware-wiring.md
@@ -0,0 +1,35 @@
 
															+# Hardware Wiring
														
 
															+
														
 
															+## Waveshare 7.5" V2 e-ink → ESP32
														
 
															+
														
 
															+| Display pin | ESP32 GPIO | Notes |
														
 
															+|---|---|---|
														
 
															+| BUSY | 4 | Input — display signals when busy |
														
 
															+| RST | 16 | Reset |
														
 
															+| DC | 17 | Data/command select |
														
 
															+| CS | 5 | SPI chip select |
														
 
															+| CLK | 18 | SPI clock (hardware SPI) |
														
 
															+| DIN | 23 | SPI MOSI |
														
 
															+| GND | GND | |
														
 
															+| VCC | 3.3 V | Do **not** use 5 V |
														
 
															+
														
 
															+> These pin assignments match `main.cpp`. If you need to remap them, change
														
 
															+> the `PIN_*` defines at the top of the file — the SPI CLK and DIN pins (18, 23)
														
 
															+> are hardware SPI and cannot be freely remapped without switching to software SPI.
														
 
															+
														
 
															+## Power
														
 
															+
														
 
															+- Display: powered from the ESP32 3.3 V rail. Current draw during refresh is
														
 
															+  ~30 mA peak — within ESP32 rail limits for a single display.
														
 
															+- ESP32: power from USB (5 V). For permanent installation use a 5 V USB wall
														
 
															+  adapter rated ≥ 1 A.
														
 
															+
														
 
															+## Audio input
														
 
															+
														
 
															+| Source | Connection | Notes |
														
 
															+|---|---|---|
														
 
															+| Mixing desk | Line-out → USB audio interface → PC USB | Cleanest signal; recommended |
														
 
															+| Microphone | USB condenser → PC USB | Use if mixer feed not available |
														
 
															+
														
 
															+Whisper performs best with a clean, low-noise signal. A direct line feed from
														
 
															+the mixing desk eliminates room echo and background noise.
														
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -0,0 +1,121 @@
 
															+# Setup Guide
														
 
															+
														
 
															+## Prerequisites
														
 
															+
														
 
															+| Component | Version | Notes |
														
 
															+|---|---|---|
														
 
															+| Python | 3.11+ | Windows install from python.org |
														
 
															+| NVIDIA GPU driver | Latest | RTX series recommended |
														
 
															+| CUDA toolkit | 12.x | Required by faster-whisper |
														
 
															+| Mosquitto | 2.x | MQTT broker |
														
 
															+| WhisperLiveKit | Latest | `pip install whisperlivekit` |
														
 
															+| PlatformIO | Latest | Via VS Code extension |
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 1 — Install Mosquitto (MQTT broker)
														
 
															+
														
 
															+Download from mosquitto.org and install with default settings.
														
 
															+Start the service:
														
 
															+
														
 
															+```
														
 
															+net start mosquitto
														
 
															+```
														
 
															+
														
 
															+Verify it's running:
														
 
															+
														
 
															+```
														
 
															+mosquitto_sub -h localhost -t "#" -v
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 2 — Install WhisperLiveKit
														
 
															+
														
 
															+```
														
 
															+pip install whisperlivekit
														
 
															+```
														
 
															+
														
 
															+Start the server with diarization enabled:
														
 
															+
														
 
															+```
														
 
															+wlk --model large-v3 --language en --diarization
														
 
															+```
														
 
															+
														
 
															+The first run downloads the model (~3 GB). The WebSocket will be available at
														
 
															+`ws://localhost:8000/asr`. Verify by opening `http://localhost:8000` in a browser.
														
 
															+
														
 
															+> **Latency note:** If `large-v3` is too slow on your GPU, try
														
 
															+> `--model distil-large-v3` for similar accuracy at lower latency.
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 3 — Install the Python bridge
														
 
															+
														
 
															+```
														
 
															+cd bridge
														
 
															+pip install -r requirements.txt
														
 
															+```
														
 
															+
														
 
															+Run it:
														
 
															+
														
 
															+```
														
 
															+python bridge.py
														
 
															+```
														
 
															+
														
 
															+A small window opens for assigning friendly names to auto-detected speakers
														
 
															+(SPEAKER_00, SPEAKER_01, …). The defaults (Pastor, Reader, Guest, Choir) are
														
 
															+applied immediately — edit them if your service has different roles.
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 4 — Flash the ESP32
														
 
															+
														
 
															+1. Open the `esp32/` folder in VS Code with the PlatformIO extension installed.
														
 
															+2. Edit `src/main.cpp` — fill in your WiFi credentials and the PC's IP address:
														
 
															+
														
 
															+   ```cpp
														
 
															+   #define WIFI_SSID     "YourNetwork"
														
 
															+   #define WIFI_PASSWORD "YourPassword"
														
 
															+   #define MQTT_HOST     "192.168.1.100"   // run `ipconfig` on the PC to find this
														
 
															+   ```
														
 
															+
														
 
															+3. Select the correct environment in PlatformIO:
														
 
															+   - `esp32dev` for ESP32-WROOM-32
														
 
															+   - `esp32-s3` for ESP32-S3 (recommended for larger RAM)
														
 
															+
														
 
															+4. Click **Upload**. Open Serial Monitor at 115200 baud to see boot messages.
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 5 — End-to-end test
														
 
															+
														
 
															+Run these checks in order:
														
 
															+
														
 
															+1. **Whisper standalone** — speak into the mic, verify text appears at
														
 
															+   `http://localhost:8000`.
														
 
															+
														
 
															+2. **MQTT manually** — with the ESP32 connected, publish a test message:
														
 
															+
														
 
															+   ```
														
 
															+   mosquitto_pub -h localhost -t display/text -m "{\"lines\":[\"Line one\",\"Line two\",\"Line three\"]}"
														
 
															+   ```
														
 
															+
														
 
															+   The display should refresh within ~2 seconds.
														
 
															+
														
 
															+3. **Full pipeline** — start the bridge, speak naturally. Text should appear on
														
 
															+   the display within 3–5 seconds of speech.
														
 
															+
														
 
															+4. **Speaker labels** — if two people speak alternately, `[PASTOR]` / `[READER]`
														
 
															+   labels should appear as speaker changes are detected.
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 6 — Deployment checklist
														
 
															+
														
 
															+- [ ] PC set to never sleep during services
														
 
															+- [ ] Mosquitto service set to start automatically (`sc config mosquitto start=auto`)
														
 
															+- [ ] WhisperLiveKit added to Windows startup (Task Scheduler or a `.bat` file)
														
 
															+- [ ] ESP32 powered from a USB wall adapter (not PC USB, to avoid dependency)
														
 
															+- [ ] Static IP assigned to ESP32 in router DHCP settings
														
 
															+- [ ] Audio input confirmed — direct mixer feed preferred over microphone
														
--- a/esp32/platformio.ini
+++ b/esp32/platformio.ini
@@ -0,0 +1,26 @@
 
															+[platformio]
														
 
															+default_envs = esp32dev
														
 
															+
														
 
															+; ── ESP32-WROOM-32 (standard, 520 KB RAM) ────────────────────────────────────
														
 
															+[env:esp32dev]
														
 
															+platform      = espressif32
														
 
															+board         = esp32dev
														
 
															+framework     = arduino
														
 
															+monitor_speed = 115200
														
 
															+lib_deps =
														
 
															+    knolleary/PubSubClient @ ^2.8
														
 
															+    bblanchon/ArduinoJson @ ^7.0
														
 
															+    zinggjm/GxEPD2 @ ^1.5
														
 
															+    adafruit/Adafruit GFX Library @ ^1.11
														
 
															+
														
 
															+; ── ESP32-S3 (preferred — more RAM, good for large font bitmaps) ──────────────
														
 
															+[env:esp32-s3]
														
 
															+platform      = espressif32
														
 
															+board         = esp32-s3-devkitc-1
														
 
															+framework     = arduino
														
 
															+monitor_speed = 115200
														
 
															+lib_deps =
														
 
															+    knolleary/PubSubClient @ ^2.8
														
 
															+    bblanchon/ArduinoJson @ ^7.0
														
 
															+    zinggjm/GxEPD2 @ ^1.5
														
 
															+    adafruit/Adafruit GFX Library @ ^1.11
														
--- a/esp32/src/main.cpp
+++ b/esp32/src/main.cpp
@@ -0,0 +1,230 @@
 
															+/*
														
 
															+ * main.cpp — ESP32 e-ink Display Firmware
														
 
															+ *
														
 
															+ * Connects to WiFi + MQTT broker, subscribes to display/text and display/clear,
														
 
															+ * renders rolling 3-line text on a Waveshare 7.5" V2 (800x480) e-ink display.
														
 
															+ *
														
 
															+ * Required libraries (platformio.ini):
														
 
															+ *   knolleary/PubSubClient
														
 
															+ *   bblanchon/ArduinoJson
														
 
															+ *   zinggjm/GxEPD2
														
 
															+ *   adafruit/Adafruit GFX Library
														
 
															+ */
														
 
															+
														
 
															+#include <Arduino.h>
														
 
															+#include <WiFi.h>
														
 
															+#include <PubSubClient.h>
														
 
															+#include <ArduinoJson.h>
														
 
															+#include <GxEPD2_BW.h>
														
 
															+#include <Fonts/FreeSansBold24pt7b.h>
														
 
															+#include <Fonts/FreeSans9pt7b.h>
														
 
															+
														
 
															+// ── User config — edit before flashing ───────────────────────────────────────
														
 
															+
														
 
															+#define WIFI_SSID      "YOUR_WIFI_SSID"
														
 
															+#define WIFI_PASSWORD  "YOUR_WIFI_PASSWORD"
														
 
															+#define MQTT_HOST      "192.168.1.100"   // Windows PC IP running Mosquitto
														
 
															+#define MQTT_PORT      1883
														
 
															+#define DEVICE_ID      "display-01"
														
 
															+
														
 
															+// ── Display pins (Waveshare 7.5" V2 → ESP32) ─────────────────────────────────
														
 
															+// CLK → GPIO 18  (SPI SCLK, wired directly — managed by library)
														
 
															+// DIN → GPIO 23  (SPI MOSI, wired directly — managed by library)
														
 
															+
														
 
															+#define PIN_CS    5
														
 
															+#define PIN_DC   17
														
 
															+#define PIN_RST  16
														
 
															+#define PIN_BUSY  4
														
 
															+
														
 
															+// ── MQTT topics ───────────────────────────────────────────────────────────────
														
 
															+
														
 
															+#define TOPIC_TEXT   "display/text"
														
 
															+#define TOPIC_CLEAR  "display/clear"
														
 
															+#define TOPIC_STATUS "display/status"
														
 
															+
														
 
															+// ── Display layout ────────────────────────────────────────────────────────────
														
 
															+
														
 
															+#define DISPLAY_W       800
														
 
															+#define DISPLAY_H       480
														
 
															+#define MARGIN_X         12
														
 
															+#define LINE_1_BASELINE 110    // y baseline of first text line
														
 
															+#define LINE_SPACING    145    // pixels between baselines (generous for readability)
														
 
															+
														
 
															+// ── Display object ────────────────────────────────────────────────────────────
														
 
															+
														
 
															+GxEPD2_BW<GxEPD2_750_T7, GxEPD2_750_T7::HEIGHT> display(
														
 
															+    GxEPD2_750_T7(PIN_CS, PIN_DC, PIN_RST, PIN_BUSY)
														
 
															+);
														
 
															+
														
 
															+// ── MQTT + WiFi ───────────────────────────────────────────────────────────────
														
 
															+
														
 
															+WiFiClient   wifiClient;
														
 
															+PubSubClient mqtt(wifiClient);
														
 
															+
														
 
															+// ── Pending render state ──────────────────────────────────────────────────────
														
 
															+
														
 
															+static String pendingLines[3];
														
 
															+static bool   renderPending = false;
														
 
															+
														
 
															+// ── Display helpers ───────────────────────────────────────────────────────────
														
 
															+
														
 
															+void renderLines(const String lines[3]) {
														
 
															+    display.setFullWindow();
														
 
															+    display.firstPage();
														
 
															+    do {
														
 
															+        display.fillScreen(GxEPD_WHITE);
														
 
															+
														
 
															+        for (int i = 0; i < 3; i++) {
														
 
															+            if (lines[i].length() == 0) continue;
														
 
															+
														
 
															+            // Speaker label lines (e.g. "[PASTOR]") use a smaller italic font
														
 
															+            bool isLabel = lines[i].startsWith("[") && lines[i].endsWith("]");
														
 
															+
														
 
															+            if (isLabel) {
														
 
															+                display.setFont(&FreeSans9pt7b);
														
 
															+            } else {
														
 
															+                display.setFont(&FreeSansBold24pt7b);
														
 
															+            }
														
 
															+
														
 
															+            display.setTextColor(GxEPD_BLACK);
														
 
															+            display.setCursor(MARGIN_X, LINE_1_BASELINE + i * LINE_SPACING);
														
 
															+            display.print(lines[i]);
														
 
															+        }
														
 
															+    } while (display.nextPage());
														
 
															+
														
 
															+    Serial.println("[Display] Refreshed");
														
 
															+}
														
 
															+
														
 
															+void clearDisplay() {
														
 
															+    display.setFullWindow();
														
 
															+    display.firstPage();
														
 
															+    do {
														
 
															+        display.fillScreen(GxEPD_WHITE);
														
 
															+    } while (display.nextPage());
														
 
															+    Serial.println("[Display] Cleared");
														
 
															+}
														
 
															+
														
 
															+void showBootMessage() {
														
 
															+    String boot[3] = {"", "", "  DISPLAY READY"};
														
 
															+    renderLines(boot);
														
 
															+}
														
 
															+
														
 
															+// ── MQTT callback ─────────────────────────────────────────────────────────────
														
 
															+
														
 
															+void onMqttMessage(char* topic, byte* payload, unsigned int len) {
														
 
															+    String topicStr = String(topic);
														
 
															+
														
 
															+    if (topicStr == TOPIC_CLEAR) {
														
 
															+        for (int i = 0; i < 3; i++) pendingLines[i] = "";
														
 
															+        renderPending = true;
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    if (topicStr == TOPIC_TEXT) {
														
 
															+        // Null-terminate the payload
														
 
															+        char buf[600];
														
 
															+        if (len >= sizeof(buf)) {
														
 
															+            Serial.println("[MQTT] Payload too large, skipped");
														
 
															+            return;
														
 
															+        }
														
 
															+        memcpy(buf, payload, len);
														
 
															+        buf[len] = '\0';
														
 
															+
														
 
															+        JsonDocument doc;
														
 
															+        DeserializationError err = deserializeJson(doc, buf);
														
 
															+        if (err) {
														
 
															+            Serial.printf("[MQTT] JSON error: %s\n", err.c_str());
														
 
															+            return;
														
 
															+        }
														
 
															+
														
 
															+        JsonArray arr = doc["lines"].as<JsonArray>();
														
 
															+        if (!arr) return;
														
 
															+
														
 
															+        int i = 0;
														
 
															+        for (JsonVariant v : arr) {
														
 
															+            if (i >= 3) break;
														
 
															+            pendingLines[i++] = v.as<String>();
														
 
															+        }
														
 
															+        // Pad remaining lines
														
 
															+        for (; i < 3; i++) pendingLines[i] = "";
														
 
															+
														
 
															+        renderPending = true;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+// ── WiFi ──────────────────────────────────────────────────────────────────────
														
 
															+
														
 
															+void connectWifi() {
														
 
															+    if (WiFi.status() == WL_CONNECTED) return;
														
 
															+
														
 
															+    Serial.printf("[WiFi] Connecting to %s", WIFI_SSID);
														
 
															+    WiFi.begin(WIFI_SSID, WIFI_PASSWORD);
														
 
															+
														
 
															+    unsigned long start = millis();
														
 
															+    while (WiFi.status() != WL_CONNECTED) {
														
 
															+        if (millis() - start > 30000) {
														
 
															+            Serial.println("\n[WiFi] Timeout — restarting");
														
 
															+            ESP.restart();
														
 
															+        }
														
 
															+        delay(500);
														
 
															+        Serial.print(".");
														
 
															+    }
														
 
															+    Serial.printf("\n[WiFi] Connected: %s\n", WiFi.localIP().toString().c_str());
														
 
															+}
														
 
															+
														
 
															+// ── MQTT ──────────────────────────────────────────────────────────────────────
														
 
															+
														
 
															+void connectMqtt() {
														
 
															+    while (!mqtt.connected()) {
														
 
															+        Serial.printf("[MQTT] Connecting to %s:%d ...\n", MQTT_HOST, MQTT_PORT);
														
 
															+
														
 
															+        if (mqtt.connect(DEVICE_ID)) {
														
 
															+            Serial.println("[MQTT] Connected");
														
 
															+            mqtt.subscribe(TOPIC_TEXT);
														
 
															+            mqtt.subscribe(TOPIC_CLEAR);
														
 
															+            mqtt.publish(TOPIC_STATUS, "{\"ready\":true}");
														
 
															+        } else {
														
 
															+            Serial.printf("[MQTT] Failed (state=%d), retry in 5 s\n", mqtt.state());
														
 
															+            delay(5000);
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+// ── Arduino setup & loop ──────────────────────────────────────────────────────
														
 
															+
														
 
															+void setup() {
														
 
															+    Serial.begin(115200);
														
 
															+    Serial.println("\n[Boot] Church Live Transcription Display");
														
 
															+
														
 
															+    display.init(115200);
														
 
															+    display.setRotation(0);
														
 
															+    clearDisplay();
														
 
															+    showBootMessage();
														
 
															+
														
 
															+    connectWifi();
														
 
															+
														
 
															+    mqtt.setServer(MQTT_HOST, MQTT_PORT);
														
 
															+    mqtt.setCallback(onMqttMessage);
														
 
															+    mqtt.setBufferSize(600);
														
 
															+    mqtt.setKeepAlive(60);
														
 
															+
														
 
															+    connectMqtt();
														
 
															+}
														
 
															+
														
 
															+void loop() {
														
 
															+    // Maintain connectivity
														
 
															+    if (WiFi.status() != WL_CONNECTED) {
														
 
															+        Serial.println("[WiFi] Lost — reconnecting...");
														
 
															+        connectWifi();
														
 
															+    }
														
 
															+    if (!mqtt.connected()) {
														
 
															+        connectMqtt();
														
 
															+    }
														
 
															+    mqtt.loop();
														
 
															+
														
 
															+    // Render outside of MQTT callback to avoid blocking the broker heartbeat
														
 
															+    if (renderPending) {
														
 
															+        renderPending = false;
														
 
															+        renderLines(pendingLines);
														
 
															+    }
														
 
															+}