1 tháng trước cách đây · 7c43cfe4ec
--- a/bridge/bridge.py
+++ b/bridge/bridge.py
@@ -49,10 +49,10 @@ AUDIO_DEVICE: int | None = 12
 
				 SPEAKERS_FILE = Path(__file__).parent / "speakers.json"
			
 
				 
			
 
				 DEFAULT_SPEAKERS: dict[str, dict] = {
			
 
				-    "SPEAKER_00": {"name": "A.A.A", "role": "Serving Brother", "location": "Sydney"},
			
 
				-    "SPEAKER_01": {"name": "A.A.A", "role": "Contributor",     "location": "London"},
			
 
				-    "SPEAKER_02": {"name": "A.A.A", "role": "Contributor",     "location": "Hobart"},
			
 
				-    "SPEAKER_03": {"name": "A.A.A", "role": "Contributor",     "location": "Perth"},
			
 
				+    "SPEAKER_00": {"name": "A.A.A", "role": "Serving Brother", "location": "Sydney","has_embedding": false,"embedding_updated": null,"colour": "#16a34a","notes": ""},
			
 
				+    "SPEAKER_01": {"name": "A.A.A", "role": "Contributor",     "location": "London","has_embedding": false,"embedding_updated": null,"colour": "#16a34a","notes": ""},
			
 
				+    "SPEAKER_02": {"name": "A.A.A", "role": "Contributor",     "location": "Hobart","has_embedding": false,"embedding_updated": null,"colour": "#16a34a","notes": ""},
			
 
				+    "SPEAKER_03": {"name": "A.A.A", "role": "Contributor",     "location": "Perth","has_embedding": false,"embedding_updated": null,"colour": "#16a34a","notes": ""},
			
 
				 }
			
 
				 
			
 
				 # ── Audio injection queue ─────────────────────────────────────────────────────
			
--- a/bridge/embeddings.py
+++ b/bridge/embeddings.py
@@ -0,0 +1,760 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+embeddings.py — Speaker Voiceprint Management
			
 
				+
			
 
				+Handles extraction, storage, and matching of speaker voice embeddings
			
 
				+using pyannote.audio's SpeakerEmbedding pipeline.
			
 
				+
			
 
				+Embeddings are stored as numpy .npy files in bridge/embeddings/.
			
 
				+speakers.json is updated with has_embedding / embedding_updated metadata.
			
 
				+
			
 
				+Designed for future migration to a remote vector DB (pgvector, Qdrant, etc.)
			
 
				+— each embedding is a 512-dim float32 array, stored as a .npy binary file.
			
 
				+
			
 
				+Public API
			
 
				+----------
			
 
				+    registry = EmbeddingRegistry()
			
 
				+
			
 
				+    # Extract from an audio file segment (e.g. 10s snip from a recording)
			
 
				+    embedding = registry.extract(audio_path, start_sec=45.0, end_sec=55.0)
			
 
				+
			
 
				+    # Save a voiceprint for a known speaker
			
 
				+    registry.save(speaker_id="SPEAKER_00", embedding=embedding)
			
 
				+
			
 
				+    # Check if a speaker has a stored voiceprint
			
 
				+    registry.has(speaker_id="SPEAKER_00")  # -> bool
			
 
				+
			
 
				+    # Find the best matching speaker for a live embedding
			
 
				+    match = registry.find_match(live_embedding, threshold=0.82)
			
 
				+    # -> ("SPEAKER_00", 0.91) or None
			
 
				+
			
 
				+    # Load a stored embedding directly
			
 
				+    emb = registry.load("SPEAKER_00")  # -> np.ndarray | None
			
 
				+
			
 
				+    # Delete a voiceprint
			
 
				+    registry.delete("SPEAKER_00")
			
 
				+
			
 
				+    # List all speakers with voiceprints
			
 
				+    registry.list_enrolled()  # -> [{"id": ..., "updated": ...}, ...]
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import json
			
 
				+import logging
			
 
				+import os
			
 
				+from datetime import datetime, timezone
			
 
				+from pathlib import Path
			
 
				+from typing import Optional
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+# ── Paths ──────────────────────────────────────────────────────────────────────
			
 
				+
			
 
				+# Default locations relative to this file (bridge/)
			
 
				+_HERE            = Path(__file__).parent
			
 
				+EMBEDDINGS_DIR   = _HERE / "embeddings"
			
 
				+SPEAKERS_FILE    = _HERE / "speakers.json"
			
 
				+
			
 
				+# Embedding dimensions produced by pyannote SpeakerEmbedding
			
 
				+EMBEDDING_DIM = 512
			
 
				+
			
 
				+# Cosine similarity threshold for automatic name assignment
			
 
				+# 0.82 = conservative (fewer false positives), 0.75 = more permissive
			
 
				+DEFAULT_THRESHOLD = 0.82
			
 
				+
			
 
				+# Minimum audio duration in seconds for reliable embedding extraction
			
 
				+MIN_AUDIO_SEC = 5.0
			
 
				+
			
 
				+
			
 
				+# ── Lazy model loader ──────────────────────────────────────────────────────────
			
 
				+
			
 
				+_pipeline = None  # loaded on first use
			
 
				+
			
 
				+def _get_pipeline():
			
 
				+    """
			
 
				+    Load the pyannote SpeakerEmbedding pipeline on first call.
			
 
				+    Requires HF_TOKEN env var or huggingface-cli login.
			
 
				+    Model: pyannote/embedding (3M params, fast, accurate).
			
 
				+    """
			
 
				+    global _pipeline
			
 
				+    if _pipeline is not None:
			
 
				+        return _pipeline
			
 
				+
			
 
				+    try:
			
 
				+        from pyannote.audio import Model, Inference
			
 
				+    except ImportError as exc:
			
 
				+        raise RuntimeError(
			
 
				+            "pyannote.audio is required for voiceprint extraction. "
			
 
				+            "Install with: pip install pyannote.audio"
			
 
				+        ) from exc
			
 
				+
			
 
				+    hf_token = os.environ.get("HF_TOKEN")
			
 
				+    if not hf_token:
			
 
				+        raise RuntimeError(
			
 
				+            "HF_TOKEN environment variable is not set. "
			
 
				+            "Set it in start.bat or run: huggingface-cli login"
			
 
				+        )
			
 
				+
			
 
				+    logger.info("[Embeddings] Loading pyannote speaker embedding model...")
			
 
				+    try:
			
 
				+        model = Model.from_pretrained(
			
 
				+            "pyannote/embedding",
			
 
				+            use_auth_token=hf_token,
			
 
				+        )
			
 
				+        _pipeline = Inference(model, window="whole")
			
 
				+        logger.info("[Embeddings] Speaker embedding model loaded.")
			
 
				+    except Exception as exc:
			
 
				+        raise RuntimeError(
			
 
				+            f"Failed to load pyannote/embedding model: {exc}\n"
			
 
				+            "Ensure you have accepted the model conditions at "
			
 
				+            "https://huggingface.co/pyannote/embedding"
			
 
				+        ) from exc
			
 
				+
			
 
				+    return _pipeline
			
 
				+
			
 
				+
			
 
				+# ── Helpers ────────────────────────────────────────────────────────────────────
			
 
				+
			
 
				+def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
			
 
				+    """Cosine similarity between two 1-D vectors. Returns value in [-1, 1]."""
			
 
				+    a = a.flatten().astype(np.float32)
			
 
				+    b = b.flatten().astype(np.float32)
			
 
				+    norm_a = np.linalg.norm(a)
			
 
				+    norm_b = np.linalg.norm(b)
			
 
				+    if norm_a == 0 or norm_b == 0:
			
 
				+        return 0.0
			
 
				+    return float(np.dot(a, b) / (norm_a * norm_b))
			
 
				+
			
 
				+
			
 
				+def _load_speakers_json() -> dict:
			
 
				+    """Load speakers.json, returning empty dict on any failure."""
			
 
				+    if SPEAKERS_FILE.exists():
			
 
				+        try:
			
 
				+            raw = json.loads(SPEAKERS_FILE.read_text(encoding="utf-8"))
			
 
				+            # Support both legacy flat format {"SPEAKER_00": "Name"}
			
 
				+            # and new rich format {"SPEAKER_00": {"name": "...", ...}}
			
 
				+            return raw if isinstance(raw, dict) else {}
			
 
				+        except (json.JSONDecodeError, OSError):
			
 
				+            pass
			
 
				+    return {}
			
 
				+
			
 
				+
			
 
				+def _save_speakers_json(data: dict) -> None:
			
 
				+    try:
			
 
				+        SPEAKERS_FILE.write_text(
			
 
				+            json.dumps(data, indent=2, ensure_ascii=False),
			
 
				+            encoding="utf-8",
			
 
				+        )
			
 
				+    except OSError as exc:
			
 
				+        logger.error(f"[Embeddings] Could not save speakers.json: {exc}")
			
 
				+
			
 
				+
			
 
				+def _normalise_speaker_entry(entry) -> dict:
			
 
				+    """
			
 
				+    Ensure a speakers.json entry is in rich-dict format.
			
 
				+    Handles legacy flat strings: "Pastor John" -> {"name": "Pastor John", ...}
			
 
				+    """
			
 
				+    if isinstance(entry, str):
			
 
				+        return {
			
 
				+            "name":              entry,
			
 
				+            "has_embedding":     False,
			
 
				+            "embedding_updated": None,
			
 
				+            "colour":            None,
			
 
				+            "notes":             "",
			
 
				+        }
			
 
				+    if isinstance(entry, dict):
			
 
				+        entry.setdefault("name",              "")
			
 
				+        entry.setdefault("has_embedding",     False)
			
 
				+        entry.setdefault("embedding_updated", None)
			
 
				+        entry.setdefault("colour",            None)
			
 
				+        entry.setdefault("notes",             "")
			
 
				+        return entry
			
 
				+    return {"name": str(entry), "has_embedding": False,
			
 
				+            "embedding_updated": None, "colour": None, "notes": ""}
			
 
				+
			
 
				+
			
 
				+def _update_speaker_meta(speaker_id: str, has_embedding: bool) -> None:
			
 
				+    """Update speakers.json metadata for a speaker after enrolment/deletion."""
			
 
				+    data = _load_speakers_json()
			
 
				+    entry = _normalise_speaker_entry(data.get(speaker_id, speaker_id))
			
 
				+    entry["has_embedding"]     = has_embedding
			
 
				+    entry["embedding_updated"] = (
			
 
				+        datetime.now(timezone.utc).isoformat() if has_embedding else None
			
 
				+    )
			
 
				+    data[speaker_id] = entry
			
 
				+    _save_speakers_json(data)
			
 
				+
			
 
				+
			
 
				+# ── Audio extraction helper ────────────────────────────────────────────────────
			
 
				+
			
 
				+def extract_audio_segment(
			
 
				+    audio_path: Path | str,
			
 
				+    start_sec:  float,
			
 
				+    end_sec:    float,
			
 
				+    out_path:   Optional[Path] = None,
			
 
				+) -> Path:
			
 
				+    """
			
 
				+    Extract a segment of audio using miniaudio and save as a temporary WAV file.
			
 
				+
			
 
				+    Args:
			
 
				+        audio_path: Source audio file (MP3, WAV, FLAC, OGG, M4A, etc.)
			
 
				+        start_sec:  Start time in seconds
			
 
				+        end_sec:    End time in seconds
			
 
				+        out_path:   Optional explicit output path; if None, a temp file is used
			
 
				+
			
 
				+    Returns:
			
 
				+        Path to the extracted WAV file (caller should delete when done if temp)
			
 
				+    """
			
 
				+    import tempfile
			
 
				+    import struct
			
 
				+    import wave
			
 
				+
			
 
				+    try:
			
 
				+        import miniaudio
			
 
				+    except ImportError as exc:
			
 
				+        raise RuntimeError("miniaudio is required: pip install miniaudio") from exc
			
 
				+
			
 
				+    duration = end_sec - start_sec
			
 
				+    if duration < MIN_AUDIO_SEC:
			
 
				+        logger.warning(
			
 
				+            f"[Embeddings] Short segment ({duration:.1f}s < {MIN_AUDIO_SEC}s) "
			
 
				+            "— embedding may be less reliable"
			
 
				+        )
			
 
				+
			
 
				+    sample_rate   = 16000
			
 
				+    n_channels    = 1
			
 
				+    frames_needed = int(duration * sample_rate)
			
 
				+    skip_frames   = int(start_sec * sample_rate)
			
 
				+
			
 
				+    # Stream file and skip to start position
			
 
				+    stream = miniaudio.stream_file(
			
 
				+        str(audio_path),
			
 
				+        output_format=miniaudio.SampleFormat.SIGNED16,
			
 
				+        nchannels=n_channels,
			
 
				+        sample_rate=sample_rate,
			
 
				+        frames_to_read=4096,
			
 
				+    )
			
 
				+
			
 
				+    collected: list[bytes] = []
			
 
				+    collected_frames = 0
			
 
				+    skipped_frames   = 0
			
 
				+
			
 
				+    for chunk in stream:
			
 
				+        chunk_bytes  = bytes(chunk)
			
 
				+        chunk_frames = len(chunk_bytes) // 2  # int16 = 2 bytes per sample
			
 
				+
			
 
				+        # Skip frames before start_sec
			
 
				+        if skipped_frames < skip_frames:
			
 
				+            remaining_skip = skip_frames - skipped_frames
			
 
				+            if chunk_frames <= remaining_skip:
			
 
				+                skipped_frames += chunk_frames
			
 
				+                continue
			
 
				+            else:
			
 
				+                # Partial skip
			
 
				+                offset        = remaining_skip * 2
			
 
				+                chunk_bytes   = chunk_bytes[offset:]
			
 
				+                chunk_frames  = len(chunk_bytes) // 2
			
 
				+                skipped_frames = skip_frames
			
 
				+
			
 
				+        # Collect up to frames_needed
			
 
				+        remaining_needed = frames_needed - collected_frames
			
 
				+        if chunk_frames >= remaining_needed:
			
 
				+            collected.append(chunk_bytes[: remaining_needed * 2])
			
 
				+            collected_frames += remaining_needed
			
 
				+            break
			
 
				+        else:
			
 
				+            collected.append(chunk_bytes)
			
 
				+            collected_frames += chunk_frames
			
 
				+
			
 
				+    if not collected:
			
 
				+        raise ValueError(
			
 
				+            f"No audio extracted from {audio_path} at {start_sec}–{end_sec}s. "
			
 
				+            "Check that the file is valid and the time range is within its duration."
			
 
				+        )
			
 
				+
			
 
				+    pcm_data = b"".join(collected)
			
 
				+
			
 
				+    # Write to WAV
			
 
				+    if out_path is None:
			
 
				+        tmp = tempfile.NamedTemporaryFile(
			
 
				+            suffix=".wav", delete=False, prefix="embedding_"
			
 
				+        )
			
 
				+        out_path = Path(tmp.name)
			
 
				+        tmp.close()
			
 
				+
			
 
				+    with wave.open(str(out_path), "wb") as wf:
			
 
				+        wf.setnchannels(n_channels)
			
 
				+        wf.setsampwidth(2)  # int16
			
 
				+        wf.setframerate(sample_rate)
			
 
				+        wf.writeframes(pcm_data)
			
 
				+
			
 
				+    return out_path
			
 
				+
			
 
				+
			
 
				+# ── Main registry class ────────────────────────────────────────────────────────
			
 
				+
			
 
				+class EmbeddingRegistry:
			
 
				+    """
			
 
				+    Manages speaker voice embeddings on disk.
			
 
				+
			
 
				+    Thread-safe for reading; write operations should be called from
			
 
				+    a single thread (the admin server) in normal use.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        embeddings_dir: Path | str = EMBEDDINGS_DIR,
			
 
				+        speakers_file:  Path | str = SPEAKERS_FILE,
			
 
				+        threshold:      float      = DEFAULT_THRESHOLD,
			
 
				+    ):
			
 
				+        self.embeddings_dir = Path(embeddings_dir)
			
 
				+        self.speakers_file  = Path(speakers_file)
			
 
				+        self.threshold      = threshold
			
 
				+        self.embeddings_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    def _path(self, speaker_id: str) -> Path:
			
 
				+        return self.embeddings_dir / f"{speaker_id}.npy"
			
 
				+
			
 
				+    # ── Core storage ───────────────────────────────────────────────────────────
			
 
				+
			
 
				+    def save(self, speaker_id: str, embedding: np.ndarray) -> None:
			
 
				+        """Persist a voiceprint embedding for a speaker."""
			
 
				+        emb = embedding.flatten().astype(np.float32)
			
 
				+        if emb.shape[0] != EMBEDDING_DIM:
			
 
				+            logger.warning(
			
 
				+                f"[Embeddings] Unexpected embedding dim {emb.shape[0]} "
			
 
				+                f"(expected {EMBEDDING_DIM}) for {speaker_id}"
			
 
				+            )
			
 
				+        np.save(str(self._path(speaker_id)), emb)
			
 
				+        _update_speaker_meta(speaker_id, has_embedding=True)
			
 
				+        logger.info(f"[Embeddings] Saved voiceprint for {speaker_id}")
			
 
				+
			
 
				+    def load(self, speaker_id: str) -> Optional[np.ndarray]:
			
 
				+        """Load a stored embedding, or return None if not found."""
			
 
				+        p = self._path(speaker_id)
			
 
				+        if not p.exists():
			
 
				+            return None
			
 
				+        try:
			
 
				+            return np.load(str(p)).astype(np.float32)
			
 
				+        except Exception as exc:
			
 
				+            logger.error(f"[Embeddings] Failed to load {p}: {exc}")
			
 
				+            return None
			
 
				+
			
 
				+    def has(self, speaker_id: str) -> bool:
			
 
				+        """Return True if a voiceprint exists for this speaker."""
			
 
				+        return self._path(speaker_id).exists()
			
 
				+
			
 
				+    def delete(self, speaker_id: str) -> bool:
			
 
				+        """Delete a stored voiceprint. Returns True if it existed."""
			
 
				+        p = self._path(speaker_id)
			
 
				+        if p.exists():
			
 
				+            p.unlink()
			
 
				+            _update_speaker_meta(speaker_id, has_embedding=False)
			
 
				+            logger.info(f"[Embeddings] Deleted voiceprint for {speaker_id}")
			
 
				+            return True
			
 
				+        return False
			
 
				+
			
 
				+    def list_enrolled(self) -> list[dict]:
			
 
				+        """Return list of speakers with stored voiceprints."""
			
 
				+        enrolled = []
			
 
				+        for p in sorted(self.embeddings_dir.glob("*.npy")):
			
 
				+            speaker_id = p.stem
			
 
				+            data       = _load_speakers_json()
			
 
				+            entry      = _normalise_speaker_entry(data.get(speaker_id, speaker_id))
			
 
				+            enrolled.append({
			
 
				+                "id":      speaker_id,
			
 
				+                "name":    entry["name"],
			
 
				+                "updated": entry.get("embedding_updated"),
			
 
				+                "size_kb": round(p.stat().st_size / 1024, 1),
			
 
				+            })
			
 
				+        return enrolled
			
 
				+
			
 
				+    # ── Extraction ─────────────────────────────────────────────────────────────
			
 
				+
			
 
				+    def extract(
			
 
				+        self,
			
 
				+        audio_path: Path | str,
			
 
				+        start_sec:  float = 0.0,
			
 
				+        end_sec:    Optional[float] = None,
			
 
				+    ) -> np.ndarray:
			
 
				+        """
			
 
				+        Extract a speaker embedding from an audio file segment.
			
 
				+
			
 
				+        Args:
			
 
				+            audio_path: Path to audio file (MP3, WAV, FLAC, M4A, OGG, etc.)
			
 
				+            start_sec:  Start of segment in seconds (default: 0)
			
 
				+            end_sec:    End of segment in seconds (default: use whole file)
			
 
				+
			
 
				+        Returns:
			
 
				+            numpy array of shape (512,) — the speaker embedding
			
 
				+
			
 
				+        Raises:
			
 
				+            RuntimeError: if pyannote model fails to load
			
 
				+            ValueError:   if audio segment is empty or too short
			
 
				+        """
			
 
				+        import tempfile
			
 
				+
			
 
				+        audio_path = Path(audio_path)
			
 
				+        if not audio_path.exists():
			
 
				+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
			
 
				+
			
 
				+        # If no end time, get file duration and use whole file
			
 
				+        if end_sec is None:
			
 
				+            try:
			
 
				+                import miniaudio
			
 
				+                info    = miniaudio.get_file_info(str(audio_path))
			
 
				+                end_sec = info.duration
			
 
				+            except Exception:
			
 
				+                end_sec = start_sec + 30.0  # fallback
			
 
				+
			
 
				+        duration = end_sec - start_sec
			
 
				+        if duration <= 0:
			
 
				+            raise ValueError(f"Invalid segment: start={start_sec}, end={end_sec}")
			
 
				+
			
 
				+        # Extract segment to a temp WAV
			
 
				+        tmp_wav = None
			
 
				+        try:
			
 
				+            tmp_wav = extract_audio_segment(audio_path, start_sec, end_sec)
			
 
				+            pipeline = _get_pipeline()
			
 
				+
			
 
				+            # pyannote Inference accepts a file path directly
			
 
				+            embedding = pipeline(str(tmp_wav))
			
 
				+
			
 
				+            # embedding may be a pyannote Annotation or ndarray depending on version
			
 
				+            if hasattr(embedding, "data"):
			
 
				+                arr = np.array(embedding.data).flatten()
			
 
				+            elif isinstance(embedding, np.ndarray):
			
 
				+                arr = embedding.flatten()
			
 
				+            else:
			
 
				+                arr = np.array(embedding).flatten()
			
 
				+
			
 
				+            logger.info(
			
 
				+                f"[Embeddings] Extracted embedding from {audio_path.name} "
			
 
				+                f"[{start_sec:.1f}s – {end_sec:.1f}s] → shape {arr.shape}"
			
 
				+            )
			
 
				+            return arr.astype(np.float32)
			
 
				+
			
 
				+        finally:
			
 
				+            if tmp_wav and tmp_wav.exists():
			
 
				+                try:
			
 
				+                    tmp_wav.unlink()
			
 
				+                except OSError:
			
 
				+                    pass
			
 
				+
			
 
				+    def extract_and_save(
			
 
				+        self,
			
 
				+        speaker_id: str,
			
 
				+        audio_path: Path | str,
			
 
				+        start_sec:  float = 0.0,
			
 
				+        end_sec:    Optional[float] = None,
			
 
				+    ) -> np.ndarray:
			
 
				+        """
			
 
				+        Extract embedding from audio segment and immediately save it.
			
 
				+        Convenience wrapper around extract() + save().
			
 
				+        """
			
 
				+        embedding = self.extract(audio_path, start_sec, end_sec)
			
 
				+        self.save(speaker_id, embedding)
			
 
				+        return embedding
			
 
				+
			
 
				+    # ── Matching ───────────────────────────────────────────────────────────────
			
 
				+
			
 
				+    def find_match(
			
 
				+        self,
			
 
				+        live_embedding: np.ndarray,
			
 
				+        threshold:      Optional[float] = None,
			
 
				+        exclude_ids:    Optional[set[str]] = None,
			
 
				+    ) -> Optional[tuple[str, float]]:
			
 
				+        """
			
 
				+        Find the best matching enrolled speaker for a live embedding.
			
 
				+
			
 
				+        Args:
			
 
				+            live_embedding: Embedding from live audio (shape: (512,))
			
 
				+            threshold:      Cosine similarity threshold (default: self.threshold)
			
 
				+            exclude_ids:    Speaker IDs to skip (already confirmed this session)
			
 
				+
			
 
				+        Returns:
			
 
				+            (speaker_id, similarity_score) if match found above threshold,
			
 
				+            or None if no match.
			
 
				+
			
 
				+        Notes:
			
 
				+            - Similarity is cosine similarity in [-1, 1]; higher = more similar
			
 
				+            - Typical same-speaker similarity: 0.85–0.98
			
 
				+            - Typical different-speaker similarity: 0.2–0.65
			
 
				+            - Threshold 0.82 is conservative; lower to 0.75 for more permissive
			
 
				+        """
			
 
				+        if threshold is None:
			
 
				+            threshold = self.threshold
			
 
				+
			
 
				+        exclude_ids = exclude_ids or set()
			
 
				+        best_id    = None
			
 
				+        best_score = -1.0
			
 
				+
			
 
				+        for p in self.embeddings_dir.glob("*.npy"):
			
 
				+            speaker_id = p.stem
			
 
				+            if speaker_id in exclude_ids:
			
 
				+                continue
			
 
				+            stored = self.load(speaker_id)
			
 
				+            if stored is None:
			
 
				+                continue
			
 
				+            score = _cosine_similarity(live_embedding, stored)
			
 
				+            logger.debug(f"[Embeddings] {speaker_id}: similarity={score:.4f}")
			
 
				+            if score > best_score:
			
 
				+                best_score = score
			
 
				+                best_id    = speaker_id
			
 
				+
			
 
				+        if best_id and best_score >= threshold:
			
 
				+            logger.info(
			
 
				+                f"[Embeddings] Match: {best_id} (similarity={best_score:.4f})"
			
 
				+            )
			
 
				+            return (best_id, best_score)
			
 
				+
			
 
				+        logger.debug(
			
 
				+            f"[Embeddings] No match above threshold={threshold:.2f} "
			
 
				+            f"(best={best_score:.4f})"
			
 
				+        )
			
 
				+        return None
			
 
				+
			
 
				+    def similarity_scores(
			
 
				+        self,
			
 
				+        live_embedding: np.ndarray,
			
 
				+    ) -> list[dict]:
			
 
				+        """
			
 
				+        Return similarity scores against all enrolled speakers, sorted descending.
			
 
				+        Useful for admin UI diagnostics / confidence display.
			
 
				+        """
			
 
				+        results = []
			
 
				+        data    = _load_speakers_json()
			
 
				+        for p in self.embeddings_dir.glob("*.npy"):
			
 
				+            speaker_id = p.stem
			
 
				+            stored     = self.load(speaker_id)
			
 
				+            if stored is None:
			
 
				+                continue
			
 
				+            score = _cosine_similarity(live_embedding, stored)
			
 
				+            entry = _normalise_speaker_entry(data.get(speaker_id, speaker_id))
			
 
				+            results.append({
			
 
				+                "id":         speaker_id,
			
 
				+                "name":       entry["name"],
			
 
				+                "similarity": round(score, 4),
			
 
				+                "match":      score >= self.threshold,
			
 
				+            })
			
 
				+        return sorted(results, key=lambda x: x["similarity"], reverse=True)
			
 
				+
			
 
				+    # ── Accumulator for live audio ─────────────────────────────────────────────
			
 
				+
			
 
				+    def make_accumulator(
			
 
				+        self,
			
 
				+        min_seconds: float = 5.0,
			
 
				+        sample_rate: int   = 16000,
			
 
				+    ) -> "LiveEmbeddingAccumulator":
			
 
				+        """
			
 
				+        Create an accumulator that buffers live PCM audio and
			
 
				+        extracts an embedding once enough audio has been collected.
			
 
				+        """
			
 
				+        return LiveEmbeddingAccumulator(
			
 
				+            registry    = self,
			
 
				+            min_seconds = min_seconds,
			
 
				+            sample_rate = sample_rate,
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+# ── Live audio accumulator ─────────────────────────────────────────────────────
			
 
				+
			
 
				+class LiveEmbeddingAccumulator:
			
 
				+    """
			
 
				+    Buffers raw int16 PCM chunks from the live microphone stream.
			
 
				+    Once MIN_SECONDS of audio is accumulated, extracts an embedding
			
 
				+    that can be used for speaker matching or enrolment.
			
 
				+
			
 
				+    Usage:
			
 
				+        acc = registry.make_accumulator()
			
 
				+        acc.push(pcm_chunk_bytes)
			
 
				+        ...
			
 
				+        if acc.ready():
			
 
				+            embedding = acc.extract_embedding()
			
 
				+            match = registry.find_match(embedding)
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        registry:    EmbeddingRegistry,
			
 
				+        min_seconds: float = 5.0,
			
 
				+        sample_rate: int   = 16000,
			
 
				+    ):
			
 
				+        self.registry    = registry
			
 
				+        self.min_frames  = int(min_seconds * sample_rate)
			
 
				+        self.sample_rate = sample_rate
			
 
				+        self._frames:  list[bytes] = []
			
 
				+        self._n_frames = 0
			
 
				+
			
 
				+    def push(self, pcm_bytes: bytes) -> None:
			
 
				+        """Add a chunk of raw int16 PCM audio."""
			
 
				+        self._frames.append(pcm_bytes)
			
 
				+        self._n_frames += len(pcm_bytes) // 2  # int16 = 2 bytes
			
 
				+
			
 
				+    def ready(self) -> bool:
			
 
				+        """Return True if enough audio has been accumulated."""
			
 
				+        return self._n_frames >= self.min_frames
			
 
				+
			
 
				+    def seconds_accumulated(self) -> float:
			
 
				+        return self._n_frames / self.sample_rate
			
 
				+
			
 
				+    def reset(self) -> None:
			
 
				+        self._frames  = []
			
 
				+        self._n_frames = 0
			
 
				+
			
 
				+    def extract_embedding(self) -> np.ndarray:
			
 
				+        """
			
 
				+        Extract an embedding from the buffered audio.
			
 
				+        Raises RuntimeError if not enough audio has been collected.
			
 
				+        """
			
 
				+        if not self.ready():
			
 
				+            raise RuntimeError(
			
 
				+                f"Not enough audio: {self.seconds_accumulated():.1f}s "
			
 
				+                f"accumulated, need {self.min_frames / self.sample_rate:.1f}s"
			
 
				+            )
			
 
				+
			
 
				+        import tempfile
			
 
				+        import wave
			
 
				+
			
 
				+        # Write buffered PCM to a temp WAV
			
 
				+        pcm_data = b"".join(self._frames)
			
 
				+        tmp      = tempfile.NamedTemporaryFile(
			
 
				+            suffix=".wav", delete=False, prefix="live_emb_"
			
 
				+        )
			
 
				+        tmp_path = Path(tmp.name)
			
 
				+        tmp.close()
			
 
				+
			
 
				+        try:
			
 
				+            with wave.open(str(tmp_path), "wb") as wf:
			
 
				+                wf.setnchannels(1)
			
 
				+                wf.setsampwidth(2)
			
 
				+                wf.setframerate(self.sample_rate)
			
 
				+                wf.writeframes(pcm_data)
			
 
				+
			
 
				+            return self.registry.extract(tmp_path)
			
 
				+        finally:
			
 
				+            if tmp_path.exists():
			
 
				+                try:
			
 
				+                    tmp_path.unlink()
			
 
				+                except OSError:
			
 
				+                    pass
			
 
				+
			
 
				+
			
 
				+# ── Standalone test / CLI ──────────────────────────────────────────────────────
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import argparse
			
 
				+    import sys
			
 
				+
			
 
				+    logging.basicConfig(level=logging.INFO, format="%(message)s")
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description="Speaker voiceprint utility",
			
 
				+        formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				+        epilog="""
			
 
				+Examples:
			
 
				+  # Extract and save a voiceprint from seconds 45-55 of a recording
			
 
				+  python embeddings.py enrol SPEAKER_00 recording.mp3 --start 45 --end 55
			
 
				+
			
 
				+  # List all enrolled speakers
			
 
				+  python embeddings.py list
			
 
				+
			
 
				+  # Test match: compare a 10s clip against enrolled speakers
			
 
				+  python embeddings.py match recording.mp3 --start 120 --end 130
			
 
				+
			
 
				+  # Delete a voiceprint
			
 
				+  python embeddings.py delete SPEAKER_00
			
 
				+
			
 
				+  # Show similarity scores for a clip against all enrolled speakers
			
 
				+  python embeddings.py scores recording.mp3 --start 45 --end 55
			
 
				+        """,
			
 
				+    )
			
 
				+    sub = parser.add_subparsers(dest="cmd")
			
 
				+
			
 
				+    # enrol
			
 
				+    p_enrol = sub.add_parser("enrol", help="Extract and save a voiceprint")
			
 
				+    p_enrol.add_argument("speaker_id")
			
 
				+    p_enrol.add_argument("audio_file")
			
 
				+    p_enrol.add_argument("--start", type=float, default=0.0)
			
 
				+    p_enrol.add_argument("--end",   type=float, default=None)
			
 
				+
			
 
				+    # list
			
 
				+    sub.add_parser("list", help="List enrolled speakers")
			
 
				+
			
 
				+    # match
			
 
				+    p_match = sub.add_parser("match", help="Find matching speaker for an audio clip")
			
 
				+    p_match.add_argument("audio_file")
			
 
				+    p_match.add_argument("--start",     type=float, default=0.0)
			
 
				+    p_match.add_argument("--end",       type=float, default=None)
			
 
				+    p_match.add_argument("--threshold", type=float, default=DEFAULT_THRESHOLD)
			
 
				+
			
 
				+    # scores
			
 
				+    p_scores = sub.add_parser("scores", help="Show similarity against all enrolled speakers")
			
 
				+    p_scores.add_argument("audio_file")
			
 
				+    p_scores.add_argument("--start", type=float, default=0.0)
			
 
				+    p_scores.add_argument("--end",   type=float, default=None)
			
 
				+
			
 
				+    # delete
			
 
				+    p_del = sub.add_parser("delete", help="Delete a stored voiceprint")
			
 
				+    p_del.add_argument("speaker_id")
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+    if not args.cmd:
			
 
				+        parser.print_help()
			
 
				+        sys.exit(0)
			
 
				+
			
 
				+    registry = EmbeddingRegistry()
			
 
				+
			
 
				+    if args.cmd == "enrol":
			
 
				+        emb = registry.extract_and_save(
			
 
				+            args.speaker_id, args.audio_file, args.start, args.end
			
 
				+        )
			
 
				+        print(f"✓ Enrolled {args.speaker_id} — embedding shape: {emb.shape}")
			
 
				+
			
 
				+    elif args.cmd == "list":
			
 
				+        enrolled = registry.list_enrolled()
			
 
				+        if not enrolled:
			
 
				+            print("No speakers enrolled yet.")
			
 
				+        else:
			
 
				+            print(f"{'ID':<15} {'Name':<25} {'Updated':<28} {'Size'}")
			
 
				+            print("-" * 75)
			
 
				+            for e in enrolled:
			
 
				+                print(
			
 
				+                    f"{e['id']:<15} {e['name']:<25} "
			
 
				+                    f"{(e['updated'] or 'unknown'):<28} {e['size_kb']} KB"
			
 
				+                )
			
 
				+
			
 
				+    elif args.cmd == "match":
			
 
				+        emb   = registry.extract(args.audio_file, args.start, args.end)
			
 
				+        match = registry.find_match(emb, threshold=args.threshold)
			
 
				+        if match:
			
 
				+            sid, score = match
			
 
				+            data  = _load_speakers_json()
			
 
				+            entry = _normalise_speaker_entry(data.get(sid, sid))
			
 
				+            print(f"✓ Match: {sid} ({entry['name']}) — similarity {score:.4f}")
			
 
				+        else:
			
 
				+            print(f"✗ No match above threshold {args.threshold:.2f}")
			
 
				+
			
 
				+    elif args.cmd == "scores":
			
 
				+        emb    = registry.extract(args.audio_file, args.start, args.end)
			
 
				+        scores = registry.similarity_scores(emb)
			
 
				+        if not scores:
			
 
				+            print("No enrolled speakers to compare against.")
			
 
				+        else:
			
 
				+            print(f"{'ID':<15} {'Name':<25} {'Similarity':<12} {'Match?'}")
			
 
				+            print("-" * 60)
			
 
				+            for s in scores:
			
 
				+                flag = "✓" if s["match"] else " "
			
 
				+                print(
			
 
				+                    f"{s['id']:<15} {s['name']:<25} "
			
 
				+                    f"{s['similarity']:<12.4f} {flag}"
			
 
				+                )
			
 
				+
			
 
				+    elif args.cmd == "delete":
			
 
				+        if registry.delete(args.speaker_id):
			
 
				+            print(f"✓ Deleted voiceprint for {args.speaker_id}")
			
 
				+        else:
			
 
				+            print(f"No voiceprint found for {args.speaker_id}")
			
--- a/bridge/speakers.json
+++ b/bridge/speakers.json
@@ -2,21 +2,37 @@
 
				   "SPEAKER_00": {
			
 
				     "name": "A.A.A",
			
 
				     "role": "Serving Brother",
			
 
				-    "location": "Sydney"
			
 
				+    "location": "Sydney",
			
 
				+    "has_embedding": false,
			
 
				+    "embedding_updated": null,
			
 
				+    "colour": "#16a34a",
			
 
				+    "notes": ""
			
 
				   },
			
 
				   "SPEAKER_01": {
			
 
				     "name": "A.A.A",
			
 
				     "role": "Contributor",
			
 
				-    "location": "London"
			
 
				+    "location": "London",
			
 
				+    "has_embedding": false,
			
 
				+    "embedding_updated": null,
			
 
				+    "colour": "#16a34a",
			
 
				+    "notes": ""
			
 
				   },
			
 
				   "SPEAKER_02": {
			
 
				     "name": "A.A.A",
			
 
				     "role": "Contributor",
			
 
				-    "location": "Hobart"
			
 
				+    "location": "Hobart",
			
 
				+    "has_embedding": false,
			
 
				+    "embedding_updated": null,
			
 
				+    "colour": "#16a34a",
			
 
				+    "notes": ""
			
 
				   },
			
 
				   "SPEAKER_03": {
			
 
				     "name": "A.A.A",
			
 
				     "role": "Contributor",
			
 
				-    "location": "Perth"
			
 
				+    "location": "Perth",
			
 
				+    "has_embedding": false,
			
 
				+    "embedding_updated": null,
			
 
				+    "colour": "#16a34a",
			
 
				+    "notes": ""
			
 
				   }
			
 
				 }