benjamin.harris
/
Deaf_Transcription_Service


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
							@echo off
setlocal enabledelayedexpansion
title Transcription - Launcher

:: ════════════════════════════════════════════════════════════════════════════
::  CONFIGURATION - edit these lines before first use
:: ════════════════════════════════════════════════════════════════════════════

:: Your HuggingFace access token (required for speaker diarization)
:: Get one at https://huggingface.co/settings/tokens
set HF_TOKEN=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

:: Whisper model to use:
::   large-v3          — most accurate, needs ~6 GB VRAM, ~3 s latency
::   distil-large-v3   — faster (~2 s latency), very slightly less accurate
::   medium            — fallback if VRAM is limited (~4 GB VRAM)
set WHISPER_MODEL=large-v3

:: ════════════════════════════════════════════════════════════════════════════

:: Check virtual environment exists
if not exist .venv\Scripts\activate.bat (
    echo.
    echo ERROR: Virtual environment not found.
    echo Please run install.bat first.
    echo.
    pause
    exit /b 1
)

:: Check Mosquitto is running
sc query mosquitto | find "RUNNING" >nul 2>&1
if errorlevel 1 (
    echo Starting Mosquitto MQTT broker...
    net start mosquitto >nul 2>&1
    if errorlevel 1 (
        echo WARNING: Could not start Mosquitto. Is it installed?
        echo See SETUP.md Part 4.
        pause
        exit /b 1
    )
)

echo.
echo ============================================================
echo  Live Transcription Display
echo ============================================================
echo.
echo Starting Whisper server ^(with speaker diarization^)...
echo Starting bridge in a new window...
echo Starting speaker admin in a new window...
echo.
echo All three windows must stay open during the service.
echo.
echo NOTE: First run downloads diarization models ^(~500 MB^).
echo       Wait for "Server running" before speaking.
echo.

:: Activate venv and launch WhisperLiveKit via the compatibility launcher
:: The launcher patches torchaudio for diart and makes ffmpeg available.
:: start "Whisper Transcription Server" cmd /k "call .venv\Scripts\activate.bat && set HF_TOKEN=%HF_TOKEN% && echo Starting WhisperLiveKit (%WHISPER_MODEL%) with speaker diarization... && python bridge\whisper_launcher.py --model %WHISPER_MODEL% --language en --backend faster-whisper --diarization-backend diart"
start "Whisper Transcription Server" cmd /k "call .venv\Scripts\activate.bat && set HF_TOKEN=%HF_TOKEN% && set IMAGEIO_FFMPEG_EXE=.venv\Lib\site-packages\imageio_ffmpeg\binaries\ffmpeg-win-x86_64-v7.1.exe && wlk --model %WHISPER_MODEL% --language en --backend faster-whisper --diarization-backend diart"

:: Give Whisper more time on first run — diarization model downloads ~500 MB
timeout /t 15 /nobreak >nul

:: Activate venv and launch the bridge (headless audio pipeline)
start "Transcription Bridge" cmd /k "call .venv\Scripts\activate.bat && echo Starting bridge... && python bridge\bridge.py"

:: Activate venv and launch the speaker admin web server
start "Speaker Admin" cmd /k "call .venv\Scripts\activate.bat && echo Starting speaker admin... && python bridge\admin.py"

:: Wait for servers to be ready then open browser tabs
echo Waiting for servers to start...
timeout /t 12 /nobreak >nul
start http://localhost:8000
start http://localhost:8001

echo.
echo All three windows must stay open during the service.
echo.
echo  http://localhost:8000  ^<-- Whisper web UI ^(verify transcription^)
echo  http://localhost:8001  ^<-- Speaker admin   ^(manage names + recordings^)
echo.
pause