start.bat 3.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. @echo off
  2. setlocal enabledelayedexpansion
  3. title Transcription - Launcher
  4. :: ════════════════════════════════════════════════════════════════════════════
  5. :: CONFIGURATION - edit these lines before first use
  6. :: ════════════════════════════════════════════════════════════════════════════
  7. :: Your HuggingFace access token (required for speaker diarization)
  8. :: Get one at https://huggingface.co/settings/tokens
  9. set HF_TOKEN=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
  10. :: Whisper model to use:
  11. :: large-v3 — most accurate, needs ~6 GB VRAM, ~3 s latency
  12. :: distil-large-v3 — faster (~2 s latency), very slightly less accurate
  13. :: medium — fallback if VRAM is limited (~4 GB VRAM)
  14. set WHISPER_MODEL=large-v3
  15. :: ════════════════════════════════════════════════════════════════════════════
  16. :: Check virtual environment exists
  17. if not exist .venv\Scripts\activate.bat (
  18. echo.
  19. echo ERROR: Virtual environment not found.
  20. echo Please run install.bat first.
  21. echo.
  22. pause
  23. exit /b 1
  24. )
  25. :: Check Mosquitto is running
  26. sc query mosquitto | find "RUNNING" >nul 2>&1
  27. if errorlevel 1 (
  28. echo Starting Mosquitto MQTT broker...
  29. net start mosquitto >nul 2>&1
  30. if errorlevel 1 (
  31. echo WARNING: Could not start Mosquitto. Is it installed?
  32. echo See SETUP.md Part 4.
  33. pause
  34. exit /b 1
  35. )
  36. )
  37. echo.
  38. echo ============================================================
  39. echo Live Transcription Display
  40. echo ============================================================
  41. echo.
  42. echo Starting Whisper server ^(with speaker diarization^)...
  43. echo Starting bridge in a new window...
  44. echo Starting speaker admin in a new window...
  45. echo.
  46. echo All three windows must stay open during the service.
  47. echo.
  48. echo NOTE: First run downloads diarization models ^(~500 MB^).
  49. echo Wait for "Server running" before speaking.
  50. echo.
  51. :: Activate venv and launch WhisperLiveKit via the compatibility launcher
  52. :: The launcher patches torchaudio for diart and makes ffmpeg available.
  53. :: start "Whisper Transcription Server" cmd /k "call .venv\Scripts\activate.bat && set HF_TOKEN=%HF_TOKEN% && echo Starting WhisperLiveKit (%WHISPER_MODEL%) with speaker diarization... && python bridge\whisper_launcher.py --model %WHISPER_MODEL% --language en --backend faster-whisper --diarization-backend diart"
  54. start "Whisper Transcription Server" cmd /k "call .venv\Scripts\activate.bat && set HF_TOKEN=%HF_TOKEN% && set IMAGEIO_FFMPEG_EXE=.venv\Lib\site-packages\imageio_ffmpeg\binaries\ffmpeg-win-x86_64-v7.1.exe && wlk --model %WHISPER_MODEL% --language en --backend faster-whisper --diarization-backend diart"
  55. :: Give Whisper more time on first run — diarization model downloads ~500 MB
  56. timeout /t 15 /nobreak >nul
  57. :: Activate venv and launch the bridge (headless audio pipeline)
  58. start "Transcription Bridge" cmd /k "call .venv\Scripts\activate.bat && echo Starting bridge... && python bridge\bridge.py"
  59. :: Activate venv and launch the speaker admin web server
  60. start "Speaker Admin" cmd /k "call .venv\Scripts\activate.bat && echo Starting speaker admin... && python bridge\admin.py"
  61. :: Wait for servers to be ready then open browser tabs
  62. echo Waiting for servers to start...
  63. timeout /t 12 /nobreak >nul
  64. start http://localhost:8000
  65. start http://localhost:8001
  66. echo.
  67. echo All three windows must stay open during the service.
  68. echo.
  69. echo http://localhost:8000 ^<-- Whisper web UI ^(verify transcription^)
  70. echo http://localhost:8001 ^<-- Speaker admin ^(manage names + recordings^)
  71. echo.
  72. pause