Browse Source

Telemetry Updates

Benjamin Harris 3 months ago
parent
commit
b83d8cea10
3 changed files with 128 additions and 51 deletions
  1. 71 0
      CHANGELOG.md
  2. 54 51
      backend/app.py
  3. 3 0
      backend/telemetry.py

+ 71 - 0
CHANGELOG.md

@@ -0,0 +1,71 @@
+# Changelog
+
+All notable changes to **tasplanning.report** are documented here.
+Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) — versioning is [Semantic Versioning](https://semver.org/).
+
+---
+
+## [Unreleased]
+
+### Changed
+- Telemetry inserts in `/ask` moved to FastAPI `BackgroundTasks` — no longer blocks the response
+- Extracted shared `_log_ask()` helper, removing duplicated insert logic between GET and POST handlers
+- Qdrant collection availability now verified at startup with `@app.on_event("startup")`
+- Ollama inference parameters (`num_ctx`, `num_predict`, `temperature`) now configurable via env vars `OLLAMA_NUM_CTX`, `OLLAMA_NUM_PREDICT`, `OLLAMA_TEMPERATURE`
+
+### Fixed
+- Added missing SQLite indexes on `ask_logs.ts`, `ask_logs.normalized`, and `feedback.ts` — improves dashboard query performance as the DB grows
+
+---
+
+## [1.1.0] — 2026-03-27
+
+### Added
+- **BYOK (Bring Your Own Key)** — `context_only` mode on `/ask` returns RAG context and a pre-built prompt so the browser can call an external LLM directly (Anthropic, OpenAI, xAI Grok, local Ollama)
+- `byok-settings.php` — provider and API key management UI; keys stored in `localStorage` only, never sent to the server
+- `_bootstrap.php` — shared PHP bootstrap for common config/auth helpers
+- `design-tokens.css` — extracted CSS custom properties into a standalone shared token file
+- `limiter.py` — extracted `slowapi` rate limiter into its own module to avoid circular imports between `app.py` and `telemetry.py`
+- `telemetry.py` refactored as a standalone FastAPI `APIRouter` — browser-side events (`page_view`, `byok_call`, `error`) logged to `events` table
+- `/feedback` endpoint — stores thumbs-up/down ratings with query, answer, model, scope, and cited sources
+- `/councils` endpoint — returns sorted list of indexed council names from Qdrant payload
+- Scope-aware retrieval: `state_plus_local`, `state_only`, `local_only`, `any`
+- NCC and Australian Standards optional inclusion (`include_ncc`, `include_standards` params)
+- `section_id` parameter on `/ask` — triggers section-specific output format guides for planning report generation
+- DEMO token gate — optional `DEMO_REQUIRE_TOKEN` / `DEMO_TOKEN` env vars for API gating during early access
+- Rate limiting via `slowapi`: 20/min on `/ask`, 60/min on `/feedback` and `/telemetry`
+- `dashboard.php` — internal query monitoring dashboard with IP-based access control
+- `CLAUDE.md` — full project context document for AI-assisted development
+
+### Changed
+- `app.py` fully rewritten: RAG pipeline, prompt structure, Qdrant filter builders, admin endpoints, and telemetry all consolidated
+- CORS now accepts explicit origin list from `CORS_ORIGINS` env var; wildcard fallback uses `tasplanning.report` subdomain regex
+- `ollama_chat()`: `keep_alive=-1` enforced as top-level JSON key (not inside `options`) to prevent model unloading between requests
+- `num_ctx` fixed at 6144 to avoid KV cache resize on every request
+- Dashboard restricted to allowlisted IPs via `.htaccess` rewrite rules
+- `site-report.php` — Google Maps key loaded via `/gmaps-key.php` proxy instead of inline
+- `byok-settings.php`, `faq.php`, `dashboard.php` updated to use `design-tokens.css`
+- `generate_planning_report.php` — CORS origin validation tightened
+
+### Removed
+- `app(1).py` and `telemetry(1).py` — legacy duplicate files removed
+
+---
+
+## [1.0.0] — Initial release
+
+### Added
+- FastAPI backend with RAG pipeline: Ollama embeddings → Qdrant vector search → Ollama LLM
+- Qdrant vector database with `planning_docs` collection; chunks keyed by `corpus`, `council`, `source_file`, `page`
+- PHP/Apache frontend served from `public/`
+- `local_state-planning-scheme.php` — two-panel chat assistant UI (sidebar + thread)
+- `site-report.php` — property lookup with Google Maps PlaceAutocomplete and LIST/ArcGIS integration
+- `index.php` — landing page with demo modal and waitlist form
+- `faq.php`, `privacy.php`, `terms.php` — supporting pages
+- `waitlist.php` — PHPMailer-backed waitlist signup endpoint
+- `gmaps-key.php` — Google Maps API key proxy (validates `HTTP_HOST` before serving key)
+- `/js/api-status.js` — shared live API health indicator polling `/readyz`
+- SQLite telemetry database (`telemetry.db`) with `ask_logs`, `feedback`, `events` tables
+- IP anonymisation via HMAC-SHA256 (`TPR_IP_SECRET`)
+- Docker Compose stack: `qdrant`, `backend`, `web`, `sqliteweb`, `composer` services
+- Ollama on Windows host (`RTX 4070 Super`) with `llama3.1:8b-instruct-q4_K_M` and `nomic-embed-text`

+ 54 - 51
backend/app.py

@@ -21,7 +21,7 @@ import time
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 
 
 from typing import Optional, Literal, List, Tuple
 from typing import Optional, Literal, List, Tuple
-from fastapi import FastAPI, Query, HTTPException, Request
+from fastapi import BackgroundTasks, FastAPI, Query, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse
 from fastapi.responses import StreamingResponse
 from slowapi.middleware import SlowAPIMiddleware
 from slowapi.middleware import SlowAPIMiddleware
@@ -45,6 +45,9 @@ COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")
 EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")
 EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")
 CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")
 CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")
 CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
 CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
+OLLAMA_NUM_CTX      = int(os.getenv("OLLAMA_NUM_CTX", "6144"))
+OLLAMA_NUM_PREDICT  = int(os.getenv("OLLAMA_NUM_PREDICT", "512"))
+OLLAMA_TEMPERATURE  = float(os.getenv("OLLAMA_TEMPERATURE", "0.2"))
 
 
 # ---------------------------------------------------------------------------
 # ---------------------------------------------------------------------------
 # Demo token gate (disabled by default)
 # Demo token gate (disabled by default)
@@ -91,6 +94,14 @@ app.add_middleware(
 qc = QdrantClient(url=QDRANT_URL)
 qc = QdrantClient(url=QDRANT_URL)
 app.include_router(telemetry_router)
 app.include_router(telemetry_router)
 
 
+@app.on_event("startup")
+def check_qdrant():
+    try:
+        qc.get_collection(COLLECTION)
+        logger.info("Qdrant collection '%s' ready", COLLECTION)
+    except Exception as e:
+        logger.error("Qdrant startup check failed for collection '%s': %s", COLLECTION, e)
+
 # ---------------------------------------------------------------------------
 # ---------------------------------------------------------------------------
 # Rate limiting (slowapi — in-memory, per IP)
 # Rate limiting (slowapi — in-memory, per IP)
 # Shared limiter instance lives in limiter.py to avoid circular imports with
 # Shared limiter instance lives in limiter.py to avoid circular imports with
@@ -203,9 +214,9 @@ def ollama_chat(prompt: str) -> str:
               "prompt": prompt,
               "prompt": prompt,
               "stream": False,
               "stream": False,
               "options": {
               "options": {
-                "num_ctx": 6144, # was 8192,
-                "num_predict": 512,
-                "temperature": 0.2,
+                "num_ctx": OLLAMA_NUM_CTX,
+                "num_predict": OLLAMA_NUM_PREDICT,
+                "temperature": OLLAMA_TEMPERATURE,
                 "top_p": 0.9,
                 "top_p": 0.9,
                 "repeat_penalty": 1.1,
                 "repeat_penalty": 1.1,
               },
               },
@@ -777,10 +788,32 @@ You are an expert Tasmanian planning and building compliance assistant with deep
     return {"answer": answer, "sources": all_sources}
     return {"answer": answer, "sources": all_sources}
 
 
 
 
+def _log_ask(ts, sid, ip, query, scope, allow_tps, latency_ms, model, sources, answer):
+    """Write one ask_logs row. Runs in a background task — never raises to the caller."""
+    try:
+        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in sources]
+        with db() as conn:
+            conn.execute("""
+                INSERT INTO ask_logs
+                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
+                     model, ok, topk_json, tokens_in, tokens_out, answer)
+                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
+            """, (
+                ts, sid, ip_hash(ip), query, _normalize(query),
+                scope, int(allow_tps),
+                latency_ms, model, 1, _json_dumps(topk), 0, 0,
+                _trunc(answer, 8000, "ask_logs.answer"),
+            ))
+            conn.commit()
+    except Exception:
+        logger.exception("[telemetry] ask insert failed")
+
+
 @app.get("/ask")
 @app.get("/ask")
 @limiter.limit("20/minute")
 @limiter.limit("20/minute")
 def ask_get(
 def ask_get(
     request: Request,
     request: Request,
+    background_tasks: BackgroundTasks,
     query: str = Query(..., description="User question"),
     query: str = Query(..., description="User question"),
     top_k: int = 10,
     top_k: int = 10,
     council: Optional[str] = None,
     council: Optional[str] = None,
@@ -797,36 +830,21 @@ def ask_get(
     out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
     out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
     latency_ms = int((time.perf_counter() - started) * 1000)
     latency_ms = int((time.perf_counter() - started) * 1000)
 
 
-    # Telemetry insert — never allowed to break the response
-    try:
-        ip = request.client.host if request.client else "0.0.0.0"
-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
-        allow_tps = scope in ("state_only", "state_plus_local")
-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
-
-        with db() as conn:
-            conn.execute("""
-                INSERT INTO ask_logs
-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
-                     model, ok, topk_json, tokens_in, tokens_out, answer)
-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
-            """, (
-                datetime.utcnow().isoformat(),
-                sid, ip_hash(ip), query, _normalize(query),
-                scope, int(allow_tps),
-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
-                _trunc(out.get("answer") or "", 8000, "ask_get.answer"),
-            ))
-            conn.commit()
-    except Exception as e:
-        logger.exception("[telemetry] ask_get insert failed")
+    ip  = request.client.host if request.client else "0.0.0.0"
+    sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
+    background_tasks.add_task(
+        _log_ask,
+        datetime.utcnow().isoformat(), sid, ip, query, scope,
+        scope in ("state_only", "state_plus_local"),
+        latency_ms, CHAT_MODEL, out.get("sources") or [], out.get("answer") or "",
+    )
 
 
     return out
     return out
 
 
 
 
 @app.post("/ask")
 @app.post("/ask")
 @limiter.limit("20/minute")
 @limiter.limit("20/minute")
-def ask_post(request: Request, body: AskBody):
+def ask_post(request: Request, background_tasks: BackgroundTasks, body: AskBody):
     _verify_demo_token_if_needed(request)
     _verify_demo_token_if_needed(request)
     qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
     qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
     if not qtxt:
     if not qtxt:
@@ -846,28 +864,13 @@ def ask_post(request: Request, body: AskBody):
     )
     )
     latency_ms = int((time.perf_counter() - started) * 1000)
     latency_ms = int((time.perf_counter() - started) * 1000)
 
 
-    # Telemetry insert — never allowed to break the response
-    try:
-        ip = request.client.host if request.client else "0.0.0.0"
-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
-        allow_tps = body.scope in ("state_only", "state_plus_local")
-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
-
-        with db() as conn:
-            conn.execute("""
-                INSERT INTO ask_logs
-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
-                     model, ok, topk_json, tokens_in, tokens_out, answer)
-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
-            """, (
-                datetime.utcnow().isoformat(),
-                sid, ip_hash(ip), qtxt, _normalize(qtxt),
-                body.scope, int(allow_tps),
-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
-                _trunc(out.get("answer") or "", 8000, "ask_post.answer"),
-            ))
-            conn.commit()
-    except Exception as e:
-        logger.exception("[telemetry] ask_post insert failed")
+    ip  = request.client.host if request.client else "0.0.0.0"
+    sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
+    background_tasks.add_task(
+        _log_ask,
+        datetime.utcnow().isoformat(), sid, ip, qtxt, body.scope,
+        body.scope in ("state_only", "state_plus_local"),
+        latency_ms, CHAT_MODEL, out.get("sources") or [], out.get("answer") or "",
+    )
 
 
     return out
     return out

+ 3 - 0
backend/telemetry.py

@@ -109,6 +109,9 @@ def init():
           scope TEXT,                   -- retrieval scope used
           scope TEXT,                   -- retrieval scope used
           sources_json TEXT             -- JSON array of cited sources
           sources_json TEXT             -- JSON array of cited sources
         );
         );
+        CREATE INDEX IF NOT EXISTS idx_ask_logs_ts         ON ask_logs(ts);
+        CREATE INDEX IF NOT EXISTS idx_ask_logs_normalized ON ask_logs(normalized);
+        CREATE INDEX IF NOT EXISTS idx_feedback_ts         ON feedback(ts);
         """)
         """)
         conn.commit()
         conn.commit()