3 months ago · b83d8cea10
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,71 @@
 
															+# Changelog
														
 
															+
														
 
															+All notable changes to **tasplanning.report** are documented here.
														
 
															+Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) — versioning is [Semantic Versioning](https://semver.org/).
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## [Unreleased]
														
 
															+
														
 
															+### Changed
														
 
															+- Telemetry inserts in `/ask` moved to FastAPI `BackgroundTasks` — no longer blocks the response
														
 
															+- Extracted shared `_log_ask()` helper, removing duplicated insert logic between GET and POST handlers
														
 
															+- Qdrant collection availability now verified at startup with `@app.on_event("startup")`
														
 
															+- Ollama inference parameters (`num_ctx`, `num_predict`, `temperature`) now configurable via env vars `OLLAMA_NUM_CTX`, `OLLAMA_NUM_PREDICT`, `OLLAMA_TEMPERATURE`
														
 
															+
														
 
															+### Fixed
														
 
															+- Added missing SQLite indexes on `ask_logs.ts`, `ask_logs.normalized`, and `feedback.ts` — improves dashboard query performance as the DB grows
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## [1.1.0] — 2026-03-27
														
 
															+
														
 
															+### Added
														
 
															+- **BYOK (Bring Your Own Key)** — `context_only` mode on `/ask` returns RAG context and a pre-built prompt so the browser can call an external LLM directly (Anthropic, OpenAI, xAI Grok, local Ollama)
														
 
															+- `byok-settings.php` — provider and API key management UI; keys stored in `localStorage` only, never sent to the server
														
 
															+- `_bootstrap.php` — shared PHP bootstrap for common config/auth helpers
														
 
															+- `design-tokens.css` — extracted CSS custom properties into a standalone shared token file
														
 
															+- `limiter.py` — extracted `slowapi` rate limiter into its own module to avoid circular imports between `app.py` and `telemetry.py`
														
 
															+- `telemetry.py` refactored as a standalone FastAPI `APIRouter` — browser-side events (`page_view`, `byok_call`, `error`) logged to `events` table
														
 
															+- `/feedback` endpoint — stores thumbs-up/down ratings with query, answer, model, scope, and cited sources
														
 
															+- `/councils` endpoint — returns sorted list of indexed council names from Qdrant payload
														
 
															+- Scope-aware retrieval: `state_plus_local`, `state_only`, `local_only`, `any`
														
 
															+- NCC and Australian Standards optional inclusion (`include_ncc`, `include_standards` params)
														
 
															+- `section_id` parameter on `/ask` — triggers section-specific output format guides for planning report generation
														
 
															+- DEMO token gate — optional `DEMO_REQUIRE_TOKEN` / `DEMO_TOKEN` env vars for API gating during early access
														
 
															+- Rate limiting via `slowapi`: 20/min on `/ask`, 60/min on `/feedback` and `/telemetry`
														
 
															+- `dashboard.php` — internal query monitoring dashboard with IP-based access control
														
 
															+- `CLAUDE.md` — full project context document for AI-assisted development
														
 
															+
														
 
															+### Changed
														
 
															+- `app.py` fully rewritten: RAG pipeline, prompt structure, Qdrant filter builders, admin endpoints, and telemetry all consolidated
														
 
															+- CORS now accepts explicit origin list from `CORS_ORIGINS` env var; wildcard fallback uses `tasplanning.report` subdomain regex
														
 
															+- `ollama_chat()`: `keep_alive=-1` enforced as top-level JSON key (not inside `options`) to prevent model unloading between requests
														
 
															+- `num_ctx` fixed at 6144 to avoid KV cache resize on every request
														
 
															+- Dashboard restricted to allowlisted IPs via `.htaccess` rewrite rules
														
 
															+- `site-report.php` — Google Maps key loaded via `/gmaps-key.php` proxy instead of inline
														
 
															+- `byok-settings.php`, `faq.php`, `dashboard.php` updated to use `design-tokens.css`
														
 
															+- `generate_planning_report.php` — CORS origin validation tightened
														
 
															+
														
 
															+### Removed
														
 
															+- `app(1).py` and `telemetry(1).py` — legacy duplicate files removed
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## [1.0.0] — Initial release
														
 
															+
														
 
															+### Added
														
 
															+- FastAPI backend with RAG pipeline: Ollama embeddings → Qdrant vector search → Ollama LLM
														
 
															+- Qdrant vector database with `planning_docs` collection; chunks keyed by `corpus`, `council`, `source_file`, `page`
														
 
															+- PHP/Apache frontend served from `public/`
														
 
															+- `local_state-planning-scheme.php` — two-panel chat assistant UI (sidebar + thread)
														
 
															+- `site-report.php` — property lookup with Google Maps PlaceAutocomplete and LIST/ArcGIS integration
														
 
															+- `index.php` — landing page with demo modal and waitlist form
														
 
															+- `faq.php`, `privacy.php`, `terms.php` — supporting pages
														
 
															+- `waitlist.php` — PHPMailer-backed waitlist signup endpoint
														
 
															+- `gmaps-key.php` — Google Maps API key proxy (validates `HTTP_HOST` before serving key)
														
 
															+- `/js/api-status.js` — shared live API health indicator polling `/readyz`
														
 
															+- SQLite telemetry database (`telemetry.db`) with `ask_logs`, `feedback`, `events` tables
														
 
															+- IP anonymisation via HMAC-SHA256 (`TPR_IP_SECRET`)
														
 
															+- Docker Compose stack: `qdrant`, `backend`, `web`, `sqliteweb`, `composer` services
														
 
															+- Ollama on Windows host (`RTX 4070 Super`) with `llama3.1:8b-instruct-q4_K_M` and `nomic-embed-text`
														
--- a/backend/app.py
+++ b/backend/app.py
@@ -21,7 +21,7 @@ import time
 
															 logger = logging.getLogger(__name__)
														
 
															 from typing import Optional, Literal, List, Tuple
														
 
															-from fastapi import FastAPI, Query, HTTPException, Request
														
 
															+from fastapi import BackgroundTasks, FastAPI, Query, HTTPException, Request
														
 
															 from fastapi.middleware.cors import CORSMiddleware
														
 
															 from fastapi.responses import StreamingResponse
														
 
															 from slowapi.middleware import SlowAPIMiddleware
														
@@ -45,6 +45,9 @@ COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")
 
															 EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")
														
 
															 CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")
														
 
															 CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
														
 
															+OLLAMA_NUM_CTX      = int(os.getenv("OLLAMA_NUM_CTX", "6144"))
														
 
															+OLLAMA_NUM_PREDICT  = int(os.getenv("OLLAMA_NUM_PREDICT", "512"))
														
 
															+OLLAMA_TEMPERATURE  = float(os.getenv("OLLAMA_TEMPERATURE", "0.2"))
														
 
															 # ---------------------------------------------------------------------------
														
 
															 # Demo token gate (disabled by default)
														
@@ -91,6 +94,14 @@ app.add_middleware(
 
															 qc = QdrantClient(url=QDRANT_URL)
														
 
															 app.include_router(telemetry_router)
														
 
															+@app.on_event("startup")
														
 
															+def check_qdrant():
														
 
															+    try:
														
 
															+        qc.get_collection(COLLECTION)
														
 
															+        logger.info("Qdrant collection '%s' ready", COLLECTION)
														
 
															+    except Exception as e:
														
 
															+        logger.error("Qdrant startup check failed for collection '%s': %s", COLLECTION, e)
														
 
															+
														
 
															 # ---------------------------------------------------------------------------
														
 
															 # Rate limiting (slowapi — in-memory, per IP)
														
 
															 # Shared limiter instance lives in limiter.py to avoid circular imports with
														
@@ -203,9 +214,9 @@ def ollama_chat(prompt: str) -> str:
 
															               "prompt": prompt,
														
 
															               "stream": False,
														
 
															               "options": {
														
 
															-                "num_ctx": 6144, # was 8192,
														
 
															-                "num_predict": 512,
														
 
															-                "temperature": 0.2,
														
 
															+                "num_ctx": OLLAMA_NUM_CTX,
														
 
															+                "num_predict": OLLAMA_NUM_PREDICT,
														
 
															+                "temperature": OLLAMA_TEMPERATURE,
														
 
															                 "top_p": 0.9,
														
 
															                 "repeat_penalty": 1.1,
														
 
															               },
														
@@ -777,10 +788,32 @@ You are an expert Tasmanian planning and building compliance assistant with deep
 
															     return {"answer": answer, "sources": all_sources}
														
 
															+def _log_ask(ts, sid, ip, query, scope, allow_tps, latency_ms, model, sources, answer):
														
 
															+    """Write one ask_logs row. Runs in a background task — never raises to the caller."""
														
 
															+    try:
														
 
															+        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in sources]
														
 
															+        with db() as conn:
														
 
															+            conn.execute("""
														
 
															+                INSERT INTO ask_logs
														
 
															+                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
														
 
															+                     model, ok, topk_json, tokens_in, tokens_out, answer)
														
 
															+                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
														
 
															+            """, (
														
 
															+                ts, sid, ip_hash(ip), query, _normalize(query),
														
 
															+                scope, int(allow_tps),
														
 
															+                latency_ms, model, 1, _json_dumps(topk), 0, 0,
														
 
															+                _trunc(answer, 8000, "ask_logs.answer"),
														
 
															+            ))
														
 
															+            conn.commit()
														
 
															+    except Exception:
														
 
															+        logger.exception("[telemetry] ask insert failed")
														
 
															+
														
 
															+
														
 
															 @app.get("/ask")
														
 
															 @limiter.limit("20/minute")
														
 
															 def ask_get(
														
 
															     request: Request,
														
 
															+    background_tasks: BackgroundTasks,
														
 
															     query: str = Query(..., description="User question"),
														
 
															     top_k: int = 10,
														
 
															     council: Optional[str] = None,
														
@@ -797,36 +830,21 @@ def ask_get(
 
															     out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
														
 
															     latency_ms = int((time.perf_counter() - started) * 1000)
														
 
															-    # Telemetry insert — never allowed to break the response
														
 
															-    try:
														
 
															-        ip = request.client.host if request.client else "0.0.0.0"
														
 
															-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
														
 
															-        allow_tps = scope in ("state_only", "state_plus_local")
														
 
															-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
														
 
															-
														
 
															-        with db() as conn:
														
 
															-            conn.execute("""
														
 
															-                INSERT INTO ask_logs
														
 
															-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
														
 
															-                     model, ok, topk_json, tokens_in, tokens_out, answer)
														
 
															-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
														
 
															-            """, (
														
 
															-                datetime.utcnow().isoformat(),
														
 
															-                sid, ip_hash(ip), query, _normalize(query),
														
 
															-                scope, int(allow_tps),
														
 
															-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
														
 
															-                _trunc(out.get("answer") or "", 8000, "ask_get.answer"),
														
 
															-            ))
														
 
															-            conn.commit()
														
 
															-    except Exception as e:
														
 
															-        logger.exception("[telemetry] ask_get insert failed")
														
 
															+    ip  = request.client.host if request.client else "0.0.0.0"
														
 
															+    sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
														
 
															+    background_tasks.add_task(
														
 
															+        _log_ask,
														
 
															+        datetime.utcnow().isoformat(), sid, ip, query, scope,
														
 
															+        scope in ("state_only", "state_plus_local"),
														
 
															+        latency_ms, CHAT_MODEL, out.get("sources") or [], out.get("answer") or "",
														
 
															+    )
														
 
															     return out
														
 
															 @app.post("/ask")
														
 
															 @limiter.limit("20/minute")
														
 
															-def ask_post(request: Request, body: AskBody):
														
 
															+def ask_post(request: Request, background_tasks: BackgroundTasks, body: AskBody):
														
 
															     _verify_demo_token_if_needed(request)
														
 
															     qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
														
 
															     if not qtxt:
														
@@ -846,28 +864,13 @@ def ask_post(request: Request, body: AskBody):
 
															     )
														
 
															     latency_ms = int((time.perf_counter() - started) * 1000)
														
 
															-    # Telemetry insert — never allowed to break the response
														
 
															-    try:
														
 
															-        ip = request.client.host if request.client else "0.0.0.0"
														
 
															-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
														
 
															-        allow_tps = body.scope in ("state_only", "state_plus_local")
														
 
															-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
														
 
															-
														
 
															-        with db() as conn:
														
 
															-            conn.execute("""
														
 
															-                INSERT INTO ask_logs
														
 
															-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
														
 
															-                     model, ok, topk_json, tokens_in, tokens_out, answer)
														
 
															-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
														
 
															-            """, (
														
 
															-                datetime.utcnow().isoformat(),
														
 
															-                sid, ip_hash(ip), qtxt, _normalize(qtxt),
														
 
															-                body.scope, int(allow_tps),
														
 
															-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
														
 
															-                _trunc(out.get("answer") or "", 8000, "ask_post.answer"),
														
 
															-            ))
														
 
															-            conn.commit()
														
 
															-    except Exception as e:
														
 
															-        logger.exception("[telemetry] ask_post insert failed")
														
 
															+    ip  = request.client.host if request.client else "0.0.0.0"
														
 
															+    sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
														
 
															+    background_tasks.add_task(
														
 
															+        _log_ask,
														
 
															+        datetime.utcnow().isoformat(), sid, ip, qtxt, body.scope,
														
 
															+        body.scope in ("state_only", "state_plus_local"),
														
 
															+        latency_ms, CHAT_MODEL, out.get("sources") or [], out.get("answer") or "",
														
 
															+    )
														
 
															     return out
														
--- a/backend/telemetry.py
+++ b/backend/telemetry.py
@@ -109,6 +109,9 @@ def init():
 
															           scope TEXT,                   -- retrieval scope used
														
 
															           sources_json TEXT             -- JSON array of cited sources
														
 
															         );
														
 
															+        CREATE INDEX IF NOT EXISTS idx_ask_logs_ts         ON ask_logs(ts);
														
 
															+        CREATE INDEX IF NOT EXISTS idx_ask_logs_normalized ON ask_logs(normalized);
														
 
															+        CREATE INDEX IF NOT EXISTS idx_feedback_ts         ON feedback(ts);
														
 
															         """)
														
 
															         conn.commit()