3 месяцев назад · b83d8cea10
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,71 @@
 
				+# Changelog
			
 
				+
			
 
				+All notable changes to **tasplanning.report** are documented here.
			
 
				+Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) — versioning is [Semantic Versioning](https://semver.org/).
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## [Unreleased]
			
 
				+
			
 
				+### Changed
			
 
				+- Telemetry inserts in `/ask` moved to FastAPI `BackgroundTasks` — no longer blocks the response
			
 
				+- Extracted shared `_log_ask()` helper, removing duplicated insert logic between GET and POST handlers
			
 
				+- Qdrant collection availability now verified at startup with `@app.on_event("startup")`
			
 
				+- Ollama inference parameters (`num_ctx`, `num_predict`, `temperature`) now configurable via env vars `OLLAMA_NUM_CTX`, `OLLAMA_NUM_PREDICT`, `OLLAMA_TEMPERATURE`
			
 
				+
			
 
				+### Fixed
			
 
				+- Added missing SQLite indexes on `ask_logs.ts`, `ask_logs.normalized`, and `feedback.ts` — improves dashboard query performance as the DB grows
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## [1.1.0] — 2026-03-27
			
 
				+
			
 
				+### Added
			
 
				+- **BYOK (Bring Your Own Key)** — `context_only` mode on `/ask` returns RAG context and a pre-built prompt so the browser can call an external LLM directly (Anthropic, OpenAI, xAI Grok, local Ollama)
			
 
				+- `byok-settings.php` — provider and API key management UI; keys stored in `localStorage` only, never sent to the server
			
 
				+- `_bootstrap.php` — shared PHP bootstrap for common config/auth helpers
			
 
				+- `design-tokens.css` — extracted CSS custom properties into a standalone shared token file
			
 
				+- `limiter.py` — extracted `slowapi` rate limiter into its own module to avoid circular imports between `app.py` and `telemetry.py`
			
 
				+- `telemetry.py` refactored as a standalone FastAPI `APIRouter` — browser-side events (`page_view`, `byok_call`, `error`) logged to `events` table
			
 
				+- `/feedback` endpoint — stores thumbs-up/down ratings with query, answer, model, scope, and cited sources
			
 
				+- `/councils` endpoint — returns sorted list of indexed council names from Qdrant payload
			
 
				+- Scope-aware retrieval: `state_plus_local`, `state_only`, `local_only`, `any`
			
 
				+- NCC and Australian Standards optional inclusion (`include_ncc`, `include_standards` params)
			
 
				+- `section_id` parameter on `/ask` — triggers section-specific output format guides for planning report generation
			
 
				+- DEMO token gate — optional `DEMO_REQUIRE_TOKEN` / `DEMO_TOKEN` env vars for API gating during early access
			
 
				+- Rate limiting via `slowapi`: 20/min on `/ask`, 60/min on `/feedback` and `/telemetry`
			
 
				+- `dashboard.php` — internal query monitoring dashboard with IP-based access control
			
 
				+- `CLAUDE.md` — full project context document for AI-assisted development
			
 
				+
			
 
				+### Changed
			
 
				+- `app.py` fully rewritten: RAG pipeline, prompt structure, Qdrant filter builders, admin endpoints, and telemetry all consolidated
			
 
				+- CORS now accepts explicit origin list from `CORS_ORIGINS` env var; wildcard fallback uses `tasplanning.report` subdomain regex
			
 
				+- `ollama_chat()`: `keep_alive=-1` enforced as top-level JSON key (not inside `options`) to prevent model unloading between requests
			
 
				+- `num_ctx` fixed at 6144 to avoid KV cache resize on every request
			
 
				+- Dashboard restricted to allowlisted IPs via `.htaccess` rewrite rules
			
 
				+- `site-report.php` — Google Maps key loaded via `/gmaps-key.php` proxy instead of inline
			
 
				+- `byok-settings.php`, `faq.php`, `dashboard.php` updated to use `design-tokens.css`
			
 
				+- `generate_planning_report.php` — CORS origin validation tightened
			
 
				+
			
 
				+### Removed
			
 
				+- `app(1).py` and `telemetry(1).py` — legacy duplicate files removed
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## [1.0.0] — Initial release
			
 
				+
			
 
				+### Added
			
 
				+- FastAPI backend with RAG pipeline: Ollama embeddings → Qdrant vector search → Ollama LLM
			
 
				+- Qdrant vector database with `planning_docs` collection; chunks keyed by `corpus`, `council`, `source_file`, `page`
			
 
				+- PHP/Apache frontend served from `public/`
			
 
				+- `local_state-planning-scheme.php` — two-panel chat assistant UI (sidebar + thread)
			
 
				+- `site-report.php` — property lookup with Google Maps PlaceAutocomplete and LIST/ArcGIS integration
			
 
				+- `index.php` — landing page with demo modal and waitlist form
			
 
				+- `faq.php`, `privacy.php`, `terms.php` — supporting pages
			
 
				+- `waitlist.php` — PHPMailer-backed waitlist signup endpoint
			
 
				+- `gmaps-key.php` — Google Maps API key proxy (validates `HTTP_HOST` before serving key)
			
 
				+- `/js/api-status.js` — shared live API health indicator polling `/readyz`
			
 
				+- SQLite telemetry database (`telemetry.db`) with `ask_logs`, `feedback`, `events` tables
			
 
				+- IP anonymisation via HMAC-SHA256 (`TPR_IP_SECRET`)
			
 
				+- Docker Compose stack: `qdrant`, `backend`, `web`, `sqliteweb`, `composer` services
			
 
				+- Ollama on Windows host (`RTX 4070 Super`) with `llama3.1:8b-instruct-q4_K_M` and `nomic-embed-text`
			
--- a/backend/app.py
+++ b/backend/app.py
@@ -21,7 +21,7 @@ import time
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 from typing import Optional, Literal, List, Tuple
			
 
				-from fastapi import FastAPI, Query, HTTPException, Request
			
 
				+from fastapi import BackgroundTasks, FastAPI, Query, HTTPException, Request
			
 
				 from fastapi.middleware.cors import CORSMiddleware
			
 
				 from fastapi.responses import StreamingResponse
			
 
				 from slowapi.middleware import SlowAPIMiddleware
			
@@ -45,6 +45,9 @@ COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")
 
				 EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")
			
 
				 CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")
			
 
				 CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
			
 
				+OLLAMA_NUM_CTX      = int(os.getenv("OLLAMA_NUM_CTX", "6144"))
			
 
				+OLLAMA_NUM_PREDICT  = int(os.getenv("OLLAMA_NUM_PREDICT", "512"))
			
 
				+OLLAMA_TEMPERATURE  = float(os.getenv("OLLAMA_TEMPERATURE", "0.2"))
			
 
				 
			
 
				 # ---------------------------------------------------------------------------
			
 
				 # Demo token gate (disabled by default)
			
@@ -91,6 +94,14 @@ app.add_middleware(
 
				 qc = QdrantClient(url=QDRANT_URL)
			
 
				 app.include_router(telemetry_router)
			
 
				 
			
 
				+@app.on_event("startup")
			
 
				+def check_qdrant():
			
 
				+    try:
			
 
				+        qc.get_collection(COLLECTION)
			
 
				+        logger.info("Qdrant collection '%s' ready", COLLECTION)
			
 
				+    except Exception as e:
			
 
				+        logger.error("Qdrant startup check failed for collection '%s': %s", COLLECTION, e)
			
 
				+
			
 
				 # ---------------------------------------------------------------------------
			
 
				 # Rate limiting (slowapi — in-memory, per IP)
			
 
				 # Shared limiter instance lives in limiter.py to avoid circular imports with
			
@@ -203,9 +214,9 @@ def ollama_chat(prompt: str) -> str:
 
				               "prompt": prompt,
			
 
				               "stream": False,
			
 
				               "options": {
			
 
				-                "num_ctx": 6144, # was 8192,
			
 
				-                "num_predict": 512,
			
 
				-                "temperature": 0.2,
			
 
				+                "num_ctx": OLLAMA_NUM_CTX,
			
 
				+                "num_predict": OLLAMA_NUM_PREDICT,
			
 
				+                "temperature": OLLAMA_TEMPERATURE,
			
 
				                 "top_p": 0.9,
			
 
				                 "repeat_penalty": 1.1,
			
 
				               },
			
@@ -777,10 +788,32 @@ You are an expert Tasmanian planning and building compliance assistant with deep
 
				     return {"answer": answer, "sources": all_sources}
			
 
				 
			
 
				 
			
 
				+def _log_ask(ts, sid, ip, query, scope, allow_tps, latency_ms, model, sources, answer):
			
 
				+    """Write one ask_logs row. Runs in a background task — never raises to the caller."""
			
 
				+    try:
			
 
				+        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in sources]
			
 
				+        with db() as conn:
			
 
				+            conn.execute("""
			
 
				+                INSERT INTO ask_logs
			
 
				+                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
			
 
				+                     model, ok, topk_json, tokens_in, tokens_out, answer)
			
 
				+                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
			
 
				+            """, (
			
 
				+                ts, sid, ip_hash(ip), query, _normalize(query),
			
 
				+                scope, int(allow_tps),
			
 
				+                latency_ms, model, 1, _json_dumps(topk), 0, 0,
			
 
				+                _trunc(answer, 8000, "ask_logs.answer"),
			
 
				+            ))
			
 
				+            conn.commit()
			
 
				+    except Exception:
			
 
				+        logger.exception("[telemetry] ask insert failed")
			
 
				+
			
 
				+
			
 
				 @app.get("/ask")
			
 
				 @limiter.limit("20/minute")
			
 
				 def ask_get(
			
 
				     request: Request,
			
 
				+    background_tasks: BackgroundTasks,
			
 
				     query: str = Query(..., description="User question"),
			
 
				     top_k: int = 10,
			
 
				     council: Optional[str] = None,
			
@@ -797,36 +830,21 @@ def ask_get(
 
				     out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
			
 
				     latency_ms = int((time.perf_counter() - started) * 1000)
			
 
				 
			
 
				-    # Telemetry insert — never allowed to break the response
			
 
				-    try:
			
 
				-        ip = request.client.host if request.client else "0.0.0.0"
			
 
				-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
			
 
				-        allow_tps = scope in ("state_only", "state_plus_local")
			
 
				-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
			
 
				-
			
 
				-        with db() as conn:
			
 
				-            conn.execute("""
			
 
				-                INSERT INTO ask_logs
			
 
				-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
			
 
				-                     model, ok, topk_json, tokens_in, tokens_out, answer)
			
 
				-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
			
 
				-            """, (
			
 
				-                datetime.utcnow().isoformat(),
			
 
				-                sid, ip_hash(ip), query, _normalize(query),
			
 
				-                scope, int(allow_tps),
			
 
				-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
			
 
				-                _trunc(out.get("answer") or "", 8000, "ask_get.answer"),
			
 
				-            ))
			
 
				-            conn.commit()
			
 
				-    except Exception as e:
			
 
				-        logger.exception("[telemetry] ask_get insert failed")
			
 
				+    ip  = request.client.host if request.client else "0.0.0.0"
			
 
				+    sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
			
 
				+    background_tasks.add_task(
			
 
				+        _log_ask,
			
 
				+        datetime.utcnow().isoformat(), sid, ip, query, scope,
			
 
				+        scope in ("state_only", "state_plus_local"),
			
 
				+        latency_ms, CHAT_MODEL, out.get("sources") or [], out.get("answer") or "",
			
 
				+    )
			
 
				 
			
 
				     return out
			
 
				 
			
 
				 
			
 
				 @app.post("/ask")
			
 
				 @limiter.limit("20/minute")
			
 
				-def ask_post(request: Request, body: AskBody):
			
 
				+def ask_post(request: Request, background_tasks: BackgroundTasks, body: AskBody):
			
 
				     _verify_demo_token_if_needed(request)
			
 
				     qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
			
 
				     if not qtxt:
			
@@ -846,28 +864,13 @@ def ask_post(request: Request, body: AskBody):
 
				     )
			
 
				     latency_ms = int((time.perf_counter() - started) * 1000)
			
 
				 
			
 
				-    # Telemetry insert — never allowed to break the response
			
 
				-    try:
			
 
				-        ip = request.client.host if request.client else "0.0.0.0"
			
 
				-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
			
 
				-        allow_tps = body.scope in ("state_only", "state_plus_local")
			
 
				-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
			
 
				-
			
 
				-        with db() as conn:
			
 
				-            conn.execute("""
			
 
				-                INSERT INTO ask_logs
			
 
				-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
			
 
				-                     model, ok, topk_json, tokens_in, tokens_out, answer)
			
 
				-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
			
 
				-            """, (
			
 
				-                datetime.utcnow().isoformat(),
			
 
				-                sid, ip_hash(ip), qtxt, _normalize(qtxt),
			
 
				-                body.scope, int(allow_tps),
			
 
				-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
			
 
				-                _trunc(out.get("answer") or "", 8000, "ask_post.answer"),
			
 
				-            ))
			
 
				-            conn.commit()
			
 
				-    except Exception as e:
			
 
				-        logger.exception("[telemetry] ask_post insert failed")
			
 
				+    ip  = request.client.host if request.client else "0.0.0.0"
			
 
				+    sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
			
 
				+    background_tasks.add_task(
			
 
				+        _log_ask,
			
 
				+        datetime.utcnow().isoformat(), sid, ip, qtxt, body.scope,
			
 
				+        body.scope in ("state_only", "state_plus_local"),
			
 
				+        latency_ms, CHAT_MODEL, out.get("sources") or [], out.get("answer") or "",
			
 
				+    )
			
 
				 
			
 
				     return out
			
--- a/backend/telemetry.py
+++ b/backend/telemetry.py
@@ -109,6 +109,9 @@ def init():
 
				           scope TEXT,                   -- retrieval scope used
			
 
				           sources_json TEXT             -- JSON array of cited sources
			
 
				         );
			
 
				+        CREATE INDEX IF NOT EXISTS idx_ask_logs_ts         ON ask_logs(ts);
			
 
				+        CREATE INDEX IF NOT EXISTS idx_ask_logs_normalized ON ask_logs(normalized);
			
 
				+        CREATE INDEX IF NOT EXISTS idx_feedback_ts         ON feedback(ts);
			
 
				         """)
			
 
				         conn.commit()