Benjamin Harris 3 mesi fa
parent
commit
b83d8cea10
3 ha cambiato i file con 128 aggiunte e 51 eliminazioni
  1. 71 0
      CHANGELOG.md
  2. 54 51
      backend/app.py
  3. 3 0
      backend/telemetry.py

+ 71 - 0
CHANGELOG.md

@@ -0,0 +1,71 @@
+# Changelog
+
+All notable changes to **tasplanning.report** are documented here.
+Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) — versioning is [Semantic Versioning](https://semver.org/).
+
+---
+
+## [Unreleased]
+
+### Changed
+- Telemetry inserts in `/ask` moved to FastAPI `BackgroundTasks` — no longer blocks the response
+- Extracted shared `_log_ask()` helper, removing duplicated insert logic between GET and POST handlers
+- Qdrant collection availability now verified at startup with `@app.on_event("startup")`
+- Ollama inference parameters (`num_ctx`, `num_predict`, `temperature`) now configurable via env vars `OLLAMA_NUM_CTX`, `OLLAMA_NUM_PREDICT`, `OLLAMA_TEMPERATURE`
+
+### Fixed
+- Added missing SQLite indexes on `ask_logs.ts`, `ask_logs.normalized`, and `feedback.ts` — improves dashboard query performance as the DB grows
+
+---
+
+## [1.1.0] — 2026-03-27
+
+### Added
+- **BYOK (Bring Your Own Key)** — `context_only` mode on `/ask` returns RAG context and a pre-built prompt so the browser can call an external LLM directly (Anthropic, OpenAI, xAI Grok, local Ollama)
+- `byok-settings.php` — provider and API key management UI; keys stored in `localStorage` only, never sent to the server
+- `_bootstrap.php` — shared PHP bootstrap for common config/auth helpers
+- `design-tokens.css` — extracted CSS custom properties into a standalone shared token file
+- `limiter.py` — extracted `slowapi` rate limiter into its own module to avoid circular imports between `app.py` and `telemetry.py`
+- `telemetry.py` refactored as a standalone FastAPI `APIRouter` — browser-side events (`page_view`, `byok_call`, `error`) logged to `events` table
+- `/feedback` endpoint — stores thumbs-up/down ratings with query, answer, model, scope, and cited sources
+- `/councils` endpoint — returns sorted list of indexed council names from Qdrant payload
+- Scope-aware retrieval: `state_plus_local`, `state_only`, `local_only`, `any`
+- NCC and Australian Standards optional inclusion (`include_ncc`, `include_standards` params)
+- `section_id` parameter on `/ask` — triggers section-specific output format guides for planning report generation
+- DEMO token gate — optional `DEMO_REQUIRE_TOKEN` / `DEMO_TOKEN` env vars for API gating during early access
+- Rate limiting via `slowapi`: 20/min on `/ask`, 60/min on `/feedback` and `/telemetry`
+- `dashboard.php` — internal query monitoring dashboard with IP-based access control
+- `CLAUDE.md` — full project context document for AI-assisted development
+
+### Changed
+- `app.py` fully rewritten: RAG pipeline, prompt structure, Qdrant filter builders, admin endpoints, and telemetry all consolidated
+- CORS now accepts explicit origin list from `CORS_ORIGINS` env var; wildcard fallback uses `tasplanning.report` subdomain regex
+- `ollama_chat()`: `keep_alive=-1` enforced as top-level JSON key (not inside `options`) to prevent model unloading between requests
+- `num_ctx` fixed at 6144 to avoid KV cache resize on every request
+- Dashboard restricted to allowlisted IPs via `.htaccess` rewrite rules
+- `site-report.php` — Google Maps key loaded via `/gmaps-key.php` proxy instead of inline
+- `byok-settings.php`, `faq.php`, `dashboard.php` updated to use `design-tokens.css`
+- `generate_planning_report.php` — CORS origin validation tightened
+
+### Removed
+- `app(1).py` and `telemetry(1).py` — legacy duplicate files removed
+
+---
+
+## [1.0.0] — Initial release
+
+### Added
+- FastAPI backend with RAG pipeline: Ollama embeddings → Qdrant vector search → Ollama LLM
+- Qdrant vector database with `planning_docs` collection; chunks keyed by `corpus`, `council`, `source_file`, `page`
+- PHP/Apache frontend served from `public/`
+- `local_state-planning-scheme.php` — two-panel chat assistant UI (sidebar + thread)
+- `site-report.php` — property lookup with Google Maps PlaceAutocomplete and LIST/ArcGIS integration
+- `index.php` — landing page with demo modal and waitlist form
+- `faq.php`, `privacy.php`, `terms.php` — supporting pages
+- `waitlist.php` — PHPMailer-backed waitlist signup endpoint
+- `gmaps-key.php` — Google Maps API key proxy (validates `HTTP_HOST` before serving key)
+- `/js/api-status.js` — shared live API health indicator polling `/readyz`
+- SQLite telemetry database (`telemetry.db`) with `ask_logs`, `feedback`, `events` tables
+- IP anonymisation via HMAC-SHA256 (`TPR_IP_SECRET`)
+- Docker Compose stack: `qdrant`, `backend`, `web`, `sqliteweb`, `composer` services
+- Ollama on Windows host (`RTX 4070 Super`) with `llama3.1:8b-instruct-q4_K_M` and `nomic-embed-text`

+ 54 - 51
backend/app.py

@@ -21,7 +21,7 @@ import time
 logger = logging.getLogger(__name__)
 
 from typing import Optional, Literal, List, Tuple
-from fastapi import FastAPI, Query, HTTPException, Request
+from fastapi import BackgroundTasks, FastAPI, Query, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse
 from slowapi.middleware import SlowAPIMiddleware
@@ -45,6 +45,9 @@ COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")
 EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")
 CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")
 CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
+OLLAMA_NUM_CTX      = int(os.getenv("OLLAMA_NUM_CTX", "6144"))
+OLLAMA_NUM_PREDICT  = int(os.getenv("OLLAMA_NUM_PREDICT", "512"))
+OLLAMA_TEMPERATURE  = float(os.getenv("OLLAMA_TEMPERATURE", "0.2"))
 
 # ---------------------------------------------------------------------------
 # Demo token gate (disabled by default)
@@ -91,6 +94,14 @@ app.add_middleware(
 qc = QdrantClient(url=QDRANT_URL)
 app.include_router(telemetry_router)
 
+@app.on_event("startup")
+def check_qdrant():
+    try:
+        qc.get_collection(COLLECTION)
+        logger.info("Qdrant collection '%s' ready", COLLECTION)
+    except Exception as e:
+        logger.error("Qdrant startup check failed for collection '%s': %s", COLLECTION, e)
+
 # ---------------------------------------------------------------------------
 # Rate limiting (slowapi — in-memory, per IP)
 # Shared limiter instance lives in limiter.py to avoid circular imports with
@@ -203,9 +214,9 @@ def ollama_chat(prompt: str) -> str:
               "prompt": prompt,
               "stream": False,
               "options": {
-                "num_ctx": 6144, # was 8192,
-                "num_predict": 512,
-                "temperature": 0.2,
+                "num_ctx": OLLAMA_NUM_CTX,
+                "num_predict": OLLAMA_NUM_PREDICT,
+                "temperature": OLLAMA_TEMPERATURE,
                 "top_p": 0.9,
                 "repeat_penalty": 1.1,
               },
@@ -777,10 +788,32 @@ You are an expert Tasmanian planning and building compliance assistant with deep
     return {"answer": answer, "sources": all_sources}
 
 
+def _log_ask(ts, sid, ip, query, scope, allow_tps, latency_ms, model, sources, answer):
+    """Write one ask_logs row. Runs in a background task — never raises to the caller."""
+    try:
+        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in sources]
+        with db() as conn:
+            conn.execute("""
+                INSERT INTO ask_logs
+                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
+                     model, ok, topk_json, tokens_in, tokens_out, answer)
+                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
+            """, (
+                ts, sid, ip_hash(ip), query, _normalize(query),
+                scope, int(allow_tps),
+                latency_ms, model, 1, _json_dumps(topk), 0, 0,
+                _trunc(answer, 8000, "ask_logs.answer"),
+            ))
+            conn.commit()
+    except Exception:
+        logger.exception("[telemetry] ask insert failed")
+
+
 @app.get("/ask")
 @limiter.limit("20/minute")
 def ask_get(
     request: Request,
+    background_tasks: BackgroundTasks,
     query: str = Query(..., description="User question"),
     top_k: int = 10,
     council: Optional[str] = None,
@@ -797,36 +830,21 @@ def ask_get(
     out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
     latency_ms = int((time.perf_counter() - started) * 1000)
 
-    # Telemetry insert — never allowed to break the response
-    try:
-        ip = request.client.host if request.client else "0.0.0.0"
-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
-        allow_tps = scope in ("state_only", "state_plus_local")
-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
-
-        with db() as conn:
-            conn.execute("""
-                INSERT INTO ask_logs
-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
-                     model, ok, topk_json, tokens_in, tokens_out, answer)
-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
-            """, (
-                datetime.utcnow().isoformat(),
-                sid, ip_hash(ip), query, _normalize(query),
-                scope, int(allow_tps),
-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
-                _trunc(out.get("answer") or "", 8000, "ask_get.answer"),
-            ))
-            conn.commit()
-    except Exception as e:
-        logger.exception("[telemetry] ask_get insert failed")
+    ip  = request.client.host if request.client else "0.0.0.0"
+    sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
+    background_tasks.add_task(
+        _log_ask,
+        datetime.utcnow().isoformat(), sid, ip, query, scope,
+        scope in ("state_only", "state_plus_local"),
+        latency_ms, CHAT_MODEL, out.get("sources") or [], out.get("answer") or "",
+    )
 
     return out
 
 
 @app.post("/ask")
 @limiter.limit("20/minute")
-def ask_post(request: Request, body: AskBody):
+def ask_post(request: Request, background_tasks: BackgroundTasks, body: AskBody):
     _verify_demo_token_if_needed(request)
     qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
     if not qtxt:
@@ -846,28 +864,13 @@ def ask_post(request: Request, body: AskBody):
     )
     latency_ms = int((time.perf_counter() - started) * 1000)
 
-    # Telemetry insert — never allowed to break the response
-    try:
-        ip = request.client.host if request.client else "0.0.0.0"
-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
-        allow_tps = body.scope in ("state_only", "state_plus_local")
-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
-
-        with db() as conn:
-            conn.execute("""
-                INSERT INTO ask_logs
-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
-                     model, ok, topk_json, tokens_in, tokens_out, answer)
-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
-            """, (
-                datetime.utcnow().isoformat(),
-                sid, ip_hash(ip), qtxt, _normalize(qtxt),
-                body.scope, int(allow_tps),
-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
-                _trunc(out.get("answer") or "", 8000, "ask_post.answer"),
-            ))
-            conn.commit()
-    except Exception as e:
-        logger.exception("[telemetry] ask_post insert failed")
+    ip  = request.client.host if request.client else "0.0.0.0"
+    sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
+    background_tasks.add_task(
+        _log_ask,
+        datetime.utcnow().isoformat(), sid, ip, qtxt, body.scope,
+        body.scope in ("state_only", "state_plus_local"),
+        latency_ms, CHAT_MODEL, out.get("sources") or [], out.get("answer") or "",
+    )
 
     return out

+ 3 - 0
backend/telemetry.py

@@ -109,6 +109,9 @@ def init():
           scope TEXT,                   -- retrieval scope used
           sources_json TEXT             -- JSON array of cited sources
         );
+        CREATE INDEX IF NOT EXISTS idx_ask_logs_ts         ON ask_logs(ts);
+        CREATE INDEX IF NOT EXISTS idx_ask_logs_normalized ON ask_logs(normalized);
+        CREATE INDEX IF NOT EXISTS idx_feedback_ts         ON feedback(ts);
         """)
         conn.commit()