2 달 전 · 350c47aeaa
--- a/.env
+++ b/.env
@@ -1,4 +1,7 @@
 
				 # .env  (same directory as docker-compose.yml)
			
 
				 GMAPS_API_KEY=AIzaSyCQsmOScTJM4P-4WTOO9M7YO01PmTYnBIg
			
 
				 TPR_IP_SECRET=mmOwQgqljUs1CPiKW3O9vvL4XGalAHojOEmB7SJLBxXBPXHbBoDCMyS8fPc62aDk
			
 
				-SMTP_PASS=56cN473SblsTol4s
			
 
				+SMTP_PASS=56cN473SblsTol4s
			
 
				+# Set to 'local' on dev machines to enable the direct Maps key injection shortcut.
			
 
				+# Leave unset (or set to 'production') on all deployed environments.
			
 
				+APP_ENV=local
			
--- a/.gitignore
+++ b/.gitignore
@@ -8,18 +8,23 @@
 
				 /.bundle
			
 
				 
			
 
				 # Ignore all environment files (except templates).
			
 
				-/.env*
			
 
				+.env.*
			
 
				 .env
			
 
				+!.env.example
			
 
				 
			
 
				 # Ignore all logfiles and tempfiles.
			
 
				-/log/*
			
 
				-/tmp/*
			
 
				+/log/
			
 
				+/tmp/
			
 
				 
			
 
				 # Local claude configuration
			
 
				 /.claude
			
 
				 
			
 
				-public/vendor/*
			
 
				-pdfs/*
			
 
				+/public/vendor/
			
 
				+/pdfs/
			
 
				+
			
 
				+# Python virtual environments
			
 
				+/venv/
			
 
				+/.venv/
			
 
				 
			
 
				 # Cert Files
			
 
				 *.pem
			
--- a/backend/app(1).py
+++ b/backend/app(1).py
@@ -1,683 +0,0 @@
 
				-import os, re

			
 
				-import json

			
 
				-import requests

			
 
				-import time

			
 
				-from typing import Optional, Literal, List, Tuple

			
 
				-from fastapi import FastAPI, Query, HTTPException, Request

			
 
				-from fastapi.middleware.cors import CORSMiddleware

			
 
				-from fastapi.responses import StreamingResponse

			
 
				-from slowapi.middleware import SlowAPIMiddleware

			
 
				-from slowapi import Limiter

			
 
				-from slowapi.util import get_remote_address

			
 
				-from slowapi.errors import RateLimitExceeded

			
 
				-from fastapi.responses import JSONResponse

			
 
				-from pydantic import BaseModel

			
 
				-from qdrant_client import QdrantClient

			
 
				-from qdrant_client.http import models as qmodels

			
 
				-from collections import Counter, defaultdict

			
 
				-from datetime import datetime

			
 
				-from telemetry import router as telemetry_router, db, ip_hash

			
 
				-

			
 
				-# ---- Environment ----

			
 
				-OLLAMA_URL          = os.getenv("OLLAMA_URL", "http://192.168.8.73:11434")

			
 
				-QDRANT_URL          = os.getenv("QDRANT_URL", "http://localhost:6333")

			
 
				-OLLAMA_KEEP_ALIVE   = -1

			
 
				-COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")

			
 
				-EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")

			
 
				-CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b")

			
 
				-CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]

			
 
				-

			
 
				-# ---- DEMO TOKEN ----

			
 
				-DEMO_REQUIRE_TOKEN = os.getenv("DEMO_REQUIRE_TOKEN", "0") == "1"

			
 
				-DEMO_TOKEN = os.getenv("DEMO_TOKEN", "")

			
 
				-

			
 
				-def _verify_demo_token_if_needed(request):

			
 
				-    if not DEMO_REQUIRE_TOKEN:

			
 
				-        return

			
 
				-    auth = request.headers.get("Authorization", "")

			
 
				-    if not (auth.startswith("Bearer ") and auth.split(" ",1)[1] == DEMO_TOKEN):

			
 
				-        raise HTTPException(status_code=401, detail="Unauthorized")

			
 
				-

			
 
				-

			
 
				-# ---- FAST API ----

			
 
				-app = FastAPI()

			
 
				-# Allowed origins — always include your frontend explicitly

			
 
				-_origins = CORS_ORIGINS if CORS_ORIGINS else []

			
 
				-_allow_all = len(_origins) == 0

			
 
				-

			
 
				-app.add_middleware(

			
 
				-    CORSMiddleware,

			
 
				-    allow_origins=_origins if not _allow_all else ["*"],

			
 
				-    allow_origin_regex=r"https://.*\.tasplanning\.report" if _allow_all else None,

			
 
				-    allow_credentials=not _allow_all,   # credentials only when origins are explicit

			
 
				-    allow_methods=["GET", "POST", "OPTIONS"],

			
 
				-    allow_headers=["Content-Type", "Authorization", "X-TPR-SID"],

			
 
				-    expose_headers=["X-TPR-SID"],

			
 
				-)

			
 
				-

			
 
				-qc = QdrantClient(url=QDRANT_URL)

			
 
				-app.include_router(telemetry_router)

			
 
				-

			
 
				-# ---- SLOW API ----

			
 
				-limiter = Limiter(key_func=get_remote_address)

			
 
				-app.state.limiter = limiter  # type: ignore

			
 
				-app.add_middleware(SlowAPIMiddleware)

			
 
				-

			
 
				-@app.exception_handler(RateLimitExceeded)

			
 
				-def ratelimit_handler(request, exc):

			
 
				-    return JSONResponse(status_code=429, content={"error":"rate_limited","detail":"Too many requests"})

			
 
				-

			
 
				-

			
 
				-# ---- Ollama helpers ----

			
 
				-def slug(s: Optional[str]) -> Optional[str]:

			
 
				-    if not s:

			
 
				-        return None

			
 
				-    return re.sub(r'[^a-z0-9]+', '-', s.strip().lower()).strip('-') or None

			
 
				-

			
 
				-

			
 
				-def ollama_embed(text: str) -> List[float]:

			
 
				-    r = requests.post(

			
 
				-        f"{OLLAMA_URL}/api/embeddings",

			
 
				-        json={"model": EMBED_MODEL, "prompt": text},

			
 
				-        timeout=60

			
 
				-    )

			
 
				-    r.raise_for_status()

			
 
				-    data = r.json()

			
 
				-    if "embedding" not in data:

			
 
				-        raise RuntimeError(f"Ollama embeddings error: {data}")

			
 
				-    return data["embedding"]

			
 
				-

			
 
				-def ollama_chat(prompt: str) -> str:

			
 
				-    r = requests.post(

			
 
				-        f"{OLLAMA_URL}/api/generate",

			
 
				-        json={

			
 
				-          "model": CHAT_MODEL,

			
 
				-          "prompt": prompt,

			
 
				-          "stream": False,

			
 
				-          "options": {

			
 
				-            "num_ctx": 8192,

			
 
				-            "num_predict": 512,

			
 
				-            "temperature": 0.2,

			
 
				-            "top_p": 0.9,

			
 
				-            "repeat_penalty": 1.1,

			
 
				-          },

			
 
				-          "keep_alive": OLLAMA_KEEP_ALIVE   # ← moved outside options, uses env var

			
 
				-        },

			
 
				-        timeout=180

			
 
				-    )

			
 
				-    r.raise_for_status()

			
 
				-    data = r.json()

			
 
				-    return data.get("response", "").strip()

			
 
				-

			
 
				-def _scroll_points(collection: str, qfilter=None, include_vector: bool=False, page_size: int=200):

			
 
				-    offset = None

			
 
				-    while True:

			
 
				-        points, offset = qc.scroll(

			
 
				-            collection_name=collection,

			
 
				-            limit=page_size,

			
 
				-            with_payload=True,

			
 
				-            with_vectors=include_vector,

			
 
				-            offset=offset,

			
 
				-            scroll_filter=qfilter

			
 
				-        )

			
 
				-        if not points:

			
 
				-            break

			
 
				-        for pt in points:

			
 
				-            yield pt

			
 
				-        if offset is None:

			
 
				-            break

			
 
				-

			
 
				-# ---- Health ----

			
 
				-@app.get("/readyz")

			
 
				-def readyz():

			
 
				-    return {"ok": True}

			
 
				-    

			
 
				-def _normalize(q: Optional[str]) -> str:

			
 
				-    return re.sub(r"\s+", " ", (q or "").strip().lower())

			
 
				-

			
 
				-def _json_dumps(o) -> str:

			
 
				-    return json.dumps(o, ensure_ascii=False, separators=(",",":"))

			
 
				-

			
 
				-# ---- Councils list (prefers payload 'council', falls back to filename token) ----

			
 
				-@app.get("/councils")

			
 
				-def councils():

			
 
				-    councils = set()

			
 
				-    offset = None

			
 
				-    # sample up to ~5k points (50 * 100)

			
 
				-    for _ in range(50):

			
 
				-        points, offset = qc.scroll(

			
 
				-            collection_name=COLLECTION,

			
 
				-            limit=100,

			
 
				-            with_payload=True,

			
 
				-            offset=offset

			
 
				-        )

			
 
				-        for pt in points:

			
 
				-            p = pt.payload or {}

			
 
				-            token = (p.get("council") or "").strip().lower()

			
 
				-            if not token:

			
 
				-                sf = (p.get("source_file") or "").lower()

			
 
				-                if sf:

			
 
				-                    token = sf.replace(".pdf", "").split("_")[0].split("-")[0]

			
 
				-            if token:

			
 
				-                councils.add(token)

			
 
				-        if offset is None:

			
 
				-            break

			
 
				-    return sorted(councils)

			
 
				-

			
 
				-# ---- Filter builders ----

			
 
				-def _mv(key: str, value: str) -> qmodels.FieldCondition:

			
 
				-    return qmodels.FieldCondition(key=key, match=qmodels.MatchValue(value=value))

			
 
				-

			
 
				-def _mt(key: str, text: str) -> qmodels.FieldCondition:

			
 
				-    return qmodels.FieldCondition(key=key, match=qmodels.MatchText(text=text))

			
 
				-

			
 
				-def filter_tps() -> qmodels.Filter:

			
 
				-    """TPS only, exact match on corpus."""

			
 
				-    return qmodels.Filter(must=[_mv("corpus", "tps")])

			
 
				-

			
 
				-def filter_lps(council: str) -> qmodels.Filter:

			
 
				-    """

			
 
				-    LPS for a specific council (slug), exact match on both fields.

			
 
				-    """

			
 
				-    cslug = slug(council) or council.lower()

			
 
				-    return qmodels.Filter(must=[_mv("corpus", "lps"), _mv("council", cslug)])

			
 
				-

			
 
				-def filter_ncc() -> qmodels.Filter:

			
 
				-    return qmodels.Filter(must=[_mv("corpus", "ncc")])

			
 
				-

			
 
				-def filter_as() -> qmodels.Filter:

			
 
				-    return qmodels.Filter(must=[_mv("corpus", "as")])

			
 
				-

			
 
				-def with_source_contains(flt: Optional[qmodels.Filter], source_contains: Optional[str]) -> qmodels.Filter:

			
 
				-    if not source_contains:

			
 
				-        return flt

			
 
				-    add = _mt("source_file", source_contains)

			
 
				-    if flt:

			
 
				-        # preserve existing must/should/must_not and AND the filename condition

			
 
				-        must = list(getattr(flt, "must", []) or [])

			
 
				-        must.append(add)

			
 
				-        return qmodels.Filter(

			
 
				-            must=must,

			
 
				-            should=getattr(flt, "should", None),

			
 
				-            must_not=getattr(flt, "must_not", None),

			
 
				-        )

			
 
				-    return qmodels.Filter(must=[add])

			
 
				-

			
 
				-def q_search(vec: List[float], flt: Optional[qmodels.Filter], limit: int):

			
 
				-    results = qc.query_points(

			
 
				-        collection_name=COLLECTION,

			
 
				-        query=vec,

			
 
				-        limit=max(1, limit),

			
 
				-        query_filter=flt,

			
 
				-        with_payload=True,

			
 
				-    )

			
 
				-    return results.points

			
 
				-

			
 
				-def render_blocks(hits) -> Tuple[List[str], List[dict]]:

			
 
				-    blocks, sources = [], []

			
 
				-    for h in hits:

			
 
				-        p = h.payload or {}

			
 
				-        src = f"{p.get('source_file')} (p.{p.get('page')} chunk {p.get('chunk_index')})"

			
 
				-        snippet = p.get("text", "")

			
 
				-        blocks.append(f"Source: {src}\nText: {snippet}")

			
 
				-        sources.append({

			
 
				-            "source_file": p.get("source_file"),

			
 
				-            "page": p.get("page"),

			
 
				-            "chunk_index": p.get("chunk_index"),

			
 
				-            "score": h.score

			
 
				-        })

			
 
				-    return blocks, sources

			
 
				-

			
 
				-def combine_context(sections: List[Tuple[str, List[str]]]) -> str:

			
 
				-    out = []

			
 
				-    for heading, blocks in sections:

			
 
				-        if not blocks:

			
 
				-            continue

			
 
				-        out.append(f"=== {heading} ===")

			
 
				-        out.extend(blocks)

			
 
				-    return "\n\n".join(out) if out else "No context found."

			
 
				-

			
 
				-def _scan_points(qfilter: Optional[qmodels.Filter] = None, max_pages: int = 10000, page_size: int = 200):

			
 
				-    """

			
 
				-    Iterate through ALL points (filtered if qfilter given).

			
 
				-    For your current dataset this is fine; if it grows huge later we'll switch to a stored summary or a background job.

			
 
				-    """

			
 
				-    offset = None

			
 
				-    pages = 0

			
 
				-    while pages < max_pages:

			
 
				-        points, offset = qc.scroll(

			
 
				-            collection_name=COLLECTION,

			
 
				-            limit=page_size,

			
 
				-            with_payload=True,

			
 
				-            offset=offset,

			
 
				-            scroll_filter=qfilter

			
 
				-        )

			
 
				-        if not points:

			
 
				-            break

			
 
				-        for pt in points:

			
 
				-            yield pt

			
 
				-        pages += 1

			
 
				-        if offset is None:

			
 
				-            break

			
 
				-

			
 
				-@app.get("/admin/stats")

			
 
				-def admin_stats(council: Optional[str] = None, corpus: Optional[str] = None):

			
 
				-    must = []

			
 
				-    if council:

			
 
				-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))

			
 
				-    if corpus:

			
 
				-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))

			
 
				-    qfilter = qmodels.Filter(must=must) if must else None

			
 
				-

			
 
				-    corp = Counter()

			
 
				-    councils = Counter()

			
 
				-    total = 0

			
 
				-    for pt in _scan_points(qfilter=qfilter):

			
 
				-        p = pt.payload or {}

			
 
				-        corp[(p.get("corpus") or "").lower()] += 1

			
 
				-        if p.get("council"):

			
 
				-            councils[(p.get("council") or "").lower()] += 1

			
 
				-        total += 1

			
 
				-

			
 
				-    return {

			
 
				-        "collection": COLLECTION,

			
 
				-        "total_points": total,

			
 
				-        "by_corpus": dict(corp),

			
 
				-        "by_council": dict(councils),

			
 
				-        "note": "Counts are points (chunks), not documents.",

			
 
				-    }

			
 
				-

			
 
				-@app.get("/admin/files")

			
 
				-def admin_files(council: Optional[str] = None, corpus: Optional[str] = None, contains: Optional[str] = None, limit: int = 200):

			
 
				-    must = []

			
 
				-    if council:

			
 
				-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))

			
 
				-    if corpus:

			
 
				-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))

			
 
				-    if contains:

			
 
				-        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=contains)))

			
 
				-    qfilter = qmodels.Filter(must=must) if must else None

			
 
				-

			
 
				-    files = defaultdict(lambda: {"points": 0, "corpus": None, "council": None, "pages": set()})

			
 
				-    for pt in _scan_points(qfilter=qfilter):

			
 
				-        p = pt.payload or {}

			
 
				-        f = (p.get("source_file") or "").strip()

			
 
				-        if not f:

			
 
				-            continue

			
 
				-        rec = files[f]

			
 
				-        rec["points"] += 1

			
 
				-        rec["corpus"] = rec["corpus"] or p.get("corpus")

			
 
				-        rec["council"] = rec["council"] or p.get("council")

			
 
				-        if p.get("page") is not None:

			
 
				-            rec["pages"].add(p["page"])

			
 
				-

			
 
				-    # shape for output

			
 
				-    out = []

			
 
				-    for f, rec in files.items():

			
 
				-        out.append({

			
 
				-            "source_file": f,

			
 
				-            "corpus": rec["corpus"],

			
 
				-            "council": rec["council"],

			
 
				-            "points": rec["points"],

			
 
				-            "page_count_est": len(rec["pages"]) if rec["pages"] else None,

			
 
				-        })

			
 
				-

			
 
				-    # sort by points desc, limit

			
 
				-    out.sort(key=lambda x: x["points"], reverse=True)

			
 
				-    return out[:max(1, limit)]

			
 
				-

			
 
				-@app.get("/admin/sample")

			
 
				-def admin_sample(council: Optional[str] = None, corpus: Optional[str] = None, n: int = 5):

			
 
				-    must = []

			
 
				-    if council:

			
 
				-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))

			
 
				-    if corpus:

			
 
				-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))

			
 
				-    qfilter = qmodels.Filter(must=must) if must else None

			
 
				-

			
 
				-    samples = []

			
 
				-    for pt in _scan_points(qfilter=qfilter):

			
 
				-        p = pt.payload or {}

			
 
				-        txt = (p.get("text") or "").strip()

			
 
				-        if not txt:

			
 
				-            continue

			
 
				-        samples.append({

			
 
				-            "source_file": p.get("source_file"),

			
 
				-            "corpus": p.get("corpus"),

			
 
				-            "council": p.get("council"),

			
 
				-            "page": p.get("page"),

			
 
				-            "chunk_index": p.get("chunk_index"),

			
 
				-            "preview": (txt[:400] + "…") if len(txt) > 400 else txt

			
 
				-        })

			
 
				-        if len(samples) >= max(1, n):

			
 
				-            break

			
 
				-    return samples

			
 
				-

			
 
				-@app.get("/admin/export")

			
 
				-def admin_export(

			
 
				-    collection: str = COLLECTION,

			
 
				-    council: Optional[str] = None,

			
 
				-    corpus: Optional[str] = None,

			
 
				-    source_contains: Optional[str] = None,

			
 
				-    include_vector: bool = False,

			
 
				-    limit: Optional[int] = None

			
 
				-):

			
 
				-    must = []

			
 
				-    if council:

			
 
				-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))

			
 
				-    if corpus:

			
 
				-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))

			
 
				-    if source_contains:

			
 
				-        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=source_contains)))

			
 
				-    qfilter = qmodels.Filter(must=must) if must else None

			
 
				-

			
 
				-    def gen():

			
 
				-        count = 0

			
 
				-        for pt in _scroll_points(collection, qfilter=qfilter, include_vector=include_vector):

			
 
				-            obj = {

			
 
				-                "id": str(getattr(pt, "id", None)),

			
 
				-                "payload": pt.payload or {},

			
 
				-            }

			
 
				-            if include_vector:

			
 
				-                obj["vector"] = pt.vector

			
 
				-            yield json.dumps(obj, ensure_ascii=False) + "\n"

			
 
				-            count += 1

			
 
				-            if limit and count >= limit:

			
 
				-                break

			
 
				-

			
 
				-    filename = f'{collection}-{corpus or "all"}-{council or "all"}.ndjson'

			
 
				-    headers = {"Content-Disposition": f'attachment; filename="{filename}"'}

			
 
				-    return StreamingResponse(gen(), media_type="application/x-ndjson", headers=headers)

			
 
				-

			
 
				-

			
 
				-def _section_format_guide(section_id: Optional[str], section_title: str, ctx: dict) -> str:

			
 
				-    """

			
 
				-    Return strict, section-specific formatting guidance for the LLM.

			
 
				-    Keep these short, prescriptive, and impossible to ignore.

			
 
				-    """

			
 
				-    sid = (section_id or "").lower()

			
 
				-

			
 
				-    # Utility bits from context

			
 
				-    zones = ctx.get("planning_zones") or []

			
 
				-    zone_label = ", ".join(zones) if zones else "the applicable zone"

			
 
				-    council_label = ctx.get("council") or ""

			
 
				-

			
 
				-    # ---- ZONING (tables of clauses like your sample) ----

			
 
				-    if sid in {"zoning", "zoning-41", "zoning-42", "zoning-43", "zoning-44", "zoning-441", "zoning-442"}:

			
 
				-        return f"""

			
 
				-FORMAT REQUIREMENTS (MANDATORY):

			
 
				-- Produce a concise preface (≤ 2 sentences) naming {zone_label}.

			
 
				-- Then include a Markdown table listing EACH visible clause found in CONTEXT that applies to the zone or LPS for **{council_label or 'the selected council'}**.

			
 
				-- One row per subclause. If an A/P pair exists (e.g., A1 / P1), include both in the same row.

			
 
				-- Columns (exact):

			
 
				-  | Clause | Topic | Acceptable Solution (A) | Performance Criteria (P) | Assessment | Source |

			
 
				-- "Clause": the clause number (e.g., "12.3.1 A1" or "DOR-S1.7.1").

			
 
				-- "Topic": short label extracted from the clause heading.

			
 
				-- "Acceptable Solution (A)" and "Performance Criteria (P)": quote briefly—no more than 1–2 lines each.

			
 
				-- "Assessment": state clearly whether the proposal meets A, or relies on P. If unknown from CONTEXT, write "TBC".

			
 
				-- "Source": filename + page (from CONTEXT).

			
 
				-- Only include clauses actually present in CONTEXT; NEVER invent clause numbers or text.

			
 
				-- After the table, add a one-paragraph summary noting any items assessed as TBC or non-compliant.

			
 
				-""".strip()

			
 
				-

			
 
				-    # ---- Codes overview list/table (optional future) ----

			
 
				-    if sid.startswith("code-"):

			
 
				-        return """

			
 
				-FORMAT REQUIREMENTS:

			
 
				-- Start with one sentence stating which Code and why it is triggered.

			
 
				-- Then provide a short checklist or table of the relevant sub-clauses (A vs P), with Source for each.

			
 
				-- Keep to 150–250 words + table.

			
 
				-""".strip()

			
 
				-

			
 
				-    # ---- Permit Overview (concise triggers) ----

			
 
				-    if sid == "permit-overview":

			
 
				-        return """

			
 
				-FORMAT REQUIREMENTS:

			
 
				-- Produce 3 blocks with headings:

			
 
				-  1) "Project Context" – 3–5 bullet points (site, proposal, zone).

			
 
				-  2) "Applicable Provisions" – bullets grouping TPS SPP, LPS (selected council), and triggered Codes.

			
 
				-  3) "Assessment Path" – bullet list of key clauses to assess next.

			
 
				-- Cite specific clause numbers ONLY if present in CONTEXT (include Source).

			
 
				-""".strip()

			
 
				-

			
 
				-    # ---- Default (no special formatting) ----

			
 
				-    return """

			
 
				-FORMAT REQUIREMENTS:

			
 
				-- Use concise Markdown with short paragraphs and bullets as needed.

			
 
				-- Cite briefly (filename + page) when quoting a control.

			
 
				-""".strip()

			
 
				-

			
 
				-

			
 
				-# ---- Ask (GET + POST) ----

			
 
				-class AskBody(BaseModel):

			
 
				-    # accept multiple keys from different frontends

			
 
				-    query: Optional[str] = None

			
 
				-    question: Optional[str] = None

			
 
				-    q: Optional[str] = None

			
 
				-    prompt: Optional[str] = None

			
 
				-

			
 
				-    top_k: int = 10

			
 
				-    council: Optional[str] = None

			
 
				-    include_ncc: bool = False

			
 
				-    include_standards: bool = False

			
 
				-    source_contains: Optional[str] = None

			
 
				-    scope: Literal['state_plus_local','local_only','state_only','any'] = 'state_plus_local'

			
 
				-    section_id: Optional[str] = None

			
 
				-

			
 
				-    # BYOK mode: return context blocks without calling Ollama.

			
 
				-    # The browser then calls its own LLM with the returned context + prompt.

			
 
				-    context_only: bool = False

			
 
				-

			
 
				-def _allowed(p: dict, scope: str, cslug: Optional[str]) -> bool:

			
 
				-    corp = (p.get("corpus") or "").lower()

			
 
				-    council = (p.get("council") or "").lower()

			
 
				-    if scope == "local_only":

			
 
				-        return corp == "lps" and cslug and council == cslug

			
 
				-    if scope == "state_only":

			
 
				-        return corp == "tps"

			
 
				-    if scope == "state_plus_local":

			
 
				-        return corp == "tps" or (corp == "lps" and cslug and council == cslug)

			
 
				-    return True

			
 
				-    

			
 
				-def do_ask(

			
 
				-    query: str,

			
 
				-    top_k: int = 10,

			
 
				-    council: Optional[str] = None,

			
 
				-    include_ncc: bool = False,

			
 
				-    include_standards: bool = False,

			
 
				-    source_contains: Optional[str] = None,

			
 
				-    scope: str = "state_plus_local",

			
 
				-    section_id: Optional[str] = None,

			
 
				-    context_only: bool = False,

			
 
				-):

			
 
				-    vec = ollama_embed(query)

			
 
				-    cslug = slug(council) if council else None

			
 
				-

			
 
				-    # Build allowed scopes based on scope param

			
 
				-    scopes: List[Tuple[str, qmodels.Filter]] = []

			
 
				-    if scope in ("state_only", "state_plus_local", "any"):

			
 
				-        scopes.append(("Tasmanian Planning Scheme (SPP)", filter_tps()))

			
 
				-    if scope in ("local_only", "state_plus_local", "any") and cslug:

			
 
				-        scopes.append((f"Local Provisions Schedule — {cslug}", filter_lps(cslug)))

			
 
				-    if include_ncc:

			
 
				-        scopes.append(("National Construction Code (NCC)", filter_ncc()))

			
 
				-    if include_standards:

			
 
				-        scopes.append(("Australian Standards (AS)", filter_as()))

			
 
				-

			
 
				-    # Apply additional filename filter if requested (AND)

			
 
				-    scopes = [(name, with_source_contains(flt, source_contains)) for name, flt in scopes]

			
 
				-

			
 
				-    # Allocate limits per scope

			
 
				-    per_spp = max(3, top_k // 3) if any(n.startswith("Tasmanian Planning Scheme") for n, _ in scopes) else 0

			
 
				-    per_lps = max(3, top_k // 3) if any(n.startswith("Local Provisions Schedule") for n, _ in scopes) else 0

			
 
				-    remaining = max(1, top_k - (per_spp + per_lps))

			
 
				-    extra_scopes = sum(1 for n, _ in scopes if not (n.startswith("Tasmanian Planning Scheme") or n.startswith("Local Provisions Schedule")))

			
 
				-    per_extra = max(1, remaining // max(1, extra_scopes)) if extra_scopes else 0

			
 
				-

			
 
				-    limits: List[int] = []

			
 
				-    for name, _ in scopes:

			
 
				-        if name.startswith("Tasmanian Planning Scheme"):

			
 
				-            limits.append(per_spp)

			
 
				-        elif name.startswith("Local Provisions Schedule"):

			
 
				-            limits.append(per_lps)

			
 
				-        else:

			
 
				-            limits.append(per_extra)

			
 
				-

			
 
				-    sections: List[Tuple[str, List[str]]] = []

			
 
				-    all_sources: List[dict] = []

			
 
				-

			
 
				-    for (name, flt), lim in zip(scopes, limits):

			
 
				-        if lim <= 0:

			
 
				-            continue

			
 
				-        hits = q_search(vec, flt, lim)

			
 
				-

			
 
				-        # Guardrail: drop any hit that violates scope/council

			
 
				-        hits = [h for h in hits if _allowed(h.payload or {}, scope, cslug)]

			
 
				-

			
 
				-        blocks, sources = render_blocks(hits)

			
 
				-        sections.append((name, blocks))

			
 
				-        all_sources.extend(sources)

			
 
				-

			
 
				-    context = combine_context(sections)

			
 
				-    

			
 
				-    #format_guide = _section_format_guide(section_id, section_title="(auto)", ctx={})

			
 
				-    format_guide = _section_format_guide(

			
 
				-        section_id,

			
 
				-        section_title="(auto)",

			
 
				-        ctx={

			
 
				-            "council": council,           # from do_ask parameter

			
 
				-            "planning_zones": [],         # populate if you have zone detection

			
 
				-        }

			
 
				-    )

			
 
				-

			
 
				-    prompt = f"""

			
 
				-You are a careful planning and building compliance assistant.

			
 
				-

			
 
				-ALWAYS follow this order of authority when forming an answer:

			
 
				-1) Tasmanian Planning Scheme — State Planning Provisions (SPP). Use as the base rule-set.

			
 
				-2) Local Provisions Schedule (LPS) for the selected council. If an LPS provision modifies or overrides an SPP control, apply the LPS outcome.

			
 
				-3) (Optional) National Construction Code (NCC) — building control (separate to planning).

			
 
				-4) (Optional) Australian Standards — cite when directly relevant.

			
 
				-

			
 
				-Use ONLY the information in CONTEXT. If a clause/section is visible, quote it briefly and always cite the source file + page.

			
 
				-If something is not supported by the provided CONTEXT, say you don't know.

			
 
				-

			
 
				-CONTEXT:

			
 
				-{context}

			
 
				-

			
 
				-SECTION FORMAT GUIDANCE:

			
 
				-{format_guide}

			
 
				-

			
 
				-QUESTION: {query}

			
 
				-

			
 
				-Answer:

			
 
				-""".strip()

			
 
				-

			
 
				-    # BYOK mode: skip Ollama and return the context + prompt so the

			
 
				-    # browser can call its own LLM provider (Claude, GPT, Grok, etc.)

			
 
				-    if context_only:

			
 
				-        return {

			
 
				-            "context_only": True,

			
 
				-            "context": context,

			
 
				-            "prompt": prompt,

			
 
				-            "sources": all_sources,

			
 
				-            # Include the raw section blocks so the browser can inspect them

			
 
				-            "sections": [

			
 
				-                {"heading": name, "blocks": blocks}

			
 
				-                for name, blocks in sections

			
 
				-            ]

			
 
				-        }

			
 
				-

			
 
				-    answer = ollama_chat(prompt)

			
 
				-    return {"answer": answer, "sources": all_sources}

			
 
				-

			
 
				-

			
 
				-@app.get("/ask")

			
 
				-@limiter.limit("20/minute")

			
 
				-def ask_get(

			
 
				-    request: Request,

			
 
				-    query: str = Query(..., description="User question"),

			
 
				-    top_k: int = 10,

			
 
				-    council: Optional[str] = None,

			
 
				-    include_ncc: bool = False,

			
 
				-    include_standards: bool = False,

			
 
				-    source_contains: Optional[str] = None,

			
 
				-    scope: str = "state_plus_local",

			
 
				-    section_id: Optional[str] = None,

			
 
				-    context_only: bool = False,

			
 
				-):

			
 
				-    _verify_demo_token_if_needed(request)

			
 
				-

			
 
				-    started = time.perf_counter()

			
 
				-    out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)

			
 
				-    latency_ms = int((time.perf_counter() - started) * 1000)

			
 
				-

			
 
				-    # Telemetry insert

			
 
				-    try:

			
 
				-        ip = request.client.host if request.client else "0.0.0.0"

			
 
				-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""

			
 
				-        allow_tps = scope in ("state_only", "state_plus_local")

			
 
				-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]

			
 
				-

			
 
				-        with db() as conn:

			
 
				-            conn.execute("""

			
 
				-                INSERT INTO ask_logs

			
 
				-                    (ts, sid, ip_hash, query, normalized, allow_tps, latency_ms, model, ok, topk_json, tokens_in, tokens_out)

			
 
				-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?)

			
 
				-            """, (

			
 
				-                datetime.utcnow().isoformat(),

			
 
				-                sid, ip_hash(ip), query, _normalize(query), int(allow_tps),

			
 
				-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0

			
 
				-            ))

			
 
				-            conn.commit()

			
 
				-    except Exception as e:

			
 
				-        # Don't break the request if logging fails

			
 
				-        print("[telemetry] ask_get insert failed:", e)

			
 
				-

			
 
				-    return out

			
 
				-

			
 
				-

			
 
				-@app.post("/ask")

			
 
				-@limiter.limit("20/minute")

			
 
				-def ask_post(request: Request, body: AskBody):

			
 
				-    _verify_demo_token_if_needed(request)

			
 
				-    qtxt = (body.query or body.question or body.q or body.prompt or "").strip()

			
 
				-    if not qtxt:

			
 
				-        raise HTTPException(status_code=422, detail="Missing query/question")

			
 
				-

			
 
				-    started = time.perf_counter()

			
 
				-    out = do_ask(

			
 
				-        query=qtxt,

			
 
				-        top_k=body.top_k,

			
 
				-        council=body.council,

			
 
				-        include_ncc=body.include_ncc,

			
 
				-        include_standards=body.include_standards,

			
 
				-        source_contains=body.source_contains,

			
 
				-        scope=body.scope,

			
 
				-        section_id=body.section_id,

			
 
				-        context_only=body.context_only,

			
 
				-    )

			
 
				-    latency_ms = int((time.perf_counter() - started) * 1000)

			
 
				-

			
 
				-    # Telemetry insert

			
 
				-    try:

			
 
				-        ip = request.client.host if request.client else "0.0.0.0"

			
 
				-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""

			
 
				-        allow_tps = body.scope in ("state_only", "state_plus_local")

			
 
				-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]

			
 
				-

			
 
				-        with db() as conn:

			
 
				-            conn.execute("""

			
 
				-                INSERT INTO ask_logs

			
 
				-                    (ts, sid, ip_hash, query, normalized, allow_tps, latency_ms, model, ok, topk_json, tokens_in, tokens_out)

			
 
				-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?)

			
 
				-            """, (

			
 
				-                datetime.utcnow().isoformat(),

			
 
				-                sid, ip_hash(ip), qtxt, _normalize(qtxt), int(allow_tps),

			
 
				-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0

			
 
				-            ))

			
 
				-            conn.commit()

			
 
				-    except Exception as e:

			
 
				-        print("[telemetry] ask_post insert failed:", e)

			
 
				-

			
 
				-    return out
			
--- a/backend/app.py
+++ b/backend/app.py
@@ -1,744 +1,873 @@
 
				-import os, re

			
 
				-import json

			
 
				-import requests

			
 
				-import time

			
 
				-from typing import Optional, Literal, List, Tuple

			
 
				-from fastapi import FastAPI, Query, HTTPException, Request

			
 
				-from fastapi.middleware.cors import CORSMiddleware

			
 
				-from fastapi.responses import StreamingResponse

			
 
				-from slowapi.middleware import SlowAPIMiddleware

			
 
				-from slowapi import Limiter

			
 
				-from slowapi.util import get_remote_address

			
 
				-from slowapi.errors import RateLimitExceeded

			
 
				-from fastapi.responses import JSONResponse

			
 
				-from pydantic import BaseModel

			
 
				-from qdrant_client import QdrantClient

			
 
				-from qdrant_client.http import models as qmodels

			
 
				-from collections import Counter, defaultdict

			
 
				-from datetime import datetime

			
 
				-from telemetry import router as telemetry_router, db, ip_hash

			
 
				-

			
 
				-# ---- Environment ----

			
 
				-OLLAMA_URL          = os.getenv("OLLAMA_URL", "http://192.168.8.73:11434")

			
 
				-QDRANT_URL          = os.getenv("QDRANT_URL", "http://localhost:6333")

			
 
				-OLLAMA_KEEP_ALIVE   = os.getenv("OLLAMA_KEEP_ALIVE", "-1")  # -1 = keep loaded forever

			
 
				-COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")

			
 
				-EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")

			
 
				-CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")

			
 
				-CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]

			
 
				-

			
 
				-# ---- DEMO TOKEN ----

			
 
				-DEMO_REQUIRE_TOKEN = os.getenv("DEMO_REQUIRE_TOKEN", "0") == "1"

			
 
				-DEMO_TOKEN = os.getenv("DEMO_TOKEN", "")

			
 
				-

			
 
				-def _verify_demo_token_if_needed(request):

			
 
				-    if not DEMO_REQUIRE_TOKEN:

			
 
				-        return

			
 
				-    auth = request.headers.get("Authorization", "")

			
 
				-    if not (auth.startswith("Bearer ") and auth.split(" ",1)[1] == DEMO_TOKEN):

			
 
				-        raise HTTPException(status_code=401, detail="Unauthorized")

			
 
				-

			
 
				-

			
 
				-# ---- FAST API ----

			
 
				-app = FastAPI()

			
 
				-# Allowed origins — always include your frontend explicitly

			
 
				-_origins = CORS_ORIGINS if CORS_ORIGINS else []

			
 
				-_allow_all = len(_origins) == 0

			
 
				-

			
 
				-app.add_middleware(

			
 
				-    CORSMiddleware,

			
 
				-    allow_origins=_origins if not _allow_all else ["*"],

			
 
				-    allow_origin_regex=r"https://.*\.tasplanning\.report" if _allow_all else None,

			
 
				-    allow_credentials=not _allow_all,   # credentials only when origins are explicit

			
 
				-    allow_methods=["GET", "POST", "OPTIONS"],

			
 
				-    allow_headers=["Content-Type", "Authorization", "X-TPR-SID"],

			
 
				-    expose_headers=["X-TPR-SID"],

			
 
				-)

			
 
				-

			
 
				-qc = QdrantClient(url=QDRANT_URL)

			
 
				-app.include_router(telemetry_router)

			
 
				-

			
 
				-# ---- SLOW API ----

			
 
				-limiter = Limiter(key_func=get_remote_address)

			
 
				-app.state.limiter = limiter  # type: ignore

			
 
				-app.add_middleware(SlowAPIMiddleware)

			
 
				-

			
 
				-@app.exception_handler(RateLimitExceeded)

			
 
				-def ratelimit_handler(request, exc):

			
 
				-    return JSONResponse(status_code=429, content={"error":"rate_limited","detail":"Too many requests"})

			
 
				-

			
 
				-

			
 
				-# ---- Feedback endpoint ----

			
 
				-class FeedbackBody(BaseModel):

			
 
				-    verdict: str                    # "up" or "down"

			
 
				-    query: Optional[str] = None     # the question that was asked

			
 
				-    answer: Optional[str] = None    # the answer that was rated

			
 
				-    note: Optional[str] = None      # optional free-text from thumbs-down

			
 
				-    sid: Optional[str] = None       # session id from browser

			
 
				-    model: Optional[str] = None     # which model answered

			
 
				-    scope: Optional[str] = None     # which scope was used

			
 
				-    sources: Optional[list] = None  # which sources were cited

			
 
				-

			
 
				-@app.post("/feedback")

			
 
				-@limiter.limit("60/minute")

			
 
				-def feedback(request: Request, body: FeedbackBody):

			
 
				-    if body.verdict not in ("up", "down"):

			
 
				-        raise HTTPException(status_code=422, detail="verdict must be 'up' or 'down'")

			
 
				-

			
 
				-    ip  = request.client.host if request.client else "0.0.0.0"

			
 
				-    sid = body.sid or request.headers.get("X-TPR-SID") or ""

			
 
				-

			
 
				-    try:

			
 
				-        with db() as conn:

			
 
				-            conn.execute("""

			
 
				-                INSERT INTO feedback

			
 
				-                    (ts, sid, ip_hash, verdict, query, answer, note, model, scope, sources_json)

			
 
				-                VALUES (?,?,?,?,?,?,?,?,?,?)

			
 
				-            """, (

			
 
				-                datetime.utcnow().isoformat(),

			
 
				-                sid, ip_hash(ip), body.verdict,

			
 
				-                (body.query or "")[:2000],

			
 
				-                (body.answer or "")[:8000],

			
 
				-                (body.note or "")[:1000],

			
 
				-                body.model or CHAT_MODEL,

			
 
				-                body.scope or "",

			
 
				-                _json_dumps(body.sources or []),

			
 
				-            ))

			
 
				-            conn.commit()

			
 
				-    except Exception as e:

			
 
				-        print("[feedback] insert failed:", e)

			
 
				-        # Still return ok — don't surface DB errors to users

			
 
				-    return {"ok": True}

			
 
				-

			
 
				-

			
 
				-# ---- Ollama helpers ----

			
 
				-def slug(s: Optional[str]) -> Optional[str]:

			
 
				-    if not s:

			
 
				-        return None

			
 
				-    return re.sub(r'[^a-z0-9]+', '-', s.strip().lower()).strip('-') or None

			
 
				-

			
 
				-

			
 
				-def ollama_embed(text: str) -> List[float]:

			
 
				-    r = requests.post(

			
 
				-        f"{OLLAMA_URL}/api/embeddings",

			
 
				-        json={"model": EMBED_MODEL, "prompt": text},

			
 
				-        timeout=60

			
 
				-    )

			
 
				-    r.raise_for_status()

			
 
				-    data = r.json()

			
 
				-    if "embedding" not in data:

			
 
				-        raise RuntimeError(f"Ollama embeddings error: {data}")

			
 
				-    return data["embedding"]

			
 
				-

			
 
				-def ollama_chat(prompt: str) -> str:

			
 
				-    r = requests.post(

			
 
				-        f"{OLLAMA_URL}/api/generate",

			
 
				-        json={

			
 
				-          "model": CHAT_MODEL,

			
 
				-          "prompt": prompt,

			
 
				-          "stream": False,

			
 
				-          "options": {

			
 
				-            "num_ctx": 6144, # was 8192,

			
 
				-            "num_predict": 512,

			
 
				-            "temperature": 0.2,

			
 
				-            "top_p": 0.9,

			
 
				-            "repeat_penalty": 1.1,

			
 
				-          },

			
 
				-          #"keep_alive": int(OLLAMA_KEEP_ALIVE) if OLLAMA_KEEP_ALIVE.lstrip('-').isdigit() else OLLAMA_KEEP_ALIVE,   # ← moved outside options, uses env var

			
 
				-          "keep_alive": -1,

			
 
				-        },

			
 
				-        timeout=180

			
 
				-    )

			
 
				-    r.raise_for_status()

			
 
				-    data = r.json()

			
 
				-    return data.get("response", "").strip()

			
 
				-

			
 
				-def _scroll_points(collection: str, qfilter=None, include_vector: bool=False, page_size: int=200):

			
 
				-    offset = None

			
 
				-    while True:

			
 
				-        points, offset = qc.scroll(

			
 
				-            collection_name=collection,

			
 
				-            limit=page_size,

			
 
				-            with_payload=True,

			
 
				-            with_vectors=include_vector,

			
 
				-            offset=offset,

			
 
				-            scroll_filter=qfilter

			
 
				-        )

			
 
				-        if not points:

			
 
				-            break

			
 
				-        for pt in points:

			
 
				-            yield pt

			
 
				-        if offset is None:

			
 
				-            break

			
 
				-

			
 
				-# ---- Health ----

			
 
				-@app.get("/readyz")

			
 
				-def readyz():

			
 
				-    return {"ok": True}

			
 
				-    

			
 
				-def _normalize(q: Optional[str]) -> str:

			
 
				-    return re.sub(r"\s+", " ", (q or "").strip().lower())

			
 
				-

			
 
				-def _json_dumps(o) -> str:

			
 
				-    return json.dumps(o, ensure_ascii=False, separators=(",",":"))

			
 
				-

			
 
				-# ---- Councils list (prefers payload 'council', falls back to filename token) ----

			
 
				-@app.get("/councils")

			
 
				-def councils():

			
 
				-    councils = set()

			
 
				-    offset = None

			
 
				-    # sample up to ~5k points (50 * 100)

			
 
				-    for _ in range(50):

			
 
				-        points, offset = qc.scroll(

			
 
				-            collection_name=COLLECTION,

			
 
				-            limit=100,

			
 
				-            with_payload=True,

			
 
				-            offset=offset

			
 
				-        )

			
 
				-        for pt in points:

			
 
				-            p = pt.payload or {}

			
 
				-            token = (p.get("council") or "").strip().lower()

			
 
				-            if not token:

			
 
				-                sf = (p.get("source_file") or "").lower()

			
 
				-                if sf:

			
 
				-                    token = sf.replace(".pdf", "").split("_")[0].split("-")[0]

			
 
				-            if token:

			
 
				-                councils.add(token)

			
 
				-        if offset is None:

			
 
				-            break

			
 
				-    return sorted(councils)

			
 
				-

			
 
				-# ---- Filter builders ----

			
 
				-def _mv(key: str, value: str) -> qmodels.FieldCondition:

			
 
				-    return qmodels.FieldCondition(key=key, match=qmodels.MatchValue(value=value))

			
 
				-

			
 
				-def _mt(key: str, text: str) -> qmodels.FieldCondition:

			
 
				-    return qmodels.FieldCondition(key=key, match=qmodels.MatchText(text=text))

			
 
				-

			
 
				-def filter_tps() -> qmodels.Filter:

			
 
				-    """TPS only, exact match on corpus."""

			
 
				-    return qmodels.Filter(must=[_mv("corpus", "tps")])

			
 
				-

			
 
				-def filter_lps(council: str) -> qmodels.Filter:

			
 
				-    """

			
 
				-    LPS for a specific council (slug), exact match on both fields.

			
 
				-    """

			
 
				-    cslug = slug(council) or council.lower()

			
 
				-    return qmodels.Filter(must=[_mv("corpus", "lps"), _mv("council", cslug)])

			
 
				-

			
 
				-def filter_ncc() -> qmodels.Filter:

			
 
				-    return qmodels.Filter(must=[_mv("corpus", "ncc")])

			
 
				-

			
 
				-def filter_as() -> qmodels.Filter:

			
 
				-    return qmodels.Filter(must=[_mv("corpus", "as")])

			
 
				-

			
 
				-def with_source_contains(flt: Optional[qmodels.Filter], source_contains: Optional[str]) -> qmodels.Filter:

			
 
				-    if not source_contains:

			
 
				-        return flt

			
 
				-    add = _mt("source_file", source_contains)

			
 
				-    if flt:

			
 
				-        # preserve existing must/should/must_not and AND the filename condition

			
 
				-        must = list(getattr(flt, "must", []) or [])

			
 
				-        must.append(add)

			
 
				-        return qmodels.Filter(

			
 
				-            must=must,

			
 
				-            should=getattr(flt, "should", None),

			
 
				-            must_not=getattr(flt, "must_not", None),

			
 
				-        )

			
 
				-    return qmodels.Filter(must=[add])

			
 
				-

			
 
				-def q_search(vec: List[float], flt: Optional[qmodels.Filter], limit: int):

			
 
				-    results = qc.query_points(

			
 
				-        collection_name=COLLECTION,

			
 
				-        query=vec,

			
 
				-        limit=max(1, limit),

			
 
				-        query_filter=flt,

			
 
				-        with_payload=True,

			
 
				-    )

			
 
				-    return results.points

			
 
				-

			
 
				-def render_blocks(hits) -> Tuple[List[str], List[dict]]:

			
 
				-    blocks, sources = [], []

			
 
				-    for h in hits:

			
 
				-        p = h.payload or {}

			
 
				-        src = f"{p.get('source_file')} (p.{p.get('page')} chunk {p.get('chunk_index')})"

			
 
				-        snippet = p.get("text", "")

			
 
				-        blocks.append(f"Source: {src}\nText: {snippet}")

			
 
				-        sources.append({

			
 
				-            "source_file": p.get("source_file"),

			
 
				-            "page": p.get("page"),

			
 
				-            "chunk_index": p.get("chunk_index"),

			
 
				-            "score": h.score

			
 
				-        })

			
 
				-    return blocks, sources

			
 
				-

			
 
				-def combine_context(sections: List[Tuple[str, List[str]]]) -> str:

			
 
				-    out = []

			
 
				-    for heading, blocks in sections:

			
 
				-        if not blocks:

			
 
				-            continue

			
 
				-        out.append(f"=== {heading} ===")

			
 
				-        out.extend(blocks)

			
 
				-    return "\n\n".join(out) if out else "No context found."

			
 
				-

			
 
				-def _scan_points(qfilter: Optional[qmodels.Filter] = None, max_pages: int = 10000, page_size: int = 200):

			
 
				-    """

			
 
				-    Iterate through ALL points (filtered if qfilter given).

			
 
				-    For your current dataset this is fine; if it grows huge later we'll switch to a stored summary or a background job.

			
 
				-    """

			
 
				-    offset = None

			
 
				-    pages = 0

			
 
				-    while pages < max_pages:

			
 
				-        points, offset = qc.scroll(

			
 
				-            collection_name=COLLECTION,

			
 
				-            limit=page_size,

			
 
				-            with_payload=True,

			
 
				-            offset=offset,

			
 
				-            scroll_filter=qfilter

			
 
				-        )

			
 
				-        if not points:

			
 
				-            break

			
 
				-        for pt in points:

			
 
				-            yield pt

			
 
				-        pages += 1

			
 
				-        if offset is None:

			
 
				-            break

			
 
				-

			
 
				-@app.get("/admin/stats")

			
 
				-def admin_stats(council: Optional[str] = None, corpus: Optional[str] = None):

			
 
				-    must = []

			
 
				-    if council:

			
 
				-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))

			
 
				-    if corpus:

			
 
				-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))

			
 
				-    qfilter = qmodels.Filter(must=must) if must else None

			
 
				-

			
 
				-    corp = Counter()

			
 
				-    councils = Counter()

			
 
				-    total = 0

			
 
				-    for pt in _scan_points(qfilter=qfilter):

			
 
				-        p = pt.payload or {}

			
 
				-        corp[(p.get("corpus") or "").lower()] += 1

			
 
				-        if p.get("council"):

			
 
				-            councils[(p.get("council") or "").lower()] += 1

			
 
				-        total += 1

			
 
				-

			
 
				-    return {

			
 
				-        "collection": COLLECTION,

			
 
				-        "total_points": total,

			
 
				-        "by_corpus": dict(corp),

			
 
				-        "by_council": dict(councils),

			
 
				-        "note": "Counts are points (chunks), not documents.",

			
 
				-    }

			
 
				-

			
 
				-@app.get("/admin/files")

			
 
				-def admin_files(council: Optional[str] = None, corpus: Optional[str] = None, contains: Optional[str] = None, limit: int = 200):

			
 
				-    must = []

			
 
				-    if council:

			
 
				-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))

			
 
				-    if corpus:

			
 
				-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))

			
 
				-    if contains:

			
 
				-        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=contains)))

			
 
				-    qfilter = qmodels.Filter(must=must) if must else None

			
 
				-

			
 
				-    files = defaultdict(lambda: {"points": 0, "corpus": None, "council": None, "pages": set()})

			
 
				-    for pt in _scan_points(qfilter=qfilter):

			
 
				-        p = pt.payload or {}

			
 
				-        f = (p.get("source_file") or "").strip()

			
 
				-        if not f:

			
 
				-            continue

			
 
				-        rec = files[f]

			
 
				-        rec["points"] += 1

			
 
				-        rec["corpus"] = rec["corpus"] or p.get("corpus")

			
 
				-        rec["council"] = rec["council"] or p.get("council")

			
 
				-        if p.get("page") is not None:

			
 
				-            rec["pages"].add(p["page"])

			
 
				-

			
 
				-    # shape for output

			
 
				-    out = []

			
 
				-    for f, rec in files.items():

			
 
				-        out.append({

			
 
				-            "source_file": f,

			
 
				-            "corpus": rec["corpus"],

			
 
				-            "council": rec["council"],

			
 
				-            "points": rec["points"],

			
 
				-            "page_count_est": len(rec["pages"]) if rec["pages"] else None,

			
 
				-        })

			
 
				-

			
 
				-    # sort by points desc, limit

			
 
				-    out.sort(key=lambda x: x["points"], reverse=True)

			
 
				-    return out[:max(1, limit)]

			
 
				-

			
 
				-@app.get("/admin/sample")

			
 
				-def admin_sample(council: Optional[str] = None, corpus: Optional[str] = None, n: int = 5):

			
 
				-    must = []

			
 
				-    if council:

			
 
				-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))

			
 
				-    if corpus:

			
 
				-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))

			
 
				-    qfilter = qmodels.Filter(must=must) if must else None

			
 
				-

			
 
				-    samples = []

			
 
				-    for pt in _scan_points(qfilter=qfilter):

			
 
				-        p = pt.payload or {}

			
 
				-        txt = (p.get("text") or "").strip()

			
 
				-        if not txt:

			
 
				-            continue

			
 
				-        samples.append({

			
 
				-            "source_file": p.get("source_file"),

			
 
				-            "corpus": p.get("corpus"),

			
 
				-            "council": p.get("council"),

			
 
				-            "page": p.get("page"),

			
 
				-            "chunk_index": p.get("chunk_index"),

			
 
				-            "preview": (txt[:400] + "…") if len(txt) > 400 else txt

			
 
				-        })

			
 
				-        if len(samples) >= max(1, n):

			
 
				-            break

			
 
				-    return samples

			
 
				-

			
 
				-@app.get("/admin/export")

			
 
				-def admin_export(

			
 
				-    collection: str = COLLECTION,

			
 
				-    council: Optional[str] = None,

			
 
				-    corpus: Optional[str] = None,

			
 
				-    source_contains: Optional[str] = None,

			
 
				-    include_vector: bool = False,

			
 
				-    limit: Optional[int] = None

			
 
				-):

			
 
				-    must = []

			
 
				-    if council:

			
 
				-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))

			
 
				-    if corpus:

			
 
				-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))

			
 
				-    if source_contains:

			
 
				-        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=source_contains)))

			
 
				-    qfilter = qmodels.Filter(must=must) if must else None

			
 
				-

			
 
				-    def gen():

			
 
				-        count = 0

			
 
				-        for pt in _scroll_points(collection, qfilter=qfilter, include_vector=include_vector):

			
 
				-            obj = {

			
 
				-                "id": str(getattr(pt, "id", None)),

			
 
				-                "payload": pt.payload or {},

			
 
				-            }

			
 
				-            if include_vector:

			
 
				-                obj["vector"] = pt.vector

			
 
				-            yield json.dumps(obj, ensure_ascii=False) + "\n"

			
 
				-            count += 1

			
 
				-            if limit and count >= limit:

			
 
				-                break

			
 
				-

			
 
				-    filename = f'{collection}-{corpus or "all"}-{council or "all"}.ndjson'

			
 
				-    headers = {"Content-Disposition": f'attachment; filename="{filename}"'}

			
 
				-    return StreamingResponse(gen(), media_type="application/x-ndjson", headers=headers)

			
 
				-

			
 
				-

			
 
				-def _section_format_guide(section_id: Optional[str], section_title: str, ctx: dict) -> str:

			
 
				-    """

			
 
				-    Return strict, section-specific formatting guidance for the LLM.

			
 
				-    Keep these short, prescriptive, and impossible to ignore.

			
 
				-    """

			
 
				-    sid = (section_id or "").lower()

			
 
				-

			
 
				-    # Utility bits from context

			
 
				-    zones = ctx.get("planning_zones") or []

			
 
				-    zone_label = ", ".join(zones) if zones else "the applicable zone"

			
 
				-    council_label = ctx.get("council") or ""

			
 
				-

			
 
				-    # ---- ZONING (tables of clauses like your sample) ----

			
 
				-    if sid in {"zoning", "zoning-41", "zoning-42", "zoning-43", "zoning-44", "zoning-441", "zoning-442"}:

			
 
				-        return f"""

			
 
				-            FORMAT REQUIREMENTS (MANDATORY):

			
 
				-            - Produce a concise preface (≤ 2 sentences) naming {zone_label}.

			
 
				-            - Then include a Markdown table listing EACH visible clause found in CONTEXT that applies to the zone or LPS for **{council_label or 'the selected council'}**.

			
 
				-            - One row per subclause. If an A/P pair exists (e.g., A1 / P1), include both in the same row.

			
 
				-            - Columns (exact):

			
 
				-              | Clause | Topic | Acceptable Solution (A) | Performance Criteria (P) | Assessment | Source |

			
 
				-            - "Clause": the clause number (e.g., "12.3.1 A1" or "DOR-S1.7.1").

			
 
				-            - "Topic": short label extracted from the clause heading.

			
 
				-            - "Acceptable Solution (A)" and "Performance Criteria (P)": quote briefly—no more than 1–2 lines each.

			
 
				-            - "Assessment": state clearly whether the proposal meets A, or relies on P. If unknown from CONTEXT, write "TBC".

			
 
				-            - "Source": filename + page (from CONTEXT).

			
 
				-            - Only include clauses actually present in CONTEXT; NEVER invent clause numbers or text.

			
 
				-            - After the table, add a one-paragraph summary noting any items assessed as TBC or non-compliant.

			
 
				-            """.strip()

			
 
				-

			
 
				-    # ---- Codes overview list/table (optional future) ----

			
 
				-    if sid.startswith("code-"):

			
 
				-        return """

			
 
				-            FORMAT REQUIREMENTS:

			
 
				-            - Start with one sentence stating which Code and why it is triggered.

			
 
				-            - Then provide a short checklist or table of the relevant sub-clauses (A vs P), with Source for each.

			
 
				-            - Keep to 150–250 words + table.

			
 
				-            """.strip()

			
 
				-

			
 
				-    # ---- Permit Overview (concise triggers) ----

			
 
				-    if sid == "permit-overview":

			
 
				-        return """

			
 
				-            FORMAT REQUIREMENTS:

			
 
				-            - Produce 3 blocks with headings:

			
 
				-              1) "Project Context" – 3–5 bullet points (site, proposal, zone).

			
 
				-              2) "Applicable Provisions" – bullets grouping TPS SPP, LPS (selected council), and triggered Codes.

			
 
				-              3) "Assessment Path" – bullet list of key clauses to assess next.

			
 
				-            - Cite specific clause numbers ONLY if present in CONTEXT (include Source).

			
 
				-            """.strip()

			
 
				-

			
 
				-    # ---- Default (no special formatting) ----

			
 
				-    return """

			
 
				-        FORMAT REQUIREMENTS:

			
 
				-        - Use concise Markdown with short paragraphs and bullets as needed.

			
 
				-        - Cite briefly (filename + page) when quoting a control.

			
 
				-        """.strip()

			
 
				-

			
 
				-

			
 
				-# ---- Ask (GET + POST) ----

			
 
				-class AskBody(BaseModel):

			
 
				-    # accept multiple keys from different frontends

			
 
				-    query: Optional[str] = None

			
 
				-    question: Optional[str] = None

			
 
				-    q: Optional[str] = None

			
 
				-    prompt: Optional[str] = None

			
 
				-

			
 
				-    top_k: int = 10

			
 
				-    council: Optional[str] = None

			
 
				-    include_ncc: bool = False

			
 
				-    include_standards: bool = False

			
 
				-    source_contains: Optional[str] = None

			
 
				-    scope: Literal['state_plus_local','local_only','state_only','any'] = 'state_plus_local'

			
 
				-    section_id: Optional[str] = None

			
 
				-

			
 
				-    # BYOK mode: return context blocks without calling Ollama.

			
 
				-    # The browser then calls its own LLM with the returned context + prompt.

			
 
				-    context_only: bool = False

			
 
				-

			
 
				-def _allowed(p: dict, scope: str, cslug: Optional[str]) -> bool:

			
 
				-    corp = (p.get("corpus") or "").lower()

			
 
				-    council = (p.get("council") or "").lower()

			
 
				-    if scope == "local_only":

			
 
				-        return corp == "lps" and cslug and council == cslug

			
 
				-    if scope == "state_only":

			
 
				-        return corp == "tps"

			
 
				-    if scope == "state_plus_local":

			
 
				-        return corp == "tps" or (corp == "lps" and cslug and council == cslug)

			
 
				-    return True

			
 
				-    

			
 
				-def do_ask(

			
 
				-    query: str,

			
 
				-    top_k: int = 10,

			
 
				-    council: Optional[str] = None,

			
 
				-    include_ncc: bool = False,

			
 
				-    include_standards: bool = False,

			
 
				-    source_contains: Optional[str] = None,

			
 
				-    scope: str = "state_plus_local",

			
 
				-    section_id: Optional[str] = None,

			
 
				-    context_only: bool = False,

			
 
				-):

			
 
				-    vec = ollama_embed(query)

			
 
				-    cslug = slug(council) if council else None

			
 
				-

			
 
				-    # Build allowed scopes based on scope param

			
 
				-    scopes: List[Tuple[str, qmodels.Filter]] = []

			
 
				-    if scope in ("state_only", "state_plus_local", "any"):

			
 
				-        scopes.append(("Tasmanian Planning Scheme (SPP)", filter_tps()))

			
 
				-    if scope in ("local_only", "state_plus_local", "any") and cslug:

			
 
				-        scopes.append((f"Local Provisions Schedule — {cslug}", filter_lps(cslug)))

			
 
				-    if include_ncc:

			
 
				-        scopes.append(("National Construction Code (NCC)", filter_ncc()))

			
 
				-    if include_standards:

			
 
				-        scopes.append(("Australian Standards (AS)", filter_as()))

			
 
				-

			
 
				-    # Apply additional filename filter if requested (AND)

			
 
				-    scopes = [(name, with_source_contains(flt, source_contains)) for name, flt in scopes]

			
 
				-

			
 
				-    # Allocate limits per scope

			
 
				-    per_spp = max(3, top_k // 3) if any(n.startswith("Tasmanian Planning Scheme") for n, _ in scopes) else 0

			
 
				-    per_lps = max(3, top_k // 3) if any(n.startswith("Local Provisions Schedule") for n, _ in scopes) else 0

			
 
				-    remaining = max(1, top_k - (per_spp + per_lps))

			
 
				-    extra_scopes = sum(1 for n, _ in scopes if not (n.startswith("Tasmanian Planning Scheme") or n.startswith("Local Provisions Schedule")))

			
 
				-    per_extra = max(1, remaining // max(1, extra_scopes)) if extra_scopes else 0

			
 
				-

			
 
				-    limits: List[int] = []

			
 
				-    for name, _ in scopes:

			
 
				-        if name.startswith("Tasmanian Planning Scheme"):

			
 
				-            limits.append(per_spp)

			
 
				-        elif name.startswith("Local Provisions Schedule"):

			
 
				-            limits.append(per_lps)

			
 
				-        else:

			
 
				-            limits.append(per_extra)

			
 
				-

			
 
				-    sections: List[Tuple[str, List[str]]] = []

			
 
				-    all_sources: List[dict] = []

			
 
				-

			
 
				-    for (name, flt), lim in zip(scopes, limits):

			
 
				-        if lim <= 0:

			
 
				-            continue

			
 
				-        hits = q_search(vec, flt, lim)

			
 
				-

			
 
				-        # Guardrail: drop any hit that violates scope/council

			
 
				-        hits = [h for h in hits if _allowed(h.payload or {}, scope, cslug)]

			
 
				-

			
 
				-        blocks, sources = render_blocks(hits)

			
 
				-        sections.append((name, blocks))

			
 
				-        all_sources.extend(sources)

			
 
				-

			
 
				-    context = combine_context(sections)

			
 
				-    

			
 
				-    #format_guide = _section_format_guide(section_id, section_title="(auto)", ctx={})

			
 
				-    format_guide = _section_format_guide(

			
 
				-        section_id,

			
 
				-        section_title="(auto)",

			
 
				-        ctx={

			
 
				-            "council": council,           # from do_ask parameter

			
 
				-            "planning_zones": [],         # populate if you have zone detection

			
 
				-        }

			
 
				-    )

			
 
				-

			
 
				-    prompt = f"""

			
 
				-You are an expert Tasmanian planning and building compliance assistant with deep knowledge of the Tasmanian Planning Scheme structure.

			
 
				-

			
 
				-## AUTHORITY ORDER — always apply in this sequence:

			
 
				-1. State Planning Provisions (SPP) — the statewide baseline. Cite clause numbers exactly.

			
 
				-2. Local Provisions Schedule (LPS) for the selected council — overrides SPP where it differs.

			
 
				-3. National Construction Code (NCC) — building controls only, keep separate from planning.

			
 
				-4. Australian Standards — only when directly referenced by a clause in CONTEXT.

			
 
				-

			
 
				-## STRICT RULES:

			
 
				-- Use ONLY information present in CONTEXT below. Never invent clause numbers, standards, or measurements.

			
 
				-- If CONTEXT does not contain enough information to answer, say: "The provided context does not cover this — check the TPSO viewer directly at tpso.planning.tas.gov.au"

			
 
				-- Every specific standard or requirement you state MUST include its source: (filename, p.N)

			
 
				-- Quote clause text briefly (1–2 lines max) then explain in plain English.

			
 
				-- Distinguish clearly between Acceptable Solutions (A) and Performance Criteria (P).

			
 
				-

			
 
				-## OUTPUT FORMAT:

			
 
				-- Use Markdown: ## for main headings, ### for sub-headings, **bold** for clause numbers.

			
 
				-- For setbacks, parking rates, or multiple standards: use a Markdown table with columns: Clause | Requirement | A or P | Source

			
 
				-- End every response with a ## Sources section listing each cited document and page.

			
 
				-- Keep answers concise but complete — do not pad or repeat information.

			
 
				-- Professional planning language; avoid informal phrasing.

			
 
				-

			
 
				-## CONTEXT (retrieved from Tasmanian Planning Scheme documents):

			
 
				-{context}

			
 
				-

			
 
				-{format_guide}

			
 
				-

			
 
				-## QUESTION:

			
 
				-{query}

			
 
				-

			
 
				-## ANSWER:

			
 
				-""".strip()

			
 
				-

			
 
				-    # BYOK mode: skip Ollama and return the context + prompt so the

			
 
				-    # browser can call its own LLM provider (Claude, GPT, Grok, etc.)

			
 
				-    if context_only:

			
 
				-        return {

			
 
				-            "context_only": True,

			
 
				-            "context": context,

			
 
				-            "prompt": prompt,

			
 
				-            "sources": all_sources,

			
 
				-            # Include the raw section blocks so the browser can inspect them

			
 
				-            "sections": [

			
 
				-                {"heading": name, "blocks": blocks}

			
 
				-                for name, blocks in sections

			
 
				-            ]

			
 
				-        }

			
 
				-

			
 
				-    answer = ollama_chat(prompt)

			
 
				-    return {"answer": answer, "sources": all_sources}

			
 
				-

			
 
				-

			
 
				-@app.get("/ask")

			
 
				-@limiter.limit("20/minute")

			
 
				-def ask_get(

			
 
				-    request: Request,

			
 
				-    query: str = Query(..., description="User question"),

			
 
				-    top_k: int = 10,

			
 
				-    council: Optional[str] = None,

			
 
				-    include_ncc: bool = False,

			
 
				-    include_standards: bool = False,

			
 
				-    source_contains: Optional[str] = None,

			
 
				-    scope: str = "state_plus_local",

			
 
				-    section_id: Optional[str] = None,

			
 
				-    context_only: bool = False,

			
 
				-):

			
 
				-    _verify_demo_token_if_needed(request)

			
 
				-

			
 
				-    started = time.perf_counter()

			
 
				-    out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)

			
 
				-    latency_ms = int((time.perf_counter() - started) * 1000)

			
 
				-

			
 
				-    # Telemetry insert

			
 
				-    try:

			
 
				-        ip = request.client.host if request.client else "0.0.0.0"

			
 
				-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""

			
 
				-        allow_tps = scope in ("state_only", "state_plus_local")

			
 
				-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]

			
 
				-

			
 
				-        with db() as conn:

			
 
				-            conn.execute("""

			
 
				-                INSERT INTO ask_logs

			
 
				-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,

			
 
				-                     model, ok, topk_json, tokens_in, tokens_out, answer)

			
 
				-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)

			
 
				-            """, (

			
 
				-                datetime.utcnow().isoformat(),

			
 
				-                sid, ip_hash(ip), query, _normalize(query),

			
 
				-                scope, int(allow_tps),

			
 
				-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,

			
 
				-                (out.get("answer") or "")[:8000],       # ← truncate to 8KB max

			
 
				-            ))

			
 
				-            conn.commit()

			
 
				-    except Exception as e:

			
 
				-        # Don't break the request if logging fails

			
 
				-        print("[telemetry] ask_get insert failed:", e)

			
 
				-

			
 
				-    return out

			
 
				-

			
 
				-

			
 
				-@app.post("/ask")

			
 
				-@limiter.limit("20/minute")

			
 
				-def ask_post(request: Request, body: AskBody):

			
 
				-    _verify_demo_token_if_needed(request)

			
 
				-    qtxt = (body.query or body.question or body.q or body.prompt or "").strip()

			
 
				-    if not qtxt:

			
 
				-        raise HTTPException(status_code=422, detail="Missing query/question")

			
 
				-

			
 
				-    started = time.perf_counter()

			
 
				-    out = do_ask(

			
 
				-        query=qtxt,

			
 
				-        top_k=body.top_k,

			
 
				-        council=body.council,

			
 
				-        include_ncc=body.include_ncc,

			
 
				-        include_standards=body.include_standards,

			
 
				-        source_contains=body.source_contains,

			
 
				-        scope=body.scope,

			
 
				-        section_id=body.section_id,

			
 
				-        context_only=body.context_only,

			
 
				-    )

			
 
				-    latency_ms = int((time.perf_counter() - started) * 1000)

			
 
				-

			
 
				-    # Telemetry insert

			
 
				-    try:

			
 
				-        ip = request.client.host if request.client else "0.0.0.0"

			
 
				-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""

			
 
				-        allow_tps = body.scope in ("state_only", "state_plus_local")

			
 
				-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]

			
 
				-

			
 
				-        with db() as conn:

			
 
				-            conn.execute("""

			
 
				-                INSERT INTO ask_logs

			
 
				-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,

			
 
				-                     model, ok, topk_json, tokens_in, tokens_out, answer)

			
 
				-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)

			
 
				-            """, (

			
 
				-                datetime.utcnow().isoformat(),

			
 
				-                sid, ip_hash(ip), qtxt, _normalize(qtxt),

			
 
				-                body.scope, int(allow_tps),

			
 
				-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,

			
 
				-                (out.get("answer") or "")[:8000],   # ← truncate to 8KB max

			
 
				-            ))

			
 
				-            conn.commit()

			
 
				-    except Exception as e:

			
 
				-        print("[telemetry] ask_post insert failed:", e)

			
 
				-

			
 
				-    return out
			
 
				+"""
			
 
				+app.py — FastAPI backend for tasplanning.report
			
 
				+
			
 
				+RAG pipeline:
			
 
				+  1. Embed the user query via Ollama (nomic-embed-text)
			
 
				+  2. Search Qdrant for the closest chunks, split by corpus/scope
			
 
				+  3. Inject retrieved context into a structured prompt
			
 
				+  4. Call Ollama (llama3.1:8b) and return the answer + source citations
			
 
				+
			
 
				+BYOK mode (context_only=True): skip step 4 and return the prompt so
			
 
				+the browser can call its own LLM (Claude, GPT, Grok, local Ollama).
			
 
				+
			
 
				+Restart required after any change to this file:
			
 
				+    docker compose restart backend
			
 
				+"""
			
 
				+import os, re, hmac, logging
			
 
				+import json
			
 
				+import requests
			
 
				+import time
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+from typing import Optional, Literal, List, Tuple
			
 
				+from fastapi import FastAPI, Query, HTTPException, Request
			
 
				+from fastapi.middleware.cors import CORSMiddleware
			
 
				+from fastapi.responses import StreamingResponse
			
 
				+from slowapi.middleware import SlowAPIMiddleware
			
 
				+from slowapi.errors import RateLimitExceeded
			
 
				+from limiter import limiter
			
 
				+from fastapi.responses import JSONResponse
			
 
				+from pydantic import BaseModel
			
 
				+from qdrant_client import QdrantClient
			
 
				+from qdrant_client.http import models as qmodels
			
 
				+from collections import Counter, defaultdict
			
 
				+from datetime import datetime
			
 
				+from telemetry import router as telemetry_router, db, ip_hash
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Environment
			
 
				+# ---------------------------------------------------------------------------
			
 
				+OLLAMA_URL          = os.getenv("OLLAMA_URL", "http://192.168.8.73:11434")
			
 
				+QDRANT_URL          = os.getenv("QDRANT_URL", "http://localhost:6333")
			
 
				+OLLAMA_KEEP_ALIVE   = os.getenv("OLLAMA_KEEP_ALIVE", "-1")  # -1 = keep loaded forever
			
 
				+COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")
			
 
				+EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")
			
 
				+CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")
			
 
				+CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Demo token gate (disabled by default)
			
 
				+# Enable by setting DEMO_REQUIRE_TOKEN=1 and DEMO_TOKEN=<secret> in .env.
			
 
				+# When enabled, every request to /ask and /admin/* must include:
			
 
				+#   Authorization: Bearer <DEMO_TOKEN>
			
 
				+# ---------------------------------------------------------------------------
			
 
				+DEMO_REQUIRE_TOKEN = os.getenv("DEMO_REQUIRE_TOKEN", "0") == "1"
			
 
				+DEMO_TOKEN = os.getenv("DEMO_TOKEN", "")
			
 
				+
			
 
				+def _verify_demo_token_if_needed(request):
			
 
				+    if not DEMO_REQUIRE_TOKEN:
			
 
				+        return
			
 
				+    auth = request.headers.get("Authorization", "")
			
 
				+    parts = auth.split(" ", 1)
			
 
				+    if len(parts) != 2 or parts[0] != "Bearer":
			
 
				+        raise HTTPException(status_code=401, detail="Unauthorized")
			
 
				+    # compare_digest runs in constant time — prevents timing-based token guessing
			
 
				+    if not hmac.compare_digest(parts[1], DEMO_TOKEN):
			
 
				+        raise HTTPException(status_code=401, detail="Unauthorized")
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# FastAPI app + CORS
			
 
				+# ---------------------------------------------------------------------------
			
 
				+app = FastAPI()
			
 
				+
			
 
				+# If CORS_ORIGINS is empty (shouldn't happen in production) fall back to a
			
 
				+# wildcard with the tasplanning.report regex — credentials cannot be used
			
 
				+# with wildcard origins so allow_credentials is gated on explicit origins.
			
 
				+_origins = CORS_ORIGINS if CORS_ORIGINS else []
			
 
				+_allow_all = len(_origins) == 0
			
 
				+
			
 
				+app.add_middleware(
			
 
				+    CORSMiddleware,
			
 
				+    allow_origins=_origins if not _allow_all else ["*"],
			
 
				+    allow_origin_regex=r"https://.*\.tasplanning\.report" if _allow_all else None,
			
 
				+    allow_credentials=not _allow_all,   # credentials only when origins are explicit
			
 
				+    allow_methods=["GET", "POST", "OPTIONS"],
			
 
				+    allow_headers=["Content-Type", "Authorization", "X-TPR-SID"],
			
 
				+    expose_headers=["X-TPR-SID"],
			
 
				+)
			
 
				+
			
 
				+qc = QdrantClient(url=QDRANT_URL)
			
 
				+app.include_router(telemetry_router)
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Rate limiting (slowapi — in-memory, per IP)
			
 
				+# Shared limiter instance lives in limiter.py to avoid circular imports with
			
 
				+# telemetry.py, which also needs to decorate its own endpoints.
			
 
				+# ---------------------------------------------------------------------------
			
 
				+app.state.limiter = limiter  # type: ignore
			
 
				+app.add_middleware(SlowAPIMiddleware)
			
 
				+
			
 
				+@app.exception_handler(RateLimitExceeded)
			
 
				+def ratelimit_handler(request, exc):
			
 
				+    return JSONResponse(status_code=429, content={"error":"rate_limited","detail":"Too many requests"})
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Feedback endpoint
			
 
				+# Stores thumbs-up/down ratings alongside the query + answer for prompt tuning.
			
 
				+# Fields are truncated before insert to keep the SQLite row size reasonable.
			
 
				+# ---------------------------------------------------------------------------
			
 
				+class FeedbackBody(BaseModel):
			
 
				+    verdict: str                    # "up" or "down"
			
 
				+    query: Optional[str] = None     # the question that was asked
			
 
				+    answer: Optional[str] = None    # the answer that was rated
			
 
				+    note: Optional[str] = None      # optional free-text from thumbs-down
			
 
				+    sid: Optional[str] = None       # session id from browser
			
 
				+    model: Optional[str] = None     # which model answered
			
 
				+    scope: Optional[str] = None     # which scope was used
			
 
				+    sources: Optional[list] = None  # which sources were cited
			
 
				+
			
 
				+@app.post("/feedback")
			
 
				+@limiter.limit("60/minute")
			
 
				+def feedback(request: Request, body: FeedbackBody):
			
 
				+    if body.verdict not in ("up", "down"):
			
 
				+        raise HTTPException(status_code=422, detail="verdict must be 'up' or 'down'")
			
 
				+
			
 
				+    ip  = request.client.host if request.client else "0.0.0.0"
			
 
				+    sid = body.sid or request.headers.get("X-TPR-SID") or ""
			
 
				+
			
 
				+    try:
			
 
				+        with db() as conn:
			
 
				+            conn.execute("""
			
 
				+                INSERT INTO feedback
			
 
				+                    (ts, sid, ip_hash, verdict, query, answer, note, model, scope, sources_json)
			
 
				+                VALUES (?,?,?,?,?,?,?,?,?,?)
			
 
				+            """, (
			
 
				+                datetime.utcnow().isoformat(),
			
 
				+                sid, ip_hash(ip), body.verdict,
			
 
				+                _trunc(body.query or "", 2000, "feedback.query"),
			
 
				+                _trunc(body.answer or "", 8000, "feedback.answer"),
			
 
				+                _trunc(body.note or "", 1000, "feedback.note"),
			
 
				+                body.model or CHAT_MODEL,
			
 
				+                body.scope or "",
			
 
				+                _json_dumps(body.sources or []),
			
 
				+            ))
			
 
				+            conn.commit()
			
 
				+    except Exception as e:
			
 
				+        logger.exception("[feedback] telemetry insert failed")
			
 
				+        # Still return ok — don't surface DB errors to users
			
 
				+    return {"ok": True}
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Ollama helpers
			
 
				+# ---------------------------------------------------------------------------
			
 
				+def slug(s: Optional[str]) -> Optional[str]:
			
 
				+    """Normalise a council name to a URL-safe slug for Qdrant filter matching."""
			
 
				+    if not s:
			
 
				+        return None
			
 
				+    return re.sub(r'[^a-z0-9]+', '-', s.strip().lower()).strip('-') or None
			
 
				+
			
 
				+
			
 
				+def ollama_embed(text: str) -> List[float]:
			
 
				+    """Call the Ollama embeddings API and return the float vector."""
			
 
				+    try:
			
 
				+        r = requests.post(
			
 
				+            f"{OLLAMA_URL}/api/embeddings",
			
 
				+            json={"model": EMBED_MODEL, "prompt": text},
			
 
				+            timeout=60
			
 
				+        )
			
 
				+        r.raise_for_status()
			
 
				+    except requests.Timeout:
			
 
				+        logger.error("Ollama embed timeout after 60s (url=%s model=%s)", OLLAMA_URL, EMBED_MODEL)
			
 
				+        raise HTTPException(status_code=503, detail="Embedding service timed out")
			
 
				+    except requests.ConnectionError:
			
 
				+        logger.error("Ollama embed connection error (url=%s)", OLLAMA_URL)
			
 
				+        raise HTTPException(status_code=503, detail="Embedding service unavailable")
			
 
				+    except requests.HTTPError as e:
			
 
				+        logger.error("Ollama embed HTTP %s: %s", e.response.status_code, e.response.text[:200])
			
 
				+        raise HTTPException(status_code=502, detail="Embedding service error")
			
 
				+    data = r.json()
			
 
				+    if "embedding" not in data:
			
 
				+        logger.error("Ollama embed unexpected response: %s", str(data)[:200])
			
 
				+        raise HTTPException(status_code=502, detail="Embedding service returned unexpected response")
			
 
				+    return data["embedding"]
			
 
				+
			
 
				+def ollama_chat(prompt: str) -> str:
			
 
				+    """
			
 
				+    Send a prompt to Ollama and return the generated text.
			
 
				+
			
 
				+    keep_alive MUST be a top-level key — putting it inside options{} causes
			
 
				+    Ollama to silently ignore it and unload the model between requests.
			
 
				+
			
 
				+    num_ctx is fixed at 6144. Changing it between requests forces Ollama to
			
 
				+    reload the model (KV cache is resized), adding ~3–5 s of cold-start latency.
			
 
				+    """
			
 
				+    try:
			
 
				+        r = requests.post(
			
 
				+            f"{OLLAMA_URL}/api/generate",
			
 
				+            json={
			
 
				+              "model": CHAT_MODEL,
			
 
				+              "prompt": prompt,
			
 
				+              "stream": False,
			
 
				+              "options": {
			
 
				+                "num_ctx": 6144, # was 8192,
			
 
				+                "num_predict": 512,
			
 
				+                "temperature": 0.2,
			
 
				+                "top_p": 0.9,
			
 
				+                "repeat_penalty": 1.1,
			
 
				+              },
			
 
				+              "keep_alive": -1,  # keep model resident in VRAM between requests
			
 
				+            },
			
 
				+            timeout=180
			
 
				+        )
			
 
				+        r.raise_for_status()
			
 
				+    except requests.Timeout:
			
 
				+        logger.error("Ollama chat timeout after 180s (url=%s model=%s)", OLLAMA_URL, CHAT_MODEL)
			
 
				+        raise HTTPException(status_code=503, detail="LLM service timed out")
			
 
				+    except requests.ConnectionError:
			
 
				+        logger.error("Ollama chat connection error (url=%s)", OLLAMA_URL)
			
 
				+        raise HTTPException(status_code=503, detail="LLM service unavailable")
			
 
				+    except requests.HTTPError as e:
			
 
				+        logger.error("Ollama chat HTTP %s: %s", e.response.status_code, e.response.text[:200])
			
 
				+        raise HTTPException(status_code=502, detail="LLM service error")
			
 
				+    data = r.json()
			
 
				+    return data.get("response", "").strip()
			
 
				+
			
 
				+def _scroll_points(collection: str, qfilter=None, include_vector: bool=False, page_size: int=200):
			
 
				+    """
			
 
				+    Page through all points in a collection using Qdrant's scroll API.
			
 
				+    Used by /admin/export which needs vectors and supports arbitrary collections.
			
 
				+    For payload-only scans over the default collection use _scan_points instead.
			
 
				+    """
			
 
				+    offset = None
			
 
				+    while True:
			
 
				+        points, offset = qc.scroll(
			
 
				+            collection_name=collection,
			
 
				+            limit=page_size,
			
 
				+            with_payload=True,
			
 
				+            with_vectors=include_vector,
			
 
				+            offset=offset,
			
 
				+            scroll_filter=qfilter
			
 
				+        )
			
 
				+        if not points:
			
 
				+            break
			
 
				+        for pt in points:
			
 
				+            yield pt
			
 
				+        if offset is None:
			
 
				+            break
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Health + utility endpoints
			
 
				+# ---------------------------------------------------------------------------
			
 
				+@app.get("/readyz")
			
 
				+def readyz():
			
 
				+    return {"ok": True}
			
 
				+
			
 
				+def _normalize(q: Optional[str]) -> str:
			
 
				+    """Collapse whitespace and lowercase — used for dedup tracking in ask_logs."""
			
 
				+    return re.sub(r"\s+", " ", (q or "").strip().lower())
			
 
				+
			
 
				+def _json_dumps(o) -> str:
			
 
				+    return json.dumps(o, ensure_ascii=False, separators=(",",":"))
			
 
				+
			
 
				+def _trunc(s: str, limit: int, field: str) -> str:
			
 
				+    """Truncate `s` to `limit` chars. Logs a warning if truncation occurs so
			
 
				+    data loss is visible in docker logs rather than silently discarded."""
			
 
				+    if len(s) > limit:
			
 
				+        logger.warning("telemetry field %r truncated from %d to %d chars", field, len(s), limit)
			
 
				+        return s[:limit]
			
 
				+    return s
			
 
				+
			
 
				+# ---- Councils list (prefers payload 'council', falls back to filename token) ----
			
 
				+@app.get("/councils")
			
 
				+def councils():
			
 
				+    councils = set()
			
 
				+    offset = None
			
 
				+    # sample up to ~5k points (50 * 100)
			
 
				+    for _ in range(50):
			
 
				+        points, offset = qc.scroll(
			
 
				+            collection_name=COLLECTION,
			
 
				+            limit=100,
			
 
				+            with_payload=True,
			
 
				+            offset=offset
			
 
				+        )
			
 
				+        for pt in points:
			
 
				+            p = pt.payload or {}
			
 
				+            token = (p.get("council") or "").strip().lower()
			
 
				+            if not token:
			
 
				+                sf = (p.get("source_file") or "").lower()
			
 
				+                if sf:
			
 
				+                    token = sf.replace(".pdf", "").split("_")[0].split("-")[0]
			
 
				+            if token:
			
 
				+                councils.add(token)
			
 
				+        if offset is None:
			
 
				+            break
			
 
				+    return sorted(councils)
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Qdrant filter builders
			
 
				+# _mv  — exact MatchValue (keyword field, case-sensitive)
			
 
				+# _mt  — MatchText (full-text / substring match)
			
 
				+# ---------------------------------------------------------------------------
			
 
				+def _mv(key: str, value: str) -> qmodels.FieldCondition:
			
 
				+    return qmodels.FieldCondition(key=key, match=qmodels.MatchValue(value=value))
			
 
				+
			
 
				+def _mt(key: str, text: str) -> qmodels.FieldCondition:
			
 
				+    return qmodels.FieldCondition(key=key, match=qmodels.MatchText(text=text))
			
 
				+
			
 
				+def filter_tps() -> qmodels.Filter:
			
 
				+    """TPS only, exact match on corpus."""
			
 
				+    return qmodels.Filter(must=[_mv("corpus", "tps")])
			
 
				+
			
 
				+def filter_lps(council: str) -> qmodels.Filter:
			
 
				+    """
			
 
				+    LPS for a specific council (slug), exact match on both fields.
			
 
				+    """
			
 
				+    cslug = slug(council) or council.lower()
			
 
				+    return qmodels.Filter(must=[_mv("corpus", "lps"), _mv("council", cslug)])
			
 
				+
			
 
				+def filter_ncc() -> qmodels.Filter:
			
 
				+    return qmodels.Filter(must=[_mv("corpus", "ncc")])
			
 
				+
			
 
				+def filter_as() -> qmodels.Filter:
			
 
				+    return qmodels.Filter(must=[_mv("corpus", "as")])
			
 
				+
			
 
				+def with_source_contains(flt: Optional[qmodels.Filter], source_contains: Optional[str]) -> qmodels.Filter:
			
 
				+    """AND an additional source_file substring condition onto an existing filter."""
			
 
				+    if not source_contains:
			
 
				+        return flt
			
 
				+    add = _mt("source_file", source_contains)
			
 
				+    if flt:
			
 
				+        # preserve existing must/should/must_not and AND the filename condition
			
 
				+        must = list(getattr(flt, "must", []) or [])
			
 
				+        must.append(add)
			
 
				+        return qmodels.Filter(
			
 
				+            must=must,
			
 
				+            should=getattr(flt, "should", None),
			
 
				+            must_not=getattr(flt, "must_not", None),
			
 
				+        )
			
 
				+    return qmodels.Filter(must=[add])
			
 
				+
			
 
				+def q_search(vec: List[float], flt: Optional[qmodels.Filter], limit: int):
			
 
				+    """ANN vector search — returns up to `limit` scored points."""
			
 
				+    results = qc.query_points(
			
 
				+        collection_name=COLLECTION,
			
 
				+        query=vec,
			
 
				+        limit=max(1, limit),
			
 
				+        query_filter=flt,
			
 
				+        with_payload=True,
			
 
				+    )
			
 
				+    return results.points
			
 
				+
			
 
				+def render_blocks(hits) -> Tuple[List[str], List[dict]]:
			
 
				+    """Convert raw Qdrant hits into plain-text context blocks and source dicts."""
			
 
				+    blocks, sources = [], []
			
 
				+    for h in hits:
			
 
				+        p = h.payload or {}
			
 
				+        src = f"{p.get('source_file')} (p.{p.get('page')} chunk {p.get('chunk_index')})"
			
 
				+        snippet = p.get("text", "")
			
 
				+        blocks.append(f"Source: {src}\nText: {snippet}")
			
 
				+        sources.append({
			
 
				+            "source_file": p.get("source_file"),
			
 
				+            "page": p.get("page"),
			
 
				+            "chunk_index": p.get("chunk_index"),
			
 
				+            "score": h.score
			
 
				+        })
			
 
				+    return blocks, sources
			
 
				+
			
 
				+def combine_context(sections: List[Tuple[str, List[str]]]) -> str:
			
 
				+    """Join all section blocks into a single context string for the prompt."""
			
 
				+    out = []
			
 
				+    for heading, blocks in sections:
			
 
				+        if not blocks:
			
 
				+            continue
			
 
				+        out.append(f"=== {heading} ===")
			
 
				+        out.extend(blocks)
			
 
				+    return "\n\n".join(out) if out else "No context found."
			
 
				+
			
 
				+def _scan_points(qfilter: Optional[qmodels.Filter] = None, max_pages: int = 10000, page_size: int = 200):
			
 
				+    """
			
 
				+    Iterate through ALL points in the default collection (payload only, no vectors).
			
 
				+    Used by /admin/stats, /admin/files, /admin/sample.
			
 
				+
			
 
				+    For the current dataset size this is fine. If the collection grows very large,
			
 
				+    switch to a pre-aggregated summary stored in a separate Qdrant collection or
			
 
				+    a background job that writes counts to SQLite.
			
 
				+    """
			
 
				+    offset = None
			
 
				+    pages = 0
			
 
				+    while pages < max_pages:
			
 
				+        points, offset = qc.scroll(
			
 
				+            collection_name=COLLECTION,
			
 
				+            limit=page_size,
			
 
				+            with_payload=True,
			
 
				+            offset=offset,
			
 
				+            scroll_filter=qfilter
			
 
				+        )
			
 
				+        if not points:
			
 
				+            break
			
 
				+        for pt in points:
			
 
				+            yield pt
			
 
				+        pages += 1
			
 
				+        if offset is None:
			
 
				+            break
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Admin endpoints — require DEMO_TOKEN when DEMO_REQUIRE_TOKEN=1
			
 
				+# All endpoints are rate-limited; /export is tighter (streams full DB).
			
 
				+# ---------------------------------------------------------------------------
			
 
				+@app.get("/admin/stats")
			
 
				+@limiter.limit("30/minute")
			
 
				+def admin_stats(request: Request, council: Optional[str] = None, corpus: Optional[str] = None):
			
 
				+    _verify_demo_token_if_needed(request)
			
 
				+    must = []
			
 
				+    if council:
			
 
				+        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
			
 
				+    if corpus:
			
 
				+        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
			
 
				+    qfilter = qmodels.Filter(must=must) if must else None
			
 
				+
			
 
				+    corp = Counter()
			
 
				+    councils = Counter()
			
 
				+    total = 0
			
 
				+    for pt in _scan_points(qfilter=qfilter):
			
 
				+        p = pt.payload or {}
			
 
				+        corp[(p.get("corpus") or "").lower()] += 1
			
 
				+        if p.get("council"):
			
 
				+            councils[(p.get("council") or "").lower()] += 1
			
 
				+        total += 1
			
 
				+
			
 
				+    return {
			
 
				+        "collection": COLLECTION,
			
 
				+        "total_points": total,
			
 
				+        "by_corpus": dict(corp),
			
 
				+        "by_council": dict(councils),
			
 
				+        "note": "Counts are points (chunks), not documents.",
			
 
				+    }
			
 
				+
			
 
				+@app.get("/admin/files")
			
 
				+@limiter.limit("30/minute")
			
 
				+def admin_files(request: Request, council: Optional[str] = None, corpus: Optional[str] = None, contains: Optional[str] = None, limit: int = 200):
			
 
				+    _verify_demo_token_if_needed(request)
			
 
				+    must = []
			
 
				+    if council:
			
 
				+        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
			
 
				+    if corpus:
			
 
				+        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
			
 
				+    if contains:
			
 
				+        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=contains)))
			
 
				+    qfilter = qmodels.Filter(must=must) if must else None
			
 
				+
			
 
				+    files = defaultdict(lambda: {"points": 0, "corpus": None, "council": None, "pages": set()})
			
 
				+    for pt in _scan_points(qfilter=qfilter):
			
 
				+        p = pt.payload or {}
			
 
				+        f = (p.get("source_file") or "").strip()
			
 
				+        if not f:
			
 
				+            continue
			
 
				+        rec = files[f]
			
 
				+        rec["points"] += 1
			
 
				+        rec["corpus"] = rec["corpus"] or p.get("corpus")
			
 
				+        rec["council"] = rec["council"] or p.get("council")
			
 
				+        if p.get("page") is not None:
			
 
				+            rec["pages"].add(p["page"])
			
 
				+
			
 
				+    # shape for output
			
 
				+    out = []
			
 
				+    for f, rec in files.items():
			
 
				+        out.append({
			
 
				+            "source_file": f,
			
 
				+            "corpus": rec["corpus"],
			
 
				+            "council": rec["council"],
			
 
				+            "points": rec["points"],
			
 
				+            "page_count_est": len(rec["pages"]) if rec["pages"] else None,
			
 
				+        })
			
 
				+
			
 
				+    # sort by points desc, limit
			
 
				+    out.sort(key=lambda x: x["points"], reverse=True)
			
 
				+    return out[:max(1, limit)]
			
 
				+
			
 
				+@app.get("/admin/sample")
			
 
				+@limiter.limit("30/minute")
			
 
				+def admin_sample(request: Request, council: Optional[str] = None, corpus: Optional[str] = None, n: int = 5):
			
 
				+    _verify_demo_token_if_needed(request)
			
 
				+    must = []
			
 
				+    if council:
			
 
				+        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
			
 
				+    if corpus:
			
 
				+        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
			
 
				+    qfilter = qmodels.Filter(must=must) if must else None
			
 
				+
			
 
				+    samples = []
			
 
				+    for pt in _scan_points(qfilter=qfilter):
			
 
				+        p = pt.payload or {}
			
 
				+        txt = (p.get("text") or "").strip()
			
 
				+        if not txt:
			
 
				+            continue
			
 
				+        samples.append({
			
 
				+            "source_file": p.get("source_file"),
			
 
				+            "corpus": p.get("corpus"),
			
 
				+            "council": p.get("council"),
			
 
				+            "page": p.get("page"),
			
 
				+            "chunk_index": p.get("chunk_index"),
			
 
				+            "preview": (txt[:400] + "…") if len(txt) > 400 else txt
			
 
				+        })
			
 
				+        if len(samples) >= max(1, n):
			
 
				+            break
			
 
				+    return samples
			
 
				+
			
 
				+@app.get("/admin/export")
			
 
				+@limiter.limit("5/minute")
			
 
				+def admin_export(
			
 
				+    request: Request,
			
 
				+    collection: str = COLLECTION,
			
 
				+    council: Optional[str] = None,
			
 
				+    corpus: Optional[str] = None,
			
 
				+    source_contains: Optional[str] = None,
			
 
				+    include_vector: bool = False,
			
 
				+    limit: Optional[int] = None
			
 
				+):
			
 
				+    _verify_demo_token_if_needed(request)
			
 
				+    must = []
			
 
				+    if council:
			
 
				+        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
			
 
				+    if corpus:
			
 
				+        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
			
 
				+    if source_contains:
			
 
				+        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=source_contains)))
			
 
				+    qfilter = qmodels.Filter(must=must) if must else None
			
 
				+
			
 
				+    def gen():
			
 
				+        count = 0
			
 
				+        for pt in _scroll_points(collection, qfilter=qfilter, include_vector=include_vector):
			
 
				+            obj = {
			
 
				+                "id": str(getattr(pt, "id", None)),
			
 
				+                "payload": pt.payload or {},
			
 
				+            }
			
 
				+            if include_vector:
			
 
				+                obj["vector"] = pt.vector
			
 
				+            yield json.dumps(obj, ensure_ascii=False) + "\n"
			
 
				+            count += 1
			
 
				+            if limit and count >= limit:
			
 
				+                break
			
 
				+
			
 
				+    filename = f'{collection}-{corpus or "all"}-{council or "all"}.ndjson'
			
 
				+    headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
			
 
				+    return StreamingResponse(gen(), media_type="application/x-ndjson", headers=headers)
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Section-specific format guides
			
 
				+# Each section_id maps to a tightly-scoped formatting instruction injected at
			
 
				+# the end of the prompt. This steers the LLM output for structured report
			
 
				+# sections without changing the core RAG prompt.
			
 
				+# ---------------------------------------------------------------------------
			
 
				+def _section_format_guide(section_id: Optional[str], section_title: str, ctx: dict) -> str:
			
 
				+    """
			
 
				+    Return strict, section-specific formatting guidance for the LLM.
			
 
				+    Keep these short, prescriptive, and impossible to ignore.
			
 
				+    """
			
 
				+    sid = (section_id or "").lower()
			
 
				+
			
 
				+    # Utility bits from context
			
 
				+    zones = ctx.get("planning_zones") or []
			
 
				+    zone_label = ", ".join(zones) if zones else "the applicable zone"
			
 
				+    council_label = ctx.get("council") or ""
			
 
				+
			
 
				+    # ---- ZONING (tables of clauses like your sample) ----
			
 
				+    if sid in {"zoning", "zoning-41", "zoning-42", "zoning-43", "zoning-44", "zoning-441", "zoning-442"}:
			
 
				+        return f"""
			
 
				+            FORMAT REQUIREMENTS (MANDATORY):
			
 
				+            - Produce a concise preface (≤ 2 sentences) naming {zone_label}.
			
 
				+            - Then include a Markdown table listing EACH visible clause found in CONTEXT that applies to the zone or LPS for **{council_label or 'the selected council'}**.
			
 
				+            - One row per subclause. If an A/P pair exists (e.g., A1 / P1), include both in the same row.
			
 
				+            - Columns (exact):
			
 
				+              | Clause | Topic | Acceptable Solution (A) | Performance Criteria (P) | Assessment | Source |
			
 
				+            - "Clause": the clause number (e.g., "12.3.1 A1" or "DOR-S1.7.1").
			
 
				+            - "Topic": short label extracted from the clause heading.
			
 
				+            - "Acceptable Solution (A)" and "Performance Criteria (P)": quote briefly—no more than 1–2 lines each.
			
 
				+            - "Assessment": state clearly whether the proposal meets A, or relies on P. If unknown from CONTEXT, write "TBC".
			
 
				+            - "Source": filename + page (from CONTEXT).
			
 
				+            - Only include clauses actually present in CONTEXT; NEVER invent clause numbers or text.
			
 
				+            - After the table, add a one-paragraph summary noting any items assessed as TBC or non-compliant.
			
 
				+            """.strip()
			
 
				+
			
 
				+    # ---- Codes overview list/table (optional future) ----
			
 
				+    if sid.startswith("code-"):
			
 
				+        return """
			
 
				+            FORMAT REQUIREMENTS:
			
 
				+            - Start with one sentence stating which Code and why it is triggered.
			
 
				+            - Then provide a short checklist or table of the relevant sub-clauses (A vs P), with Source for each.
			
 
				+            - Keep to 150–250 words + table.
			
 
				+            """.strip()
			
 
				+
			
 
				+    # ---- Permit Overview (concise triggers) ----
			
 
				+    if sid == "permit-overview":
			
 
				+        return """
			
 
				+            FORMAT REQUIREMENTS:
			
 
				+            - Produce 3 blocks with headings:
			
 
				+              1) "Project Context" – 3–5 bullet points (site, proposal, zone).
			
 
				+              2) "Applicable Provisions" – bullets grouping TPS SPP, LPS (selected council), and triggered Codes.
			
 
				+              3) "Assessment Path" – bullet list of key clauses to assess next.
			
 
				+            - Cite specific clause numbers ONLY if present in CONTEXT (include Source).
			
 
				+            """.strip()
			
 
				+
			
 
				+    # ---- Default (no special formatting) ----
			
 
				+    return """
			
 
				+        FORMAT REQUIREMENTS:
			
 
				+        - Use concise Markdown with short paragraphs and bullets as needed.
			
 
				+        - Cite briefly (filename + page) when quoting a control.
			
 
				+        """.strip()
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# /ask — core RAG endpoint
			
 
				+# ---------------------------------------------------------------------------
			
 
				+class AskBody(BaseModel):
			
 
				+    # accept multiple keys from different frontends
			
 
				+    query: Optional[str] = None
			
 
				+    question: Optional[str] = None
			
 
				+    q: Optional[str] = None
			
 
				+    prompt: Optional[str] = None
			
 
				+
			
 
				+    top_k: int = 10
			
 
				+    council: Optional[str] = None
			
 
				+    include_ncc: bool = False
			
 
				+    include_standards: bool = False
			
 
				+    source_contains: Optional[str] = None
			
 
				+    scope: Literal['state_plus_local','local_only','state_only','any'] = 'state_plus_local'
			
 
				+    section_id: Optional[str] = None
			
 
				+
			
 
				+    # BYOK mode: return context blocks without calling Ollama.
			
 
				+    # The browser then calls its own LLM with the returned context + prompt.
			
 
				+    context_only: bool = False
			
 
				+
			
 
				+def _allowed(p: dict, scope: str, cslug: Optional[str]) -> bool:
			
 
				+    """
			
 
				+    Secondary guardrail applied after the Qdrant vector search.
			
 
				+    Qdrant filters are the primary gate; this catches any edge-case leakage
			
 
				+    (e.g. MatchText returning a partial match across corpora).
			
 
				+    """
			
 
				+    corp = (p.get("corpus") or "").lower()
			
 
				+    council = (p.get("council") or "").lower()
			
 
				+    if scope == "local_only":
			
 
				+        return corp == "lps" and cslug and council == cslug
			
 
				+    if scope == "state_only":
			
 
				+        return corp == "tps"
			
 
				+    if scope == "state_plus_local":
			
 
				+        return corp == "tps" or (corp == "lps" and cslug and council == cslug)
			
 
				+    return True
			
 
				+
			
 
				+def do_ask(
			
 
				+    query: str,
			
 
				+    top_k: int = 10,
			
 
				+    council: Optional[str] = None,
			
 
				+    include_ncc: bool = False,
			
 
				+    include_standards: bool = False,
			
 
				+    source_contains: Optional[str] = None,
			
 
				+    scope: str = "state_plus_local",
			
 
				+    section_id: Optional[str] = None,
			
 
				+    context_only: bool = False,
			
 
				+):
			
 
				+    top_k = max(1, min(top_k, 30))  # clamp: at least 1, at most 30
			
 
				+    vec = ollama_embed(query)
			
 
				+    cslug = slug(council) if council else None
			
 
				+
			
 
				+    # Build the list of (section_heading, qdrant_filter) pairs based on scope.
			
 
				+    # Each pair is searched independently so we can control the chunk budget
			
 
				+    # per corpus — avoids TPS drowning out LPS results or vice versa.
			
 
				+    scopes: List[Tuple[str, qmodels.Filter]] = []
			
 
				+    if scope in ("state_only", "state_plus_local", "any"):
			
 
				+        scopes.append(("Tasmanian Planning Scheme (SPP)", filter_tps()))
			
 
				+    if scope in ("local_only", "state_plus_local", "any") and cslug:
			
 
				+        scopes.append((f"Local Provisions Schedule — {cslug}", filter_lps(cslug)))
			
 
				+    if include_ncc:
			
 
				+        scopes.append(("National Construction Code (NCC)", filter_ncc()))
			
 
				+    if include_standards:
			
 
				+        scopes.append(("Australian Standards (AS)", filter_as()))
			
 
				+
			
 
				+    # Apply additional filename filter if requested (AND)
			
 
				+    scopes = [(name, with_source_contains(flt, source_contains)) for name, flt in scopes]
			
 
				+
			
 
				+    # Divide top_k across scopes: SPP and LPS each get ~1/3, the remainder
			
 
				+    # is split evenly across any extra corpora (NCC, AS).
			
 
				+    per_spp = max(3, top_k // 3) if any(n.startswith("Tasmanian Planning Scheme") for n, _ in scopes) else 0
			
 
				+    per_lps = max(3, top_k // 3) if any(n.startswith("Local Provisions Schedule") for n, _ in scopes) else 0
			
 
				+    remaining = max(1, top_k - (per_spp + per_lps))
			
 
				+    extra_scopes = sum(1 for n, _ in scopes if not (n.startswith("Tasmanian Planning Scheme") or n.startswith("Local Provisions Schedule")))
			
 
				+    per_extra = max(1, remaining // max(1, extra_scopes)) if extra_scopes else 0
			
 
				+
			
 
				+    limits: List[int] = []
			
 
				+    for name, _ in scopes:
			
 
				+        if name.startswith("Tasmanian Planning Scheme"):
			
 
				+            limits.append(per_spp)
			
 
				+        elif name.startswith("Local Provisions Schedule"):
			
 
				+            limits.append(per_lps)
			
 
				+        else:
			
 
				+            limits.append(per_extra)
			
 
				+
			
 
				+    sections: List[Tuple[str, List[str]]] = []
			
 
				+    all_sources: List[dict] = []
			
 
				+
			
 
				+    for (name, flt), lim in zip(scopes, limits):
			
 
				+        if lim <= 0:
			
 
				+            continue
			
 
				+        hits = q_search(vec, flt, lim)
			
 
				+
			
 
				+        # Guardrail: drop any hit that violates scope/council
			
 
				+        hits = [h for h in hits if _allowed(h.payload or {}, scope, cslug)]
			
 
				+
			
 
				+        blocks, sources = render_blocks(hits)
			
 
				+        sections.append((name, blocks))
			
 
				+        all_sources.extend(sources)
			
 
				+
			
 
				+    context = combine_context(sections)
			
 
				+
			
 
				+    format_guide = _section_format_guide(
			
 
				+        section_id,
			
 
				+        section_title="(auto)",
			
 
				+        ctx={
			
 
				+            "council": council,           # from do_ask parameter
			
 
				+            "planning_zones": [],         # populate if you have zone detection
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+    prompt = f"""
			
 
				+You are an expert Tasmanian planning and building compliance assistant with deep knowledge of the Tasmanian Planning Scheme structure.
			
 
				+
			
 
				+## AUTHORITY ORDER — always apply in this sequence:
			
 
				+1. State Planning Provisions (SPP) — the statewide baseline. Cite clause numbers exactly.
			
 
				+2. Local Provisions Schedule (LPS) for the selected council — overrides SPP where it differs.
			
 
				+3. National Construction Code (NCC) — building controls only, keep separate from planning.
			
 
				+4. Australian Standards — only when directly referenced by a clause in CONTEXT.
			
 
				+
			
 
				+## STRICT RULES:
			
 
				+- Use ONLY information present in CONTEXT below. Never invent clause numbers, standards, or measurements.
			
 
				+- If CONTEXT does not contain enough information to answer, say: "The provided context does not cover this — check the TPSO viewer directly at tpso.planning.tas.gov.au"
			
 
				+- Every specific standard or requirement you state MUST include its source: (filename, p.N)
			
 
				+- Quote clause text briefly (1–2 lines max) then explain in plain English.
			
 
				+- Distinguish clearly between Acceptable Solutions (A) and Performance Criteria (P).
			
 
				+
			
 
				+## OUTPUT FORMAT:
			
 
				+- Use Markdown: ## for main headings, ### for sub-headings, **bold** for clause numbers.
			
 
				+- For setbacks, parking rates, or multiple standards: use a Markdown table with columns: Clause | Requirement | A or P | Source
			
 
				+- End every response with a ## Sources section listing each cited document and page.
			
 
				+- Keep answers concise but complete — do not pad or repeat information.
			
 
				+- Professional planning language; avoid informal phrasing.
			
 
				+
			
 
				+## CONTEXT (retrieved from Tasmanian Planning Scheme documents):
			
 
				+{context}
			
 
				+
			
 
				+{format_guide}
			
 
				+
			
 
				+## QUESTION:
			
 
				+{query}
			
 
				+
			
 
				+## ANSWER:
			
 
				+""".strip()
			
 
				+
			
 
				+    # BYOK mode: skip Ollama and return the context + prompt so the
			
 
				+    # browser can call its own LLM provider (Claude, GPT, Grok, etc.)
			
 
				+    if context_only:
			
 
				+        return {
			
 
				+            "context_only": True,
			
 
				+            "context": context,
			
 
				+            "prompt": prompt,
			
 
				+            "sources": all_sources,
			
 
				+            # Include the raw section blocks so the browser can inspect them
			
 
				+            "sections": [
			
 
				+                {"heading": name, "blocks": blocks}
			
 
				+                for name, blocks in sections
			
 
				+            ]
			
 
				+        }
			
 
				+
			
 
				+    answer = ollama_chat(prompt)
			
 
				+    return {"answer": answer, "sources": all_sources}
			
 
				+
			
 
				+
			
 
				+@app.get("/ask")
			
 
				+@limiter.limit("20/minute")
			
 
				+def ask_get(
			
 
				+    request: Request,
			
 
				+    query: str = Query(..., description="User question"),
			
 
				+    top_k: int = 10,
			
 
				+    council: Optional[str] = None,
			
 
				+    include_ncc: bool = False,
			
 
				+    include_standards: bool = False,
			
 
				+    source_contains: Optional[str] = None,
			
 
				+    scope: str = "state_plus_local",
			
 
				+    section_id: Optional[str] = None,
			
 
				+    context_only: bool = False,
			
 
				+):
			
 
				+    _verify_demo_token_if_needed(request)
			
 
				+
			
 
				+    started = time.perf_counter()
			
 
				+    out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
			
 
				+    latency_ms = int((time.perf_counter() - started) * 1000)
			
 
				+
			
 
				+    # Telemetry insert — never allowed to break the response
			
 
				+    try:
			
 
				+        ip = request.client.host if request.client else "0.0.0.0"
			
 
				+        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
			
 
				+        allow_tps = scope in ("state_only", "state_plus_local")
			
 
				+        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
			
 
				+
			
 
				+        with db() as conn:
			
 
				+            conn.execute("""
			
 
				+                INSERT INTO ask_logs
			
 
				+                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
			
 
				+                     model, ok, topk_json, tokens_in, tokens_out, answer)
			
 
				+                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
			
 
				+            """, (
			
 
				+                datetime.utcnow().isoformat(),
			
 
				+                sid, ip_hash(ip), query, _normalize(query),
			
 
				+                scope, int(allow_tps),
			
 
				+                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
			
 
				+                _trunc(out.get("answer") or "", 8000, "ask_get.answer"),
			
 
				+            ))
			
 
				+            conn.commit()
			
 
				+    except Exception as e:
			
 
				+        logger.exception("[telemetry] ask_get insert failed")
			
 
				+
			
 
				+    return out
			
 
				+
			
 
				+
			
 
				+@app.post("/ask")
			
 
				+@limiter.limit("20/minute")
			
 
				+def ask_post(request: Request, body: AskBody):
			
 
				+    _verify_demo_token_if_needed(request)
			
 
				+    qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
			
 
				+    if not qtxt:
			
 
				+        raise HTTPException(status_code=422, detail="Missing query/question")
			
 
				+
			
 
				+    started = time.perf_counter()
			
 
				+    out = do_ask(
			
 
				+        query=qtxt,
			
 
				+        top_k=body.top_k,
			
 
				+        council=body.council,
			
 
				+        include_ncc=body.include_ncc,
			
 
				+        include_standards=body.include_standards,
			
 
				+        source_contains=body.source_contains,
			
 
				+        scope=body.scope,
			
 
				+        section_id=body.section_id,
			
 
				+        context_only=body.context_only,
			
 
				+    )
			
 
				+    latency_ms = int((time.perf_counter() - started) * 1000)
			
 
				+
			
 
				+    # Telemetry insert — never allowed to break the response
			
 
				+    try:
			
 
				+        ip = request.client.host if request.client else "0.0.0.0"
			
 
				+        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
			
 
				+        allow_tps = body.scope in ("state_only", "state_plus_local")
			
 
				+        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
			
 
				+
			
 
				+        with db() as conn:
			
 
				+            conn.execute("""
			
 
				+                INSERT INTO ask_logs
			
 
				+                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
			
 
				+                     model, ok, topk_json, tokens_in, tokens_out, answer)
			
 
				+                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
			
 
				+            """, (
			
 
				+                datetime.utcnow().isoformat(),
			
 
				+                sid, ip_hash(ip), qtxt, _normalize(qtxt),
			
 
				+                body.scope, int(allow_tps),
			
 
				+                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
			
 
				+                _trunc(out.get("answer") or "", 8000, "ask_post.answer"),
			
 
				+            ))
			
 
				+            conn.commit()
			
 
				+    except Exception as e:
			
 
				+        logger.exception("[telemetry] ask_post insert failed")
			
 
				+
			
 
				+    return out
			
--- a/backend/limiter.py
+++ b/backend/limiter.py
@@ -0,0 +1,4 @@
 
				+from slowapi import Limiter
			
 
				+from slowapi.util import get_remote_address
			
 
				+
			
 
				+limiter = Limiter(key_func=get_remote_address)
			
--- a/backend/telemetry(1).py
+++ b/backend/telemetry(1).py
@@ -1,116 +0,0 @@
 
				-# backend/telemetry.py
			
 
				-from fastapi import APIRouter, Request, Depends
			
 
				-from pydantic import BaseModel
			
 
				-import time, hashlib, hmac, os, sqlite3, json, hashlib
			
 
				-from datetime import datetime
			
 
				-from typing import Any, Dict, List, Optional
			
 
				-
			
 
				-router = APIRouter()
			
 
				-DB_PATH = os.getenv("TPR_DB", "/data/telemetry.db")
			
 
				-IP_SECRET = os.getenv("TPR_IP_SECRET", "change-me")
			
 
				-
			
 
				-def db():
			
 
				-    os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
			
 
				-    conn = sqlite3.connect(DB_PATH)
			
 
				-    try:
			
 
				-        conn.execute("PRAGMA journal_mode=WAL;")
			
 
				-    except sqlite3.OperationalError:
			
 
				-        pass  # read-only FS or WAL not supported — continue anyway
			
 
				-    return conn
			
 
				-
			
 
				-def ip_hash(ip: str) -> str:
			
 
				-    dig = hmac.new(IP_SECRET.encode(), ip.encode(), 'sha256').hexdigest()
			
 
				-    return dig[:12]
			
 
				-
			
 
				-class TelemetryEvent(BaseModel):
			
 
				-    type: str
			
 
				-    ts: str
			
 
				-    sid: str
			
 
				-    ua: Optional[str] = None
			
 
				-    data: Dict[str, Any] = {}
			
 
				-
			
 
				-@router.on_event("startup")
			
 
				-def init():
			
 
				-    try:
			
 
				-        with db() as conn:
			
 
				-            conn.executescript("""
			
 
				-            CREATE TABLE IF NOT EXISTS events (
			
 
				-              id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				-              ts TEXT NOT NULL,
			
 
				-              type TEXT NOT NULL,
			
 
				-              sid TEXT,
			
 
				-              ua TEXT,
			
 
				-              ip_hash TEXT,
			
 
				-              data_json TEXT
			
 
				-            );
			
 
				-            CREATE TABLE IF NOT EXISTS ask_logs (
			
 
				-              id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				-              ts TEXT NOT NULL,
			
 
				-              sid TEXT,
			
 
				-              ip_hash TEXT,
			
 
				-              query TEXT,
			
 
				-              normalized TEXT,
			
 
				-              allow_tps INTEGER,
			
 
				-              latency_ms INTEGER,
			
 
				-              model TEXT,
			
 
				-              ok INTEGER,
			
 
				-              topk_json TEXT,
			
 
				-              tokens_in INTEGER,
			
 
				-              tokens_out INTEGER
			
 
				-            );
			
 
				-            """)
			
 
				-            conn.commit()
			
 
				-    except Exception as e:
			
 
				-        print(f"[telemetry] DB init failed: {e} — telemetry will be disabled")
			
 
				-        # Do NOT re-raise — let the app start without telemetry
			
 
				-
			
 
				-@router.post("/telemetry")
			
 
				-async def telemetry(ev: TelemetryEvent, request: Request):
			
 
				-    ip = request.client.host if request.client else "0.0.0.0"
			
 
				-    with db() as conn:
			
 
				-        conn.execute(
			
 
				-          "INSERT INTO events (ts,type,sid,ua,ip_hash,data_json) VALUES (?,?,?,?,?,json(?))",
			
 
				-          (ev.ts, ev.type, ev.sid, ev.ua, ip_hash(ip), json_dumps(ev.data))
			
 
				-        )
			
 
				-        conn.commit()
			
 
				-    return {"ok": True}
			
 
				-
			
 
				-# Wrap your /ask handler to also log authoritative facts:
			
 
				-from fastapi import APIRouter
			
 
				-import json, re
			
 
				-ask_router = APIRouter()
			
 
				-
			
 
				-def normalize(q: str) -> str:
			
 
				-    return re.sub(r"\s+", " ", q.strip().lower())
			
 
				-
			
 
				-def json_dumps(o): return json.dumps(o, ensure_ascii=False, separators=(",",":"))
			
 
				-
			
 
				-@ask_router.post("/ask")
			
 
				-async def ask(req: Dict[str, Any], request: Request):
			
 
				-    t0 = time.perf_counter()
			
 
				-    ip = request.client.host if request.client else "0.0.0.0"
			
 
				-    sid = request.headers.get("x-tpr-sid") or request.cookies.get("sid") or ""
			
 
				-    query = (req.get("query") or "").strip()
			
 
				-    allow_tps = bool(req.get("allow_tps"))
			
 
				-    # ... run retrieval/LLM as you already do ...
			
 
				-    # mock result placeholders:
			
 
				-    model = "localai/llama-3.1"
			
 
				-    topk = [{"id": "doc123", "score": 0.83}]
			
 
				-    tokens_in, tokens_out = 250, 380
			
 
				-    ok = True
			
 
				-    answer = {"text": "…", "citations": topk, "model": model, "usage": {"input_tokens": tokens_in, "output_tokens": tokens_out}}
			
 
				-
			
 
				-    latency = int((time.perf_counter() - t0) * 1000)
			
 
				-
			
 
				-    with db() as conn:
			
 
				-        conn.execute("""
			
 
				-        INSERT INTO ask_logs (ts,sid,ip_hash,query,normalized,allow_tps,latency_ms,model,ok,topk_json,tokens_in,tokens_out)
			
 
				-        VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
			
 
				-        """, (
			
 
				-            datetime.utcnow().isoformat(),
			
 
				-            sid, ip_hash(ip), query, normalize(query), int(allow_tps),
			
 
				-            latency, model, int(ok), json_dumps(topk), tokens_in, tokens_out
			
 
				-        ))
			
 
				-        conn.commit()
			
 
				-
			
 
				-    return answer
			
--- a/backend/telemetry.py
+++ b/backend/telemetry.py
@@ -1,129 +1,138 @@
 
				-# backend/telemetry.py

			
 
				-from fastapi import APIRouter, Request, Depends

			
 
				-from pydantic import BaseModel

			
 
				-import time, hashlib, hmac, os

			
 
				-from datetime import datetime

			
 
				-from typing import Any, Dict, List, Optional

			
 
				-import os, sqlite3, json, hmac, hashlib

			
 
				-

			
 
				-router = APIRouter()

			
 
				-DB_PATH = os.getenv("TPR_DB", "/data/telemetry.db")

			
 
				-IP_SECRET = os.getenv("TPR_IP_SECRET", "change-me")

			
 
				-

			
 
				-def db():

			
 
				-    os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)

			
 
				-    conn = sqlite3.connect(DB_PATH)

			
 
				-    try:

			
 
				-        conn.execute("PRAGMA journal_mode=WAL;")

			
 
				-    except sqlite3.OperationalError:

			
 
				-        # fallback if FS forbids WAL

			
 
				-        conn.execute("PRAGMA journal_mode=DELETE;")

			
 
				-    return conn

			
 
				-

			
 
				-def ip_hash(ip: str) -> str:

			
 
				-    dig = hmac.new(IP_SECRET.encode(), ip.encode(), 'sha256').hexdigest()

			
 
				-    return dig[:12]

			
 
				-

			
 
				-class TelemetryEvent(BaseModel):

			
 
				-    type: str

			
 
				-    ts: str

			
 
				-    sid: str

			
 
				-    ua: Optional[str] = None

			
 
				-    data: Dict[str, Any] = {}

			
 
				-

			
 
				-@router.on_event("startup")

			
 
				-def init():

			
 
				-    with db() as conn:

			
 
				-        conn.executescript("""

			
 
				-        CREATE TABLE IF NOT EXISTS events (

			
 
				-          id INTEGER PRIMARY KEY AUTOINCREMENT,

			
 
				-          ts TEXT NOT NULL,

			
 
				-          type TEXT NOT NULL,

			
 
				-          sid TEXT,

			
 
				-          ua TEXT,

			
 
				-          ip_hash TEXT,

			
 
				-          data_json TEXT

			
 
				-        );

			
 
				-        CREATE TABLE IF NOT EXISTS ask_logs (

			
 
				-          id INTEGER PRIMARY KEY AUTOINCREMENT,

			
 
				-          ts TEXT NOT NULL,

			
 
				-          sid TEXT,

			
 
				-          ip_hash TEXT,

			
 
				-          query TEXT,

			
 
				-          normalized TEXT,

			
 
				-          scope TEXT,

			
 
				-          allow_tps INTEGER,

			
 
				-          latency_ms INTEGER,

			
 
				-          model TEXT,

			
 
				-          ok INTEGER,

			
 
				-          topk_json TEXT,

			
 
				-          tokens_in INTEGER,

			
 
				-          tokens_out INTEGER,

			
 
				-          answer TEXT

			
 
				-        );

			
 
				-        CREATE TABLE IF NOT EXISTS feedback (

			
 
				-          id INTEGER PRIMARY KEY AUTOINCREMENT,

			
 
				-          ts TEXT NOT NULL,

			
 
				-          sid TEXT,

			
 
				-          ip_hash TEXT,

			
 
				-          verdict TEXT NOT NULL,        -- 'up' or 'down'

			
 
				-          query TEXT,                   -- the question asked

			
 
				-          answer TEXT,                  -- the answer rated

			
 
				-          note TEXT,                    -- optional free-text note (thumbs down)

			
 
				-          model TEXT,                   -- which LLM answered

			
 
				-          scope TEXT,                   -- retrieval scope used

			
 
				-          sources_json TEXT             -- JSON array of cited sources

			
 
				-        );

			
 
				-        """)

			
 
				-        conn.commit()

			
 
				-

			
 
				-@router.post("/telemetry")

			
 
				-async def telemetry(ev: TelemetryEvent, request: Request):

			
 
				-    ip = request.client.host if request.client else "0.0.0.0"

			
 
				-    with db() as conn:

			
 
				-        conn.execute(

			
 
				-          "INSERT INTO events (ts,type,sid,ua,ip_hash,data_json) VALUES (?,?,?,?,?,json(?))",

			
 
				-          (ev.ts, ev.type, ev.sid, ev.ua, ip_hash(ip), json_dumps(ev.data))

			
 
				-        )

			
 
				-        conn.commit()

			
 
				-    return {"ok": True}

			
 
				-

			
 
				-# Wrap your /ask handler to also log authoritative facts:

			
 
				-from fastapi import APIRouter

			
 
				-import json, re

			
 
				-ask_router = APIRouter()

			
 
				-

			
 
				-def normalize(q: str) -> str:

			
 
				-    return re.sub(r"\s+", " ", q.strip().lower())

			
 
				-

			
 
				-def json_dumps(o): return json.dumps(o, ensure_ascii=False, separators=(",",":"))

			
 
				-

			
 
				-@ask_router.post("/ask")

			
 
				-async def ask(req: Dict[str, Any], request: Request):

			
 
				-    t0 = time.perf_counter()

			
 
				-    ip = request.client.host if request.client else "0.0.0.0"

			
 
				-    sid = request.headers.get("x-tpr-sid") or request.cookies.get("sid") or ""

			
 
				-    query = (req.get("query") or "").strip()

			
 
				-    allow_tps = bool(req.get("allow_tps"))

			
 
				-    # ... run retrieval/LLM as you already do ...

			
 
				-    # mock result placeholders:

			
 
				-    model = "localai/llama-3.1"

			
 
				-    topk = [{"id": "doc123", "score": 0.83}]

			
 
				-    tokens_in, tokens_out = 250, 380

			
 
				-    ok = True

			
 
				-    answer = {"text": "…", "citations": topk, "model": model, "usage": {"input_tokens": tokens_in, "output_tokens": tokens_out}}

			
 
				-

			
 
				-    latency = int((time.perf_counter() - t0) * 1000)

			
 
				-

			
 
				-    with db() as conn:

			
 
				-        conn.execute("""

			
 
				-        INSERT INTO ask_logs (ts,sid,ip_hash,query,normalized,allow_tps,latency_ms,model,ok,topk_json,tokens_in,tokens_out)

			
 
				-        VALUES (?,?,?,?,?,?,?,?,?,?,?,?)

			
 
				-        """, (

			
 
				-            datetime.utcnow().isoformat(),

			
 
				-            sid, ip_hash(ip), query, normalize(query), int(allow_tps),

			
 
				-            latency, model, int(ok), json_dumps(topk), tokens_in, tokens_out

			
 
				-        ))

			
 
				-        conn.commit()

			
 
				-

			
 
				-    return answer
			
 
				+"""
			
 
				+telemetry.py — SQLite telemetry store + /telemetry event endpoint
			
 
				+
			
 
				+Shared between app.py (ask_logs, feedback inserts) and the /telemetry
			
 
				+route (browser-side events: page_view, byok_call, etc.).
			
 
				+
			
 
				+Database layout
			
 
				+---------------
			
 
				+  ask_logs  — one row per /ask call (query, latency, model, answer, …)
			
 
				+  feedback  — thumbs up/down ratings from users
			
 
				+  events    — generic browser-side events (page views, BYOK calls, errors)
			
 
				+
			
 
				+The DB file path comes from the TPR_DB env var (default /data/telemetry.db).
			
 
				+It is bind-mounted from the host so both the backend and web containers can
			
 
				+read from the same file.
			
 
				+
			
 
				+IP hashing
			
 
				+----------
			
 
				+Client IPs are never stored in plain text. ip_hash() produces a 12-char
			
 
				+HMAC-SHA256 prefix keyed on TPR_IP_SECRET. The truncation to 12 chars is
			
 
				+intentional — it gives enough entropy to distinguish users for analytics
			
 
				+while making reversal impractical even if the hash list is leaked.
			
 
				+"""
			
 
				+from fastapi import APIRouter, Request
			
 
				+from pydantic import BaseModel
			
 
				+import os, sqlite3, json, hmac, hashlib
			
 
				+from datetime import datetime
			
 
				+from typing import Any, Dict, Optional
			
 
				+
			
 
				+router = APIRouter()
			
 
				+DB_PATH = os.getenv("TPR_DB", "/data/telemetry.db")
			
 
				+IP_SECRET = os.getenv("TPR_IP_SECRET", "change-me")
			
 
				+
			
 
				+
			
 
				+def db():
			
 
				+    """
			
 
				+    Open a SQLite connection with WAL journal mode for better concurrency.
			
 
				+    Falls back to DELETE mode on filesystems that don't support WAL (e.g. some
			
 
				+    NFS or tmpfs mounts) — this can happen in certain Docker volume configs.
			
 
				+    Returns a context-manager-compatible connection (use `with db() as conn`).
			
 
				+    """
			
 
				+    os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
			
 
				+    conn = sqlite3.connect(DB_PATH)
			
 
				+    try:
			
 
				+        conn.execute("PRAGMA journal_mode=WAL;")
			
 
				+    except sqlite3.OperationalError:
			
 
				+        # fallback if FS forbids WAL
			
 
				+        conn.execute("PRAGMA journal_mode=DELETE;")
			
 
				+    return conn
			
 
				+
			
 
				+def ip_hash(ip: str) -> str:
			
 
				+    """
			
 
				+    Return a 12-char HMAC-SHA256 prefix of the IP address.
			
 
				+    Keyed on TPR_IP_SECRET — rotate the secret to invalidate all stored hashes.
			
 
				+    """
			
 
				+    dig = hmac.new(IP_SECRET.encode(), ip.encode(), 'sha256').hexdigest()
			
 
				+    return dig[:12]
			
 
				+
			
 
				+
			
 
				+class TelemetryEvent(BaseModel):
			
 
				+    type: str               # e.g. "page_view", "byok_call", "error"
			
 
				+    ts: str                 # ISO timestamp from the client (informational only)
			
 
				+    sid: str                # browser session ID
			
 
				+    ua: Optional[str] = None
			
 
				+    data: Dict[str, Any] = {}
			
 
				+
			
 
				+
			
 
				+@router.on_event("startup")
			
 
				+def init():
			
 
				+    """Create all tables if they don't exist. Safe to run on every startup."""
			
 
				+    with db() as conn:
			
 
				+        conn.executescript("""
			
 
				+        CREATE TABLE IF NOT EXISTS events (
			
 
				+          id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				+          ts TEXT NOT NULL,
			
 
				+          type TEXT NOT NULL,
			
 
				+          sid TEXT,
			
 
				+          ua TEXT,
			
 
				+          ip_hash TEXT,
			
 
				+          data_json TEXT
			
 
				+        );
			
 
				+        CREATE TABLE IF NOT EXISTS ask_logs (
			
 
				+          id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				+          ts TEXT NOT NULL,
			
 
				+          sid TEXT,
			
 
				+          ip_hash TEXT,
			
 
				+          query TEXT,
			
 
				+          normalized TEXT,
			
 
				+          scope TEXT,
			
 
				+          allow_tps INTEGER,
			
 
				+          latency_ms INTEGER,
			
 
				+          model TEXT,
			
 
				+          ok INTEGER,
			
 
				+          topk_json TEXT,
			
 
				+          tokens_in INTEGER,
			
 
				+          tokens_out INTEGER,
			
 
				+          answer TEXT
			
 
				+        );
			
 
				+        CREATE TABLE IF NOT EXISTS feedback (
			
 
				+          id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				+          ts TEXT NOT NULL,
			
 
				+          sid TEXT,
			
 
				+          ip_hash TEXT,
			
 
				+          verdict TEXT NOT NULL,        -- 'up' or 'down'
			
 
				+          query TEXT,                   -- the question asked
			
 
				+          answer TEXT,                  -- the answer rated
			
 
				+          note TEXT,                    -- optional free-text note (thumbs down)
			
 
				+          model TEXT,                   -- which LLM answered
			
 
				+          scope TEXT,                   -- retrieval scope used
			
 
				+          sources_json TEXT             -- JSON array of cited sources
			
 
				+        );
			
 
				+        """)
			
 
				+        conn.commit()
			
 
				+
			
 
				+
			
 
				+@router.post("/telemetry")
			
 
				+@limiter.limit("60/minute")
			
 
				+async def telemetry(ev: TelemetryEvent, request: Request):
			
 
				+    """
			
 
				+    Record a generic browser-side event.
			
 
				+    The server-side timestamp is used for the DB row; ev.ts is stored in
			
 
				+    data_json only so dashboards can show client-reported timing if needed.
			
 
				+    """
			
 
				+    ip = request.client.host if request.client else "0.0.0.0"
			
 
				+    with db() as conn:
			
 
				+        conn.execute(
			
 
				+          "INSERT INTO events (ts,type,sid,ua,ip_hash,data_json) VALUES (?,?,?,?,?,json(?))",
			
 
				+          (datetime.utcnow().isoformat(), ev.type, ev.sid, ev.ua, ip_hash(ip), json_dumps(ev.data))
			
 
				+        )
			
 
				+        conn.commit()
			
 
				+    return {"ok": True}
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Utilities (used by app.py via `from telemetry import …`)
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+def json_dumps(o): return json.dumps(o, ensure_ascii=False, separators=(",",":"))
			
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -87,6 +87,8 @@ services:
 
				         - SMTP_FROM_NAME=Tas Planning Assistant
			
 
				         - NOTIFY_EMAIL=ben@modulos.com.au
			
 
				         - APP_API_BASE=https://api.modulos.com.au/ask
			
 
				+        - CORS_ORIGINS=${CORS_ORIGINS:-https://tasplanning.report,https://modulosdesign.com.au,https://llm.modulos.com.au}
			
 
				+        - APP_ENV=${APP_ENV:-production}
			
 
				       depends_on:
			
 
				         - backend
			
 
				       restart: unless-stopped
			
@@ -120,7 +122,7 @@ services:
 
				             > /etc/apache2/conf-enabled/rewrite-https.conf
			
 
				 
			
 
				           printf "ServerName localhost\n" > /etc/apache2/conf-enabled/servername.conf
			
 
				-          printf "PassEnv GMAPS_API_KEY\n" > /etc/apache2/conf-enabled/passenv.conf;
			
 
				+          printf "PassEnv GMAPS_API_KEY\nPassEnv CORS_ORIGINS\nPassEnv APP_ENV\n" > /etc/apache2/conf-enabled/passenv.conf;
			
 
				 
			
 
				           chown -R www-data:www-data /var/www/html/cache || true;
			
 
				 
			
--- a/public/_bootstrap.php
+++ b/public/_bootstrap.php
@@ -0,0 +1,11 @@
 
				+<?php
			
 
				+/**
			
 
				+ * _bootstrap.php — shared PHP initialisation included at the top of every page.
			
 
				+ *
			
 
				+ * Keep this file minimal. It must be safe to include before any output,
			
 
				+ * including on pure-API endpoints like generate_planning_report.php.
			
 
				+ */
			
 
				+
			
 
				+// Single source of truth for the application timezone.
			
 
				+// Tasmania observes AEST (UTC+10) / AEDT (UTC+11) in summer.
			
 
				+date_default_timezone_set('Australia/Hobart');
			
--- a/public/byok-settings.php
+++ b/public/byok-settings.php
@@ -12,31 +12,8 @@
 
				   <link href="https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;1,9..40,300&display=swap" rel="stylesheet">

			
 
				   <link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.css" rel="stylesheet">

			
 
				   <link rel="icon" href="/favicon.ico">

			
 
				+  <link rel="stylesheet" href="/css/design-tokens.css">

			
 
				   <style>

			
 
				-    :root {

			
 
				-      --bg:           #0b0f0e;

			
 
				-      --bg-1:         #111614;

			
 
				-      --bg-2:         #181e1b;

			
 
				-      --bg-card:      #141a17;

			
 
				-      --border:       rgba(255,255,255,0.07);

			
 
				-      --border-hover: rgba(255,255,255,0.14);

			
 
				-      --accent:       #2ddc8a;

			
 
				-      --accent-dim:   rgba(45,220,138,0.10);

			
 
				-      --accent-glow:  rgba(45,220,138,0.22);

			
 
				-      --text-primary: #eaf0ec;

			
 
				-      --text-secondary:#8fa899;

			
 
				-      --text-muted:   #4f6459;

			
 
				-      --danger:       #f08080;

			
 
				-      --warn:         #f0c060;

			
 
				-      --warn-dim:     rgba(240,192,96,0.10);

			
 
				-      --serif:        'DM Serif Display', Georgia, serif;

			
 
				-      --sans:         'DM Sans', system-ui, sans-serif;

			
 
				-      --mono:         ui-monospace, 'Cascadia Code', Menlo, monospace;

			
 
				-      --radius:       10px;

			
 
				-      --radius-lg:    16px;

			
 
				-      --radius-sm:    5px;

			
 
				-      --transition:   0.16s cubic-bezier(0.4,0,0.2,1);

			
 
				-    }

			
 
				     *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }

			
 
				     body {

			
 
				       font-family: var(--sans); background: var(--bg); color: var(--text-primary);

			
--- a/public/css/design-tokens.css
+++ b/public/css/design-tokens.css
@@ -0,0 +1,53 @@
 
				+/* ==========================================================================
			
 
				+   design-tokens.css — shared CSS custom properties for tasplanning.report
			
 
				+
			
 
				+   Link this file from every page's <head> BEFORE any page-specific <style>:
			
 
				+     <link rel="stylesheet" href="/css/design-tokens.css">
			
 
				+
			
 
				+   Pages that need different values for a specific token (e.g. --radius on
			
 
				+   site-report) can override just that token in a subsequent <style> block.
			
 
				+
			
 
				+   NOTE: dashboard.php uses an abbreviated naming convention (--acc, --t1,
			
 
				+   etc.) and is excluded from this file — its tokens live inline.
			
 
				+   ========================================================================== */
			
 
				+
			
 
				+:root {
			
 
				+  /* ── Backgrounds ────────────────────────────────────────────────────── */
			
 
				+  --bg:             #0b0f0e;
			
 
				+  --bg-1:           #111614;
			
 
				+  --bg-2:           #181e1b;
			
 
				+  --bg-card:        #141a17;
			
 
				+
			
 
				+  /* ── Borders ────────────────────────────────────────────────────────── */
			
 
				+  --border:         rgba(255,255,255,0.07);
			
 
				+  --border-hover:   rgba(255,255,255,0.14);
			
 
				+
			
 
				+  /* ── Accent (green) ─────────────────────────────────────────────────── */
			
 
				+  --accent:         #2ddc8a;
			
 
				+  --accent-dim:     rgba(45,220,138,0.10);
			
 
				+  --accent-glow:    rgba(45,220,138,0.22);
			
 
				+
			
 
				+  /* ── Text ───────────────────────────────────────────────────────────── */
			
 
				+  --text-primary:   #eaf0ec;
			
 
				+  --text-secondary: #8fa899;
			
 
				+  --text-muted:     #4f6459;
			
 
				+
			
 
				+  /* ── Status ─────────────────────────────────────────────────────────── */
			
 
				+  --danger:         #f08080;
			
 
				+  --warn:           #f0c060;
			
 
				+  --warn-dim:       rgba(240,192,96,0.10);
			
 
				+
			
 
				+  /* ── Typography ─────────────────────────────────────────────────────── */
			
 
				+  --serif:          'DM Serif Display', Georgia, serif;
			
 
				+  --sans:           'DM Sans', system-ui, sans-serif;
			
 
				+  --mono:           ui-monospace, 'Cascadia Code', Menlo, monospace;
			
 
				+
			
 
				+  /* ── Shape ──────────────────────────────────────────────────────────── */
			
 
				+  --radius-sm:      5px;
			
 
				+  --radius:         10px;
			
 
				+  --radius-lg:      16px;
			
 
				+  --radius-xl:      26px;
			
 
				+
			
 
				+  /* ── Motion ─────────────────────────────────────────────────────────── */
			
 
				+  --transition:     0.16s cubic-bezier(0.4,0,0.2,1);
			
 
				+}
			
--- a/public/dashboard.php
+++ b/public/dashboard.php
@@ -1,4 +1,5 @@
 
				 <?php
			
 
				+require_once __DIR__ . '/_bootstrap.php';
			
 
				 /**
			
 
				  * dashboard.php — Query monitoring dashboard
			
 
				  * Place in /home/modulos_llm/public/
			
@@ -18,10 +19,17 @@ if ($dashPass) {
 
				 }
			
 
				 
			
 
				 // Copy to a temp file www-data can open — source DB is owned by uid 10001
			
 
				+// Write to a staging path first, then rename atomically to avoid a window
			
 
				+// where the file is world-readable before chmod tightens permissions.
			
 
				 $tmpDb = sys_get_temp_dir() . '/tpr_dash_' . date('YmdH') . '.db';
			
 
				 if (!file_exists($tmpDb) || (filemtime($tmpDb) < time() - 55)) {
			
 
				-    @copy($dbPath, $tmpDb);
			
 
				-    @chmod($tmpDb, 0600);
			
 
				+    $stagingDb = $tmpDb . '.tmp';
			
 
				+    $prevUmask = umask(0177); // 0600 effective permissions on created file
			
 
				+    $copied = @copy($dbPath, $stagingDb);
			
 
				+    umask($prevUmask);
			
 
				+    if ($copied) {
			
 
				+        @rename($stagingDb, $tmpDb); // atomic replace — never briefly world-readable
			
 
				+    }
			
 
				 }
			
 
				 $readPath = file_exists($tmpDb) ? $tmpDb : $dbPath;
			
 
				 
			
@@ -44,8 +52,9 @@ if ($db) {
 
				         $stats['total'] = (int)($r['t']??0);
			
 
				         $stats['avg_latency'] = (int)($r['a']??0);
			
 
				 
			
 
				-        $r = $db->query("SELECT COUNT(*) as c FROM ask_logs WHERE ts LIKE '{$today}%'")->fetch();
			
 
				-        $stats['today'] = (int)($r['c']??0);
			
 
				+        $stmt = $db->prepare("SELECT COUNT(*) as c FROM ask_logs WHERE ts LIKE ?");
			
 
				+        $stmt->execute([$today . '%']);
			
 
				+        $stats['today'] = (int)($stmt->fetch()['c'] ?? 0);
			
 
				 
			
 
				         foreach ($db->query("SELECT verdict, COUNT(*) as c FROM feedback GROUP BY verdict")->fetchAll() as $r) {
			
 
				             if ($r['verdict']==='up')   $stats['thumbs_up']   = (int)$r['c'];
			
@@ -54,9 +63,13 @@ if ($db) {
 
				 
			
 
				         $stats['models'] = $db->query("SELECT model, COUNT(*) as c FROM ask_logs GROUP BY model ORDER BY c DESC LIMIT 5")->fetchAll();
			
 
				 
			
 
				-        $todayRows = $db->query("SELECT id,ts,query,answer,latency_ms,model,scope,ok,COALESCE(topk_json,'[]') as topk_json FROM ask_logs WHERE ts LIKE '{$today}%' ORDER BY ts DESC LIMIT 100")->fetchAll();
			
 
				+        $stmt = $db->prepare("SELECT id,ts,query,answer,latency_ms,model,scope,ok,COALESCE(topk_json,'[]') as topk_json FROM ask_logs WHERE ts LIKE ? ORDER BY ts DESC LIMIT 100");
			
 
				+        $stmt->execute([$today . '%']);
			
 
				+        $todayRows = $stmt->fetchAll();
			
 
				 
			
 
				-        $recentRows = $db->query("SELECT id,ts,query,answer,latency_ms,model,scope,ok,COALESCE(topk_json,'[]') as topk_json FROM ask_logs WHERE ts NOT LIKE '{$today}%' ORDER BY ts DESC LIMIT 200")->fetchAll();
			
 
				+        $stmt = $db->prepare("SELECT id,ts,query,answer,latency_ms,model,scope,ok,COALESCE(topk_json,'[]') as topk_json FROM ask_logs WHERE ts NOT LIKE ? ORDER BY ts DESC LIMIT 200");
			
 
				+        $stmt->execute([$today . '%']);
			
 
				+        $recentRows = $stmt->fetchAll();
			
 
				 
			
 
				         $feedbackRows = $db->query("SELECT id,ts,verdict,query,note,model,scope FROM feedback ORDER BY ts DESC LIMIT 50")->fetchAll();
			
 
				 
			
--- a/public/faq.php
+++ b/public/faq.php
@@ -14,6 +14,7 @@
 
				   <link href="https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;1,9..40,300&display=swap" rel="stylesheet">

			
 
				   <link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.css" rel="stylesheet">

			
 
				   <link rel="icon" href="/favicon.ico">

			
 
				+  <link rel="stylesheet" href="/css/design-tokens.css">

			
 
				   <link rel="apple-touch-icon" href="/image/apple-touch-icon.png">

			
 
				 

			
 
				   <!-- FAQ structured data -->

			
@@ -91,25 +92,8 @@
 
				   </script>

			
 
				 

			
 
				   <style>

			
 
				-    :root {

			
 
				-      --bg:           #0b0f0e;

			
 
				-      --bg-1:         #111614;

			
 
				-      --bg-2:         #181e1b;

			
 
				-      --bg-card:      #141a17;

			
 
				-      --border:       rgba(255,255,255,0.07);

			
 
				-      --border-hover: rgba(255,255,255,0.14);

			
 
				-      --accent:       #2ddc8a;

			
 
				-      --accent-dim:   rgba(45,220,138,0.10);

			
 
				-      --accent-glow:  rgba(45,220,138,0.22);

			
 
				-      --text-primary: #eaf0ec;

			
 
				-      --text-secondary:#8fa899;

			
 
				-      --text-muted:   #4f6459;

			
 
				-      --serif:        'DM Serif Display', Georgia, serif;

			
 
				-      --sans:         'DM Sans', system-ui, sans-serif;

			
 
				-      --radius:       10px;

			
 
				-      --radius-lg:    16px;

			
 
				-      --transition:   0.18s cubic-bezier(0.4,0,0.2,1);

			
 
				-    }

			
 
				+    /* ── Page-specific token overrides ───────────────────────────────── */

			
 
				+    :root { --transition: 0.18s cubic-bezier(0.4,0,0.2,1); }

			
 
				 

			
 
				     *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }

			
 
				     html { scroll-behavior: smooth; }

			
--- a/public/generate_planning_report.php
+++ b/public/generate_planning_report.php
@@ -1,4 +1,5 @@
 
				 <?php declare(strict_types=1);
			
 
				+require_once __DIR__ . '/_bootstrap.php';
			
 
				 
			
 
				 ini_set('display_errors', '0');
			
 
				 ini_set('log_errors', '1');
			
@@ -12,15 +13,8 @@ error_reporting(E_ALL);
 
				  * Test:     curl -s -X POST -H "Content-Type: application/json" \
			
 
				  *           --data @sample.json http://localhost/internal/classes/generate_planning_report.php | jq
			
 
				  */
			
 
				-date_default_timezone_set('Australia/Hobart');
			
 
				-
			
 
				-$allowedOrigins = [
			
 
				-    'http://192.168.8.69:2380',   // your demo.html origin
			
 
				-    'http://localhost',           // handy for local dev
			
 
				-    'http://localhost:8080',
			
 
				-    'https://modulosdesign.com.au',
			
 
				-    'https://llm.modulos.com.au'
			
 
				-];
			
 
				+$corsEnv = getenv('CORS_ORIGINS') ?: 'https://tasplanning.report';
			
 
				+$allowedOrigins = array_filter(array_map('trim', explode(',', $corsEnv)));
			
 
				 $origin = $_SERVER['HTTP_ORIGIN'] ?? '';
			
 
				 if ($origin && in_array($origin, $allowedOrigins, true)) {
			
 
				     header("Access-Control-Allow-Origin: $origin");
			
@@ -50,6 +44,15 @@ try {
 
				         exit;
			
 
				     }
			
 
				 
			
 
				+    // Reject non-JSON content types early — prevents json_decode silently
			
 
				+    // returning null on form-encoded or multipart bodies.
			
 
				+    $ct = $_SERVER['CONTENT_TYPE'] ?? '';
			
 
				+    if (strpos($ct, 'application/json') === false) {
			
 
				+        http_response_code(415);
			
 
				+        echo json_encode(['ok' => false, 'error' => 'Content-Type must be application/json']);
			
 
				+        exit;
			
 
				+    }
			
 
				+
			
 
				     $raw = file_get_contents('php://input') ?: '';
			
 
				     $in  = json_decode($raw, true, 512, JSON_THROW_ON_ERROR);
			
 
				 
			
--- a/public/list_lookup.php
+++ b/public/list_lookup.php
@@ -69,7 +69,12 @@ try {
 
				     function ll_cache_set($key,$data){
			
 
				         $f = ll_cache_path($key);
			
 
				         if (!is_dir(dirname($f))) @mkdir(dirname($f), 0775, true);
			
 
				-        @file_put_contents($f, json_encode($data, JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE));
			
 
				+        // Write to a temp file then rename atomically so concurrent requests
			
 
				+        // that miss the cache simultaneously don't both write a partial file.
			
 
				+        $tmp = $f . '.tmp.' . getmypid();
			
 
				+        if (@file_put_contents($tmp, json_encode($data, JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE)) !== false) {
			
 
				+            @rename($tmp, $f);
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     // Pre-cache key by snapped lat/lng (so we can serve before we know PID)
			
--- a/public/local_state-planning-scheme.php
+++ b/public/local_state-planning-scheme.php
@@ -26,32 +26,11 @@
 
				   <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
			
 
				   <link href="https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;1,9..40,300&display=swap" rel="stylesheet">
			
 
				   <link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.css" rel="stylesheet">
			
 
				+  <link rel="stylesheet" href="/css/design-tokens.css">
			
 
				 
			
 
				   <style>
			
 
				-    /* ── Tokens ──────────────────────────────────────────────────────── */
			
 
				-    :root {
			
 
				-      --bg:           #0b0f0e;
			
 
				-      --bg-1:         #111614;
			
 
				-      --bg-2:         #181e1b;
			
 
				-      --bg-card:      #141a17;
			
 
				-      --border:       rgba(255,255,255,0.07);
			
 
				-      --border-hover: rgba(255,255,255,0.14);
			
 
				-      --accent:       #2ddc8a;
			
 
				-      --accent-dim:   rgba(45,220,138,0.10);
			
 
				-      --accent-glow:  rgba(45,220,138,0.22);
			
 
				-      --text-primary: #eaf0ec;
			
 
				-      --text-secondary:#8fa899;
			
 
				-      --text-muted:   #4f6459;
			
 
				-      --danger:       #f08080;
			
 
				-      --user-bg:      #1a2420;
			
 
				-      --serif:        'DM Serif Display', Georgia, serif;
			
 
				-      --sans:         'DM Sans', system-ui, sans-serif;
			
 
				-      --mono:         ui-monospace, 'Cascadia Code', Menlo, monospace;
			
 
				-      --radius:       10px;
			
 
				-      --radius-lg:    16px;
			
 
				-      --radius-sm:    5px;
			
 
				-      --transition:   0.16s cubic-bezier(0.4,0,0.2,1);
			
 
				-    }
			
 
				+    /* ── Page-specific token overrides ───────────────────────────────── */
			
 
				+    :root { --user-bg: #1a2420; }
			
 
				 
			
 
				     *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
			
 
				     html, body { height: 100%; }
			
--- a/public/site-report.php
+++ b/public/site-report.php
@@ -1,4 +1,4 @@
 
				-<?php

			
 
				+<?php require_once __DIR__ . '/_bootstrap.php';

			
 
				 /* =========================================================================

			
 
				  * Site Report – Tasmanian Property Lookup (PHP 8.3+)

			
 
				  * -------------------------------------------------------------------------

			
@@ -8,8 +8,6 @@
 
				  * - Generate AI report via generate_planning_report.php

			
 
				  * - Open Section Builder with BroadcastChannel / postMessage handoff

			
 
				  * =======================================================================*/

			
 
				-date_default_timezone_set('Australia/Hobart');

			
 
				-

			
 
				 // ── Security: API key from environment only, never hardcoded ──────────────

			
 
				 // Set GMAPS_API_KEY in your .env / docker-compose environment block.

			
 
				 // The key is passed to a PHP proxy endpoint (/gmaps-key.php) so it never

			
@@ -17,10 +15,13 @@ date_default_timezone_set('Australia/Hobart');
 
				 $GMAPS_API_KEY   = getenv('GMAPS_API_KEY') ?: '';

			
 
				 $LOOKUP_ENDPOINT = './list_lookup.php';

			
 
				 $REPORT_ENDPOINT = './generate_planning_report.php';

			
 
				-$IS_LOCAL        = in_array($_SERVER['REMOTE_ADDR'] ?? '', ['127.0.0.1', '::1']);

			
 
				+// Use APP_ENV=local (set in .env) to enable the dev shortcut of inlining the

			
 
				+// Maps key directly. Never derive this from REMOTE_ADDR — inside Docker the

			
 
				+// client address is the container gateway (172.x.x.x), not 127.0.0.1, so the

			
 
				+// old check always evaluated to false and the inline path was never reachable.

			
 
				+$IS_LOCAL        = (getenv('APP_ENV') === 'local');

			
 
				 

			
 
				 // Proxy the key: expose it only if request comes from same origin

			
 
				-// For production, create a gmaps-key.php that checks a session/nonce

			
 
				 $KEY_ENDPOINT    = './gmaps-key.php';

			
 
				 ?>

			
 
				 <!doctype html>

			
@@ -42,32 +43,18 @@ $KEY_ENDPOINT    = './gmaps-key.php';
 
				   <script defer src="https://unpkg.com/leaflet-image@latest/leaflet-image.js"></script>

			
 
				   <script defer src="https://unpkg.com/html2pdf.js@0.10.1/dist/html2pdf.bundle.min.js"></script>

			
 
				   <link rel="icon" href="/favicon.ico">

			
 
				-  

			
 
				+  <link rel="stylesheet" href="/css/design-tokens.css">

			
 
				+

			
 
				   <script src="/js/api-status.js" data-api="https://api.modulos.com.au"></script>

			
 
				 

			
 
				   <style>

			
 
				-    /* ── Design tokens (matches index redesign) ─────────────────────── */

			
 
				+    /* ── Page-specific token overrides ───────────────────────────────── */

			
 
				     :root {

			
 
				-      --bg:           #0b0f0e;

			
 
				-      --bg-1:         #111614;

			
 
				-      --bg-2:         #181e1b;

			
 
				-      --bg-card:      #141a17;

			
 
				-      --border:       rgba(255,255,255,0.07);

			
 
				-      --border-hover: rgba(255,255,255,0.14);

			
 
				-      --accent:       #2ddc8a;

			
 
				-      --accent-dim:   rgba(45,220,138,0.10);

			
 
				-      --accent-glow:  rgba(45,220,138,0.22);

			
 
				-      --text-primary: #eaf0ec;

			
 
				-      --text-secondary:#8fa899;

			
 
				-      --text-muted:   #4f6459;

			
 
				-      --danger:       #f08080;

			
 
				-      --warn:         #f0b060;

			
 
				-      --serif:        'DM Serif Display', Georgia, serif;

			
 
				-      --sans:         'DM Sans', system-ui, sans-serif;

			
 
				-      --radius:       12px;

			
 
				-      --radius-lg:    18px;

			
 
				-      --radius-sm:    6px;

			
 
				-      --transition:   0.18s cubic-bezier(0.4,0,0.2,1);

			
 
				+      --warn:       #f0b060;   /* slightly warmer than the shared default */

			
 
				+      --radius:     12px;

			
 
				+      --radius-lg:  18px;

			
 
				+      --radius-sm:  6px;

			
 
				+      --transition: 0.18s cubic-bezier(0.4,0,0.2,1);

			
 
				     }

			
 
				 

			
 
				     *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }