소스 검색

Update Mar27

Benjamin Harris 2 달 전
부모
커밋
350c47aeaa

+ 4 - 1
.env

@@ -1,4 +1,7 @@
 # .env  (same directory as docker-compose.yml)
 GMAPS_API_KEY=AIzaSyCQsmOScTJM4P-4WTOO9M7YO01PmTYnBIg
 TPR_IP_SECRET=mmOwQgqljUs1CPiKW3O9vvL4XGalAHojOEmB7SJLBxXBPXHbBoDCMyS8fPc62aDk
-SMTP_PASS=56cN473SblsTol4s
+SMTP_PASS=56cN473SblsTol4s
+# Set to 'local' on dev machines to enable the direct Maps key injection shortcut.
+# Leave unset (or set to 'production') on all deployed environments.
+APP_ENV=local

+ 10 - 5
.gitignore

@@ -8,18 +8,23 @@
 /.bundle
 
 # Ignore all environment files (except templates).
-/.env*
+.env.*
 .env
+!.env.example
 
 # Ignore all logfiles and tempfiles.
-/log/*
-/tmp/*
+/log/
+/tmp/
 
 # Local claude configuration
 /.claude
 
-public/vendor/*
-pdfs/*
+/public/vendor/
+/pdfs/
+
+# Python virtual environments
+/venv/
+/.venv/
 
 # Cert Files
 *.pem

+ 0 - 683
backend/app(1).py

@@ -1,683 +0,0 @@
-import os, re
-import json
-import requests
-import time
-from typing import Optional, Literal, List, Tuple
-from fastapi import FastAPI, Query, HTTPException, Request
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse
-from slowapi.middleware import SlowAPIMiddleware
-from slowapi import Limiter
-from slowapi.util import get_remote_address
-from slowapi.errors import RateLimitExceeded
-from fastapi.responses import JSONResponse
-from pydantic import BaseModel
-from qdrant_client import QdrantClient
-from qdrant_client.http import models as qmodels
-from collections import Counter, defaultdict
-from datetime import datetime
-from telemetry import router as telemetry_router, db, ip_hash
-
-# ---- Environment ----
-OLLAMA_URL          = os.getenv("OLLAMA_URL", "http://192.168.8.73:11434")
-QDRANT_URL          = os.getenv("QDRANT_URL", "http://localhost:6333")
-OLLAMA_KEEP_ALIVE   = -1
-COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")
-EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")
-CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b")
-CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
-
-# ---- DEMO TOKEN ----
-DEMO_REQUIRE_TOKEN = os.getenv("DEMO_REQUIRE_TOKEN", "0") == "1"
-DEMO_TOKEN = os.getenv("DEMO_TOKEN", "")
-
-def _verify_demo_token_if_needed(request):
-    if not DEMO_REQUIRE_TOKEN:
-        return
-    auth = request.headers.get("Authorization", "")
-    if not (auth.startswith("Bearer ") and auth.split(" ",1)[1] == DEMO_TOKEN):
-        raise HTTPException(status_code=401, detail="Unauthorized")
-
-
-# ---- FAST API ----
-app = FastAPI()
-# Allowed origins — always include your frontend explicitly
-_origins = CORS_ORIGINS if CORS_ORIGINS else []
-_allow_all = len(_origins) == 0
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=_origins if not _allow_all else ["*"],
-    allow_origin_regex=r"https://.*\.tasplanning\.report" if _allow_all else None,
-    allow_credentials=not _allow_all,   # credentials only when origins are explicit
-    allow_methods=["GET", "POST", "OPTIONS"],
-    allow_headers=["Content-Type", "Authorization", "X-TPR-SID"],
-    expose_headers=["X-TPR-SID"],
-)
-
-qc = QdrantClient(url=QDRANT_URL)
-app.include_router(telemetry_router)
-
-# ---- SLOW API ----
-limiter = Limiter(key_func=get_remote_address)
-app.state.limiter = limiter  # type: ignore
-app.add_middleware(SlowAPIMiddleware)
-
-@app.exception_handler(RateLimitExceeded)
-def ratelimit_handler(request, exc):
-    return JSONResponse(status_code=429, content={"error":"rate_limited","detail":"Too many requests"})
-
-
-# ---- Ollama helpers ----
-def slug(s: Optional[str]) -> Optional[str]:
-    if not s:
-        return None
-    return re.sub(r'[^a-z0-9]+', '-', s.strip().lower()).strip('-') or None
-
-
-def ollama_embed(text: str) -> List[float]:
-    r = requests.post(
-        f"{OLLAMA_URL}/api/embeddings",
-        json={"model": EMBED_MODEL, "prompt": text},
-        timeout=60
-    )
-    r.raise_for_status()
-    data = r.json()
-    if "embedding" not in data:
-        raise RuntimeError(f"Ollama embeddings error: {data}")
-    return data["embedding"]
-
-def ollama_chat(prompt: str) -> str:
-    r = requests.post(
-        f"{OLLAMA_URL}/api/generate",
-        json={
-          "model": CHAT_MODEL,
-          "prompt": prompt,
-          "stream": False,
-          "options": {
-            "num_ctx": 8192,
-            "num_predict": 512,
-            "temperature": 0.2,
-            "top_p": 0.9,
-            "repeat_penalty": 1.1,
-          },
-          "keep_alive": OLLAMA_KEEP_ALIVE   # ← moved outside options, uses env var
-        },
-        timeout=180
-    )
-    r.raise_for_status()
-    data = r.json()
-    return data.get("response", "").strip()
-
-def _scroll_points(collection: str, qfilter=None, include_vector: bool=False, page_size: int=200):
-    offset = None
-    while True:
-        points, offset = qc.scroll(
-            collection_name=collection,
-            limit=page_size,
-            with_payload=True,
-            with_vectors=include_vector,
-            offset=offset,
-            scroll_filter=qfilter
-        )
-        if not points:
-            break
-        for pt in points:
-            yield pt
-        if offset is None:
-            break
-
-# ---- Health ----
-@app.get("/readyz")
-def readyz():
-    return {"ok": True}
-    
-def _normalize(q: Optional[str]) -> str:
-    return re.sub(r"\s+", " ", (q or "").strip().lower())
-
-def _json_dumps(o) -> str:
-    return json.dumps(o, ensure_ascii=False, separators=(",",":"))
-
-# ---- Councils list (prefers payload 'council', falls back to filename token) ----
-@app.get("/councils")
-def councils():
-    councils = set()
-    offset = None
-    # sample up to ~5k points (50 * 100)
-    for _ in range(50):
-        points, offset = qc.scroll(
-            collection_name=COLLECTION,
-            limit=100,
-            with_payload=True,
-            offset=offset
-        )
-        for pt in points:
-            p = pt.payload or {}
-            token = (p.get("council") or "").strip().lower()
-            if not token:
-                sf = (p.get("source_file") or "").lower()
-                if sf:
-                    token = sf.replace(".pdf", "").split("_")[0].split("-")[0]
-            if token:
-                councils.add(token)
-        if offset is None:
-            break
-    return sorted(councils)
-
-# ---- Filter builders ----
-def _mv(key: str, value: str) -> qmodels.FieldCondition:
-    return qmodels.FieldCondition(key=key, match=qmodels.MatchValue(value=value))
-
-def _mt(key: str, text: str) -> qmodels.FieldCondition:
-    return qmodels.FieldCondition(key=key, match=qmodels.MatchText(text=text))
-
-def filter_tps() -> qmodels.Filter:
-    """TPS only, exact match on corpus."""
-    return qmodels.Filter(must=[_mv("corpus", "tps")])
-
-def filter_lps(council: str) -> qmodels.Filter:
-    """
-    LPS for a specific council (slug), exact match on both fields.
-    """
-    cslug = slug(council) or council.lower()
-    return qmodels.Filter(must=[_mv("corpus", "lps"), _mv("council", cslug)])
-
-def filter_ncc() -> qmodels.Filter:
-    return qmodels.Filter(must=[_mv("corpus", "ncc")])
-
-def filter_as() -> qmodels.Filter:
-    return qmodels.Filter(must=[_mv("corpus", "as")])
-
-def with_source_contains(flt: Optional[qmodels.Filter], source_contains: Optional[str]) -> qmodels.Filter:
-    if not source_contains:
-        return flt
-    add = _mt("source_file", source_contains)
-    if flt:
-        # preserve existing must/should/must_not and AND the filename condition
-        must = list(getattr(flt, "must", []) or [])
-        must.append(add)
-        return qmodels.Filter(
-            must=must,
-            should=getattr(flt, "should", None),
-            must_not=getattr(flt, "must_not", None),
-        )
-    return qmodels.Filter(must=[add])
-
-def q_search(vec: List[float], flt: Optional[qmodels.Filter], limit: int):
-    results = qc.query_points(
-        collection_name=COLLECTION,
-        query=vec,
-        limit=max(1, limit),
-        query_filter=flt,
-        with_payload=True,
-    )
-    return results.points
-
-def render_blocks(hits) -> Tuple[List[str], List[dict]]:
-    blocks, sources = [], []
-    for h in hits:
-        p = h.payload or {}
-        src = f"{p.get('source_file')} (p.{p.get('page')} chunk {p.get('chunk_index')})"
-        snippet = p.get("text", "")
-        blocks.append(f"Source: {src}\nText: {snippet}")
-        sources.append({
-            "source_file": p.get("source_file"),
-            "page": p.get("page"),
-            "chunk_index": p.get("chunk_index"),
-            "score": h.score
-        })
-    return blocks, sources
-
-def combine_context(sections: List[Tuple[str, List[str]]]) -> str:
-    out = []
-    for heading, blocks in sections:
-        if not blocks:
-            continue
-        out.append(f"=== {heading} ===")
-        out.extend(blocks)
-    return "\n\n".join(out) if out else "No context found."
-
-def _scan_points(qfilter: Optional[qmodels.Filter] = None, max_pages: int = 10000, page_size: int = 200):
-    """
-    Iterate through ALL points (filtered if qfilter given).
-    For your current dataset this is fine; if it grows huge later we'll switch to a stored summary or a background job.
-    """
-    offset = None
-    pages = 0
-    while pages < max_pages:
-        points, offset = qc.scroll(
-            collection_name=COLLECTION,
-            limit=page_size,
-            with_payload=True,
-            offset=offset,
-            scroll_filter=qfilter
-        )
-        if not points:
-            break
-        for pt in points:
-            yield pt
-        pages += 1
-        if offset is None:
-            break
-
-@app.get("/admin/stats")
-def admin_stats(council: Optional[str] = None, corpus: Optional[str] = None):
-    must = []
-    if council:
-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
-    if corpus:
-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
-    qfilter = qmodels.Filter(must=must) if must else None
-
-    corp = Counter()
-    councils = Counter()
-    total = 0
-    for pt in _scan_points(qfilter=qfilter):
-        p = pt.payload or {}
-        corp[(p.get("corpus") or "").lower()] += 1
-        if p.get("council"):
-            councils[(p.get("council") or "").lower()] += 1
-        total += 1
-
-    return {
-        "collection": COLLECTION,
-        "total_points": total,
-        "by_corpus": dict(corp),
-        "by_council": dict(councils),
-        "note": "Counts are points (chunks), not documents.",
-    }
-
-@app.get("/admin/files")
-def admin_files(council: Optional[str] = None, corpus: Optional[str] = None, contains: Optional[str] = None, limit: int = 200):
-    must = []
-    if council:
-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
-    if corpus:
-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
-    if contains:
-        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=contains)))
-    qfilter = qmodels.Filter(must=must) if must else None
-
-    files = defaultdict(lambda: {"points": 0, "corpus": None, "council": None, "pages": set()})
-    for pt in _scan_points(qfilter=qfilter):
-        p = pt.payload or {}
-        f = (p.get("source_file") or "").strip()
-        if not f:
-            continue
-        rec = files[f]
-        rec["points"] += 1
-        rec["corpus"] = rec["corpus"] or p.get("corpus")
-        rec["council"] = rec["council"] or p.get("council")
-        if p.get("page") is not None:
-            rec["pages"].add(p["page"])
-
-    # shape for output
-    out = []
-    for f, rec in files.items():
-        out.append({
-            "source_file": f,
-            "corpus": rec["corpus"],
-            "council": rec["council"],
-            "points": rec["points"],
-            "page_count_est": len(rec["pages"]) if rec["pages"] else None,
-        })
-
-    # sort by points desc, limit
-    out.sort(key=lambda x: x["points"], reverse=True)
-    return out[:max(1, limit)]
-
-@app.get("/admin/sample")
-def admin_sample(council: Optional[str] = None, corpus: Optional[str] = None, n: int = 5):
-    must = []
-    if council:
-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
-    if corpus:
-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
-    qfilter = qmodels.Filter(must=must) if must else None
-
-    samples = []
-    for pt in _scan_points(qfilter=qfilter):
-        p = pt.payload or {}
-        txt = (p.get("text") or "").strip()
-        if not txt:
-            continue
-        samples.append({
-            "source_file": p.get("source_file"),
-            "corpus": p.get("corpus"),
-            "council": p.get("council"),
-            "page": p.get("page"),
-            "chunk_index": p.get("chunk_index"),
-            "preview": (txt[:400] + "…") if len(txt) > 400 else txt
-        })
-        if len(samples) >= max(1, n):
-            break
-    return samples
-
-@app.get("/admin/export")
-def admin_export(
-    collection: str = COLLECTION,
-    council: Optional[str] = None,
-    corpus: Optional[str] = None,
-    source_contains: Optional[str] = None,
-    include_vector: bool = False,
-    limit: Optional[int] = None
-):
-    must = []
-    if council:
-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
-    if corpus:
-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
-    if source_contains:
-        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=source_contains)))
-    qfilter = qmodels.Filter(must=must) if must else None
-
-    def gen():
-        count = 0
-        for pt in _scroll_points(collection, qfilter=qfilter, include_vector=include_vector):
-            obj = {
-                "id": str(getattr(pt, "id", None)),
-                "payload": pt.payload or {},
-            }
-            if include_vector:
-                obj["vector"] = pt.vector
-            yield json.dumps(obj, ensure_ascii=False) + "\n"
-            count += 1
-            if limit and count >= limit:
-                break
-
-    filename = f'{collection}-{corpus or "all"}-{council or "all"}.ndjson'
-    headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
-    return StreamingResponse(gen(), media_type="application/x-ndjson", headers=headers)
-
-
-def _section_format_guide(section_id: Optional[str], section_title: str, ctx: dict) -> str:
-    """
-    Return strict, section-specific formatting guidance for the LLM.
-    Keep these short, prescriptive, and impossible to ignore.
-    """
-    sid = (section_id or "").lower()
-
-    # Utility bits from context
-    zones = ctx.get("planning_zones") or []
-    zone_label = ", ".join(zones) if zones else "the applicable zone"
-    council_label = ctx.get("council") or ""
-
-    # ---- ZONING (tables of clauses like your sample) ----
-    if sid in {"zoning", "zoning-41", "zoning-42", "zoning-43", "zoning-44", "zoning-441", "zoning-442"}:
-        return f"""
-FORMAT REQUIREMENTS (MANDATORY):
-- Produce a concise preface (≤ 2 sentences) naming {zone_label}.
-- Then include a Markdown table listing EACH visible clause found in CONTEXT that applies to the zone or LPS for **{council_label or 'the selected council'}**.
-- One row per subclause. If an A/P pair exists (e.g., A1 / P1), include both in the same row.
-- Columns (exact):
-  | Clause | Topic | Acceptable Solution (A) | Performance Criteria (P) | Assessment | Source |
-- "Clause": the clause number (e.g., "12.3.1 A1" or "DOR-S1.7.1").
-- "Topic": short label extracted from the clause heading.
-- "Acceptable Solution (A)" and "Performance Criteria (P)": quote briefly—no more than 1–2 lines each.
-- "Assessment": state clearly whether the proposal meets A, or relies on P. If unknown from CONTEXT, write "TBC".
-- "Source": filename + page (from CONTEXT).
-- Only include clauses actually present in CONTEXT; NEVER invent clause numbers or text.
-- After the table, add a one-paragraph summary noting any items assessed as TBC or non-compliant.
-""".strip()
-
-    # ---- Codes overview list/table (optional future) ----
-    if sid.startswith("code-"):
-        return """
-FORMAT REQUIREMENTS:
-- Start with one sentence stating which Code and why it is triggered.
-- Then provide a short checklist or table of the relevant sub-clauses (A vs P), with Source for each.
-- Keep to 150–250 words + table.
-""".strip()
-
-    # ---- Permit Overview (concise triggers) ----
-    if sid == "permit-overview":
-        return """
-FORMAT REQUIREMENTS:
-- Produce 3 blocks with headings:
-  1) "Project Context" – 3–5 bullet points (site, proposal, zone).
-  2) "Applicable Provisions" – bullets grouping TPS SPP, LPS (selected council), and triggered Codes.
-  3) "Assessment Path" – bullet list of key clauses to assess next.
-- Cite specific clause numbers ONLY if present in CONTEXT (include Source).
-""".strip()
-
-    # ---- Default (no special formatting) ----
-    return """
-FORMAT REQUIREMENTS:
-- Use concise Markdown with short paragraphs and bullets as needed.
-- Cite briefly (filename + page) when quoting a control.
-""".strip()
-
-
-# ---- Ask (GET + POST) ----
-class AskBody(BaseModel):
-    # accept multiple keys from different frontends
-    query: Optional[str] = None
-    question: Optional[str] = None
-    q: Optional[str] = None
-    prompt: Optional[str] = None
-
-    top_k: int = 10
-    council: Optional[str] = None
-    include_ncc: bool = False
-    include_standards: bool = False
-    source_contains: Optional[str] = None
-    scope: Literal['state_plus_local','local_only','state_only','any'] = 'state_plus_local'
-    section_id: Optional[str] = None
-
-    # BYOK mode: return context blocks without calling Ollama.
-    # The browser then calls its own LLM with the returned context + prompt.
-    context_only: bool = False
-
-def _allowed(p: dict, scope: str, cslug: Optional[str]) -> bool:
-    corp = (p.get("corpus") or "").lower()
-    council = (p.get("council") or "").lower()
-    if scope == "local_only":
-        return corp == "lps" and cslug and council == cslug
-    if scope == "state_only":
-        return corp == "tps"
-    if scope == "state_plus_local":
-        return corp == "tps" or (corp == "lps" and cslug and council == cslug)
-    return True
-    
-def do_ask(
-    query: str,
-    top_k: int = 10,
-    council: Optional[str] = None,
-    include_ncc: bool = False,
-    include_standards: bool = False,
-    source_contains: Optional[str] = None,
-    scope: str = "state_plus_local",
-    section_id: Optional[str] = None,
-    context_only: bool = False,
-):
-    vec = ollama_embed(query)
-    cslug = slug(council) if council else None
-
-    # Build allowed scopes based on scope param
-    scopes: List[Tuple[str, qmodels.Filter]] = []
-    if scope in ("state_only", "state_plus_local", "any"):
-        scopes.append(("Tasmanian Planning Scheme (SPP)", filter_tps()))
-    if scope in ("local_only", "state_plus_local", "any") and cslug:
-        scopes.append((f"Local Provisions Schedule — {cslug}", filter_lps(cslug)))
-    if include_ncc:
-        scopes.append(("National Construction Code (NCC)", filter_ncc()))
-    if include_standards:
-        scopes.append(("Australian Standards (AS)", filter_as()))
-
-    # Apply additional filename filter if requested (AND)
-    scopes = [(name, with_source_contains(flt, source_contains)) for name, flt in scopes]
-
-    # Allocate limits per scope
-    per_spp = max(3, top_k // 3) if any(n.startswith("Tasmanian Planning Scheme") for n, _ in scopes) else 0
-    per_lps = max(3, top_k // 3) if any(n.startswith("Local Provisions Schedule") for n, _ in scopes) else 0
-    remaining = max(1, top_k - (per_spp + per_lps))
-    extra_scopes = sum(1 for n, _ in scopes if not (n.startswith("Tasmanian Planning Scheme") or n.startswith("Local Provisions Schedule")))
-    per_extra = max(1, remaining // max(1, extra_scopes)) if extra_scopes else 0
-
-    limits: List[int] = []
-    for name, _ in scopes:
-        if name.startswith("Tasmanian Planning Scheme"):
-            limits.append(per_spp)
-        elif name.startswith("Local Provisions Schedule"):
-            limits.append(per_lps)
-        else:
-            limits.append(per_extra)
-
-    sections: List[Tuple[str, List[str]]] = []
-    all_sources: List[dict] = []
-
-    for (name, flt), lim in zip(scopes, limits):
-        if lim <= 0:
-            continue
-        hits = q_search(vec, flt, lim)
-
-        # Guardrail: drop any hit that violates scope/council
-        hits = [h for h in hits if _allowed(h.payload or {}, scope, cslug)]
-
-        blocks, sources = render_blocks(hits)
-        sections.append((name, blocks))
-        all_sources.extend(sources)
-
-    context = combine_context(sections)
-    
-    #format_guide = _section_format_guide(section_id, section_title="(auto)", ctx={})
-    format_guide = _section_format_guide(
-        section_id,
-        section_title="(auto)",
-        ctx={
-            "council": council,           # from do_ask parameter
-            "planning_zones": [],         # populate if you have zone detection
-        }
-    )
-
-    prompt = f"""
-You are a careful planning and building compliance assistant.
-
-ALWAYS follow this order of authority when forming an answer:
-1) Tasmanian Planning Scheme — State Planning Provisions (SPP). Use as the base rule-set.
-2) Local Provisions Schedule (LPS) for the selected council. If an LPS provision modifies or overrides an SPP control, apply the LPS outcome.
-3) (Optional) National Construction Code (NCC) — building control (separate to planning).
-4) (Optional) Australian Standards — cite when directly relevant.
-
-Use ONLY the information in CONTEXT. If a clause/section is visible, quote it briefly and always cite the source file + page.
-If something is not supported by the provided CONTEXT, say you don't know.
-
-CONTEXT:
-{context}
-
-SECTION FORMAT GUIDANCE:
-{format_guide}
-
-QUESTION: {query}
-
-Answer:
-""".strip()
-
-    # BYOK mode: skip Ollama and return the context + prompt so the
-    # browser can call its own LLM provider (Claude, GPT, Grok, etc.)
-    if context_only:
-        return {
-            "context_only": True,
-            "context": context,
-            "prompt": prompt,
-            "sources": all_sources,
-            # Include the raw section blocks so the browser can inspect them
-            "sections": [
-                {"heading": name, "blocks": blocks}
-                for name, blocks in sections
-            ]
-        }
-
-    answer = ollama_chat(prompt)
-    return {"answer": answer, "sources": all_sources}
-
-
-@app.get("/ask")
-@limiter.limit("20/minute")
-def ask_get(
-    request: Request,
-    query: str = Query(..., description="User question"),
-    top_k: int = 10,
-    council: Optional[str] = None,
-    include_ncc: bool = False,
-    include_standards: bool = False,
-    source_contains: Optional[str] = None,
-    scope: str = "state_plus_local",
-    section_id: Optional[str] = None,
-    context_only: bool = False,
-):
-    _verify_demo_token_if_needed(request)
-
-    started = time.perf_counter()
-    out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
-    latency_ms = int((time.perf_counter() - started) * 1000)
-
-    # Telemetry insert
-    try:
-        ip = request.client.host if request.client else "0.0.0.0"
-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
-        allow_tps = scope in ("state_only", "state_plus_local")
-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
-
-        with db() as conn:
-            conn.execute("""
-                INSERT INTO ask_logs
-                    (ts, sid, ip_hash, query, normalized, allow_tps, latency_ms, model, ok, topk_json, tokens_in, tokens_out)
-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
-            """, (
-                datetime.utcnow().isoformat(),
-                sid, ip_hash(ip), query, _normalize(query), int(allow_tps),
-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0
-            ))
-            conn.commit()
-    except Exception as e:
-        # Don't break the request if logging fails
-        print("[telemetry] ask_get insert failed:", e)
-
-    return out
-
-
-@app.post("/ask")
-@limiter.limit("20/minute")
-def ask_post(request: Request, body: AskBody):
-    _verify_demo_token_if_needed(request)
-    qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
-    if not qtxt:
-        raise HTTPException(status_code=422, detail="Missing query/question")
-
-    started = time.perf_counter()
-    out = do_ask(
-        query=qtxt,
-        top_k=body.top_k,
-        council=body.council,
-        include_ncc=body.include_ncc,
-        include_standards=body.include_standards,
-        source_contains=body.source_contains,
-        scope=body.scope,
-        section_id=body.section_id,
-        context_only=body.context_only,
-    )
-    latency_ms = int((time.perf_counter() - started) * 1000)
-
-    # Telemetry insert
-    try:
-        ip = request.client.host if request.client else "0.0.0.0"
-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
-        allow_tps = body.scope in ("state_only", "state_plus_local")
-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
-
-        with db() as conn:
-            conn.execute("""
-                INSERT INTO ask_logs
-                    (ts, sid, ip_hash, query, normalized, allow_tps, latency_ms, model, ok, topk_json, tokens_in, tokens_out)
-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
-            """, (
-                datetime.utcnow().isoformat(),
-                sid, ip_hash(ip), qtxt, _normalize(qtxt), int(allow_tps),
-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0
-            ))
-            conn.commit()
-    except Exception as e:
-        print("[telemetry] ask_post insert failed:", e)
-
-    return out

+ 873 - 744
backend/app.py

@@ -1,744 +1,873 @@
-import os, re
-import json
-import requests
-import time
-from typing import Optional, Literal, List, Tuple
-from fastapi import FastAPI, Query, HTTPException, Request
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse
-from slowapi.middleware import SlowAPIMiddleware
-from slowapi import Limiter
-from slowapi.util import get_remote_address
-from slowapi.errors import RateLimitExceeded
-from fastapi.responses import JSONResponse
-from pydantic import BaseModel
-from qdrant_client import QdrantClient
-from qdrant_client.http import models as qmodels
-from collections import Counter, defaultdict
-from datetime import datetime
-from telemetry import router as telemetry_router, db, ip_hash
-
-# ---- Environment ----
-OLLAMA_URL          = os.getenv("OLLAMA_URL", "http://192.168.8.73:11434")
-QDRANT_URL          = os.getenv("QDRANT_URL", "http://localhost:6333")
-OLLAMA_KEEP_ALIVE   = os.getenv("OLLAMA_KEEP_ALIVE", "-1")  # -1 = keep loaded forever
-COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")
-EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")
-CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")
-CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
-
-# ---- DEMO TOKEN ----
-DEMO_REQUIRE_TOKEN = os.getenv("DEMO_REQUIRE_TOKEN", "0") == "1"
-DEMO_TOKEN = os.getenv("DEMO_TOKEN", "")
-
-def _verify_demo_token_if_needed(request):
-    if not DEMO_REQUIRE_TOKEN:
-        return
-    auth = request.headers.get("Authorization", "")
-    if not (auth.startswith("Bearer ") and auth.split(" ",1)[1] == DEMO_TOKEN):
-        raise HTTPException(status_code=401, detail="Unauthorized")
-
-
-# ---- FAST API ----
-app = FastAPI()
-# Allowed origins — always include your frontend explicitly
-_origins = CORS_ORIGINS if CORS_ORIGINS else []
-_allow_all = len(_origins) == 0
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=_origins if not _allow_all else ["*"],
-    allow_origin_regex=r"https://.*\.tasplanning\.report" if _allow_all else None,
-    allow_credentials=not _allow_all,   # credentials only when origins are explicit
-    allow_methods=["GET", "POST", "OPTIONS"],
-    allow_headers=["Content-Type", "Authorization", "X-TPR-SID"],
-    expose_headers=["X-TPR-SID"],
-)
-
-qc = QdrantClient(url=QDRANT_URL)
-app.include_router(telemetry_router)
-
-# ---- SLOW API ----
-limiter = Limiter(key_func=get_remote_address)
-app.state.limiter = limiter  # type: ignore
-app.add_middleware(SlowAPIMiddleware)
-
-@app.exception_handler(RateLimitExceeded)
-def ratelimit_handler(request, exc):
-    return JSONResponse(status_code=429, content={"error":"rate_limited","detail":"Too many requests"})
-
-
-# ---- Feedback endpoint ----
-class FeedbackBody(BaseModel):
-    verdict: str                    # "up" or "down"
-    query: Optional[str] = None     # the question that was asked
-    answer: Optional[str] = None    # the answer that was rated
-    note: Optional[str] = None      # optional free-text from thumbs-down
-    sid: Optional[str] = None       # session id from browser
-    model: Optional[str] = None     # which model answered
-    scope: Optional[str] = None     # which scope was used
-    sources: Optional[list] = None  # which sources were cited
-
-@app.post("/feedback")
-@limiter.limit("60/minute")
-def feedback(request: Request, body: FeedbackBody):
-    if body.verdict not in ("up", "down"):
-        raise HTTPException(status_code=422, detail="verdict must be 'up' or 'down'")
-
-    ip  = request.client.host if request.client else "0.0.0.0"
-    sid = body.sid or request.headers.get("X-TPR-SID") or ""
-
-    try:
-        with db() as conn:
-            conn.execute("""
-                INSERT INTO feedback
-                    (ts, sid, ip_hash, verdict, query, answer, note, model, scope, sources_json)
-                VALUES (?,?,?,?,?,?,?,?,?,?)
-            """, (
-                datetime.utcnow().isoformat(),
-                sid, ip_hash(ip), body.verdict,
-                (body.query or "")[:2000],
-                (body.answer or "")[:8000],
-                (body.note or "")[:1000],
-                body.model or CHAT_MODEL,
-                body.scope or "",
-                _json_dumps(body.sources or []),
-            ))
-            conn.commit()
-    except Exception as e:
-        print("[feedback] insert failed:", e)
-        # Still return ok — don't surface DB errors to users
-    return {"ok": True}
-
-
-# ---- Ollama helpers ----
-def slug(s: Optional[str]) -> Optional[str]:
-    if not s:
-        return None
-    return re.sub(r'[^a-z0-9]+', '-', s.strip().lower()).strip('-') or None
-
-
-def ollama_embed(text: str) -> List[float]:
-    r = requests.post(
-        f"{OLLAMA_URL}/api/embeddings",
-        json={"model": EMBED_MODEL, "prompt": text},
-        timeout=60
-    )
-    r.raise_for_status()
-    data = r.json()
-    if "embedding" not in data:
-        raise RuntimeError(f"Ollama embeddings error: {data}")
-    return data["embedding"]
-
-def ollama_chat(prompt: str) -> str:
-    r = requests.post(
-        f"{OLLAMA_URL}/api/generate",
-        json={
-          "model": CHAT_MODEL,
-          "prompt": prompt,
-          "stream": False,
-          "options": {
-            "num_ctx": 6144, # was 8192,
-            "num_predict": 512,
-            "temperature": 0.2,
-            "top_p": 0.9,
-            "repeat_penalty": 1.1,
-          },
-          #"keep_alive": int(OLLAMA_KEEP_ALIVE) if OLLAMA_KEEP_ALIVE.lstrip('-').isdigit() else OLLAMA_KEEP_ALIVE,   # ← moved outside options, uses env var
-          "keep_alive": -1,
-        },
-        timeout=180
-    )
-    r.raise_for_status()
-    data = r.json()
-    return data.get("response", "").strip()
-
-def _scroll_points(collection: str, qfilter=None, include_vector: bool=False, page_size: int=200):
-    offset = None
-    while True:
-        points, offset = qc.scroll(
-            collection_name=collection,
-            limit=page_size,
-            with_payload=True,
-            with_vectors=include_vector,
-            offset=offset,
-            scroll_filter=qfilter
-        )
-        if not points:
-            break
-        for pt in points:
-            yield pt
-        if offset is None:
-            break
-
-# ---- Health ----
-@app.get("/readyz")
-def readyz():
-    return {"ok": True}
-    
-def _normalize(q: Optional[str]) -> str:
-    return re.sub(r"\s+", " ", (q or "").strip().lower())
-
-def _json_dumps(o) -> str:
-    return json.dumps(o, ensure_ascii=False, separators=(",",":"))
-
-# ---- Councils list (prefers payload 'council', falls back to filename token) ----
-@app.get("/councils")
-def councils():
-    councils = set()
-    offset = None
-    # sample up to ~5k points (50 * 100)
-    for _ in range(50):
-        points, offset = qc.scroll(
-            collection_name=COLLECTION,
-            limit=100,
-            with_payload=True,
-            offset=offset
-        )
-        for pt in points:
-            p = pt.payload or {}
-            token = (p.get("council") or "").strip().lower()
-            if not token:
-                sf = (p.get("source_file") or "").lower()
-                if sf:
-                    token = sf.replace(".pdf", "").split("_")[0].split("-")[0]
-            if token:
-                councils.add(token)
-        if offset is None:
-            break
-    return sorted(councils)
-
-# ---- Filter builders ----
-def _mv(key: str, value: str) -> qmodels.FieldCondition:
-    return qmodels.FieldCondition(key=key, match=qmodels.MatchValue(value=value))
-
-def _mt(key: str, text: str) -> qmodels.FieldCondition:
-    return qmodels.FieldCondition(key=key, match=qmodels.MatchText(text=text))
-
-def filter_tps() -> qmodels.Filter:
-    """TPS only, exact match on corpus."""
-    return qmodels.Filter(must=[_mv("corpus", "tps")])
-
-def filter_lps(council: str) -> qmodels.Filter:
-    """
-    LPS for a specific council (slug), exact match on both fields.
-    """
-    cslug = slug(council) or council.lower()
-    return qmodels.Filter(must=[_mv("corpus", "lps"), _mv("council", cslug)])
-
-def filter_ncc() -> qmodels.Filter:
-    return qmodels.Filter(must=[_mv("corpus", "ncc")])
-
-def filter_as() -> qmodels.Filter:
-    return qmodels.Filter(must=[_mv("corpus", "as")])
-
-def with_source_contains(flt: Optional[qmodels.Filter], source_contains: Optional[str]) -> qmodels.Filter:
-    if not source_contains:
-        return flt
-    add = _mt("source_file", source_contains)
-    if flt:
-        # preserve existing must/should/must_not and AND the filename condition
-        must = list(getattr(flt, "must", []) or [])
-        must.append(add)
-        return qmodels.Filter(
-            must=must,
-            should=getattr(flt, "should", None),
-            must_not=getattr(flt, "must_not", None),
-        )
-    return qmodels.Filter(must=[add])
-
-def q_search(vec: List[float], flt: Optional[qmodels.Filter], limit: int):
-    results = qc.query_points(
-        collection_name=COLLECTION,
-        query=vec,
-        limit=max(1, limit),
-        query_filter=flt,
-        with_payload=True,
-    )
-    return results.points
-
-def render_blocks(hits) -> Tuple[List[str], List[dict]]:
-    blocks, sources = [], []
-    for h in hits:
-        p = h.payload or {}
-        src = f"{p.get('source_file')} (p.{p.get('page')} chunk {p.get('chunk_index')})"
-        snippet = p.get("text", "")
-        blocks.append(f"Source: {src}\nText: {snippet}")
-        sources.append({
-            "source_file": p.get("source_file"),
-            "page": p.get("page"),
-            "chunk_index": p.get("chunk_index"),
-            "score": h.score
-        })
-    return blocks, sources
-
-def combine_context(sections: List[Tuple[str, List[str]]]) -> str:
-    out = []
-    for heading, blocks in sections:
-        if not blocks:
-            continue
-        out.append(f"=== {heading} ===")
-        out.extend(blocks)
-    return "\n\n".join(out) if out else "No context found."
-
-def _scan_points(qfilter: Optional[qmodels.Filter] = None, max_pages: int = 10000, page_size: int = 200):
-    """
-    Iterate through ALL points (filtered if qfilter given).
-    For your current dataset this is fine; if it grows huge later we'll switch to a stored summary or a background job.
-    """
-    offset = None
-    pages = 0
-    while pages < max_pages:
-        points, offset = qc.scroll(
-            collection_name=COLLECTION,
-            limit=page_size,
-            with_payload=True,
-            offset=offset,
-            scroll_filter=qfilter
-        )
-        if not points:
-            break
-        for pt in points:
-            yield pt
-        pages += 1
-        if offset is None:
-            break
-
-@app.get("/admin/stats")
-def admin_stats(council: Optional[str] = None, corpus: Optional[str] = None):
-    must = []
-    if council:
-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
-    if corpus:
-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
-    qfilter = qmodels.Filter(must=must) if must else None
-
-    corp = Counter()
-    councils = Counter()
-    total = 0
-    for pt in _scan_points(qfilter=qfilter):
-        p = pt.payload or {}
-        corp[(p.get("corpus") or "").lower()] += 1
-        if p.get("council"):
-            councils[(p.get("council") or "").lower()] += 1
-        total += 1
-
-    return {
-        "collection": COLLECTION,
-        "total_points": total,
-        "by_corpus": dict(corp),
-        "by_council": dict(councils),
-        "note": "Counts are points (chunks), not documents.",
-    }
-
-@app.get("/admin/files")
-def admin_files(council: Optional[str] = None, corpus: Optional[str] = None, contains: Optional[str] = None, limit: int = 200):
-    must = []
-    if council:
-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
-    if corpus:
-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
-    if contains:
-        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=contains)))
-    qfilter = qmodels.Filter(must=must) if must else None
-
-    files = defaultdict(lambda: {"points": 0, "corpus": None, "council": None, "pages": set()})
-    for pt in _scan_points(qfilter=qfilter):
-        p = pt.payload or {}
-        f = (p.get("source_file") or "").strip()
-        if not f:
-            continue
-        rec = files[f]
-        rec["points"] += 1
-        rec["corpus"] = rec["corpus"] or p.get("corpus")
-        rec["council"] = rec["council"] or p.get("council")
-        if p.get("page") is not None:
-            rec["pages"].add(p["page"])
-
-    # shape for output
-    out = []
-    for f, rec in files.items():
-        out.append({
-            "source_file": f,
-            "corpus": rec["corpus"],
-            "council": rec["council"],
-            "points": rec["points"],
-            "page_count_est": len(rec["pages"]) if rec["pages"] else None,
-        })
-
-    # sort by points desc, limit
-    out.sort(key=lambda x: x["points"], reverse=True)
-    return out[:max(1, limit)]
-
-@app.get("/admin/sample")
-def admin_sample(council: Optional[str] = None, corpus: Optional[str] = None, n: int = 5):
-    must = []
-    if council:
-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
-    if corpus:
-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
-    qfilter = qmodels.Filter(must=must) if must else None
-
-    samples = []
-    for pt in _scan_points(qfilter=qfilter):
-        p = pt.payload or {}
-        txt = (p.get("text") or "").strip()
-        if not txt:
-            continue
-        samples.append({
-            "source_file": p.get("source_file"),
-            "corpus": p.get("corpus"),
-            "council": p.get("council"),
-            "page": p.get("page"),
-            "chunk_index": p.get("chunk_index"),
-            "preview": (txt[:400] + "…") if len(txt) > 400 else txt
-        })
-        if len(samples) >= max(1, n):
-            break
-    return samples
-
-@app.get("/admin/export")
-def admin_export(
-    collection: str = COLLECTION,
-    council: Optional[str] = None,
-    corpus: Optional[str] = None,
-    source_contains: Optional[str] = None,
-    include_vector: bool = False,
-    limit: Optional[int] = None
-):
-    must = []
-    if council:
-        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
-    if corpus:
-        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
-    if source_contains:
-        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=source_contains)))
-    qfilter = qmodels.Filter(must=must) if must else None
-
-    def gen():
-        count = 0
-        for pt in _scroll_points(collection, qfilter=qfilter, include_vector=include_vector):
-            obj = {
-                "id": str(getattr(pt, "id", None)),
-                "payload": pt.payload or {},
-            }
-            if include_vector:
-                obj["vector"] = pt.vector
-            yield json.dumps(obj, ensure_ascii=False) + "\n"
-            count += 1
-            if limit and count >= limit:
-                break
-
-    filename = f'{collection}-{corpus or "all"}-{council or "all"}.ndjson'
-    headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
-    return StreamingResponse(gen(), media_type="application/x-ndjson", headers=headers)
-
-
-def _section_format_guide(section_id: Optional[str], section_title: str, ctx: dict) -> str:
-    """
-    Return strict, section-specific formatting guidance for the LLM.
-    Keep these short, prescriptive, and impossible to ignore.
-    """
-    sid = (section_id or "").lower()
-
-    # Utility bits from context
-    zones = ctx.get("planning_zones") or []
-    zone_label = ", ".join(zones) if zones else "the applicable zone"
-    council_label = ctx.get("council") or ""
-
-    # ---- ZONING (tables of clauses like your sample) ----
-    if sid in {"zoning", "zoning-41", "zoning-42", "zoning-43", "zoning-44", "zoning-441", "zoning-442"}:
-        return f"""
-            FORMAT REQUIREMENTS (MANDATORY):
-            - Produce a concise preface (≤ 2 sentences) naming {zone_label}.
-            - Then include a Markdown table listing EACH visible clause found in CONTEXT that applies to the zone or LPS for **{council_label or 'the selected council'}**.
-            - One row per subclause. If an A/P pair exists (e.g., A1 / P1), include both in the same row.
-            - Columns (exact):
-              | Clause | Topic | Acceptable Solution (A) | Performance Criteria (P) | Assessment | Source |
-            - "Clause": the clause number (e.g., "12.3.1 A1" or "DOR-S1.7.1").
-            - "Topic": short label extracted from the clause heading.
-            - "Acceptable Solution (A)" and "Performance Criteria (P)": quote briefly—no more than 1–2 lines each.
-            - "Assessment": state clearly whether the proposal meets A, or relies on P. If unknown from CONTEXT, write "TBC".
-            - "Source": filename + page (from CONTEXT).
-            - Only include clauses actually present in CONTEXT; NEVER invent clause numbers or text.
-            - After the table, add a one-paragraph summary noting any items assessed as TBC or non-compliant.
-            """.strip()
-
-    # ---- Codes overview list/table (optional future) ----
-    if sid.startswith("code-"):
-        return """
-            FORMAT REQUIREMENTS:
-            - Start with one sentence stating which Code and why it is triggered.
-            - Then provide a short checklist or table of the relevant sub-clauses (A vs P), with Source for each.
-            - Keep to 150–250 words + table.
-            """.strip()
-
-    # ---- Permit Overview (concise triggers) ----
-    if sid == "permit-overview":
-        return """
-            FORMAT REQUIREMENTS:
-            - Produce 3 blocks with headings:
-              1) "Project Context" – 3–5 bullet points (site, proposal, zone).
-              2) "Applicable Provisions" – bullets grouping TPS SPP, LPS (selected council), and triggered Codes.
-              3) "Assessment Path" – bullet list of key clauses to assess next.
-            - Cite specific clause numbers ONLY if present in CONTEXT (include Source).
-            """.strip()
-
-    # ---- Default (no special formatting) ----
-    return """
-        FORMAT REQUIREMENTS:
-        - Use concise Markdown with short paragraphs and bullets as needed.
-        - Cite briefly (filename + page) when quoting a control.
-        """.strip()
-
-
-# ---- Ask (GET + POST) ----
-class AskBody(BaseModel):
-    # accept multiple keys from different frontends
-    query: Optional[str] = None
-    question: Optional[str] = None
-    q: Optional[str] = None
-    prompt: Optional[str] = None
-
-    top_k: int = 10
-    council: Optional[str] = None
-    include_ncc: bool = False
-    include_standards: bool = False
-    source_contains: Optional[str] = None
-    scope: Literal['state_plus_local','local_only','state_only','any'] = 'state_plus_local'
-    section_id: Optional[str] = None
-
-    # BYOK mode: return context blocks without calling Ollama.
-    # The browser then calls its own LLM with the returned context + prompt.
-    context_only: bool = False
-
-def _allowed(p: dict, scope: str, cslug: Optional[str]) -> bool:
-    corp = (p.get("corpus") or "").lower()
-    council = (p.get("council") or "").lower()
-    if scope == "local_only":
-        return corp == "lps" and cslug and council == cslug
-    if scope == "state_only":
-        return corp == "tps"
-    if scope == "state_plus_local":
-        return corp == "tps" or (corp == "lps" and cslug and council == cslug)
-    return True
-    
-def do_ask(
-    query: str,
-    top_k: int = 10,
-    council: Optional[str] = None,
-    include_ncc: bool = False,
-    include_standards: bool = False,
-    source_contains: Optional[str] = None,
-    scope: str = "state_plus_local",
-    section_id: Optional[str] = None,
-    context_only: bool = False,
-):
-    vec = ollama_embed(query)
-    cslug = slug(council) if council else None
-
-    # Build allowed scopes based on scope param
-    scopes: List[Tuple[str, qmodels.Filter]] = []
-    if scope in ("state_only", "state_plus_local", "any"):
-        scopes.append(("Tasmanian Planning Scheme (SPP)", filter_tps()))
-    if scope in ("local_only", "state_plus_local", "any") and cslug:
-        scopes.append((f"Local Provisions Schedule — {cslug}", filter_lps(cslug)))
-    if include_ncc:
-        scopes.append(("National Construction Code (NCC)", filter_ncc()))
-    if include_standards:
-        scopes.append(("Australian Standards (AS)", filter_as()))
-
-    # Apply additional filename filter if requested (AND)
-    scopes = [(name, with_source_contains(flt, source_contains)) for name, flt in scopes]
-
-    # Allocate limits per scope
-    per_spp = max(3, top_k // 3) if any(n.startswith("Tasmanian Planning Scheme") for n, _ in scopes) else 0
-    per_lps = max(3, top_k // 3) if any(n.startswith("Local Provisions Schedule") for n, _ in scopes) else 0
-    remaining = max(1, top_k - (per_spp + per_lps))
-    extra_scopes = sum(1 for n, _ in scopes if not (n.startswith("Tasmanian Planning Scheme") or n.startswith("Local Provisions Schedule")))
-    per_extra = max(1, remaining // max(1, extra_scopes)) if extra_scopes else 0
-
-    limits: List[int] = []
-    for name, _ in scopes:
-        if name.startswith("Tasmanian Planning Scheme"):
-            limits.append(per_spp)
-        elif name.startswith("Local Provisions Schedule"):
-            limits.append(per_lps)
-        else:
-            limits.append(per_extra)
-
-    sections: List[Tuple[str, List[str]]] = []
-    all_sources: List[dict] = []
-
-    for (name, flt), lim in zip(scopes, limits):
-        if lim <= 0:
-            continue
-        hits = q_search(vec, flt, lim)
-
-        # Guardrail: drop any hit that violates scope/council
-        hits = [h for h in hits if _allowed(h.payload or {}, scope, cslug)]
-
-        blocks, sources = render_blocks(hits)
-        sections.append((name, blocks))
-        all_sources.extend(sources)
-
-    context = combine_context(sections)
-    
-    #format_guide = _section_format_guide(section_id, section_title="(auto)", ctx={})
-    format_guide = _section_format_guide(
-        section_id,
-        section_title="(auto)",
-        ctx={
-            "council": council,           # from do_ask parameter
-            "planning_zones": [],         # populate if you have zone detection
-        }
-    )
-
-    prompt = f"""
-You are an expert Tasmanian planning and building compliance assistant with deep knowledge of the Tasmanian Planning Scheme structure.
-
-## AUTHORITY ORDER — always apply in this sequence:
-1. State Planning Provisions (SPP) — the statewide baseline. Cite clause numbers exactly.
-2. Local Provisions Schedule (LPS) for the selected council — overrides SPP where it differs.
-3. National Construction Code (NCC) — building controls only, keep separate from planning.
-4. Australian Standards — only when directly referenced by a clause in CONTEXT.
-
-## STRICT RULES:
-- Use ONLY information present in CONTEXT below. Never invent clause numbers, standards, or measurements.
-- If CONTEXT does not contain enough information to answer, say: "The provided context does not cover this — check the TPSO viewer directly at tpso.planning.tas.gov.au"
-- Every specific standard or requirement you state MUST include its source: (filename, p.N)
-- Quote clause text briefly (1–2 lines max) then explain in plain English.
-- Distinguish clearly between Acceptable Solutions (A) and Performance Criteria (P).
-
-## OUTPUT FORMAT:
-- Use Markdown: ## for main headings, ### for sub-headings, **bold** for clause numbers.
-- For setbacks, parking rates, or multiple standards: use a Markdown table with columns: Clause | Requirement | A or P | Source
-- End every response with a ## Sources section listing each cited document and page.
-- Keep answers concise but complete — do not pad or repeat information.
-- Professional planning language; avoid informal phrasing.
-
-## CONTEXT (retrieved from Tasmanian Planning Scheme documents):
-{context}
-
-{format_guide}
-
-## QUESTION:
-{query}
-
-## ANSWER:
-""".strip()
-
-    # BYOK mode: skip Ollama and return the context + prompt so the
-    # browser can call its own LLM provider (Claude, GPT, Grok, etc.)
-    if context_only:
-        return {
-            "context_only": True,
-            "context": context,
-            "prompt": prompt,
-            "sources": all_sources,
-            # Include the raw section blocks so the browser can inspect them
-            "sections": [
-                {"heading": name, "blocks": blocks}
-                for name, blocks in sections
-            ]
-        }
-
-    answer = ollama_chat(prompt)
-    return {"answer": answer, "sources": all_sources}
-
-
-@app.get("/ask")
-@limiter.limit("20/minute")
-def ask_get(
-    request: Request,
-    query: str = Query(..., description="User question"),
-    top_k: int = 10,
-    council: Optional[str] = None,
-    include_ncc: bool = False,
-    include_standards: bool = False,
-    source_contains: Optional[str] = None,
-    scope: str = "state_plus_local",
-    section_id: Optional[str] = None,
-    context_only: bool = False,
-):
-    _verify_demo_token_if_needed(request)
-
-    started = time.perf_counter()
-    out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
-    latency_ms = int((time.perf_counter() - started) * 1000)
-
-    # Telemetry insert
-    try:
-        ip = request.client.host if request.client else "0.0.0.0"
-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
-        allow_tps = scope in ("state_only", "state_plus_local")
-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
-
-        with db() as conn:
-            conn.execute("""
-                INSERT INTO ask_logs
-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
-                     model, ok, topk_json, tokens_in, tokens_out, answer)
-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
-            """, (
-                datetime.utcnow().isoformat(),
-                sid, ip_hash(ip), query, _normalize(query),
-                scope, int(allow_tps),
-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
-                (out.get("answer") or "")[:8000],       # ← truncate to 8KB max
-            ))
-            conn.commit()
-    except Exception as e:
-        # Don't break the request if logging fails
-        print("[telemetry] ask_get insert failed:", e)
-
-    return out
-
-
-@app.post("/ask")
-@limiter.limit("20/minute")
-def ask_post(request: Request, body: AskBody):
-    _verify_demo_token_if_needed(request)
-    qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
-    if not qtxt:
-        raise HTTPException(status_code=422, detail="Missing query/question")
-
-    started = time.perf_counter()
-    out = do_ask(
-        query=qtxt,
-        top_k=body.top_k,
-        council=body.council,
-        include_ncc=body.include_ncc,
-        include_standards=body.include_standards,
-        source_contains=body.source_contains,
-        scope=body.scope,
-        section_id=body.section_id,
-        context_only=body.context_only,
-    )
-    latency_ms = int((time.perf_counter() - started) * 1000)
-
-    # Telemetry insert
-    try:
-        ip = request.client.host if request.client else "0.0.0.0"
-        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
-        allow_tps = body.scope in ("state_only", "state_plus_local")
-        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
-
-        with db() as conn:
-            conn.execute("""
-                INSERT INTO ask_logs
-                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
-                     model, ok, topk_json, tokens_in, tokens_out, answer)
-                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
-            """, (
-                datetime.utcnow().isoformat(),
-                sid, ip_hash(ip), qtxt, _normalize(qtxt),
-                body.scope, int(allow_tps),
-                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
-                (out.get("answer") or "")[:8000],   # ← truncate to 8KB max
-            ))
-            conn.commit()
-    except Exception as e:
-        print("[telemetry] ask_post insert failed:", e)
-
-    return out
+"""
+app.py — FastAPI backend for tasplanning.report
+
+RAG pipeline:
+  1. Embed the user query via Ollama (nomic-embed-text)
+  2. Search Qdrant for the closest chunks, split by corpus/scope
+  3. Inject retrieved context into a structured prompt
+  4. Call Ollama (llama3.1:8b) and return the answer + source citations
+
+BYOK mode (context_only=True): skip step 4 and return the prompt so
+the browser can call its own LLM (Claude, GPT, Grok, local Ollama).
+
+Restart required after any change to this file:
+    docker compose restart backend
+"""
+import os, re, hmac, logging
+import json
+import requests
+import time
+
+logger = logging.getLogger(__name__)
+
+from typing import Optional, Literal, List, Tuple
+from fastapi import FastAPI, Query, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from slowapi.middleware import SlowAPIMiddleware
+from slowapi.errors import RateLimitExceeded
+from limiter import limiter
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from qdrant_client import QdrantClient
+from qdrant_client.http import models as qmodels
+from collections import Counter, defaultdict
+from datetime import datetime
+from telemetry import router as telemetry_router, db, ip_hash
+
+# ---------------------------------------------------------------------------
+# Environment
+# ---------------------------------------------------------------------------
+OLLAMA_URL          = os.getenv("OLLAMA_URL", "http://192.168.8.73:11434")
+QDRANT_URL          = os.getenv("QDRANT_URL", "http://localhost:6333")
+OLLAMA_KEEP_ALIVE   = os.getenv("OLLAMA_KEEP_ALIVE", "-1")  # -1 = keep loaded forever
+COLLECTION          = os.getenv("QDRANT_COLLECTION", "planning_docs")
+EMBED_MODEL         = os.getenv("EMBED_MODEL", "nomic-embed-text")
+CHAT_MODEL          = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")
+CORS_ORIGINS        = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
+
+# ---------------------------------------------------------------------------
+# Demo token gate (disabled by default)
+# Enable by setting DEMO_REQUIRE_TOKEN=1 and DEMO_TOKEN=<secret> in .env.
+# When enabled, every request to /ask and /admin/* must include:
+#   Authorization: Bearer <DEMO_TOKEN>
+# ---------------------------------------------------------------------------
+DEMO_REQUIRE_TOKEN = os.getenv("DEMO_REQUIRE_TOKEN", "0") == "1"
+DEMO_TOKEN = os.getenv("DEMO_TOKEN", "")
+
+def _verify_demo_token_if_needed(request):
+    if not DEMO_REQUIRE_TOKEN:
+        return
+    auth = request.headers.get("Authorization", "")
+    parts = auth.split(" ", 1)
+    if len(parts) != 2 or parts[0] != "Bearer":
+        raise HTTPException(status_code=401, detail="Unauthorized")
+    # compare_digest runs in constant time — prevents timing-based token guessing
+    if not hmac.compare_digest(parts[1], DEMO_TOKEN):
+        raise HTTPException(status_code=401, detail="Unauthorized")
+
+
+# ---------------------------------------------------------------------------
+# FastAPI app + CORS
+# ---------------------------------------------------------------------------
+app = FastAPI()
+
+# If CORS_ORIGINS is empty (shouldn't happen in production) fall back to a
+# wildcard with the tasplanning.report regex — credentials cannot be used
+# with wildcard origins so allow_credentials is gated on explicit origins.
+_origins = CORS_ORIGINS if CORS_ORIGINS else []
+_allow_all = len(_origins) == 0
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=_origins if not _allow_all else ["*"],
+    allow_origin_regex=r"https://.*\.tasplanning\.report" if _allow_all else None,
+    allow_credentials=not _allow_all,   # credentials only when origins are explicit
+    allow_methods=["GET", "POST", "OPTIONS"],
+    allow_headers=["Content-Type", "Authorization", "X-TPR-SID"],
+    expose_headers=["X-TPR-SID"],
+)
+
+qc = QdrantClient(url=QDRANT_URL)
+app.include_router(telemetry_router)
+
+# ---------------------------------------------------------------------------
+# Rate limiting (slowapi — in-memory, per IP)
+# Shared limiter instance lives in limiter.py to avoid circular imports with
+# telemetry.py, which also needs to decorate its own endpoints.
+# ---------------------------------------------------------------------------
+app.state.limiter = limiter  # type: ignore
+app.add_middleware(SlowAPIMiddleware)
+
+@app.exception_handler(RateLimitExceeded)
+def ratelimit_handler(request, exc):
+    return JSONResponse(status_code=429, content={"error":"rate_limited","detail":"Too many requests"})
+
+
+# ---------------------------------------------------------------------------
+# Feedback endpoint
+# Stores thumbs-up/down ratings alongside the query + answer for prompt tuning.
+# Fields are truncated before insert to keep the SQLite row size reasonable.
+# ---------------------------------------------------------------------------
+class FeedbackBody(BaseModel):
+    verdict: str                    # "up" or "down"
+    query: Optional[str] = None     # the question that was asked
+    answer: Optional[str] = None    # the answer that was rated
+    note: Optional[str] = None      # optional free-text from thumbs-down
+    sid: Optional[str] = None       # session id from browser
+    model: Optional[str] = None     # which model answered
+    scope: Optional[str] = None     # which scope was used
+    sources: Optional[list] = None  # which sources were cited
+
+@app.post("/feedback")
+@limiter.limit("60/minute")
+def feedback(request: Request, body: FeedbackBody):
+    if body.verdict not in ("up", "down"):
+        raise HTTPException(status_code=422, detail="verdict must be 'up' or 'down'")
+
+    ip  = request.client.host if request.client else "0.0.0.0"
+    sid = body.sid or request.headers.get("X-TPR-SID") or ""
+
+    try:
+        with db() as conn:
+            conn.execute("""
+                INSERT INTO feedback
+                    (ts, sid, ip_hash, verdict, query, answer, note, model, scope, sources_json)
+                VALUES (?,?,?,?,?,?,?,?,?,?)
+            """, (
+                datetime.utcnow().isoformat(),
+                sid, ip_hash(ip), body.verdict,
+                _trunc(body.query or "", 2000, "feedback.query"),
+                _trunc(body.answer or "", 8000, "feedback.answer"),
+                _trunc(body.note or "", 1000, "feedback.note"),
+                body.model or CHAT_MODEL,
+                body.scope or "",
+                _json_dumps(body.sources or []),
+            ))
+            conn.commit()
+    except Exception as e:
+        logger.exception("[feedback] telemetry insert failed")
+        # Still return ok — don't surface DB errors to users
+    return {"ok": True}
+
+
+# ---------------------------------------------------------------------------
+# Ollama helpers
+# ---------------------------------------------------------------------------
+def slug(s: Optional[str]) -> Optional[str]:
+    """Normalise a council name to a URL-safe slug for Qdrant filter matching."""
+    if not s:
+        return None
+    return re.sub(r'[^a-z0-9]+', '-', s.strip().lower()).strip('-') or None
+
+
+def ollama_embed(text: str) -> List[float]:
+    """Call the Ollama embeddings API and return the float vector."""
+    try:
+        r = requests.post(
+            f"{OLLAMA_URL}/api/embeddings",
+            json={"model": EMBED_MODEL, "prompt": text},
+            timeout=60
+        )
+        r.raise_for_status()
+    except requests.Timeout:
+        logger.error("Ollama embed timeout after 60s (url=%s model=%s)", OLLAMA_URL, EMBED_MODEL)
+        raise HTTPException(status_code=503, detail="Embedding service timed out")
+    except requests.ConnectionError:
+        logger.error("Ollama embed connection error (url=%s)", OLLAMA_URL)
+        raise HTTPException(status_code=503, detail="Embedding service unavailable")
+    except requests.HTTPError as e:
+        logger.error("Ollama embed HTTP %s: %s", e.response.status_code, e.response.text[:200])
+        raise HTTPException(status_code=502, detail="Embedding service error")
+    data = r.json()
+    if "embedding" not in data:
+        logger.error("Ollama embed unexpected response: %s", str(data)[:200])
+        raise HTTPException(status_code=502, detail="Embedding service returned unexpected response")
+    return data["embedding"]
+
+def ollama_chat(prompt: str) -> str:
+    """
+    Send a prompt to Ollama and return the generated text.
+
+    keep_alive MUST be a top-level key — putting it inside options{} causes
+    Ollama to silently ignore it and unload the model between requests.
+
+    num_ctx is fixed at 6144. Changing it between requests forces Ollama to
+    reload the model (KV cache is resized), adding ~3–5 s of cold-start latency.
+    """
+    try:
+        r = requests.post(
+            f"{OLLAMA_URL}/api/generate",
+            json={
+              "model": CHAT_MODEL,
+              "prompt": prompt,
+              "stream": False,
+              "options": {
+                "num_ctx": 6144, # was 8192,
+                "num_predict": 512,
+                "temperature": 0.2,
+                "top_p": 0.9,
+                "repeat_penalty": 1.1,
+              },
+              "keep_alive": -1,  # keep model resident in VRAM between requests
+            },
+            timeout=180
+        )
+        r.raise_for_status()
+    except requests.Timeout:
+        logger.error("Ollama chat timeout after 180s (url=%s model=%s)", OLLAMA_URL, CHAT_MODEL)
+        raise HTTPException(status_code=503, detail="LLM service timed out")
+    except requests.ConnectionError:
+        logger.error("Ollama chat connection error (url=%s)", OLLAMA_URL)
+        raise HTTPException(status_code=503, detail="LLM service unavailable")
+    except requests.HTTPError as e:
+        logger.error("Ollama chat HTTP %s: %s", e.response.status_code, e.response.text[:200])
+        raise HTTPException(status_code=502, detail="LLM service error")
+    data = r.json()
+    return data.get("response", "").strip()
+
+def _scroll_points(collection: str, qfilter=None, include_vector: bool=False, page_size: int=200):
+    """
+    Page through all points in a collection using Qdrant's scroll API.
+    Used by /admin/export which needs vectors and supports arbitrary collections.
+    For payload-only scans over the default collection use _scan_points instead.
+    """
+    offset = None
+    while True:
+        points, offset = qc.scroll(
+            collection_name=collection,
+            limit=page_size,
+            with_payload=True,
+            with_vectors=include_vector,
+            offset=offset,
+            scroll_filter=qfilter
+        )
+        if not points:
+            break
+        for pt in points:
+            yield pt
+        if offset is None:
+            break
+
+# ---------------------------------------------------------------------------
+# Health + utility endpoints
+# ---------------------------------------------------------------------------
+@app.get("/readyz")
+def readyz():
+    return {"ok": True}
+
+def _normalize(q: Optional[str]) -> str:
+    """Collapse whitespace and lowercase — used for dedup tracking in ask_logs."""
+    return re.sub(r"\s+", " ", (q or "").strip().lower())
+
+def _json_dumps(o) -> str:
+    return json.dumps(o, ensure_ascii=False, separators=(",",":"))
+
+def _trunc(s: str, limit: int, field: str) -> str:
+    """Truncate `s` to `limit` chars. Logs a warning if truncation occurs so
+    data loss is visible in docker logs rather than silently discarded."""
+    if len(s) > limit:
+        logger.warning("telemetry field %r truncated from %d to %d chars", field, len(s), limit)
+        return s[:limit]
+    return s
+
+# ---- Councils list (prefers payload 'council', falls back to filename token) ----
+@app.get("/councils")
+def councils():
+    councils = set()
+    offset = None
+    # sample up to ~5k points (50 * 100)
+    for _ in range(50):
+        points, offset = qc.scroll(
+            collection_name=COLLECTION,
+            limit=100,
+            with_payload=True,
+            offset=offset
+        )
+        for pt in points:
+            p = pt.payload or {}
+            token = (p.get("council") or "").strip().lower()
+            if not token:
+                sf = (p.get("source_file") or "").lower()
+                if sf:
+                    token = sf.replace(".pdf", "").split("_")[0].split("-")[0]
+            if token:
+                councils.add(token)
+        if offset is None:
+            break
+    return sorted(councils)
+
+# ---------------------------------------------------------------------------
+# Qdrant filter builders
+# _mv  — exact MatchValue (keyword field, case-sensitive)
+# _mt  — MatchText (full-text / substring match)
+# ---------------------------------------------------------------------------
+def _mv(key: str, value: str) -> qmodels.FieldCondition:
+    return qmodels.FieldCondition(key=key, match=qmodels.MatchValue(value=value))
+
+def _mt(key: str, text: str) -> qmodels.FieldCondition:
+    return qmodels.FieldCondition(key=key, match=qmodels.MatchText(text=text))
+
+def filter_tps() -> qmodels.Filter:
+    """TPS only, exact match on corpus."""
+    return qmodels.Filter(must=[_mv("corpus", "tps")])
+
+def filter_lps(council: str) -> qmodels.Filter:
+    """
+    LPS for a specific council (slug), exact match on both fields.
+    """
+    cslug = slug(council) or council.lower()
+    return qmodels.Filter(must=[_mv("corpus", "lps"), _mv("council", cslug)])
+
+def filter_ncc() -> qmodels.Filter:
+    return qmodels.Filter(must=[_mv("corpus", "ncc")])
+
+def filter_as() -> qmodels.Filter:
+    return qmodels.Filter(must=[_mv("corpus", "as")])
+
+def with_source_contains(flt: Optional[qmodels.Filter], source_contains: Optional[str]) -> qmodels.Filter:
+    """AND an additional source_file substring condition onto an existing filter."""
+    if not source_contains:
+        return flt
+    add = _mt("source_file", source_contains)
+    if flt:
+        # preserve existing must/should/must_not and AND the filename condition
+        must = list(getattr(flt, "must", []) or [])
+        must.append(add)
+        return qmodels.Filter(
+            must=must,
+            should=getattr(flt, "should", None),
+            must_not=getattr(flt, "must_not", None),
+        )
+    return qmodels.Filter(must=[add])
+
+def q_search(vec: List[float], flt: Optional[qmodels.Filter], limit: int):
+    """ANN vector search — returns up to `limit` scored points."""
+    results = qc.query_points(
+        collection_name=COLLECTION,
+        query=vec,
+        limit=max(1, limit),
+        query_filter=flt,
+        with_payload=True,
+    )
+    return results.points
+
+def render_blocks(hits) -> Tuple[List[str], List[dict]]:
+    """Convert raw Qdrant hits into plain-text context blocks and source dicts."""
+    blocks, sources = [], []
+    for h in hits:
+        p = h.payload or {}
+        src = f"{p.get('source_file')} (p.{p.get('page')} chunk {p.get('chunk_index')})"
+        snippet = p.get("text", "")
+        blocks.append(f"Source: {src}\nText: {snippet}")
+        sources.append({
+            "source_file": p.get("source_file"),
+            "page": p.get("page"),
+            "chunk_index": p.get("chunk_index"),
+            "score": h.score
+        })
+    return blocks, sources
+
+def combine_context(sections: List[Tuple[str, List[str]]]) -> str:
+    """Join all section blocks into a single context string for the prompt."""
+    out = []
+    for heading, blocks in sections:
+        if not blocks:
+            continue
+        out.append(f"=== {heading} ===")
+        out.extend(blocks)
+    return "\n\n".join(out) if out else "No context found."
+
+def _scan_points(qfilter: Optional[qmodels.Filter] = None, max_pages: int = 10000, page_size: int = 200):
+    """
+    Iterate through ALL points in the default collection (payload only, no vectors).
+    Used by /admin/stats, /admin/files, /admin/sample.
+
+    For the current dataset size this is fine. If the collection grows very large,
+    switch to a pre-aggregated summary stored in a separate Qdrant collection or
+    a background job that writes counts to SQLite.
+    """
+    offset = None
+    pages = 0
+    while pages < max_pages:
+        points, offset = qc.scroll(
+            collection_name=COLLECTION,
+            limit=page_size,
+            with_payload=True,
+            offset=offset,
+            scroll_filter=qfilter
+        )
+        if not points:
+            break
+        for pt in points:
+            yield pt
+        pages += 1
+        if offset is None:
+            break
+
+# ---------------------------------------------------------------------------
+# Admin endpoints — require DEMO_TOKEN when DEMO_REQUIRE_TOKEN=1
+# All endpoints are rate-limited; /export is tighter (streams full DB).
+# ---------------------------------------------------------------------------
+@app.get("/admin/stats")
+@limiter.limit("30/minute")
+def admin_stats(request: Request, council: Optional[str] = None, corpus: Optional[str] = None):
+    _verify_demo_token_if_needed(request)
+    must = []
+    if council:
+        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
+    if corpus:
+        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
+    qfilter = qmodels.Filter(must=must) if must else None
+
+    corp = Counter()
+    councils = Counter()
+    total = 0
+    for pt in _scan_points(qfilter=qfilter):
+        p = pt.payload or {}
+        corp[(p.get("corpus") or "").lower()] += 1
+        if p.get("council"):
+            councils[(p.get("council") or "").lower()] += 1
+        total += 1
+
+    return {
+        "collection": COLLECTION,
+        "total_points": total,
+        "by_corpus": dict(corp),
+        "by_council": dict(councils),
+        "note": "Counts are points (chunks), not documents.",
+    }
+
+@app.get("/admin/files")
+@limiter.limit("30/minute")
+def admin_files(request: Request, council: Optional[str] = None, corpus: Optional[str] = None, contains: Optional[str] = None, limit: int = 200):
+    _verify_demo_token_if_needed(request)
+    must = []
+    if council:
+        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
+    if corpus:
+        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
+    if contains:
+        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=contains)))
+    qfilter = qmodels.Filter(must=must) if must else None
+
+    files = defaultdict(lambda: {"points": 0, "corpus": None, "council": None, "pages": set()})
+    for pt in _scan_points(qfilter=qfilter):
+        p = pt.payload or {}
+        f = (p.get("source_file") or "").strip()
+        if not f:
+            continue
+        rec = files[f]
+        rec["points"] += 1
+        rec["corpus"] = rec["corpus"] or p.get("corpus")
+        rec["council"] = rec["council"] or p.get("council")
+        if p.get("page") is not None:
+            rec["pages"].add(p["page"])
+
+    # shape for output
+    out = []
+    for f, rec in files.items():
+        out.append({
+            "source_file": f,
+            "corpus": rec["corpus"],
+            "council": rec["council"],
+            "points": rec["points"],
+            "page_count_est": len(rec["pages"]) if rec["pages"] else None,
+        })
+
+    # sort by points desc, limit
+    out.sort(key=lambda x: x["points"], reverse=True)
+    return out[:max(1, limit)]
+
+@app.get("/admin/sample")
+@limiter.limit("30/minute")
+def admin_sample(request: Request, council: Optional[str] = None, corpus: Optional[str] = None, n: int = 5):
+    _verify_demo_token_if_needed(request)
+    must = []
+    if council:
+        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
+    if corpus:
+        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
+    qfilter = qmodels.Filter(must=must) if must else None
+
+    samples = []
+    for pt in _scan_points(qfilter=qfilter):
+        p = pt.payload or {}
+        txt = (p.get("text") or "").strip()
+        if not txt:
+            continue
+        samples.append({
+            "source_file": p.get("source_file"),
+            "corpus": p.get("corpus"),
+            "council": p.get("council"),
+            "page": p.get("page"),
+            "chunk_index": p.get("chunk_index"),
+            "preview": (txt[:400] + "…") if len(txt) > 400 else txt
+        })
+        if len(samples) >= max(1, n):
+            break
+    return samples
+
+@app.get("/admin/export")
+@limiter.limit("5/minute")
+def admin_export(
+    request: Request,
+    collection: str = COLLECTION,
+    council: Optional[str] = None,
+    corpus: Optional[str] = None,
+    source_contains: Optional[str] = None,
+    include_vector: bool = False,
+    limit: Optional[int] = None
+):
+    _verify_demo_token_if_needed(request)
+    must = []
+    if council:
+        must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
+    if corpus:
+        must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
+    if source_contains:
+        must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=source_contains)))
+    qfilter = qmodels.Filter(must=must) if must else None
+
+    def gen():
+        count = 0
+        for pt in _scroll_points(collection, qfilter=qfilter, include_vector=include_vector):
+            obj = {
+                "id": str(getattr(pt, "id", None)),
+                "payload": pt.payload or {},
+            }
+            if include_vector:
+                obj["vector"] = pt.vector
+            yield json.dumps(obj, ensure_ascii=False) + "\n"
+            count += 1
+            if limit and count >= limit:
+                break
+
+    filename = f'{collection}-{corpus or "all"}-{council or "all"}.ndjson'
+    headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
+    return StreamingResponse(gen(), media_type="application/x-ndjson", headers=headers)
+
+
+# ---------------------------------------------------------------------------
+# Section-specific format guides
+# Each section_id maps to a tightly-scoped formatting instruction injected at
+# the end of the prompt. This steers the LLM output for structured report
+# sections without changing the core RAG prompt.
+# ---------------------------------------------------------------------------
+def _section_format_guide(section_id: Optional[str], section_title: str, ctx: dict) -> str:
+    """
+    Return strict, section-specific formatting guidance for the LLM.
+    Keep these short, prescriptive, and impossible to ignore.
+    """
+    sid = (section_id or "").lower()
+
+    # Utility bits from context
+    zones = ctx.get("planning_zones") or []
+    zone_label = ", ".join(zones) if zones else "the applicable zone"
+    council_label = ctx.get("council") or ""
+
+    # ---- ZONING (tables of clauses like your sample) ----
+    if sid in {"zoning", "zoning-41", "zoning-42", "zoning-43", "zoning-44", "zoning-441", "zoning-442"}:
+        return f"""
+            FORMAT REQUIREMENTS (MANDATORY):
+            - Produce a concise preface (≤ 2 sentences) naming {zone_label}.
+            - Then include a Markdown table listing EACH visible clause found in CONTEXT that applies to the zone or LPS for **{council_label or 'the selected council'}**.
+            - One row per subclause. If an A/P pair exists (e.g., A1 / P1), include both in the same row.
+            - Columns (exact):
+              | Clause | Topic | Acceptable Solution (A) | Performance Criteria (P) | Assessment | Source |
+            - "Clause": the clause number (e.g., "12.3.1 A1" or "DOR-S1.7.1").
+            - "Topic": short label extracted from the clause heading.
+            - "Acceptable Solution (A)" and "Performance Criteria (P)": quote briefly—no more than 1–2 lines each.
+            - "Assessment": state clearly whether the proposal meets A, or relies on P. If unknown from CONTEXT, write "TBC".
+            - "Source": filename + page (from CONTEXT).
+            - Only include clauses actually present in CONTEXT; NEVER invent clause numbers or text.
+            - After the table, add a one-paragraph summary noting any items assessed as TBC or non-compliant.
+            """.strip()
+
+    # ---- Codes overview list/table (optional future) ----
+    if sid.startswith("code-"):
+        return """
+            FORMAT REQUIREMENTS:
+            - Start with one sentence stating which Code and why it is triggered.
+            - Then provide a short checklist or table of the relevant sub-clauses (A vs P), with Source for each.
+            - Keep to 150–250 words + table.
+            """.strip()
+
+    # ---- Permit Overview (concise triggers) ----
+    if sid == "permit-overview":
+        return """
+            FORMAT REQUIREMENTS:
+            - Produce 3 blocks with headings:
+              1) "Project Context" – 3–5 bullet points (site, proposal, zone).
+              2) "Applicable Provisions" – bullets grouping TPS SPP, LPS (selected council), and triggered Codes.
+              3) "Assessment Path" – bullet list of key clauses to assess next.
+            - Cite specific clause numbers ONLY if present in CONTEXT (include Source).
+            """.strip()
+
+    # ---- Default (no special formatting) ----
+    return """
+        FORMAT REQUIREMENTS:
+        - Use concise Markdown with short paragraphs and bullets as needed.
+        - Cite briefly (filename + page) when quoting a control.
+        """.strip()
+
+
+# ---------------------------------------------------------------------------
+# /ask — core RAG endpoint
+# ---------------------------------------------------------------------------
+class AskBody(BaseModel):
+    # accept multiple keys from different frontends
+    query: Optional[str] = None
+    question: Optional[str] = None
+    q: Optional[str] = None
+    prompt: Optional[str] = None
+
+    top_k: int = 10
+    council: Optional[str] = None
+    include_ncc: bool = False
+    include_standards: bool = False
+    source_contains: Optional[str] = None
+    scope: Literal['state_plus_local','local_only','state_only','any'] = 'state_plus_local'
+    section_id: Optional[str] = None
+
+    # BYOK mode: return context blocks without calling Ollama.
+    # The browser then calls its own LLM with the returned context + prompt.
+    context_only: bool = False
+
+def _allowed(p: dict, scope: str, cslug: Optional[str]) -> bool:
+    """
+    Secondary guardrail applied after the Qdrant vector search.
+    Qdrant filters are the primary gate; this catches any edge-case leakage
+    (e.g. MatchText returning a partial match across corpora).
+    """
+    corp = (p.get("corpus") or "").lower()
+    council = (p.get("council") or "").lower()
+    if scope == "local_only":
+        return corp == "lps" and cslug and council == cslug
+    if scope == "state_only":
+        return corp == "tps"
+    if scope == "state_plus_local":
+        return corp == "tps" or (corp == "lps" and cslug and council == cslug)
+    return True
+
+def do_ask(
+    query: str,
+    top_k: int = 10,
+    council: Optional[str] = None,
+    include_ncc: bool = False,
+    include_standards: bool = False,
+    source_contains: Optional[str] = None,
+    scope: str = "state_plus_local",
+    section_id: Optional[str] = None,
+    context_only: bool = False,
+):
+    top_k = max(1, min(top_k, 30))  # clamp: at least 1, at most 30
+    vec = ollama_embed(query)
+    cslug = slug(council) if council else None
+
+    # Build the list of (section_heading, qdrant_filter) pairs based on scope.
+    # Each pair is searched independently so we can control the chunk budget
+    # per corpus — avoids TPS drowning out LPS results or vice versa.
+    scopes: List[Tuple[str, qmodels.Filter]] = []
+    if scope in ("state_only", "state_plus_local", "any"):
+        scopes.append(("Tasmanian Planning Scheme (SPP)", filter_tps()))
+    if scope in ("local_only", "state_plus_local", "any") and cslug:
+        scopes.append((f"Local Provisions Schedule — {cslug}", filter_lps(cslug)))
+    if include_ncc:
+        scopes.append(("National Construction Code (NCC)", filter_ncc()))
+    if include_standards:
+        scopes.append(("Australian Standards (AS)", filter_as()))
+
+    # Apply additional filename filter if requested (AND)
+    scopes = [(name, with_source_contains(flt, source_contains)) for name, flt in scopes]
+
+    # Divide top_k across scopes: SPP and LPS each get ~1/3, the remainder
+    # is split evenly across any extra corpora (NCC, AS).
+    per_spp = max(3, top_k // 3) if any(n.startswith("Tasmanian Planning Scheme") for n, _ in scopes) else 0
+    per_lps = max(3, top_k // 3) if any(n.startswith("Local Provisions Schedule") for n, _ in scopes) else 0
+    remaining = max(1, top_k - (per_spp + per_lps))
+    extra_scopes = sum(1 for n, _ in scopes if not (n.startswith("Tasmanian Planning Scheme") or n.startswith("Local Provisions Schedule")))
+    per_extra = max(1, remaining // max(1, extra_scopes)) if extra_scopes else 0
+
+    limits: List[int] = []
+    for name, _ in scopes:
+        if name.startswith("Tasmanian Planning Scheme"):
+            limits.append(per_spp)
+        elif name.startswith("Local Provisions Schedule"):
+            limits.append(per_lps)
+        else:
+            limits.append(per_extra)
+
+    sections: List[Tuple[str, List[str]]] = []
+    all_sources: List[dict] = []
+
+    for (name, flt), lim in zip(scopes, limits):
+        if lim <= 0:
+            continue
+        hits = q_search(vec, flt, lim)
+
+        # Guardrail: drop any hit that violates scope/council
+        hits = [h for h in hits if _allowed(h.payload or {}, scope, cslug)]
+
+        blocks, sources = render_blocks(hits)
+        sections.append((name, blocks))
+        all_sources.extend(sources)
+
+    context = combine_context(sections)
+
+    format_guide = _section_format_guide(
+        section_id,
+        section_title="(auto)",
+        ctx={
+            "council": council,           # from do_ask parameter
+            "planning_zones": [],         # populate if you have zone detection
+        }
+    )
+
+    prompt = f"""
+You are an expert Tasmanian planning and building compliance assistant with deep knowledge of the Tasmanian Planning Scheme structure.
+
+## AUTHORITY ORDER — always apply in this sequence:
+1. State Planning Provisions (SPP) — the statewide baseline. Cite clause numbers exactly.
+2. Local Provisions Schedule (LPS) for the selected council — overrides SPP where it differs.
+3. National Construction Code (NCC) — building controls only, keep separate from planning.
+4. Australian Standards — only when directly referenced by a clause in CONTEXT.
+
+## STRICT RULES:
+- Use ONLY information present in CONTEXT below. Never invent clause numbers, standards, or measurements.
+- If CONTEXT does not contain enough information to answer, say: "The provided context does not cover this — check the TPSO viewer directly at tpso.planning.tas.gov.au"
+- Every specific standard or requirement you state MUST include its source: (filename, p.N)
+- Quote clause text briefly (1–2 lines max) then explain in plain English.
+- Distinguish clearly between Acceptable Solutions (A) and Performance Criteria (P).
+
+## OUTPUT FORMAT:
+- Use Markdown: ## for main headings, ### for sub-headings, **bold** for clause numbers.
+- For setbacks, parking rates, or multiple standards: use a Markdown table with columns: Clause | Requirement | A or P | Source
+- End every response with a ## Sources section listing each cited document and page.
+- Keep answers concise but complete — do not pad or repeat information.
+- Professional planning language; avoid informal phrasing.
+
+## CONTEXT (retrieved from Tasmanian Planning Scheme documents):
+{context}
+
+{format_guide}
+
+## QUESTION:
+{query}
+
+## ANSWER:
+""".strip()
+
+    # BYOK mode: skip Ollama and return the context + prompt so the
+    # browser can call its own LLM provider (Claude, GPT, Grok, etc.)
+    if context_only:
+        return {
+            "context_only": True,
+            "context": context,
+            "prompt": prompt,
+            "sources": all_sources,
+            # Include the raw section blocks so the browser can inspect them
+            "sections": [
+                {"heading": name, "blocks": blocks}
+                for name, blocks in sections
+            ]
+        }
+
+    answer = ollama_chat(prompt)
+    return {"answer": answer, "sources": all_sources}
+
+
+@app.get("/ask")
+@limiter.limit("20/minute")
+def ask_get(
+    request: Request,
+    query: str = Query(..., description="User question"),
+    top_k: int = 10,
+    council: Optional[str] = None,
+    include_ncc: bool = False,
+    include_standards: bool = False,
+    source_contains: Optional[str] = None,
+    scope: str = "state_plus_local",
+    section_id: Optional[str] = None,
+    context_only: bool = False,
+):
+    _verify_demo_token_if_needed(request)
+
+    started = time.perf_counter()
+    out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
+    latency_ms = int((time.perf_counter() - started) * 1000)
+
+    # Telemetry insert — never allowed to break the response
+    try:
+        ip = request.client.host if request.client else "0.0.0.0"
+        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
+        allow_tps = scope in ("state_only", "state_plus_local")
+        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
+
+        with db() as conn:
+            conn.execute("""
+                INSERT INTO ask_logs
+                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
+                     model, ok, topk_json, tokens_in, tokens_out, answer)
+                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
+            """, (
+                datetime.utcnow().isoformat(),
+                sid, ip_hash(ip), query, _normalize(query),
+                scope, int(allow_tps),
+                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
+                _trunc(out.get("answer") or "", 8000, "ask_get.answer"),
+            ))
+            conn.commit()
+    except Exception as e:
+        logger.exception("[telemetry] ask_get insert failed")
+
+    return out
+
+
+@app.post("/ask")
+@limiter.limit("20/minute")
+def ask_post(request: Request, body: AskBody):
+    _verify_demo_token_if_needed(request)
+    qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
+    if not qtxt:
+        raise HTTPException(status_code=422, detail="Missing query/question")
+
+    started = time.perf_counter()
+    out = do_ask(
+        query=qtxt,
+        top_k=body.top_k,
+        council=body.council,
+        include_ncc=body.include_ncc,
+        include_standards=body.include_standards,
+        source_contains=body.source_contains,
+        scope=body.scope,
+        section_id=body.section_id,
+        context_only=body.context_only,
+    )
+    latency_ms = int((time.perf_counter() - started) * 1000)
+
+    # Telemetry insert — never allowed to break the response
+    try:
+        ip = request.client.host if request.client else "0.0.0.0"
+        sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
+        allow_tps = body.scope in ("state_only", "state_plus_local")
+        topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
+
+        with db() as conn:
+            conn.execute("""
+                INSERT INTO ask_logs
+                    (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
+                     model, ok, topk_json, tokens_in, tokens_out, answer)
+                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
+            """, (
+                datetime.utcnow().isoformat(),
+                sid, ip_hash(ip), qtxt, _normalize(qtxt),
+                body.scope, int(allow_tps),
+                latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
+                _trunc(out.get("answer") or "", 8000, "ask_post.answer"),
+            ))
+            conn.commit()
+    except Exception as e:
+        logger.exception("[telemetry] ask_post insert failed")
+
+    return out

+ 4 - 0
backend/limiter.py

@@ -0,0 +1,4 @@
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+
+limiter = Limiter(key_func=get_remote_address)

+ 0 - 116
backend/telemetry(1).py

@@ -1,116 +0,0 @@
-# backend/telemetry.py
-from fastapi import APIRouter, Request, Depends
-from pydantic import BaseModel
-import time, hashlib, hmac, os, sqlite3, json, hashlib
-from datetime import datetime
-from typing import Any, Dict, List, Optional
-
-router = APIRouter()
-DB_PATH = os.getenv("TPR_DB", "/data/telemetry.db")
-IP_SECRET = os.getenv("TPR_IP_SECRET", "change-me")
-
-def db():
-    os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
-    conn = sqlite3.connect(DB_PATH)
-    try:
-        conn.execute("PRAGMA journal_mode=WAL;")
-    except sqlite3.OperationalError:
-        pass  # read-only FS or WAL not supported — continue anyway
-    return conn
-
-def ip_hash(ip: str) -> str:
-    dig = hmac.new(IP_SECRET.encode(), ip.encode(), 'sha256').hexdigest()
-    return dig[:12]
-
-class TelemetryEvent(BaseModel):
-    type: str
-    ts: str
-    sid: str
-    ua: Optional[str] = None
-    data: Dict[str, Any] = {}
-
-@router.on_event("startup")
-def init():
-    try:
-        with db() as conn:
-            conn.executescript("""
-            CREATE TABLE IF NOT EXISTS events (
-              id INTEGER PRIMARY KEY AUTOINCREMENT,
-              ts TEXT NOT NULL,
-              type TEXT NOT NULL,
-              sid TEXT,
-              ua TEXT,
-              ip_hash TEXT,
-              data_json TEXT
-            );
-            CREATE TABLE IF NOT EXISTS ask_logs (
-              id INTEGER PRIMARY KEY AUTOINCREMENT,
-              ts TEXT NOT NULL,
-              sid TEXT,
-              ip_hash TEXT,
-              query TEXT,
-              normalized TEXT,
-              allow_tps INTEGER,
-              latency_ms INTEGER,
-              model TEXT,
-              ok INTEGER,
-              topk_json TEXT,
-              tokens_in INTEGER,
-              tokens_out INTEGER
-            );
-            """)
-            conn.commit()
-    except Exception as e:
-        print(f"[telemetry] DB init failed: {e} — telemetry will be disabled")
-        # Do NOT re-raise — let the app start without telemetry
-
-@router.post("/telemetry")
-async def telemetry(ev: TelemetryEvent, request: Request):
-    ip = request.client.host if request.client else "0.0.0.0"
-    with db() as conn:
-        conn.execute(
-          "INSERT INTO events (ts,type,sid,ua,ip_hash,data_json) VALUES (?,?,?,?,?,json(?))",
-          (ev.ts, ev.type, ev.sid, ev.ua, ip_hash(ip), json_dumps(ev.data))
-        )
-        conn.commit()
-    return {"ok": True}
-
-# Wrap your /ask handler to also log authoritative facts:
-from fastapi import APIRouter
-import json, re
-ask_router = APIRouter()
-
-def normalize(q: str) -> str:
-    return re.sub(r"\s+", " ", q.strip().lower())
-
-def json_dumps(o): return json.dumps(o, ensure_ascii=False, separators=(",",":"))
-
-@ask_router.post("/ask")
-async def ask(req: Dict[str, Any], request: Request):
-    t0 = time.perf_counter()
-    ip = request.client.host if request.client else "0.0.0.0"
-    sid = request.headers.get("x-tpr-sid") or request.cookies.get("sid") or ""
-    query = (req.get("query") or "").strip()
-    allow_tps = bool(req.get("allow_tps"))
-    # ... run retrieval/LLM as you already do ...
-    # mock result placeholders:
-    model = "localai/llama-3.1"
-    topk = [{"id": "doc123", "score": 0.83}]
-    tokens_in, tokens_out = 250, 380
-    ok = True
-    answer = {"text": "…", "citations": topk, "model": model, "usage": {"input_tokens": tokens_in, "output_tokens": tokens_out}}
-
-    latency = int((time.perf_counter() - t0) * 1000)
-
-    with db() as conn:
-        conn.execute("""
-        INSERT INTO ask_logs (ts,sid,ip_hash,query,normalized,allow_tps,latency_ms,model,ok,topk_json,tokens_in,tokens_out)
-        VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
-        """, (
-            datetime.utcnow().isoformat(),
-            sid, ip_hash(ip), query, normalize(query), int(allow_tps),
-            latency, model, int(ok), json_dumps(topk), tokens_in, tokens_out
-        ))
-        conn.commit()
-
-    return answer

+ 138 - 129
backend/telemetry.py

@@ -1,129 +1,138 @@
-# backend/telemetry.py
-from fastapi import APIRouter, Request, Depends
-from pydantic import BaseModel
-import time, hashlib, hmac, os
-from datetime import datetime
-from typing import Any, Dict, List, Optional
-import os, sqlite3, json, hmac, hashlib
-
-router = APIRouter()
-DB_PATH = os.getenv("TPR_DB", "/data/telemetry.db")
-IP_SECRET = os.getenv("TPR_IP_SECRET", "change-me")
-
-def db():
-    os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
-    conn = sqlite3.connect(DB_PATH)
-    try:
-        conn.execute("PRAGMA journal_mode=WAL;")
-    except sqlite3.OperationalError:
-        # fallback if FS forbids WAL
-        conn.execute("PRAGMA journal_mode=DELETE;")
-    return conn
-
-def ip_hash(ip: str) -> str:
-    dig = hmac.new(IP_SECRET.encode(), ip.encode(), 'sha256').hexdigest()
-    return dig[:12]
-
-class TelemetryEvent(BaseModel):
-    type: str
-    ts: str
-    sid: str
-    ua: Optional[str] = None
-    data: Dict[str, Any] = {}
-
-@router.on_event("startup")
-def init():
-    with db() as conn:
-        conn.executescript("""
-        CREATE TABLE IF NOT EXISTS events (
-          id INTEGER PRIMARY KEY AUTOINCREMENT,
-          ts TEXT NOT NULL,
-          type TEXT NOT NULL,
-          sid TEXT,
-          ua TEXT,
-          ip_hash TEXT,
-          data_json TEXT
-        );
-        CREATE TABLE IF NOT EXISTS ask_logs (
-          id INTEGER PRIMARY KEY AUTOINCREMENT,
-          ts TEXT NOT NULL,
-          sid TEXT,
-          ip_hash TEXT,
-          query TEXT,
-          normalized TEXT,
-          scope TEXT,
-          allow_tps INTEGER,
-          latency_ms INTEGER,
-          model TEXT,
-          ok INTEGER,
-          topk_json TEXT,
-          tokens_in INTEGER,
-          tokens_out INTEGER,
-          answer TEXT
-        );
-        CREATE TABLE IF NOT EXISTS feedback (
-          id INTEGER PRIMARY KEY AUTOINCREMENT,
-          ts TEXT NOT NULL,
-          sid TEXT,
-          ip_hash TEXT,
-          verdict TEXT NOT NULL,        -- 'up' or 'down'
-          query TEXT,                   -- the question asked
-          answer TEXT,                  -- the answer rated
-          note TEXT,                    -- optional free-text note (thumbs down)
-          model TEXT,                   -- which LLM answered
-          scope TEXT,                   -- retrieval scope used
-          sources_json TEXT             -- JSON array of cited sources
-        );
-        """)
-        conn.commit()
-
-@router.post("/telemetry")
-async def telemetry(ev: TelemetryEvent, request: Request):
-    ip = request.client.host if request.client else "0.0.0.0"
-    with db() as conn:
-        conn.execute(
-          "INSERT INTO events (ts,type,sid,ua,ip_hash,data_json) VALUES (?,?,?,?,?,json(?))",
-          (ev.ts, ev.type, ev.sid, ev.ua, ip_hash(ip), json_dumps(ev.data))
-        )
-        conn.commit()
-    return {"ok": True}
-
-# Wrap your /ask handler to also log authoritative facts:
-from fastapi import APIRouter
-import json, re
-ask_router = APIRouter()
-
-def normalize(q: str) -> str:
-    return re.sub(r"\s+", " ", q.strip().lower())
-
-def json_dumps(o): return json.dumps(o, ensure_ascii=False, separators=(",",":"))
-
-@ask_router.post("/ask")
-async def ask(req: Dict[str, Any], request: Request):
-    t0 = time.perf_counter()
-    ip = request.client.host if request.client else "0.0.0.0"
-    sid = request.headers.get("x-tpr-sid") or request.cookies.get("sid") or ""
-    query = (req.get("query") or "").strip()
-    allow_tps = bool(req.get("allow_tps"))
-    # ... run retrieval/LLM as you already do ...
-    # mock result placeholders:
-    model = "localai/llama-3.1"
-    topk = [{"id": "doc123", "score": 0.83}]
-    tokens_in, tokens_out = 250, 380
-    ok = True
-    answer = {"text": "…", "citations": topk, "model": model, "usage": {"input_tokens": tokens_in, "output_tokens": tokens_out}}
-
-    latency = int((time.perf_counter() - t0) * 1000)
-
-    with db() as conn:
-        conn.execute("""
-        INSERT INTO ask_logs (ts,sid,ip_hash,query,normalized,allow_tps,latency_ms,model,ok,topk_json,tokens_in,tokens_out)
-        VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
-        """, (
-            datetime.utcnow().isoformat(),
-            sid, ip_hash(ip), query, normalize(query), int(allow_tps),
-            latency, model, int(ok), json_dumps(topk), tokens_in, tokens_out
-        ))
-        conn.commit()
-
-    return answer
+"""
+telemetry.py — SQLite telemetry store + /telemetry event endpoint
+
+Shared between app.py (ask_logs, feedback inserts) and the /telemetry
+route (browser-side events: page_view, byok_call, etc.).
+
+Database layout
+---------------
+  ask_logs  — one row per /ask call (query, latency, model, answer, …)
+  feedback  — thumbs up/down ratings from users
+  events    — generic browser-side events (page views, BYOK calls, errors)
+
+The DB file path comes from the TPR_DB env var (default /data/telemetry.db).
+It is bind-mounted from the host so both the backend and web containers can
+read from the same file.
+
+IP hashing
+----------
+Client IPs are never stored in plain text. ip_hash() produces a 12-char
+HMAC-SHA256 prefix keyed on TPR_IP_SECRET. The truncation to 12 chars is
+intentional — it gives enough entropy to distinguish users for analytics
+while making reversal impractical even if the hash list is leaked.
+"""
+from fastapi import APIRouter, Request
+from pydantic import BaseModel
+import os, sqlite3, json, hmac, hashlib
+from datetime import datetime
+from typing import Any, Dict, Optional
+
+router = APIRouter()
+DB_PATH = os.getenv("TPR_DB", "/data/telemetry.db")
+IP_SECRET = os.getenv("TPR_IP_SECRET", "change-me")
+
+
+def db():
+    """
+    Open a SQLite connection with WAL journal mode for better concurrency.
+    Falls back to DELETE mode on filesystems that don't support WAL (e.g. some
+    NFS or tmpfs mounts) — this can happen in certain Docker volume configs.
+    Returns a context-manager-compatible connection (use `with db() as conn`).
+    """
+    os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
+    conn = sqlite3.connect(DB_PATH)
+    try:
+        conn.execute("PRAGMA journal_mode=WAL;")
+    except sqlite3.OperationalError:
+        # fallback if FS forbids WAL
+        conn.execute("PRAGMA journal_mode=DELETE;")
+    return conn
+
+def ip_hash(ip: str) -> str:
+    """
+    Return a 12-char HMAC-SHA256 prefix of the IP address.
+    Keyed on TPR_IP_SECRET — rotate the secret to invalidate all stored hashes.
+    """
+    dig = hmac.new(IP_SECRET.encode(), ip.encode(), 'sha256').hexdigest()
+    return dig[:12]
+
+
+class TelemetryEvent(BaseModel):
+    type: str               # e.g. "page_view", "byok_call", "error"
+    ts: str                 # ISO timestamp from the client (informational only)
+    sid: str                # browser session ID
+    ua: Optional[str] = None
+    data: Dict[str, Any] = {}
+
+
+@router.on_event("startup")
+def init():
+    """Create all tables if they don't exist. Safe to run on every startup."""
+    with db() as conn:
+        conn.executescript("""
+        CREATE TABLE IF NOT EXISTS events (
+          id INTEGER PRIMARY KEY AUTOINCREMENT,
+          ts TEXT NOT NULL,
+          type TEXT NOT NULL,
+          sid TEXT,
+          ua TEXT,
+          ip_hash TEXT,
+          data_json TEXT
+        );
+        CREATE TABLE IF NOT EXISTS ask_logs (
+          id INTEGER PRIMARY KEY AUTOINCREMENT,
+          ts TEXT NOT NULL,
+          sid TEXT,
+          ip_hash TEXT,
+          query TEXT,
+          normalized TEXT,
+          scope TEXT,
+          allow_tps INTEGER,
+          latency_ms INTEGER,
+          model TEXT,
+          ok INTEGER,
+          topk_json TEXT,
+          tokens_in INTEGER,
+          tokens_out INTEGER,
+          answer TEXT
+        );
+        CREATE TABLE IF NOT EXISTS feedback (
+          id INTEGER PRIMARY KEY AUTOINCREMENT,
+          ts TEXT NOT NULL,
+          sid TEXT,
+          ip_hash TEXT,
+          verdict TEXT NOT NULL,        -- 'up' or 'down'
+          query TEXT,                   -- the question asked
+          answer TEXT,                  -- the answer rated
+          note TEXT,                    -- optional free-text note (thumbs down)
+          model TEXT,                   -- which LLM answered
+          scope TEXT,                   -- retrieval scope used
+          sources_json TEXT             -- JSON array of cited sources
+        );
+        """)
+        conn.commit()
+
+
+@router.post("/telemetry")
+@limiter.limit("60/minute")
+async def telemetry(ev: TelemetryEvent, request: Request):
+    """
+    Record a generic browser-side event.
+    The server-side timestamp is used for the DB row; ev.ts is stored in
+    data_json only so dashboards can show client-reported timing if needed.
+    """
+    ip = request.client.host if request.client else "0.0.0.0"
+    with db() as conn:
+        conn.execute(
+          "INSERT INTO events (ts,type,sid,ua,ip_hash,data_json) VALUES (?,?,?,?,?,json(?))",
+          (datetime.utcnow().isoformat(), ev.type, ev.sid, ev.ua, ip_hash(ip), json_dumps(ev.data))
+        )
+        conn.commit()
+    return {"ok": True}
+
+
+# ---------------------------------------------------------------------------
+# Utilities (used by app.py via `from telemetry import …`)
+# ---------------------------------------------------------------------------
+
+def json_dumps(o): return json.dumps(o, ensure_ascii=False, separators=(",",":"))

+ 3 - 1
docker-compose.yml

@@ -87,6 +87,8 @@ services:
         - SMTP_FROM_NAME=Tas Planning Assistant
         - NOTIFY_EMAIL=ben@modulos.com.au
         - APP_API_BASE=https://api.modulos.com.au/ask
+        - CORS_ORIGINS=${CORS_ORIGINS:-https://tasplanning.report,https://modulosdesign.com.au,https://llm.modulos.com.au}
+        - APP_ENV=${APP_ENV:-production}
       depends_on:
         - backend
       restart: unless-stopped
@@ -120,7 +122,7 @@ services:
             > /etc/apache2/conf-enabled/rewrite-https.conf
 
           printf "ServerName localhost\n" > /etc/apache2/conf-enabled/servername.conf
-          printf "PassEnv GMAPS_API_KEY\n" > /etc/apache2/conf-enabled/passenv.conf;
+          printf "PassEnv GMAPS_API_KEY\nPassEnv CORS_ORIGINS\nPassEnv APP_ENV\n" > /etc/apache2/conf-enabled/passenv.conf;
 
           chown -R www-data:www-data /var/www/html/cache || true;
 

+ 11 - 0
public/_bootstrap.php

@@ -0,0 +1,11 @@
+<?php
+/**
+ * _bootstrap.php — shared PHP initialisation included at the top of every page.
+ *
+ * Keep this file minimal. It must be safe to include before any output,
+ * including on pure-API endpoints like generate_planning_report.php.
+ */
+
+// Single source of truth for the application timezone.
+// Tasmania observes AEST (UTC+10) / AEDT (UTC+11) in summer.
+date_default_timezone_set('Australia/Hobart');

+ 1 - 24
public/byok-settings.php

@@ -12,31 +12,8 @@
   <link href="https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;1,9..40,300&display=swap" rel="stylesheet">
   <link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.css" rel="stylesheet">
   <link rel="icon" href="/favicon.ico">
+  <link rel="stylesheet" href="/css/design-tokens.css">
   <style>
-    :root {
-      --bg:           #0b0f0e;
-      --bg-1:         #111614;
-      --bg-2:         #181e1b;
-      --bg-card:      #141a17;
-      --border:       rgba(255,255,255,0.07);
-      --border-hover: rgba(255,255,255,0.14);
-      --accent:       #2ddc8a;
-      --accent-dim:   rgba(45,220,138,0.10);
-      --accent-glow:  rgba(45,220,138,0.22);
-      --text-primary: #eaf0ec;
-      --text-secondary:#8fa899;
-      --text-muted:   #4f6459;
-      --danger:       #f08080;
-      --warn:         #f0c060;
-      --warn-dim:     rgba(240,192,96,0.10);
-      --serif:        'DM Serif Display', Georgia, serif;
-      --sans:         'DM Sans', system-ui, sans-serif;
-      --mono:         ui-monospace, 'Cascadia Code', Menlo, monospace;
-      --radius:       10px;
-      --radius-lg:    16px;
-      --radius-sm:    5px;
-      --transition:   0.16s cubic-bezier(0.4,0,0.2,1);
-    }
     *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
     body {
       font-family: var(--sans); background: var(--bg); color: var(--text-primary);

+ 53 - 0
public/css/design-tokens.css

@@ -0,0 +1,53 @@
+/* ==========================================================================
+   design-tokens.css — shared CSS custom properties for tasplanning.report
+
+   Link this file from every page's <head> BEFORE any page-specific <style>:
+     <link rel="stylesheet" href="/css/design-tokens.css">
+
+   Pages that need different values for a specific token (e.g. --radius on
+   site-report) can override just that token in a subsequent <style> block.
+
+   NOTE: dashboard.php uses an abbreviated naming convention (--acc, --t1,
+   etc.) and is excluded from this file — its tokens live inline.
+   ========================================================================== */
+
+:root {
+  /* ── Backgrounds ────────────────────────────────────────────────────── */
+  --bg:             #0b0f0e;
+  --bg-1:           #111614;
+  --bg-2:           #181e1b;
+  --bg-card:        #141a17;
+
+  /* ── Borders ────────────────────────────────────────────────────────── */
+  --border:         rgba(255,255,255,0.07);
+  --border-hover:   rgba(255,255,255,0.14);
+
+  /* ── Accent (green) ─────────────────────────────────────────────────── */
+  --accent:         #2ddc8a;
+  --accent-dim:     rgba(45,220,138,0.10);
+  --accent-glow:    rgba(45,220,138,0.22);
+
+  /* ── Text ───────────────────────────────────────────────────────────── */
+  --text-primary:   #eaf0ec;
+  --text-secondary: #8fa899;
+  --text-muted:     #4f6459;
+
+  /* ── Status ─────────────────────────────────────────────────────────── */
+  --danger:         #f08080;
+  --warn:           #f0c060;
+  --warn-dim:       rgba(240,192,96,0.10);
+
+  /* ── Typography ─────────────────────────────────────────────────────── */
+  --serif:          'DM Serif Display', Georgia, serif;
+  --sans:           'DM Sans', system-ui, sans-serif;
+  --mono:           ui-monospace, 'Cascadia Code', Menlo, monospace;
+
+  /* ── Shape ──────────────────────────────────────────────────────────── */
+  --radius-sm:      5px;
+  --radius:         10px;
+  --radius-lg:      16px;
+  --radius-xl:      26px;
+
+  /* ── Motion ─────────────────────────────────────────────────────────── */
+  --transition:     0.16s cubic-bezier(0.4,0,0.2,1);
+}

+ 19 - 6
public/dashboard.php

@@ -1,4 +1,5 @@
 <?php
+require_once __DIR__ . '/_bootstrap.php';
 /**
  * dashboard.php — Query monitoring dashboard
  * Place in /home/modulos_llm/public/
@@ -18,10 +19,17 @@ if ($dashPass) {
 }
 
 // Copy to a temp file www-data can open — source DB is owned by uid 10001
+// Write to a staging path first, then rename atomically to avoid a window
+// where the file is world-readable before chmod tightens permissions.
 $tmpDb = sys_get_temp_dir() . '/tpr_dash_' . date('YmdH') . '.db';
 if (!file_exists($tmpDb) || (filemtime($tmpDb) < time() - 55)) {
-    @copy($dbPath, $tmpDb);
-    @chmod($tmpDb, 0600);
+    $stagingDb = $tmpDb . '.tmp';
+    $prevUmask = umask(0177); // 0600 effective permissions on created file
+    $copied = @copy($dbPath, $stagingDb);
+    umask($prevUmask);
+    if ($copied) {
+        @rename($stagingDb, $tmpDb); // atomic replace — never briefly world-readable
+    }
 }
 $readPath = file_exists($tmpDb) ? $tmpDb : $dbPath;
 
@@ -44,8 +52,9 @@ if ($db) {
         $stats['total'] = (int)($r['t']??0);
         $stats['avg_latency'] = (int)($r['a']??0);
 
-        $r = $db->query("SELECT COUNT(*) as c FROM ask_logs WHERE ts LIKE '{$today}%'")->fetch();
-        $stats['today'] = (int)($r['c']??0);
+        $stmt = $db->prepare("SELECT COUNT(*) as c FROM ask_logs WHERE ts LIKE ?");
+        $stmt->execute([$today . '%']);
+        $stats['today'] = (int)($stmt->fetch()['c'] ?? 0);
 
         foreach ($db->query("SELECT verdict, COUNT(*) as c FROM feedback GROUP BY verdict")->fetchAll() as $r) {
             if ($r['verdict']==='up')   $stats['thumbs_up']   = (int)$r['c'];
@@ -54,9 +63,13 @@ if ($db) {
 
         $stats['models'] = $db->query("SELECT model, COUNT(*) as c FROM ask_logs GROUP BY model ORDER BY c DESC LIMIT 5")->fetchAll();
 
-        $todayRows = $db->query("SELECT id,ts,query,answer,latency_ms,model,scope,ok,COALESCE(topk_json,'[]') as topk_json FROM ask_logs WHERE ts LIKE '{$today}%' ORDER BY ts DESC LIMIT 100")->fetchAll();
+        $stmt = $db->prepare("SELECT id,ts,query,answer,latency_ms,model,scope,ok,COALESCE(topk_json,'[]') as topk_json FROM ask_logs WHERE ts LIKE ? ORDER BY ts DESC LIMIT 100");
+        $stmt->execute([$today . '%']);
+        $todayRows = $stmt->fetchAll();
 
-        $recentRows = $db->query("SELECT id,ts,query,answer,latency_ms,model,scope,ok,COALESCE(topk_json,'[]') as topk_json FROM ask_logs WHERE ts NOT LIKE '{$today}%' ORDER BY ts DESC LIMIT 200")->fetchAll();
+        $stmt = $db->prepare("SELECT id,ts,query,answer,latency_ms,model,scope,ok,COALESCE(topk_json,'[]') as topk_json FROM ask_logs WHERE ts NOT LIKE ? ORDER BY ts DESC LIMIT 200");
+        $stmt->execute([$today . '%']);
+        $recentRows = $stmt->fetchAll();
 
         $feedbackRows = $db->query("SELECT id,ts,verdict,query,note,model,scope FROM feedback ORDER BY ts DESC LIMIT 50")->fetchAll();
 

+ 3 - 19
public/faq.php

@@ -14,6 +14,7 @@
   <link href="https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;1,9..40,300&display=swap" rel="stylesheet">
   <link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.css" rel="stylesheet">
   <link rel="icon" href="/favicon.ico">
+  <link rel="stylesheet" href="/css/design-tokens.css">
   <link rel="apple-touch-icon" href="/image/apple-touch-icon.png">
 
   <!-- FAQ structured data -->
@@ -91,25 +92,8 @@
   </script>
 
   <style>
-    :root {
-      --bg:           #0b0f0e;
-      --bg-1:         #111614;
-      --bg-2:         #181e1b;
-      --bg-card:      #141a17;
-      --border:       rgba(255,255,255,0.07);
-      --border-hover: rgba(255,255,255,0.14);
-      --accent:       #2ddc8a;
-      --accent-dim:   rgba(45,220,138,0.10);
-      --accent-glow:  rgba(45,220,138,0.22);
-      --text-primary: #eaf0ec;
-      --text-secondary:#8fa899;
-      --text-muted:   #4f6459;
-      --serif:        'DM Serif Display', Georgia, serif;
-      --sans:         'DM Sans', system-ui, sans-serif;
-      --radius:       10px;
-      --radius-lg:    16px;
-      --transition:   0.18s cubic-bezier(0.4,0,0.2,1);
-    }
+    /* ── Page-specific token overrides ───────────────────────────────── */
+    :root { --transition: 0.18s cubic-bezier(0.4,0,0.2,1); }
 
     *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
     html { scroll-behavior: smooth; }

+ 12 - 9
public/generate_planning_report.php

@@ -1,4 +1,5 @@
 <?php declare(strict_types=1);
+require_once __DIR__ . '/_bootstrap.php';
 
 ini_set('display_errors', '0');
 ini_set('log_errors', '1');
@@ -12,15 +13,8 @@ error_reporting(E_ALL);
  * Test:     curl -s -X POST -H "Content-Type: application/json" \
  *           --data @sample.json http://localhost/internal/classes/generate_planning_report.php | jq
  */
-date_default_timezone_set('Australia/Hobart');
-
-$allowedOrigins = [
-    'http://192.168.8.69:2380',   // your demo.html origin
-    'http://localhost',           // handy for local dev
-    'http://localhost:8080',
-    'https://modulosdesign.com.au',
-    'https://llm.modulos.com.au'
-];
+$corsEnv = getenv('CORS_ORIGINS') ?: 'https://tasplanning.report';
+$allowedOrigins = array_filter(array_map('trim', explode(',', $corsEnv)));
 $origin = $_SERVER['HTTP_ORIGIN'] ?? '';
 if ($origin && in_array($origin, $allowedOrigins, true)) {
     header("Access-Control-Allow-Origin: $origin");
@@ -50,6 +44,15 @@ try {
         exit;
     }
 
+    // Reject non-JSON content types early — prevents json_decode silently
+    // returning null on form-encoded or multipart bodies.
+    $ct = $_SERVER['CONTENT_TYPE'] ?? '';
+    if (strpos($ct, 'application/json') === false) {
+        http_response_code(415);
+        echo json_encode(['ok' => false, 'error' => 'Content-Type must be application/json']);
+        exit;
+    }
+
     $raw = file_get_contents('php://input') ?: '';
     $in  = json_decode($raw, true, 512, JSON_THROW_ON_ERROR);
 

+ 6 - 1
public/list_lookup.php

@@ -69,7 +69,12 @@ try {
     function ll_cache_set($key,$data){
         $f = ll_cache_path($key);
         if (!is_dir(dirname($f))) @mkdir(dirname($f), 0775, true);
-        @file_put_contents($f, json_encode($data, JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE));
+        // Write to a temp file then rename atomically so concurrent requests
+        // that miss the cache simultaneously don't both write a partial file.
+        $tmp = $f . '.tmp.' . getmypid();
+        if (@file_put_contents($tmp, json_encode($data, JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE)) !== false) {
+            @rename($tmp, $f);
+        }
     }
 
     // Pre-cache key by snapped lat/lng (so we can serve before we know PID)

+ 3 - 24
public/local_state-planning-scheme.php

@@ -26,32 +26,11 @@
   <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
   <link href="https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;1,9..40,300&display=swap" rel="stylesheet">
   <link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.css" rel="stylesheet">
+  <link rel="stylesheet" href="/css/design-tokens.css">
 
   <style>
-    /* ── Tokens ──────────────────────────────────────────────────────── */
-    :root {
-      --bg:           #0b0f0e;
-      --bg-1:         #111614;
-      --bg-2:         #181e1b;
-      --bg-card:      #141a17;
-      --border:       rgba(255,255,255,0.07);
-      --border-hover: rgba(255,255,255,0.14);
-      --accent:       #2ddc8a;
-      --accent-dim:   rgba(45,220,138,0.10);
-      --accent-glow:  rgba(45,220,138,0.22);
-      --text-primary: #eaf0ec;
-      --text-secondary:#8fa899;
-      --text-muted:   #4f6459;
-      --danger:       #f08080;
-      --user-bg:      #1a2420;
-      --serif:        'DM Serif Display', Georgia, serif;
-      --sans:         'DM Sans', system-ui, sans-serif;
-      --mono:         ui-monospace, 'Cascadia Code', Menlo, monospace;
-      --radius:       10px;
-      --radius-lg:    16px;
-      --radius-sm:    5px;
-      --transition:   0.16s cubic-bezier(0.4,0,0.2,1);
-    }
+    /* ── Page-specific token overrides ───────────────────────────────── */
+    :root { --user-bg: #1a2420; }
 
     *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
     html, body { height: 100%; }

+ 14 - 27
public/site-report.php

@@ -1,4 +1,4 @@
-<?php
+<?php require_once __DIR__ . '/_bootstrap.php';
 /* =========================================================================
  * Site Report – Tasmanian Property Lookup (PHP 8.3+)
  * -------------------------------------------------------------------------
@@ -8,8 +8,6 @@
  * - Generate AI report via generate_planning_report.php
  * - Open Section Builder with BroadcastChannel / postMessage handoff
  * =======================================================================*/
-date_default_timezone_set('Australia/Hobart');
-
 // ── Security: API key from environment only, never hardcoded ──────────────
 // Set GMAPS_API_KEY in your .env / docker-compose environment block.
 // The key is passed to a PHP proxy endpoint (/gmaps-key.php) so it never
@@ -17,10 +15,13 @@ date_default_timezone_set('Australia/Hobart');
 $GMAPS_API_KEY   = getenv('GMAPS_API_KEY') ?: '';
 $LOOKUP_ENDPOINT = './list_lookup.php';
 $REPORT_ENDPOINT = './generate_planning_report.php';
-$IS_LOCAL        = in_array($_SERVER['REMOTE_ADDR'] ?? '', ['127.0.0.1', '::1']);
+// Use APP_ENV=local (set in .env) to enable the dev shortcut of inlining the
+// Maps key directly. Never derive this from REMOTE_ADDR — inside Docker the
+// client address is the container gateway (172.x.x.x), not 127.0.0.1, so the
+// old check always evaluated to false and the inline path was never reachable.
+$IS_LOCAL        = (getenv('APP_ENV') === 'local');
 
 // Proxy the key: expose it only if request comes from same origin
-// For production, create a gmaps-key.php that checks a session/nonce
 $KEY_ENDPOINT    = './gmaps-key.php';
 ?>
 <!doctype html>
@@ -42,32 +43,18 @@ $KEY_ENDPOINT    = './gmaps-key.php';
   <script defer src="https://unpkg.com/leaflet-image@latest/leaflet-image.js"></script>
   <script defer src="https://unpkg.com/html2pdf.js@0.10.1/dist/html2pdf.bundle.min.js"></script>
   <link rel="icon" href="/favicon.ico">
-  
+  <link rel="stylesheet" href="/css/design-tokens.css">
+
   <script src="/js/api-status.js" data-api="https://api.modulos.com.au"></script>
 
   <style>
-    /* ── Design tokens (matches index redesign) ─────────────────────── */
+    /* ── Page-specific token overrides ───────────────────────────────── */
     :root {
-      --bg:           #0b0f0e;
-      --bg-1:         #111614;
-      --bg-2:         #181e1b;
-      --bg-card:      #141a17;
-      --border:       rgba(255,255,255,0.07);
-      --border-hover: rgba(255,255,255,0.14);
-      --accent:       #2ddc8a;
-      --accent-dim:   rgba(45,220,138,0.10);
-      --accent-glow:  rgba(45,220,138,0.22);
-      --text-primary: #eaf0ec;
-      --text-secondary:#8fa899;
-      --text-muted:   #4f6459;
-      --danger:       #f08080;
-      --warn:         #f0b060;
-      --serif:        'DM Serif Display', Georgia, serif;
-      --sans:         'DM Sans', system-ui, sans-serif;
-      --radius:       12px;
-      --radius-lg:    18px;
-      --radius-sm:    6px;
-      --transition:   0.18s cubic-bezier(0.4,0,0.2,1);
+      --warn:       #f0b060;   /* slightly warmer than the shared default */
+      --radius:     12px;
+      --radius-lg:  18px;
+      --radius-sm:  6px;
+      --transition: 0.18s cubic-bezier(0.4,0,0.2,1);
     }
 
     *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }