app.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744
  1. import os, re
  2. import json
  3. import requests
  4. import time
  5. from typing import Optional, Literal, List, Tuple
  6. from fastapi import FastAPI, Query, HTTPException, Request
  7. from fastapi.middleware.cors import CORSMiddleware
  8. from fastapi.responses import StreamingResponse
  9. from slowapi.middleware import SlowAPIMiddleware
  10. from slowapi import Limiter
  11. from slowapi.util import get_remote_address
  12. from slowapi.errors import RateLimitExceeded
  13. from fastapi.responses import JSONResponse
  14. from pydantic import BaseModel
  15. from qdrant_client import QdrantClient
  16. from qdrant_client.http import models as qmodels
  17. from collections import Counter, defaultdict
  18. from datetime import datetime
  19. from telemetry import router as telemetry_router, db, ip_hash
  20. # ---- Environment ----
  21. OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.8.73:11434")
  22. QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
  23. OLLAMA_KEEP_ALIVE = os.getenv("OLLAMA_KEEP_ALIVE", "-1") # -1 = keep loaded forever
  24. COLLECTION = os.getenv("QDRANT_COLLECTION", "planning_docs")
  25. EMBED_MODEL = os.getenv("EMBED_MODEL", "nomic-embed-text")
  26. CHAT_MODEL = os.getenv("CHAT_MODEL", "llama3.1:8b-instruct-q4_K_M")
  27. CORS_ORIGINS = [o.strip() for o in os.getenv("CORS_ORIGINS", "https://tasplanning.report").split(",") if o.strip()]
  28. # ---- DEMO TOKEN ----
  29. DEMO_REQUIRE_TOKEN = os.getenv("DEMO_REQUIRE_TOKEN", "0") == "1"
  30. DEMO_TOKEN = os.getenv("DEMO_TOKEN", "")
  31. def _verify_demo_token_if_needed(request):
  32. if not DEMO_REQUIRE_TOKEN:
  33. return
  34. auth = request.headers.get("Authorization", "")
  35. if not (auth.startswith("Bearer ") and auth.split(" ",1)[1] == DEMO_TOKEN):
  36. raise HTTPException(status_code=401, detail="Unauthorized")
  37. # ---- FAST API ----
  38. app = FastAPI()
  39. # Allowed origins — always include your frontend explicitly
  40. _origins = CORS_ORIGINS if CORS_ORIGINS else []
  41. _allow_all = len(_origins) == 0
  42. app.add_middleware(
  43. CORSMiddleware,
  44. allow_origins=_origins if not _allow_all else ["*"],
  45. allow_origin_regex=r"https://.*\.tasplanning\.report" if _allow_all else None,
  46. allow_credentials=not _allow_all, # credentials only when origins are explicit
  47. allow_methods=["GET", "POST", "OPTIONS"],
  48. allow_headers=["Content-Type", "Authorization", "X-TPR-SID"],
  49. expose_headers=["X-TPR-SID"],
  50. )
  51. qc = QdrantClient(url=QDRANT_URL)
  52. app.include_router(telemetry_router)
  53. # ---- SLOW API ----
  54. limiter = Limiter(key_func=get_remote_address)
  55. app.state.limiter = limiter # type: ignore
  56. app.add_middleware(SlowAPIMiddleware)
  57. @app.exception_handler(RateLimitExceeded)
  58. def ratelimit_handler(request, exc):
  59. return JSONResponse(status_code=429, content={"error":"rate_limited","detail":"Too many requests"})
  60. # ---- Feedback endpoint ----
  61. class FeedbackBody(BaseModel):
  62. verdict: str # "up" or "down"
  63. query: Optional[str] = None # the question that was asked
  64. answer: Optional[str] = None # the answer that was rated
  65. note: Optional[str] = None # optional free-text from thumbs-down
  66. sid: Optional[str] = None # session id from browser
  67. model: Optional[str] = None # which model answered
  68. scope: Optional[str] = None # which scope was used
  69. sources: Optional[list] = None # which sources were cited
  70. @app.post("/feedback")
  71. @limiter.limit("60/minute")
  72. def feedback(request: Request, body: FeedbackBody):
  73. if body.verdict not in ("up", "down"):
  74. raise HTTPException(status_code=422, detail="verdict must be 'up' or 'down'")
  75. ip = request.client.host if request.client else "0.0.0.0"
  76. sid = body.sid or request.headers.get("X-TPR-SID") or ""
  77. try:
  78. with db() as conn:
  79. conn.execute("""
  80. INSERT INTO feedback
  81. (ts, sid, ip_hash, verdict, query, answer, note, model, scope, sources_json)
  82. VALUES (?,?,?,?,?,?,?,?,?,?)
  83. """, (
  84. datetime.utcnow().isoformat(),
  85. sid, ip_hash(ip), body.verdict,
  86. (body.query or "")[:2000],
  87. (body.answer or "")[:8000],
  88. (body.note or "")[:1000],
  89. body.model or CHAT_MODEL,
  90. body.scope or "",
  91. _json_dumps(body.sources or []),
  92. ))
  93. conn.commit()
  94. except Exception as e:
  95. print("[feedback] insert failed:", e)
  96. # Still return ok — don't surface DB errors to users
  97. return {"ok": True}
  98. # ---- Ollama helpers ----
  99. def slug(s: Optional[str]) -> Optional[str]:
  100. if not s:
  101. return None
  102. return re.sub(r'[^a-z0-9]+', '-', s.strip().lower()).strip('-') or None
  103. def ollama_embed(text: str) -> List[float]:
  104. r = requests.post(
  105. f"{OLLAMA_URL}/api/embeddings",
  106. json={"model": EMBED_MODEL, "prompt": text},
  107. timeout=60
  108. )
  109. r.raise_for_status()
  110. data = r.json()
  111. if "embedding" not in data:
  112. raise RuntimeError(f"Ollama embeddings error: {data}")
  113. return data["embedding"]
  114. def ollama_chat(prompt: str) -> str:
  115. r = requests.post(
  116. f"{OLLAMA_URL}/api/generate",
  117. json={
  118. "model": CHAT_MODEL,
  119. "prompt": prompt,
  120. "stream": False,
  121. "options": {
  122. "num_ctx": 6144, # was 8192,
  123. "num_predict": 512,
  124. "temperature": 0.2,
  125. "top_p": 0.9,
  126. "repeat_penalty": 1.1,
  127. },
  128. #"keep_alive": int(OLLAMA_KEEP_ALIVE) if OLLAMA_KEEP_ALIVE.lstrip('-').isdigit() else OLLAMA_KEEP_ALIVE, # ← moved outside options, uses env var
  129. "keep_alive": -1,
  130. },
  131. timeout=180
  132. )
  133. r.raise_for_status()
  134. data = r.json()
  135. return data.get("response", "").strip()
  136. def _scroll_points(collection: str, qfilter=None, include_vector: bool=False, page_size: int=200):
  137. offset = None
  138. while True:
  139. points, offset = qc.scroll(
  140. collection_name=collection,
  141. limit=page_size,
  142. with_payload=True,
  143. with_vectors=include_vector,
  144. offset=offset,
  145. scroll_filter=qfilter
  146. )
  147. if not points:
  148. break
  149. for pt in points:
  150. yield pt
  151. if offset is None:
  152. break
  153. # ---- Health ----
  154. @app.get("/readyz")
  155. def readyz():
  156. return {"ok": True}
  157. def _normalize(q: Optional[str]) -> str:
  158. return re.sub(r"\s+", " ", (q or "").strip().lower())
  159. def _json_dumps(o) -> str:
  160. return json.dumps(o, ensure_ascii=False, separators=(",",":"))
  161. # ---- Councils list (prefers payload 'council', falls back to filename token) ----
  162. @app.get("/councils")
  163. def councils():
  164. councils = set()
  165. offset = None
  166. # sample up to ~5k points (50 * 100)
  167. for _ in range(50):
  168. points, offset = qc.scroll(
  169. collection_name=COLLECTION,
  170. limit=100,
  171. with_payload=True,
  172. offset=offset
  173. )
  174. for pt in points:
  175. p = pt.payload or {}
  176. token = (p.get("council") or "").strip().lower()
  177. if not token:
  178. sf = (p.get("source_file") or "").lower()
  179. if sf:
  180. token = sf.replace(".pdf", "").split("_")[0].split("-")[0]
  181. if token:
  182. councils.add(token)
  183. if offset is None:
  184. break
  185. return sorted(councils)
  186. # ---- Filter builders ----
  187. def _mv(key: str, value: str) -> qmodels.FieldCondition:
  188. return qmodels.FieldCondition(key=key, match=qmodels.MatchValue(value=value))
  189. def _mt(key: str, text: str) -> qmodels.FieldCondition:
  190. return qmodels.FieldCondition(key=key, match=qmodels.MatchText(text=text))
  191. def filter_tps() -> qmodels.Filter:
  192. """TPS only, exact match on corpus."""
  193. return qmodels.Filter(must=[_mv("corpus", "tps")])
  194. def filter_lps(council: str) -> qmodels.Filter:
  195. """
  196. LPS for a specific council (slug), exact match on both fields.
  197. """
  198. cslug = slug(council) or council.lower()
  199. return qmodels.Filter(must=[_mv("corpus", "lps"), _mv("council", cslug)])
  200. def filter_ncc() -> qmodels.Filter:
  201. return qmodels.Filter(must=[_mv("corpus", "ncc")])
  202. def filter_as() -> qmodels.Filter:
  203. return qmodels.Filter(must=[_mv("corpus", "as")])
  204. def with_source_contains(flt: Optional[qmodels.Filter], source_contains: Optional[str]) -> qmodels.Filter:
  205. if not source_contains:
  206. return flt
  207. add = _mt("source_file", source_contains)
  208. if flt:
  209. # preserve existing must/should/must_not and AND the filename condition
  210. must = list(getattr(flt, "must", []) or [])
  211. must.append(add)
  212. return qmodels.Filter(
  213. must=must,
  214. should=getattr(flt, "should", None),
  215. must_not=getattr(flt, "must_not", None),
  216. )
  217. return qmodels.Filter(must=[add])
  218. def q_search(vec: List[float], flt: Optional[qmodels.Filter], limit: int):
  219. results = qc.query_points(
  220. collection_name=COLLECTION,
  221. query=vec,
  222. limit=max(1, limit),
  223. query_filter=flt,
  224. with_payload=True,
  225. )
  226. return results.points
  227. def render_blocks(hits) -> Tuple[List[str], List[dict]]:
  228. blocks, sources = [], []
  229. for h in hits:
  230. p = h.payload or {}
  231. src = f"{p.get('source_file')} (p.{p.get('page')} chunk {p.get('chunk_index')})"
  232. snippet = p.get("text", "")
  233. blocks.append(f"Source: {src}\nText: {snippet}")
  234. sources.append({
  235. "source_file": p.get("source_file"),
  236. "page": p.get("page"),
  237. "chunk_index": p.get("chunk_index"),
  238. "score": h.score
  239. })
  240. return blocks, sources
  241. def combine_context(sections: List[Tuple[str, List[str]]]) -> str:
  242. out = []
  243. for heading, blocks in sections:
  244. if not blocks:
  245. continue
  246. out.append(f"=== {heading} ===")
  247. out.extend(blocks)
  248. return "\n\n".join(out) if out else "No context found."
  249. def _scan_points(qfilter: Optional[qmodels.Filter] = None, max_pages: int = 10000, page_size: int = 200):
  250. """
  251. Iterate through ALL points (filtered if qfilter given).
  252. For your current dataset this is fine; if it grows huge later we'll switch to a stored summary or a background job.
  253. """
  254. offset = None
  255. pages = 0
  256. while pages < max_pages:
  257. points, offset = qc.scroll(
  258. collection_name=COLLECTION,
  259. limit=page_size,
  260. with_payload=True,
  261. offset=offset,
  262. scroll_filter=qfilter
  263. )
  264. if not points:
  265. break
  266. for pt in points:
  267. yield pt
  268. pages += 1
  269. if offset is None:
  270. break
  271. @app.get("/admin/stats")
  272. def admin_stats(council: Optional[str] = None, corpus: Optional[str] = None):
  273. must = []
  274. if council:
  275. must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
  276. if corpus:
  277. must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
  278. qfilter = qmodels.Filter(must=must) if must else None
  279. corp = Counter()
  280. councils = Counter()
  281. total = 0
  282. for pt in _scan_points(qfilter=qfilter):
  283. p = pt.payload or {}
  284. corp[(p.get("corpus") or "").lower()] += 1
  285. if p.get("council"):
  286. councils[(p.get("council") or "").lower()] += 1
  287. total += 1
  288. return {
  289. "collection": COLLECTION,
  290. "total_points": total,
  291. "by_corpus": dict(corp),
  292. "by_council": dict(councils),
  293. "note": "Counts are points (chunks), not documents.",
  294. }
  295. @app.get("/admin/files")
  296. def admin_files(council: Optional[str] = None, corpus: Optional[str] = None, contains: Optional[str] = None, limit: int = 200):
  297. must = []
  298. if council:
  299. must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
  300. if corpus:
  301. must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
  302. if contains:
  303. must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=contains)))
  304. qfilter = qmodels.Filter(must=must) if must else None
  305. files = defaultdict(lambda: {"points": 0, "corpus": None, "council": None, "pages": set()})
  306. for pt in _scan_points(qfilter=qfilter):
  307. p = pt.payload or {}
  308. f = (p.get("source_file") or "").strip()
  309. if not f:
  310. continue
  311. rec = files[f]
  312. rec["points"] += 1
  313. rec["corpus"] = rec["corpus"] or p.get("corpus")
  314. rec["council"] = rec["council"] or p.get("council")
  315. if p.get("page") is not None:
  316. rec["pages"].add(p["page"])
  317. # shape for output
  318. out = []
  319. for f, rec in files.items():
  320. out.append({
  321. "source_file": f,
  322. "corpus": rec["corpus"],
  323. "council": rec["council"],
  324. "points": rec["points"],
  325. "page_count_est": len(rec["pages"]) if rec["pages"] else None,
  326. })
  327. # sort by points desc, limit
  328. out.sort(key=lambda x: x["points"], reverse=True)
  329. return out[:max(1, limit)]
  330. @app.get("/admin/sample")
  331. def admin_sample(council: Optional[str] = None, corpus: Optional[str] = None, n: int = 5):
  332. must = []
  333. if council:
  334. must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
  335. if corpus:
  336. must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
  337. qfilter = qmodels.Filter(must=must) if must else None
  338. samples = []
  339. for pt in _scan_points(qfilter=qfilter):
  340. p = pt.payload or {}
  341. txt = (p.get("text") or "").strip()
  342. if not txt:
  343. continue
  344. samples.append({
  345. "source_file": p.get("source_file"),
  346. "corpus": p.get("corpus"),
  347. "council": p.get("council"),
  348. "page": p.get("page"),
  349. "chunk_index": p.get("chunk_index"),
  350. "preview": (txt[:400] + "…") if len(txt) > 400 else txt
  351. })
  352. if len(samples) >= max(1, n):
  353. break
  354. return samples
  355. @app.get("/admin/export")
  356. def admin_export(
  357. collection: str = COLLECTION,
  358. council: Optional[str] = None,
  359. corpus: Optional[str] = None,
  360. source_contains: Optional[str] = None,
  361. include_vector: bool = False,
  362. limit: Optional[int] = None
  363. ):
  364. must = []
  365. if council:
  366. must.append(qmodels.FieldCondition(key="council", match=qmodels.MatchText(text=council.lower())))
  367. if corpus:
  368. must.append(qmodels.FieldCondition(key="corpus", match=qmodels.MatchText(text=corpus.lower())))
  369. if source_contains:
  370. must.append(qmodels.FieldCondition(key="source_file", match=qmodels.MatchText(text=source_contains)))
  371. qfilter = qmodels.Filter(must=must) if must else None
  372. def gen():
  373. count = 0
  374. for pt in _scroll_points(collection, qfilter=qfilter, include_vector=include_vector):
  375. obj = {
  376. "id": str(getattr(pt, "id", None)),
  377. "payload": pt.payload or {},
  378. }
  379. if include_vector:
  380. obj["vector"] = pt.vector
  381. yield json.dumps(obj, ensure_ascii=False) + "\n"
  382. count += 1
  383. if limit and count >= limit:
  384. break
  385. filename = f'{collection}-{corpus or "all"}-{council or "all"}.ndjson'
  386. headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
  387. return StreamingResponse(gen(), media_type="application/x-ndjson", headers=headers)
  388. def _section_format_guide(section_id: Optional[str], section_title: str, ctx: dict) -> str:
  389. """
  390. Return strict, section-specific formatting guidance for the LLM.
  391. Keep these short, prescriptive, and impossible to ignore.
  392. """
  393. sid = (section_id or "").lower()
  394. # Utility bits from context
  395. zones = ctx.get("planning_zones") or []
  396. zone_label = ", ".join(zones) if zones else "the applicable zone"
  397. council_label = ctx.get("council") or ""
  398. # ---- ZONING (tables of clauses like your sample) ----
  399. if sid in {"zoning", "zoning-41", "zoning-42", "zoning-43", "zoning-44", "zoning-441", "zoning-442"}:
  400. return f"""
  401. FORMAT REQUIREMENTS (MANDATORY):
  402. - Produce a concise preface (≤ 2 sentences) naming {zone_label}.
  403. - Then include a Markdown table listing EACH visible clause found in CONTEXT that applies to the zone or LPS for **{council_label or 'the selected council'}**.
  404. - One row per subclause. If an A/P pair exists (e.g., A1 / P1), include both in the same row.
  405. - Columns (exact):
  406. | Clause | Topic | Acceptable Solution (A) | Performance Criteria (P) | Assessment | Source |
  407. - "Clause": the clause number (e.g., "12.3.1 A1" or "DOR-S1.7.1").
  408. - "Topic": short label extracted from the clause heading.
  409. - "Acceptable Solution (A)" and "Performance Criteria (P)": quote briefly—no more than 1–2 lines each.
  410. - "Assessment": state clearly whether the proposal meets A, or relies on P. If unknown from CONTEXT, write "TBC".
  411. - "Source": filename + page (from CONTEXT).
  412. - Only include clauses actually present in CONTEXT; NEVER invent clause numbers or text.
  413. - After the table, add a one-paragraph summary noting any items assessed as TBC or non-compliant.
  414. """.strip()
  415. # ---- Codes overview list/table (optional future) ----
  416. if sid.startswith("code-"):
  417. return """
  418. FORMAT REQUIREMENTS:
  419. - Start with one sentence stating which Code and why it is triggered.
  420. - Then provide a short checklist or table of the relevant sub-clauses (A vs P), with Source for each.
  421. - Keep to 150–250 words + table.
  422. """.strip()
  423. # ---- Permit Overview (concise triggers) ----
  424. if sid == "permit-overview":
  425. return """
  426. FORMAT REQUIREMENTS:
  427. - Produce 3 blocks with headings:
  428. 1) "Project Context" – 3–5 bullet points (site, proposal, zone).
  429. 2) "Applicable Provisions" – bullets grouping TPS SPP, LPS (selected council), and triggered Codes.
  430. 3) "Assessment Path" – bullet list of key clauses to assess next.
  431. - Cite specific clause numbers ONLY if present in CONTEXT (include Source).
  432. """.strip()
  433. # ---- Default (no special formatting) ----
  434. return """
  435. FORMAT REQUIREMENTS:
  436. - Use concise Markdown with short paragraphs and bullets as needed.
  437. - Cite briefly (filename + page) when quoting a control.
  438. """.strip()
  439. # ---- Ask (GET + POST) ----
  440. class AskBody(BaseModel):
  441. # accept multiple keys from different frontends
  442. query: Optional[str] = None
  443. question: Optional[str] = None
  444. q: Optional[str] = None
  445. prompt: Optional[str] = None
  446. top_k: int = 10
  447. council: Optional[str] = None
  448. include_ncc: bool = False
  449. include_standards: bool = False
  450. source_contains: Optional[str] = None
  451. scope: Literal['state_plus_local','local_only','state_only','any'] = 'state_plus_local'
  452. section_id: Optional[str] = None
  453. # BYOK mode: return context blocks without calling Ollama.
  454. # The browser then calls its own LLM with the returned context + prompt.
  455. context_only: bool = False
  456. def _allowed(p: dict, scope: str, cslug: Optional[str]) -> bool:
  457. corp = (p.get("corpus") or "").lower()
  458. council = (p.get("council") or "").lower()
  459. if scope == "local_only":
  460. return corp == "lps" and cslug and council == cslug
  461. if scope == "state_only":
  462. return corp == "tps"
  463. if scope == "state_plus_local":
  464. return corp == "tps" or (corp == "lps" and cslug and council == cslug)
  465. return True
  466. def do_ask(
  467. query: str,
  468. top_k: int = 10,
  469. council: Optional[str] = None,
  470. include_ncc: bool = False,
  471. include_standards: bool = False,
  472. source_contains: Optional[str] = None,
  473. scope: str = "state_plus_local",
  474. section_id: Optional[str] = None,
  475. context_only: bool = False,
  476. ):
  477. vec = ollama_embed(query)
  478. cslug = slug(council) if council else None
  479. # Build allowed scopes based on scope param
  480. scopes: List[Tuple[str, qmodels.Filter]] = []
  481. if scope in ("state_only", "state_plus_local", "any"):
  482. scopes.append(("Tasmanian Planning Scheme (SPP)", filter_tps()))
  483. if scope in ("local_only", "state_plus_local", "any") and cslug:
  484. scopes.append((f"Local Provisions Schedule — {cslug}", filter_lps(cslug)))
  485. if include_ncc:
  486. scopes.append(("National Construction Code (NCC)", filter_ncc()))
  487. if include_standards:
  488. scopes.append(("Australian Standards (AS)", filter_as()))
  489. # Apply additional filename filter if requested (AND)
  490. scopes = [(name, with_source_contains(flt, source_contains)) for name, flt in scopes]
  491. # Allocate limits per scope
  492. per_spp = max(3, top_k // 3) if any(n.startswith("Tasmanian Planning Scheme") for n, _ in scopes) else 0
  493. per_lps = max(3, top_k // 3) if any(n.startswith("Local Provisions Schedule") for n, _ in scopes) else 0
  494. remaining = max(1, top_k - (per_spp + per_lps))
  495. extra_scopes = sum(1 for n, _ in scopes if not (n.startswith("Tasmanian Planning Scheme") or n.startswith("Local Provisions Schedule")))
  496. per_extra = max(1, remaining // max(1, extra_scopes)) if extra_scopes else 0
  497. limits: List[int] = []
  498. for name, _ in scopes:
  499. if name.startswith("Tasmanian Planning Scheme"):
  500. limits.append(per_spp)
  501. elif name.startswith("Local Provisions Schedule"):
  502. limits.append(per_lps)
  503. else:
  504. limits.append(per_extra)
  505. sections: List[Tuple[str, List[str]]] = []
  506. all_sources: List[dict] = []
  507. for (name, flt), lim in zip(scopes, limits):
  508. if lim <= 0:
  509. continue
  510. hits = q_search(vec, flt, lim)
  511. # Guardrail: drop any hit that violates scope/council
  512. hits = [h for h in hits if _allowed(h.payload or {}, scope, cslug)]
  513. blocks, sources = render_blocks(hits)
  514. sections.append((name, blocks))
  515. all_sources.extend(sources)
  516. context = combine_context(sections)
  517. #format_guide = _section_format_guide(section_id, section_title="(auto)", ctx={})
  518. format_guide = _section_format_guide(
  519. section_id,
  520. section_title="(auto)",
  521. ctx={
  522. "council": council, # from do_ask parameter
  523. "planning_zones": [], # populate if you have zone detection
  524. }
  525. )
  526. prompt = f"""
  527. You are an expert Tasmanian planning and building compliance assistant with deep knowledge of the Tasmanian Planning Scheme structure.
  528. ## AUTHORITY ORDER — always apply in this sequence:
  529. 1. State Planning Provisions (SPP) — the statewide baseline. Cite clause numbers exactly.
  530. 2. Local Provisions Schedule (LPS) for the selected council — overrides SPP where it differs.
  531. 3. National Construction Code (NCC) — building controls only, keep separate from planning.
  532. 4. Australian Standards — only when directly referenced by a clause in CONTEXT.
  533. ## STRICT RULES:
  534. - Use ONLY information present in CONTEXT below. Never invent clause numbers, standards, or measurements.
  535. - If CONTEXT does not contain enough information to answer, say: "The provided context does not cover this — check the TPSO viewer directly at tpso.planning.tas.gov.au"
  536. - Every specific standard or requirement you state MUST include its source: (filename, p.N)
  537. - Quote clause text briefly (1–2 lines max) then explain in plain English.
  538. - Distinguish clearly between Acceptable Solutions (A) and Performance Criteria (P).
  539. ## OUTPUT FORMAT:
  540. - Use Markdown: ## for main headings, ### for sub-headings, **bold** for clause numbers.
  541. - For setbacks, parking rates, or multiple standards: use a Markdown table with columns: Clause | Requirement | A or P | Source
  542. - End every response with a ## Sources section listing each cited document and page.
  543. - Keep answers concise but complete — do not pad or repeat information.
  544. - Professional planning language; avoid informal phrasing.
  545. ## CONTEXT (retrieved from Tasmanian Planning Scheme documents):
  546. {context}
  547. {format_guide}
  548. ## QUESTION:
  549. {query}
  550. ## ANSWER:
  551. """.strip()
  552. # BYOK mode: skip Ollama and return the context + prompt so the
  553. # browser can call its own LLM provider (Claude, GPT, Grok, etc.)
  554. if context_only:
  555. return {
  556. "context_only": True,
  557. "context": context,
  558. "prompt": prompt,
  559. "sources": all_sources,
  560. # Include the raw section blocks so the browser can inspect them
  561. "sections": [
  562. {"heading": name, "blocks": blocks}
  563. for name, blocks in sections
  564. ]
  565. }
  566. answer = ollama_chat(prompt)
  567. return {"answer": answer, "sources": all_sources}
  568. @app.get("/ask")
  569. @limiter.limit("20/minute")
  570. def ask_get(
  571. request: Request,
  572. query: str = Query(..., description="User question"),
  573. top_k: int = 10,
  574. council: Optional[str] = None,
  575. include_ncc: bool = False,
  576. include_standards: bool = False,
  577. source_contains: Optional[str] = None,
  578. scope: str = "state_plus_local",
  579. section_id: Optional[str] = None,
  580. context_only: bool = False,
  581. ):
  582. _verify_demo_token_if_needed(request)
  583. started = time.perf_counter()
  584. out = do_ask(query, top_k, council, include_ncc, include_standards, source_contains, scope, section_id, context_only)
  585. latency_ms = int((time.perf_counter() - started) * 1000)
  586. # Telemetry insert
  587. try:
  588. ip = request.client.host if request.client else "0.0.0.0"
  589. sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
  590. allow_tps = scope in ("state_only", "state_plus_local")
  591. topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
  592. with db() as conn:
  593. conn.execute("""
  594. INSERT INTO ask_logs
  595. (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
  596. model, ok, topk_json, tokens_in, tokens_out, answer)
  597. VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
  598. """, (
  599. datetime.utcnow().isoformat(),
  600. sid, ip_hash(ip), query, _normalize(query),
  601. scope, int(allow_tps),
  602. latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
  603. (out.get("answer") or "")[:8000], # ← truncate to 8KB max
  604. ))
  605. conn.commit()
  606. except Exception as e:
  607. # Don't break the request if logging fails
  608. print("[telemetry] ask_get insert failed:", e)
  609. return out
  610. @app.post("/ask")
  611. @limiter.limit("20/minute")
  612. def ask_post(request: Request, body: AskBody):
  613. _verify_demo_token_if_needed(request)
  614. qtxt = (body.query or body.question or body.q or body.prompt or "").strip()
  615. if not qtxt:
  616. raise HTTPException(status_code=422, detail="Missing query/question")
  617. started = time.perf_counter()
  618. out = do_ask(
  619. query=qtxt,
  620. top_k=body.top_k,
  621. council=body.council,
  622. include_ncc=body.include_ncc,
  623. include_standards=body.include_standards,
  624. source_contains=body.source_contains,
  625. scope=body.scope,
  626. section_id=body.section_id,
  627. context_only=body.context_only,
  628. )
  629. latency_ms = int((time.perf_counter() - started) * 1000)
  630. # Telemetry insert
  631. try:
  632. ip = request.client.host if request.client else "0.0.0.0"
  633. sid = request.headers.get("X-TPR-SID") or request.cookies.get("sid") or ""
  634. allow_tps = body.scope in ("state_only", "state_plus_local")
  635. topk = [{"id": f"{s.get('source_file')}#p{s.get('page')}", "score": s.get("score")} for s in (out.get("sources") or [])]
  636. with db() as conn:
  637. conn.execute("""
  638. INSERT INTO ask_logs
  639. (ts, sid, ip_hash, query, normalized, scope, allow_tps, latency_ms,
  640. model, ok, topk_json, tokens_in, tokens_out, answer)
  641. VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
  642. """, (
  643. datetime.utcnow().isoformat(),
  644. sid, ip_hash(ip), qtxt, _normalize(qtxt),
  645. body.scope, int(allow_tps),
  646. latency_ms, CHAT_MODEL, 1, _json_dumps(topk), 0, 0,
  647. (out.get("answer") or "")[:8000], # ← truncate to 8KB max
  648. ))
  649. conn.commit()
  650. except Exception as e:
  651. print("[telemetry] ask_post insert failed:", e)
  652. return out