councils.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. <?php
  2. // web/councils.php
  3. declare(strict_types=1);
  4. // ---- DB config from env ----
  5. $DB_HOST = getenv('MYSQL_HOST') ?: 'db';
  6. $DB_NAME = getenv('MYSQL_DATABASE') ?: 'councils';
  7. $DB_USER = getenv('MYSQL_USER') ?: 'root';
  8. $DB_PASS = getenv('MYSQL_PASSWORD') ?: getenv('MYSQL_ROOT_PASSWORD');
  9. $DB_PORT = (int)(getenv('MYSQL_PORT') ?: 3306);
  10. // ---- Connect ----
  11. $dsn = "mysql:host={$DB_HOST};port={$DB_PORT};dbname={$DB_NAME};charset=utf8mb4";
  12. $pdo = new PDO($dsn, $DB_USER, $DB_PASS, [
  13. PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
  14. PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
  15. ]);
  16. function h($s): string { return htmlspecialchars((string)$s, ENT_QUOTES, 'UTF-8'); }
  17. // ---- Council config (name + source URL) ----
  18. $councilConfig = [];
  19. $cfgFile = '/var/www/config/councils.json';
  20. if (is_readable($cfgFile)) {
  21. $decoded = json_decode(file_get_contents($cfgFile), true);
  22. if (is_array($decoded)) $councilConfig = $decoded;
  23. }
  24. function council_name_from_key(string $k): string {
  25. global $councilConfig;
  26. if (!empty($councilConfig[$k]['name'])) return $councilConfig[$k]['name'];
  27. $base = preg_replace('/^da_/', '', $k);
  28. $base = str_replace('_', ' ', $base);
  29. $map = [
  30. 'flinders_council' => 'Flinders',
  31. 'southernmidlands' => 'Southern Midlands',
  32. ];
  33. if (isset($map[$base])) return $map[$base];
  34. return ucwords($base);
  35. }
  36. function council_source_url(string $k): string {
  37. global $councilConfig;
  38. return $councilConfig[$k]['source_url'] ?? '';
  39. }
  40. function tableExists(PDO $pdo, string $table): bool {
  41. $st = $pdo->prepare("SHOW TABLES LIKE ?");
  42. $st->execute([$table]);
  43. return (bool)$st->fetch(PDO::FETCH_NUM);
  44. }
  45. function tableHasColumn(PDO $pdo, string $table, string $col): bool {
  46. $q = $pdo->prepare("SHOW COLUMNS FROM `{$table}` LIKE ?");
  47. $q->execute([$col]);
  48. return (bool)$q->fetch();
  49. }
  50. function columnMeta(PDO $pdo, string $table, string $col): ?array {
  51. $q = $pdo->prepare("SHOW COLUMNS FROM `{$table}` LIKE ?");
  52. $q->execute([$col]);
  53. $r = $q->fetch();
  54. return $r ?: null;
  55. }
  56. function firstExistingCol(PDO $pdo, string $table, array $candidates): ?string {
  57. foreach ($candidates as $c) {
  58. if (tableHasColumn($pdo, $table, $c)) return $c;
  59. }
  60. return null;
  61. }
  62. function fmt_dt(?string $dt): string {
  63. if (!$dt) return '';
  64. try {
  65. $d = new DateTime($dt);
  66. return $d->format('Y-m-d H:i');
  67. } catch (Throwable $e) {
  68. return (string)$dt;
  69. }
  70. }
  71. function rel_age(?string $dt): string {
  72. if (!$dt) return '';
  73. try {
  74. $d = new DateTime($dt);
  75. $now = new DateTime('now');
  76. $diff = $now->getTimestamp() - $d->getTimestamp();
  77. if ($diff < 0) return 'in future';
  78. $mins = (int)floor($diff / 60);
  79. if ($mins < 60) return $mins . ' min';
  80. $hrs = (int)floor($mins / 60);
  81. if ($hrs < 48) return $hrs . ' hr';
  82. $days = (int)floor($hrs / 24);
  83. return $days . ' d';
  84. } catch (Throwable $e) {
  85. return '';
  86. }
  87. }
  88. // ---- Discover da_* tables ----
  89. $allTables = [];
  90. $st = $pdo->query("SHOW TABLES");
  91. while ($row = $st->fetch(PDO::FETCH_NUM)) {
  92. $t = $row[0];
  93. if (strpos($t, 'da_') === 0) $allTables[] = $t;
  94. }
  95. // Exclude tables you don't want
  96. $exclude = ['geo_cache', 'da_plandata', 'da_plan_data', 'da_dorset_stages'];
  97. $allTables = array_values(array_filter($allTables, fn($t) => !in_array($t, $exclude, true)));
  98. if (!$allTables) {
  99. http_response_code(200);
  100. echo "<h1>No da_* tables found</h1>";
  101. exit;
  102. }
  103. sort($allTables);
  104. // Optional sort controls
  105. $sort = (string)($_GET['sort'] ?? 'name'); // name|last|count|open
  106. $dir = strtolower((string)($_GET['dir'] ?? 'asc')) === 'desc' ? 'desc' : 'asc';
  107. // Optional external scrape log table support
  108. // If you have a runs table already, this will attempt to use it.
  109. // Supported names: scrape_runs, scrape_log, runs
  110. $runsTable = null;
  111. foreach (['scrape_runs','scrape_log','runs'] as $cand) {
  112. if (tableExists($pdo, $cand)) { $runsTable = $cand; break; }
  113. }
  114. $runsCouncilCol = $runsTable ? firstExistingCol($pdo, $runsTable, ['council_key','council','table_name','source']) : null;
  115. $runsStatusCol = $runsTable ? firstExistingCol($pdo, $runsTable, ['status','state','result','ok']) : null;
  116. $runsTimeCol = $runsTable ? firstExistingCol($pdo, $runsTable, ['finished_at','ended_at','completed_at','run_finished_at','created_at','started_at','run_at','timestamp']) : null;
  117. $summary = [];
  118. foreach ($allTables as $t) {
  119. // counts
  120. $total = (int)$pdo->query("SELECT COUNT(*) AS c FROM `{$t}`")->fetch()['c'];
  121. $open = null;
  122. if (tableHasColumn($pdo, $t, 'on_notice_to')) {
  123. $open = (int)$pdo->query("SELECT COUNT(*) AS c FROM `{$t}` WHERE (`on_notice_to` IS NULL OR `on_notice_to` >= CURDATE())")->fetch()['c'];
  124. }
  125. // last successful scrape
  126. $lastScrape = null;
  127. // 1) Try a runs/log table (if present)
  128. if ($runsTable && $runsCouncilCol && $runsTimeCol) {
  129. $where = "WHERE `{$runsCouncilCol}` = ?";
  130. $params = [$t];
  131. if ($runsStatusCol) {
  132. $statusMeta = columnMeta($pdo, $runsTable, $runsStatusCol);
  133. $type = strtolower((string)($statusMeta['Type'] ?? ''));
  134. if (strpos($type, 'tinyint') !== false || strpos($type, 'int') !== false || $runsStatusCol === 'ok') {
  135. $where .= " AND `{$runsStatusCol}` = 1";
  136. } else {
  137. $where .= " AND `{$runsStatusCol}` IN ('ok','OK','success','successful','done','completed')";
  138. }
  139. }
  140. $sql = "SELECT `{$runsTimeCol}` AS ts FROM `{$runsTable}` {$where} ORDER BY `{$runsTimeCol}` DESC LIMIT 1";
  141. $q = $pdo->prepare($sql);
  142. $q->execute($params);
  143. $r = $q->fetch();
  144. if ($r && $r['ts']) $lastScrape = (string)$r['ts'];
  145. }
  146. // 2) Fallback to per-table timestamp columns
  147. if (!$lastScrape) {
  148. $tsCol = firstExistingCol($pdo, $t, [
  149. 'dev_timestamp',
  150. 'scraped_at',
  151. 'scrape_ts',
  152. 'last_scraped_at',
  153. 'fetched_at',
  154. 'updated_at',
  155. 'created_at',
  156. 'inserted_at',
  157. ]);
  158. if ($tsCol) {
  159. $meta = columnMeta($pdo, $t, $tsCol);
  160. $type = strtolower((string)($meta['Type'] ?? ''));
  161. if (strpos($type, 'int') !== false) {
  162. $r = $pdo->query("SELECT FROM_UNIXTIME(MAX(`{$tsCol}`)) AS ts FROM `{$t}`")->fetch();
  163. if ($r && $r['ts']) $lastScrape = (string)$r['ts'];
  164. } else {
  165. $r = $pdo->query("SELECT MAX(`{$tsCol}`) AS ts FROM `{$t}`")->fetch();
  166. if ($r && $r['ts']) $lastScrape = (string)$r['ts'];
  167. }
  168. }
  169. }
  170. // 3) Final fallback: latest date_received
  171. if (!$lastScrape && tableHasColumn($pdo, $t, 'date_received')) {
  172. $r = $pdo->query("SELECT MAX(`date_received`) AS ts FROM `{$t}`")->fetch();
  173. if ($r && $r['ts']) $lastScrape = (string)$r['ts'];
  174. }
  175. // classification stats (only if column exists)
  176. $classified = null;
  177. $withPdf = null;
  178. $typeBreakdown = [];
  179. if (tableHasColumn($pdo, $t, 'application_type')) {
  180. $row = $pdo->query("SELECT COUNT(local_document_url) AS with_pdf, COUNT(application_type) AS classified FROM `{$t}`")->fetch();
  181. $withPdf = (int)$row['with_pdf'];
  182. $classified = (int)$row['classified'];
  183. $bt = $pdo->query("SELECT application_type, COUNT(*) AS n FROM `{$t}` WHERE application_type IS NOT NULL GROUP BY application_type ORDER BY n DESC");
  184. while ($br = $bt->fetch()) {
  185. $typeBreakdown[(string)$br['application_type']] = (int)$br['n'];
  186. }
  187. }
  188. $summary[] = [
  189. 'council_key' => $t,
  190. 'council' => council_name_from_key($t),
  191. 'total' => $total,
  192. 'open' => $open,
  193. 'last_scrape' => $lastScrape,
  194. 'classified' => $classified,
  195. 'with_pdf' => $withPdf,
  196. 'type_breakdown' => $typeBreakdown,
  197. ];
  198. }
  199. // Sorting in PHP (keeps it simple and avoids dynamic SQL)
  200. $cmp = function(array $a, array $b) use ($sort, $dir): int {
  201. $mul = $dir === 'desc' ? -1 : 1;
  202. if ($sort === 'count') {
  203. return $mul * (($a['total'] <=> $b['total']) ?: strcasecmp($a['council'], $b['council']));
  204. }
  205. if ($sort === 'open') {
  206. $ao = $a['open'] ?? -1;
  207. $bo = $b['open'] ?? -1;
  208. return $mul * (($ao <=> $bo) ?: strcasecmp($a['council'], $b['council']));
  209. }
  210. if ($sort === 'last') {
  211. $at = $a['last_scrape'] ? strtotime($a['last_scrape']) : 0;
  212. $bt = $b['last_scrape'] ? strtotime($b['last_scrape']) : 0;
  213. return $mul * (($at <=> $bt) ?: strcasecmp($a['council'], $b['council']));
  214. }
  215. if ($sort === 'classified') {
  216. $ac = ($a['with_pdf'] > 0) ? ($a['classified'] / $a['with_pdf']) : -1;
  217. $bc = ($b['with_pdf'] > 0) ? ($b['classified'] / $b['with_pdf']) : -1;
  218. return $mul * (($ac <=> $bc) ?: strcasecmp($a['council'], $b['council']));
  219. }
  220. return $mul * strcasecmp($a['council'], $b['council']);
  221. };
  222. usort($summary, $cmp);
  223. ?>
  224. <!doctype html>
  225. <html lang="en">
  226. <head>
  227. <meta charset="utf-8">
  228. <meta name="viewport" content="width=device-width, initial-scale=1">
  229. <title>Council scrape status</title>
  230. <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.8/dist/css/bootstrap.min.css" rel="stylesheet">
  231. <style>
  232. body { padding: 24px; }
  233. .muted { color:#6c757d; }
  234. .nowrap { white-space: nowrap; }
  235. .type-pill { display:inline-block; font-size:.7rem; padding:1px 6px; border-radius:10px; background:#e9ecef; color:#495057; margin:1px; white-space:nowrap; }
  236. </style>
  237. </head>
  238. <body>
  239. <div class="container">
  240. <div class="d-flex align-items-center justify-content-between mb-3">
  241. <h1 class="h3 m-0">Council scrape status</h1>
  242. <div class="d-flex gap-2">
  243. <a class="btn btn-outline-secondary btn-sm" href="index.php">Open applications</a>
  244. </div>
  245. </div>
  246. <form method="get" class="row g-2 align-items-center mb-3">
  247. <div class="col-auto">
  248. <select name="sort" class="form-select form-select-sm">
  249. <option value="name" <?= $sort==='name'?'selected':'' ?>>sort: council</option>
  250. <option value="last" <?= $sort==='last'?'selected':'' ?>>sort: last scrape</option>
  251. <option value="count" <?= $sort==='count'?'selected':'' ?>>sort: total rows</option>
  252. <option value="open" <?= $sort==='open'?'selected':'' ?>>sort: open rows</option>
  253. <option value="classified" <?= $sort==='classified'?'selected':'' ?>>sort: classification %</option>
  254. </select>
  255. </div>
  256. <div class="col-auto">
  257. <select name="dir" class="form-select form-select-sm">
  258. <option value="asc" <?= $dir==='asc'?'selected':'' ?>>asc</option>
  259. <option value="desc" <?= $dir==='desc'?'selected':'' ?>>desc</option>
  260. </select>
  261. </div>
  262. <div class="col-auto">
  263. <button class="btn btn-primary btn-sm" type="submit">Apply</button>
  264. </div>
  265. <div class="col-auto muted">
  266. <?= count($summary) ?> council(s)
  267. <?php if ($runsTable): ?>
  268. , using run log: <?= h($runsTable) ?>
  269. <?php endif; ?>
  270. </div>
  271. </form>
  272. <div class="table-responsive">
  273. <table class="table table-sm table-bordered align-middle">
  274. <thead class="table-light">
  275. <tr>
  276. <th style="width: 18%">Council</th>
  277. <th style="width: 14%">Table</th>
  278. <th class="text-end" style="width: 8%">Total</th>
  279. <th class="text-end" style="width: 8%">Open</th>
  280. <th style="width: 18%">Last successful scrape</th>
  281. <th style="width: 26%">Classification</th>
  282. <th style="width: 8%"></th>
  283. </tr>
  284. </thead>
  285. <tbody>
  286. <?php foreach ($summary as $r): ?>
  287. <tr>
  288. <td>
  289. <?php $srcUrl = council_source_url($r['council_key']); ?>
  290. <?php if ($srcUrl !== ''): ?>
  291. <a href="<?= h($srcUrl) ?>" target="_blank" rel="noopener" class="text-decoration-none"><?= h($r['council']) ?> <span class="muted">↗</span></a>
  292. <?php else: ?>
  293. <?= h($r['council']) ?>
  294. <?php endif; ?>
  295. </td>
  296. <td class="nowrap"><code><?= h($r['council_key']) ?></code></td>
  297. <td class="text-end"><?= number_format((int)$r['total']) ?></td>
  298. <td class="text-end"><?= is_null($r['open']) ? '<span class="muted">n/a</span>' : number_format((int)$r['open']) ?></td>
  299. <td class="nowrap">
  300. <?php if ($r['last_scrape']): ?>
  301. <?= h(fmt_dt($r['last_scrape'])) ?>
  302. <span class="muted">(<?= h(rel_age($r['last_scrape'])) ?> ago)</span>
  303. <?php else: ?>
  304. <span class="muted">unknown</span>
  305. <?php endif; ?>
  306. </td>
  307. <td>
  308. <?php if (is_null($r['classified'])): ?>
  309. <span class="muted">—</span>
  310. <?php elseif ($r['with_pdf'] === 0): ?>
  311. <span class="muted">no PDFs</span>
  312. <?php else:
  313. $pct = (int)round(100 * $r['classified'] / $r['with_pdf']);
  314. ?>
  315. <div class="d-flex align-items-center gap-1 mb-1">
  316. <div class="progress flex-grow-1" style="height:6px">
  317. <div class="progress-bar" style="width:<?= $pct ?>%"></div>
  318. </div>
  319. <small class="muted nowrap"><?= $r['classified'] ?>/<?= $r['with_pdf'] ?></small>
  320. </div>
  321. <div>
  322. <?php foreach ($r['type_breakdown'] as $type => $n): ?>
  323. <span class="type-pill"><?= h($type) ?> <strong><?= $n ?></strong></span>
  324. <?php endforeach; ?>
  325. </div>
  326. <?php endif; ?>
  327. </td>
  328. <td class="text-center">
  329. <a class="btn btn-outline-secondary btn-sm"
  330. href="index.php?council_key=<?= urlencode($r['council_key']) ?>">
  331. View
  332. </a>
  333. </td>
  334. </tr>
  335. <?php endforeach; ?>
  336. </tbody>
  337. </table>
  338. </div>
  339. <div class="muted mt-3">
  340. Notes: “Last successful scrape” is taken from a scrape log table if present (scrape_runs, scrape_log, runs). Otherwise it uses the best available per-row timestamp column, otherwise date_received.
  341. </div>
  342. </div>
  343. </body>
  344. </html>