| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363 |
- <?php
- // web/councils.php
- declare(strict_types=1);
- // ---- DB config from env ----
- $DB_HOST = getenv('MYSQL_HOST') ?: 'db';
- $DB_NAME = getenv('MYSQL_DATABASE') ?: 'councils';
- $DB_USER = getenv('MYSQL_USER') ?: 'root';
- $DB_PASS = getenv('MYSQL_PASSWORD') ?: getenv('MYSQL_ROOT_PASSWORD');
- $DB_PORT = (int)(getenv('MYSQL_PORT') ?: 3306);
- // ---- Connect ----
- $dsn = "mysql:host={$DB_HOST};port={$DB_PORT};dbname={$DB_NAME};charset=utf8mb4";
- $pdo = new PDO($dsn, $DB_USER, $DB_PASS, [
- PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
- PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
- ]);
- function h($s): string { return htmlspecialchars((string)$s, ENT_QUOTES, 'UTF-8'); }
- function council_name_from_key(string $k): string {
- $base = preg_replace('/^da_/', '', $k);
- $base = str_replace('_', ' ', $base);
- $map = [
- 'flinders_council' => 'Flinders',
- 'southernmidlands' => 'Southern Midlands',
- ];
- if (isset($map[$base])) return $map[$base];
- return ucwords($base);
- }
- function tableExists(PDO $pdo, string $table): bool {
- $st = $pdo->prepare("SHOW TABLES LIKE ?");
- $st->execute([$table]);
- return (bool)$st->fetch(PDO::FETCH_NUM);
- }
- function tableHasColumn(PDO $pdo, string $table, string $col): bool {
- $q = $pdo->prepare("SHOW COLUMNS FROM `{$table}` LIKE ?");
- $q->execute([$col]);
- return (bool)$q->fetch();
- }
- function columnMeta(PDO $pdo, string $table, string $col): ?array {
- $q = $pdo->prepare("SHOW COLUMNS FROM `{$table}` LIKE ?");
- $q->execute([$col]);
- $r = $q->fetch();
- return $r ?: null;
- }
- function firstExistingCol(PDO $pdo, string $table, array $candidates): ?string {
- foreach ($candidates as $c) {
- if (tableHasColumn($pdo, $table, $c)) return $c;
- }
- return null;
- }
- function fmt_dt(?string $dt): string {
- if (!$dt) return '';
- try {
- $d = new DateTime($dt);
- return $d->format('Y-m-d H:i');
- } catch (Throwable $e) {
- return (string)$dt;
- }
- }
- function rel_age(?string $dt): string {
- if (!$dt) return '';
- try {
- $d = new DateTime($dt);
- $now = new DateTime('now');
- $diff = $now->getTimestamp() - $d->getTimestamp();
- if ($diff < 0) return 'in future';
- $mins = (int)floor($diff / 60);
- if ($mins < 60) return $mins . ' min';
- $hrs = (int)floor($mins / 60);
- if ($hrs < 48) return $hrs . ' hr';
- $days = (int)floor($hrs / 24);
- return $days . ' d';
- } catch (Throwable $e) {
- return '';
- }
- }
- // ---- Discover da_* tables ----
- $allTables = [];
- $st = $pdo->query("SHOW TABLES");
- while ($row = $st->fetch(PDO::FETCH_NUM)) {
- $t = $row[0];
- if (strpos($t, 'da_') === 0) $allTables[] = $t;
- }
- // Exclude tables you don't want
- $exclude = ['geo_cache', 'da_plandata', 'da_plan_data', 'da_dorset_stages'];
- $allTables = array_values(array_filter($allTables, fn($t) => !in_array($t, $exclude, true)));
- if (!$allTables) {
- http_response_code(200);
- echo "<h1>No da_* tables found</h1>";
- exit;
- }
- sort($allTables);
- // Optional sort controls
- $sort = (string)($_GET['sort'] ?? 'name'); // name|last|count|open
- $dir = strtolower((string)($_GET['dir'] ?? 'asc')) === 'desc' ? 'desc' : 'asc';
- // Optional external scrape log table support
- // If you have a runs table already, this will attempt to use it.
- // Supported names: scrape_runs, scrape_log, runs
- $runsTable = null;
- foreach (['scrape_runs','scrape_log','runs'] as $cand) {
- if (tableExists($pdo, $cand)) { $runsTable = $cand; break; }
- }
- $runsCouncilCol = $runsTable ? firstExistingCol($pdo, $runsTable, ['council_key','council','table_name','source']) : null;
- $runsStatusCol = $runsTable ? firstExistingCol($pdo, $runsTable, ['status','state','result','ok']) : null;
- $runsTimeCol = $runsTable ? firstExistingCol($pdo, $runsTable, ['finished_at','ended_at','completed_at','run_finished_at','created_at','started_at','run_at','timestamp']) : null;
- $summary = [];
- foreach ($allTables as $t) {
- // counts
- $total = (int)$pdo->query("SELECT COUNT(*) AS c FROM `{$t}`")->fetch()['c'];
- $open = null;
- if (tableHasColumn($pdo, $t, 'on_notice_to')) {
- $open = (int)$pdo->query("SELECT COUNT(*) AS c FROM `{$t}` WHERE (`on_notice_to` IS NULL OR `on_notice_to` >= CURDATE())")->fetch()['c'];
- }
- // last successful scrape
- $lastScrape = null;
- // 1) Try a runs/log table (if present)
- if ($runsTable && $runsCouncilCol && $runsTimeCol) {
- $where = "WHERE `{$runsCouncilCol}` = ?";
- $params = [$t];
- if ($runsStatusCol) {
- $statusMeta = columnMeta($pdo, $runsTable, $runsStatusCol);
- $type = strtolower((string)($statusMeta['Type'] ?? ''));
- if (strpos($type, 'tinyint') !== false || strpos($type, 'int') !== false || $runsStatusCol === 'ok') {
- $where .= " AND `{$runsStatusCol}` = 1";
- } else {
- $where .= " AND `{$runsStatusCol}` IN ('ok','OK','success','successful','done','completed')";
- }
- }
- $sql = "SELECT `{$runsTimeCol}` AS ts FROM `{$runsTable}` {$where} ORDER BY `{$runsTimeCol}` DESC LIMIT 1";
- $q = $pdo->prepare($sql);
- $q->execute($params);
- $r = $q->fetch();
- if ($r && $r['ts']) $lastScrape = (string)$r['ts'];
- }
- // 2) Fallback to per-table timestamp columns
- if (!$lastScrape) {
- $tsCol = firstExistingCol($pdo, $t, [
- 'dev_timestamp',
- 'scraped_at',
- 'scrape_ts',
- 'last_scraped_at',
- 'fetched_at',
- 'updated_at',
- 'created_at',
- 'inserted_at',
- ]);
- if ($tsCol) {
- $meta = columnMeta($pdo, $t, $tsCol);
- $type = strtolower((string)($meta['Type'] ?? ''));
- if (strpos($type, 'int') !== false) {
- $r = $pdo->query("SELECT FROM_UNIXTIME(MAX(`{$tsCol}`)) AS ts FROM `{$t}`")->fetch();
- if ($r && $r['ts']) $lastScrape = (string)$r['ts'];
- } else {
- $r = $pdo->query("SELECT MAX(`{$tsCol}`) AS ts FROM `{$t}`")->fetch();
- if ($r && $r['ts']) $lastScrape = (string)$r['ts'];
- }
- }
- }
- // 3) Final fallback: latest date_received
- if (!$lastScrape && tableHasColumn($pdo, $t, 'date_received')) {
- $r = $pdo->query("SELECT MAX(`date_received`) AS ts FROM `{$t}`")->fetch();
- if ($r && $r['ts']) $lastScrape = (string)$r['ts'];
- }
- // classification stats (only if column exists)
- $classified = null;
- $withPdf = null;
- $typeBreakdown = [];
- if (tableHasColumn($pdo, $t, 'application_type')) {
- $row = $pdo->query("SELECT COUNT(local_document_url) AS with_pdf, COUNT(application_type) AS classified FROM `{$t}`")->fetch();
- $withPdf = (int)$row['with_pdf'];
- $classified = (int)$row['classified'];
- $bt = $pdo->query("SELECT application_type, COUNT(*) AS n FROM `{$t}` WHERE application_type IS NOT NULL GROUP BY application_type ORDER BY n DESC");
- while ($br = $bt->fetch()) {
- $typeBreakdown[(string)$br['application_type']] = (int)$br['n'];
- }
- }
- $summary[] = [
- 'council_key' => $t,
- 'council' => council_name_from_key($t),
- 'total' => $total,
- 'open' => $open,
- 'last_scrape' => $lastScrape,
- 'classified' => $classified,
- 'with_pdf' => $withPdf,
- 'type_breakdown' => $typeBreakdown,
- ];
- }
- // Sorting in PHP (keeps it simple and avoids dynamic SQL)
- $cmp = function(array $a, array $b) use ($sort, $dir): int {
- $mul = $dir === 'desc' ? -1 : 1;
- if ($sort === 'count') {
- return $mul * (($a['total'] <=> $b['total']) ?: strcasecmp($a['council'], $b['council']));
- }
- if ($sort === 'open') {
- $ao = $a['open'] ?? -1;
- $bo = $b['open'] ?? -1;
- return $mul * (($ao <=> $bo) ?: strcasecmp($a['council'], $b['council']));
- }
- if ($sort === 'last') {
- $at = $a['last_scrape'] ? strtotime($a['last_scrape']) : 0;
- $bt = $b['last_scrape'] ? strtotime($b['last_scrape']) : 0;
- return $mul * (($at <=> $bt) ?: strcasecmp($a['council'], $b['council']));
- }
- if ($sort === 'classified') {
- $ac = ($a['with_pdf'] > 0) ? ($a['classified'] / $a['with_pdf']) : -1;
- $bc = ($b['with_pdf'] > 0) ? ($b['classified'] / $b['with_pdf']) : -1;
- return $mul * (($ac <=> $bc) ?: strcasecmp($a['council'], $b['council']));
- }
- return $mul * strcasecmp($a['council'], $b['council']);
- };
- usort($summary, $cmp);
- ?>
- <!doctype html>
- <html lang="en">
- <head>
- <meta charset="utf-8">
- <meta name="viewport" content="width=device-width, initial-scale=1">
- <title>Council scrape status</title>
- <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.8/dist/css/bootstrap.min.css" rel="stylesheet">
- <style>
- body { padding: 24px; }
- .muted { color:#6c757d; }
- .nowrap { white-space: nowrap; }
- .type-pill { display:inline-block; font-size:.7rem; padding:1px 6px; border-radius:10px; background:#e9ecef; color:#495057; margin:1px; white-space:nowrap; }
- </style>
- </head>
- <body>
- <div class="container">
- <div class="d-flex align-items-center justify-content-between mb-3">
- <h1 class="h3 m-0">Council scrape status</h1>
- <div class="d-flex gap-2">
- <a class="btn btn-outline-secondary btn-sm" href="index.php">Open applications</a>
- </div>
- </div>
- <form method="get" class="row g-2 align-items-center mb-3">
- <div class="col-auto">
- <select name="sort" class="form-select form-select-sm">
- <option value="name" <?= $sort==='name'?'selected':'' ?>>sort: council</option>
- <option value="last" <?= $sort==='last'?'selected':'' ?>>sort: last scrape</option>
- <option value="count" <?= $sort==='count'?'selected':'' ?>>sort: total rows</option>
- <option value="open" <?= $sort==='open'?'selected':'' ?>>sort: open rows</option>
- <option value="classified" <?= $sort==='classified'?'selected':'' ?>>sort: classification %</option>
- </select>
- </div>
- <div class="col-auto">
- <select name="dir" class="form-select form-select-sm">
- <option value="asc" <?= $dir==='asc'?'selected':'' ?>>asc</option>
- <option value="desc" <?= $dir==='desc'?'selected':'' ?>>desc</option>
- </select>
- </div>
- <div class="col-auto">
- <button class="btn btn-primary btn-sm" type="submit">Apply</button>
- </div>
- <div class="col-auto muted">
- <?= count($summary) ?> council(s)
- <?php if ($runsTable): ?>
- , using run log: <?= h($runsTable) ?>
- <?php endif; ?>
- </div>
- </form>
- <div class="table-responsive">
- <table class="table table-sm table-bordered align-middle">
- <thead class="table-light">
- <tr>
- <th style="width: 18%">Council</th>
- <th style="width: 14%">Table</th>
- <th class="text-end" style="width: 8%">Total</th>
- <th class="text-end" style="width: 8%">Open</th>
- <th style="width: 18%">Last successful scrape</th>
- <th style="width: 26%">Classification</th>
- <th style="width: 8%"></th>
- </tr>
- </thead>
- <tbody>
- <?php foreach ($summary as $r): ?>
- <tr>
- <td><?= h($r['council']) ?></td>
- <td class="nowrap"><code><?= h($r['council_key']) ?></code></td>
- <td class="text-end"><?= number_format((int)$r['total']) ?></td>
- <td class="text-end"><?= is_null($r['open']) ? '<span class="muted">n/a</span>' : number_format((int)$r['open']) ?></td>
- <td class="nowrap">
- <?php if ($r['last_scrape']): ?>
- <?= h(fmt_dt($r['last_scrape'])) ?>
- <span class="muted">(<?= h(rel_age($r['last_scrape'])) ?> ago)</span>
- <?php else: ?>
- <span class="muted">unknown</span>
- <?php endif; ?>
- </td>
- <td>
- <?php if (is_null($r['classified'])): ?>
- <span class="muted">—</span>
- <?php elseif ($r['with_pdf'] === 0): ?>
- <span class="muted">no PDFs</span>
- <?php else:
- $pct = (int)round(100 * $r['classified'] / $r['with_pdf']);
- ?>
- <div class="d-flex align-items-center gap-1 mb-1">
- <div class="progress flex-grow-1" style="height:6px">
- <div class="progress-bar" style="width:<?= $pct ?>%"></div>
- </div>
- <small class="muted nowrap"><?= $r['classified'] ?>/<?= $r['with_pdf'] ?></small>
- </div>
- <div>
- <?php foreach ($r['type_breakdown'] as $type => $n): ?>
- <span class="type-pill"><?= h($type) ?> <strong><?= $n ?></strong></span>
- <?php endforeach; ?>
- </div>
- <?php endif; ?>
- </td>
- <td class="text-center">
- <a class="btn btn-outline-secondary btn-sm"
- href="index.php?council_key=<?= urlencode($r['council_key']) ?>">
- View
- </a>
- </td>
- </tr>
- <?php endforeach; ?>
- </tbody>
- </table>
- </div>
- <div class="muted mt-3">
- Notes: “Last successful scrape” is taken from a scrape log table if present (scrape_runs, scrape_log, runs). Otherwise it uses the best available per-row timestamp column, otherwise date_received.
- </div>
- </div>
- </body>
- </html>
|