Benjamin Harris 2 сар өмнө
parent
commit
195cec142a

+ 2 - 1
.claude/settings.local.json

@@ -11,7 +11,8 @@
       "Bash(python3)",
       "Bash(python3 -c ':*)",
       "Bash(grep -n \"FROM \\\\`{\" /f/GIT_REPO/tas_councils/web/index.php)",
-      "Bash(grep -n '\\\\$t\\\\|tableHasColumn\\\\|tableExists' /f/GIT_REPO/tas_councils/web/index.php)"
+      "Bash(grep -n '\\\\$t\\\\|tableHasColumn\\\\|tableExists' /f/GIT_REPO/tas_councils/web/index.php)",
+      "Bash(xargs sed:*)"
     ]
   }
 }

+ 1 - 1
CLAUDE.md

@@ -55,7 +55,7 @@ docker compose run --rm \
 
 ### Each scraper follows this pattern:
 1. `TABLE = ENV.fetch("TABLE_NAME")` — set by `run_all.sh` from the filename
-2. `DB.ensure_table!(TABLE)` + `ensure_extra_columns!(TABLE)` — idempotent schema setup
+2. `DB.ensure_table!(TABLE)` — idempotent schema setup (all columns already included)
 3. Fetch HTML via `Http.get(url)` (handles retries, cookies, WAF warmup)
 4. Parse with Nokogiri
 5. `DB.upsert(TABLE, row)` — upserts on `(council_reference, address)`, write-once for `date_received`

+ 1 - 1
README.md

@@ -198,7 +198,7 @@ docker compose run --rm \
 1. Create `scrapers/<councilname>.rb` — use an existing simple scraper (e.g. `glamorgan.rb`) as a template.
 2. At minimum the scraper must:
    - Read `TABLE = ENV.fetch("TABLE_NAME")`
-   - Call `DB.ensure_table!(TABLE)` and `ensure_extra_columns!(TABLE)`
+   - Call `DB.ensure_table!(TABLE)` — all schema columns are already included
    - Call `DB.upsert(TABLE, row)` with at least `council_reference` and `address`
    - Call `enrich_after_upsert!` after each upsert
 3. Add the council to `COUNCIL_MAP` in `lib/util.rb` if PlanBuild integration is needed.

+ 22 - 54
lib/enrich.rb

@@ -1,54 +1,20 @@
-# tools/enrich.rb
-# Enrich DA rows AFTER scrapers:
-#  - Geocode (address_std, street/locality/state/postcode, lat/lng)
-#  - PID + Title via list_lookup.php (property_id, title_reference, area_sqm/ha)
-#
-# Usage examples:
-#   docker compose run --rm \
-#     -e GOOGLE_MAPS_API_KEY="$GOOGLE_MAPS_API_KEY" \
-#     -e LOOKUP_URL="http://web/list_lookup.php" \
-#     scraper ruby /app/tools/enrich.rb
+# lib/enrich.rb
+# Per-row enrichment called right after each DB.upsert:
+#  1. Geocode (address_std, street/locality/state/postcode, lat/lng) via Google Maps
+#  2. Property lookup (property_id, title_reference) via LOOKUP_URL service
 #
-#   # Single table, slower throttle, dry run:
-#   docker compose run --rm \
-#     -e GOOGLE_MAPS_API_KEY="$GOOGLE_MAPS_API_KEY" \
-#     -e LOOKUP_URL="http://web/list_lookup.php" \
-#     -e GEOCODE_LIMIT=200 -e GEOCODE_THROTTLE_MS=200 \
-#     -e LOOKUP_LIMIT=200  -e LOOKUP_THROTTLE_MS=250 \
-#     -e DRY_RUN=1 \
-#     scraper ruby /app/tools/enrich.rb --table=da_dorset
+# Schema is owned by DB.ensure_table! (new tables) and lib/migrate.rb (existing tables).
+# Scrapers only need to call DB.ensure_table! — no separate ensure_extra_columns! required.
 
-# lib/enrich.rb
 require "json"
 require "net/http"
 require "uri"
 require_relative "./db"
-require_relative "./util"
 require_relative "./geocode"
 require_relative "./log"
 
 LOOKUP_URL = ENV["LOOKUP_URL"] # e.g. http://web/list_lookup.php
 
-def ensure_extra_columns!(table)
-  DB.validate_table_name!(table)
-  esc = DB.client.escape(table)
-  {
-    "address_std"        => "VARCHAR(255) NULL",
-    "lat"                => "DOUBLE NULL",
-    "lng"                => "DOUBLE NULL",
-    "property_id"        => "VARCHAR(50) NULL",
-    "title_reference"    => "VARCHAR(80) NULL",
-    "document_url"       => "TEXT NULL",
-    "local_document_url" => "TEXT NULL",
-    "on_notice_to"       => "DATE NULL",
-    "on_notice_to_raw"   => "VARCHAR(80) NULL"
-  }.each do |col, defn|
-    DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS `#{col}` #{defn}")
-  rescue Mysql2::Error => e
-    Log.warn "enrich", "schema migration skipped for #{table}.#{col}: #{e.message}"
-  end
-end
-
 def http_post_json(url, payload, timeout: 15)
   uri = URI.parse(url)
   http = Net::HTTP.new(uri.host, uri.port)
@@ -65,41 +31,43 @@ rescue JSON::ParserError
   {}
 end
 
-# Call this right after DB.upsert in each scraper
-#   enrich_after_upsert!(table: TABLE, council_reference: council_reference, address: address)
+# Call this right after DB.upsert in each scraper:
+#   enrich_after_upsert!(table: TABLE, council_reference: ref, address: addr)
 def enrich_after_upsert!(table:, council_reference:, address:)
   DB.validate_table_name!(table)
   esc = DB.client.escape(table)
-  sel = DB.client.prepare("SELECT id, address, address_std, lat, lng, property_id, title_reference FROM `#{esc}` WHERE council_reference = ? AND address = ? LIMIT 1")
+  sel = DB.client.prepare(
+    "SELECT id, address, address_std, lat, lng, property_id, title_reference " \
+    "FROM `#{esc}` WHERE council_reference = ? AND address = ? LIMIT 1"
+  )
   row = sel.execute(council_reference, address).first
   return unless row
 
-  # 1) Geocode if missing lat/lng or std address
-  if row["lat"].nil? || row["lng"].nil? || (row["address_std"].to_s.strip.empty?)
+  # 1) Geocode if missing lat/lng or normalised address
+  if row["lat"].nil? || row["lng"].nil? || row["address_std"].to_s.strip.empty?
     begin
       geo = Geocode.format_au(row["address"])
       Geocode.update_da_row!(
-        table: table,
+        table:             table,
         council_reference: council_reference,
-        orig_address: row["address"],
-        geo: geo
+        orig_address:      row["address"],
+        geo:               geo
       )
       Log.debug "enrich", "geocoded #{table} #{council_reference}"
-      # refresh row to fetch lat/lng for next step
       row = sel.execute(council_reference, address).first
-    rescue => e
-      warn "[enrich] geocode failed #{table} #{council_reference}: #{e.class} #{e.message}"
+    rescue StandardError => e
+      Log.warn "enrich", "geocode failed #{table} #{council_reference}: #{e.class} #{e.message}"
     end
   end
 
-  # 2) LIST lookup only if we have coords and something’s missing
+  # 2) Property lookup — only if coords exist and pid/title are missing
   need_pid   = row["property_id"].to_s.strip.empty?
   need_title = row["title_reference"].to_s.strip.empty?
   if LOOKUP_URL && row["lat"] && row["lng"] && (need_pid || need_title)
     begin
       resp = http_post_json(LOOKUP_URL, { lat: row["lat"], lng: row["lng"] })
       if resp["ok"]
-        pid   = (resp["pid"] || "").to_s
+        pid   = (resp["pid"]      || "").to_s
         title = (resp["title_id"] || "").to_s
         upd = DB.client.prepare(
           "UPDATE `#{esc}` SET " \
@@ -116,7 +84,7 @@ def enrich_after_upsert!(table:, council_reference:, address:)
       else
         Log.warn "enrich", "lookup error #{table} #{council_reference}: #{resp["error"]}"
       end
-    rescue => e
+    rescue StandardError => e
       Log.warn "enrich", "lookup failed #{table} #{council_reference}: #{e.class} #{e.message}"
     end
   end

+ 2 - 3
scrapers/break_oday.rb

@@ -17,7 +17,6 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)
 
 def abs_url(base, href)
     return "" if href.to_s.strip.empty?
@@ -77,7 +76,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
             puts "Saved PDF #{path}"
             # return web-accessible relative path if needed
             "/downloads/breakoday/#{safe_name(council_reference)}/#{fname}"
-        rescue => e
+        rescue StandardError => e
             warn "PDF download failed for #{url}: #{e.class} #{e.message}"
             nil
         end
@@ -149,7 +148,7 @@ LIMIT 1
         begin
             row = DB.client.prepare(sql).execute(council_reference, address).first
             puts "  enriched -> #{row ? row.inspect : 'nil'}"
-        rescue => e
+        rescue StandardError => e
             warn "  enriched probe failed: #{e.class} #{e.message}"
         end
 

+ 3 - 4
scrapers/brighton.rb

@@ -16,7 +16,6 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)   
 
 # --- helpers ---------------------------------------------------------------
 # DA/APP refs like “DA2025-130”, “DA 2024/174”, etc → “DA YYYY / NNN…”
@@ -68,7 +67,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
             File.binwrite(path, body)
             puts "Saved PDF #{path}"
             "/downloads/brighton/#{safe_name(council_reference)}/#{fname}"
-        rescue => e
+        rescue StandardError => e
             warn "PDF download failed for #{url}: #{e.class} #{e.message}"
             nil
         end
@@ -152,7 +151,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
         begin
             upd = DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET document_url = ? WHERE council_reference = ? AND address = ?")
             upd.execute(document_url, council_reference, address)
-        rescue => e
+        rescue StandardError => e
             warn "document_url update skipped for #{council_reference}: #{e.class} #{e.message}"
         end
 
@@ -162,7 +161,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
             begin
                 upd2 = DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET local_document_url = ? WHERE council_reference = ? AND address = ?")
                 upd2.execute(local_doc_url, council_reference, address)
-            rescue => e
+            rescue StandardError => e
                 warn "local_document_url update skipped for #{council_reference}: #{e.class} #{e.message}"
             end
         end

+ 4 - 5
scrapers/burnie.rb

@@ -26,7 +26,6 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)
 
 # ----- HTTP helpers (browser-y headers + cookie jar + gzip/deflate) -----
 UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "\
@@ -179,7 +178,7 @@ def first_pdf_on_detail(detail_url, jar)
       doc.at_css("a[href$='.pdf'], a[href*='.pdf?']")
   return "" unless a
   URI.join(detail_url, a["href"].to_s).to_s
-rescue => e
+rescue StandardError => e
   warn "Detail fetch failed for #{detail_url}: #{e.class} #{e.message}"
   ""
 end
@@ -194,7 +193,7 @@ def decode_seamless_viewstate(doc)
     raw
   end
   Nokogiri::HTML(html)
-rescue => e
+rescue StandardError => e
   warn "Failed to decode __SEAMLESSVIEWSTATE: #{e.class} #{e.message}"
   nil
 end
@@ -237,7 +236,7 @@ def save_pdf(document_url, council_reference, jar, referer:)
   else
     warn "PDF fetch failed (#{code} #{msg}) for #{document_url}"
   end
-rescue => e
+rescue StandardError => e
   warn "PDF save error for #{document_url}: #{e.class} #{e.message}"
 end
 
@@ -363,7 +362,7 @@ nodes.each do |a|
     )
     title_reference = a.at_css(".list-item-title")&.text&.strip.to_s
     upd.execute(document_url, on_notice_to, on_notice_to_raw, title_reference, council_reference, address)
-  rescue => e
+  rescue StandardError => e
     warn "Extra fields update skipped for #{council_reference}: #{e.class} #{e.message}"
   end
 

+ 2 - 3
scrapers/centralcoast.rb

@@ -18,7 +18,6 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)
 
 def abs_url(base, href)
     return "" if href.to_s.strip.empty?
@@ -66,7 +65,7 @@ puts "  saved #{File.basename(path)} (#{body.to_s.bytesize} bytes)"
 
 # adjust if your web container mounts differently
 "/downloads/centralcoast/#{safe_name(council_reference)}/#{fname}"
-rescue => e
+rescue StandardError => e
 warn "Download failed for #{url}: #{e.class} #{e.message}"
 nil
 end
@@ -162,7 +161,7 @@ begin
             "WHERE council_reference = ? AND address = ?"
         )
     upd.execute(document_url, local_doc_url, title_reference, council_reference, address)
-rescue => e
+rescue StandardError => e
     warn "Extras update skipped for #{council_reference}: #{e.class} #{e.message}"
 end
 

+ 1 - 1
scrapers/circularhead.rb

@@ -15,7 +15,7 @@ DB.ensure_table!(TABLE)
 begin
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS document_url VARCHAR(1024) NULL")
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS title_reference TEXT NULL")
-rescue => e
+rescue StandardError => e
   warn "Optional column add skipped: #{e.class} #{e.message}"
 end
 

+ 2 - 3
scrapers/clarence.rb

@@ -19,7 +19,6 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)
 
 def abs_url(base, href)
     return "" if href.to_s.strip.empty?
@@ -86,7 +85,7 @@ def parse_date_token(s)
 
                 # Web-accessible path (served by your web container)
                 "/downloads/clarence/#{safe_name(council_reference)}/#{fname}"
-            rescue => e
+            rescue StandardError => e
                 warn "PDF download failed for #{url}: #{e.class} #{e.message}"
                 nil
             end
@@ -192,7 +191,7 @@ def parse_date_token(s)
                         "WHERE council_reference = ? AND address = ?"
                     )
                 upd.execute(r[:pdf], local_doc_url, r[:on_notice], r[:on_notice_raw], r[:title_reference], cr, addr)
-            rescue => e
+            rescue StandardError => e
                 warn "Extras update skipped for #{cr}: #{e.class} #{e.message}"
             end
 

+ 3 - 3
scrapers/derwentvalley.rb

@@ -113,14 +113,14 @@ end
 links = []
 begin
   links = detail_links_from_list(LIST_URL)
-rescue => e
+rescue StandardError => e
   warn "List fetch failed, will try news listing: #{e.class} #{e.message}"
 end
 
 if links.empty?
   begin
     links = detail_links_from_news(NEWS_URL)
-  rescue => e
+  rescue StandardError => e
     warn "News fetch failed: #{e.class} #{e.message}"
   end
 end
@@ -134,7 +134,7 @@ saved = 0
 links.each do |u|
   begin
     item = parse_detail(u)
-  rescue => e
+  rescue StandardError => e
     warn "Skip #{u}: #{e.class} #{e.message}"
     next
   end

+ 1 - 2
scrapers/devonportcity.rb

@@ -18,7 +18,6 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)
 
 def abs_url(base, href)
     return "" if href.to_s.strip.empty?
@@ -97,7 +96,7 @@ def extract_on_notice_to_from_title(title)
                     end
                 end
             end
-        rescue => e
+        rescue StandardError => e
             warn "PDF save error for #{url}: #{e.class} #{e.message}"
         end
 

+ 5 - 6
scrapers/dorset.rb

@@ -21,7 +21,6 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)   
 
 def abs_url(href)
   return "" if href.to_s.strip.empty?
@@ -231,7 +230,7 @@ def download_all(urls, jar, council_reference)
       puts "  saved #{path} (#{bytes} bytes)"
       saved << path
       first_web_rel ||= "/files/dorset/#{safe_name(council_reference)}/#{File.basename(path)}"
-    rescue => e
+    rescue StandardError => e
       warn "Download failed for #{u}: #{e.class} #{e.message}"
     end
   end
@@ -240,7 +239,7 @@ def download_all(urls, jar, council_reference)
     begin
       DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET local_document_url = ? WHERE council_reference = ?")
                .execute(first_web_rel, council_reference)
-    rescue => e
+    rescue StandardError => e
       warn "Failed to set local_document_url for #{council_reference}: #{e.class} #{e.message}"
     end
   end
@@ -285,7 +284,7 @@ list_items.each do |r|
           r[:on_notice_to_raw] = adv[:completed_raw]  || adv[:target_raw]
         end
       end
-    rescue => e
+    rescue StandardError => e
       warn "Detail fetch failed for #{detail_url}: #{e.class} #{e.message}"
     end
   end
@@ -296,7 +295,7 @@ list_items.each do |r|
   geo = nil
   begin
     geo = Geocode.format_au(r[:address])
-  rescue => e
+  rescue StandardError => e
     warn "Geocode error for #{r[:council_reference]}: #{e.class} #{e.message}"
   end
   
@@ -332,7 +331,7 @@ sql = %Q{
 begin
   row = DB.client.prepare(sql).execute(council_reference, address).first
   puts "  enriched -> #{row ? row.inspect : 'nil'}"
-rescue => e
+rescue StandardError => e
   warn "  enriched probe failed: #{e.class} #{e.message}"
 end
 

+ 1 - 1
scrapers/flinders_council.rb

@@ -15,7 +15,7 @@ DB.ensure_table!(TABLE)
 # Optional column to keep the PDF link
 begin
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS document_url VARCHAR(1024) NULL")
-rescue => e
+rescue StandardError => e
   warn "document_url add skipped: #{e.class} #{e.message}"
 end
 

+ 5 - 6
scrapers/glenorchy.rb

@@ -15,12 +15,11 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)
 
 # Optional: keep the document link with each row (adds a column if missing)
 begin
     DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS document_url VARCHAR(1024) NULL")
-rescue => e
+rescue StandardError => e
     warn "Could not add document_url column: #{e.class} #{e.message}"
 end
 
@@ -67,7 +66,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
 
             # web-facing relative path (match your nginx/apache mapping)
             "/downloads/glenorchy/#{safe_name(council_reference)}/#{fname}"
-        rescue => e
+        rescue StandardError => e
             warn "Download failed for #{doc_url}: #{e.class} #{e.message}"
             ""
         end
@@ -129,7 +128,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
         begin
             DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET document_url = ? WHERE council_reference = ? AND address = ?")
             .execute(document_url, council_reference, address)
-        rescue => e
+        rescue StandardError => e
             warn "document_url update failed: #{e.class} #{e.message}"
         end
 
@@ -139,7 +138,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
             begin
                 DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET local_document_url = ? WHERE council_reference = ? AND address = ?")
                 .execute(local_rel, council_reference, address)
-            rescue => e
+            rescue StandardError => e
                 warn "local_document_url update failed: #{e.class} #{e.message}"
             end
         end
@@ -154,7 +153,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
         begin
             upd = DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET document_url = ? WHERE council_reference = ? AND address = ?")
             upd.execute(document_url, council_reference, address)
-        rescue => e
+        rescue StandardError => e
             # ignore if column not present
         end
 

+ 2 - 2
scrapers/huonvalley.rb

@@ -16,7 +16,7 @@ DB.ensure_table!(TABLE)
 # Optional: keep the SharePoint link
 begin
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS document_url TEXT NULL")
-rescue => e
+rescue StandardError => e
   warn "document_url add skipped: #{e.class} #{e.message}"
 end
 
@@ -102,7 +102,7 @@ seen_refs = {}
 loop do
   begin
     html = Http.get(url)
-  rescue => e
+  rescue StandardError => e
     warn "Failed to fetch #{url}: #{e.class} #{e.message}"
     break
   end

+ 2 - 2
scrapers/kentish.rb

@@ -130,7 +130,7 @@ end
 
 begin
   html = Http.get(URL)
-rescue => e
+rescue StandardError => e
   warn "Failed to fetch #{URL}: #{e.class} #{e.message}"
   exit 1
 end
@@ -160,7 +160,7 @@ items.each do |r|
   begin
     upd = DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET document_url = ?, on_notice_to = ?, on_notice_to_raw = ? WHERE council_reference = ? AND address = ?")
     upd.execute(r[:document_url], r[:date_received], r[:date_received_raw], r[:council_reference], r[:address])
-  rescue => e
+  rescue StandardError => e
     warn "Extras update skipped for #{r[:council_reference]}: #{e.class} #{e.message}"
   end
 

+ 7 - 7
scrapers/launcestoncity.rb

@@ -227,7 +227,7 @@ def probe_common_docs(base_url:, key:, danum:, referer:)
           begin
             saved = download_doc(pdf_url, referer: referer, council_reference: danum_raw, jar: SESSION_JAR)
             local_rel = "/files/launceston/#{safe_name(danum_raw)}/#{File.basename(saved)}"
-          rescue => e
+          rescue StandardError => e
             warn "DOC download failed (probe) for #{danum_raw} #{File.basename(pdf_url)}: #{e.class} #{e.message}"
           end
         end
@@ -370,7 +370,7 @@ tables.each do |t|
 					  begin
 						saved = download_doc(href, referer: candidate_url, council_reference: council_reference, jar: SESSION_JAR)
 						local_rel = "/files/launceston/#{safe_name(council_reference)}/#{File.basename(saved)}"
-					  rescue => e
+					  rescue StandardError => e
 						warn "DOC download failed for #{council_reference} #{name}: #{e.class} #{e.message}"
 					  end
 					end
@@ -397,7 +397,7 @@ tables.each do |t|
 					  documents.concat(probed)
 					  anchors_added = probed.size if probed.any?
 					end
-				  rescue => e
+				  rescue StandardError => e
 					warn "Probe fallback failed for #{council_reference}: #{e.class} #{e.message}"
 				  end
 				  end
@@ -412,12 +412,12 @@ tables.each do |t|
 					  dump_dir = "/app/tmp/launceston_doclist_dumps"
 					  FileUtils.mkdir_p(dump_dir)
 					  File.write(File.join(dump_dir, "#{safe_name(council_reference)}.html"), list_html[0, 5000])
-					rescue => e
+					rescue StandardError => e
 					  warn "Failed to write dump for #{council_reference}: #{e.class} #{e.message}"
 					end
 				  end
 
-				rescue => e
+				rescue StandardError => e
 				  warn "Doc list fetch failed for #{council_reference} at #{candidate_url} (referer: #{ref}): #{e.class} #{e.message}"
 				end
 			  end
@@ -428,7 +428,7 @@ tables.each do |t|
 			end
 
 
-		  rescue => e
+		  rescue StandardError => e
 			warn "Doc list fetch failed for #{council_reference}: #{e.class} #{e.message}"
 		  end
 		end
@@ -468,7 +468,7 @@ tables.each do |t|
 		  documents_json:             JSON.generate(documents) # full set
 		})
 
-    rescue => e
+    rescue StandardError => e
       warn "Enrich failed for #{council_reference}: #{e.class} #{e.message}"
     end
   end

+ 2 - 3
scrapers/meandervalley.rb

@@ -22,7 +22,6 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)
 
 # Pull nearest text around an anchor for parsing
 def host_block_for(a)
@@ -101,7 +100,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
             File.binwrite(path, body)
             puts "Saved PDF #{path}"
             "/downloads/meandervalley/#{safe_name(council_reference)}/#{fname}"
-        rescue => e
+        rescue StandardError => e
             warn "PDF download failed for #{url}: #{e.class} #{e.message}"
             nil
         end
@@ -224,7 +223,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
                 r[:council_reference],
                 r[:address]
                 )
-        rescue => e
+        rescue StandardError => e
             warn "Extras update skipped for #{r[:council_reference]}: #{e.class} #{e.message}"
         end
 

+ 1 - 1
scrapers/northernmidlands.rb

@@ -152,7 +152,7 @@ begin
   else
     Http.get(URL)
   end
-rescue => e
+rescue StandardError => e
   warn "Failed to fetch #{URL}: #{e.class} #{e.message}"
   exit 1
 end

+ 2 - 2
scrapers/planbuild.rb

@@ -110,7 +110,7 @@ items.each do |r|
     detail = {}
     begin
         detail = fetch_detail(uuid, jar, token, hdr) if uuid
-    rescue => e
+    rescue StandardError => e
         warn "Detail fetch failed for #{ref}: #{e.class} #{e.message}"
     end
 
@@ -160,7 +160,7 @@ items.each do |r|
     geo = nil
     begin
         geo = Geocode.format_au(addr)
-    rescue => e
+    rescue StandardError => e
         warn "Geocode error for #{ref}: #{e.class} #{e.message}"
     end
 

+ 3 - 3
scrapers/southernmidlands.rb

@@ -19,7 +19,7 @@ begin
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS on_notice_to DATE NULL")
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS on_notice_to_raw VARCHAR(80) NULL")
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS title_reference TEXT NULL")
-rescue => e
+rescue StandardError => e
   warn "Optional column add skipped: #{e.class} #{e.message}"
 end
 
@@ -96,7 +96,7 @@ saved = 0
 detail_links.each do |url|
   begin
     html = Http.get(url)
-  rescue => e
+  rescue StandardError => e
     warn "Skip #{url}: #{e.class} #{e.message}"
     next
   end
@@ -173,7 +173,7 @@ detail_links.each do |url|
   begin
     upd = DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET document_url = ?, on_notice_to = ?, on_notice_to_raw = ?, title_reference = ? WHERE council_reference = ? AND address = ?")
     upd.execute(document_url, on_notice, on_notice_raw.to_s, title_reference, council_reference, address)
-  rescue => e
+  rescue StandardError => e
     warn "Extras update skipped for #{council_reference}: #{e.class} #{e.message}"
   end
 

+ 4 - 4
scrapers/waratah_wynyard.rb

@@ -19,7 +19,7 @@ begin
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS on_notice_to DATE NULL")
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS on_notice_to_raw VARCHAR(80) NULL")
   DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS title_reference TEXT NULL")
-rescue => e
+rescue StandardError => e
   warn "Optional column add skipped: #{e.class} #{e.message}"
 end
 
@@ -179,7 +179,7 @@ end
 
 begin
   html = URL.include?("/eservice/") ? Http.dorset_session_get(URL) : Http.get(URL)
-rescue => e
+rescue StandardError => e
   warn "Failed to fetch #{URL}: #{e.class} #{e.message}"
   exit 1
 end
@@ -226,7 +226,7 @@ anchors.each do |u|
     begin
       item = parse_detail_page(u)
       rows << item if item
-    rescue => e
+    rescue StandardError => e
       warn "Skip detail #{u}: #{e.class} #{e.message}"
     end
   end
@@ -270,7 +270,7 @@ rows.each do |r|
       "WHERE council_reference = ? AND address = ?"
     )
     upd.execute(r[:document_url], r[:date_received], r[:date_received_raw], r[:title_reference], cr, addr)
-  rescue => e
+  rescue StandardError => e
     warn "Extras update skipped for #{cr}: #{e.class} #{e.message}"
   end
 

+ 2 - 3
scrapers/westcoast.rb

@@ -16,7 +16,6 @@ DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
 DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 
 DB.ensure_table!(TABLE)
-ensure_extra_columns!(TABLE)  
 
 def abs_url(base, href)
   return "" if href.to_s.strip.empty?
@@ -148,7 +147,7 @@ date_received	 = Date.today
 detail_links.each do |u|
   begin
     item = parse_detail(u)
-  rescue => e
+  rescue StandardError => e
     warn "Skip #{u}: #{e.class} #{e.message}"
     next
   end
@@ -175,7 +174,7 @@ detail_links.each do |u|
   begin
     upd = DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET document_url = ?, on_notice_to = ?, on_notice_to_raw = ?, title_reference = ? WHERE council_reference = ? AND address = ?")
     upd.execute(item[:document_url], item[:date_received], item[:date_received_raw], item[:title_reference], item[:council_reference], item[:address])
-  rescue => e
+  rescue StandardError => e
     warn "Extras update skipped for #{item[:council_reference]}: #{e.class} #{e.message}"
   end
 

+ 1 - 1
scrapers/westtamar.rb

@@ -114,7 +114,7 @@ saved = 0
 detail_links.each do |u|
   begin
     item = parse_detail(u)
-  rescue => e
+  rescue StandardError => e
     warn "Skip #{u}: #{e.class} #{e.message}"
     next
   end

+ 5 - 5
tools/backfill_dorset_docs.rb

@@ -134,7 +134,7 @@ def download_all(urls, jar, council_reference)
       # map to web path. Your web exposes downloads at /files
       saved_web << path.sub(DOWNLOAD_DIR, "/files")
       puts "  saved #{File.basename(path)} (#{body.bytesize} bytes)"
-    rescue => e
+    rescue StandardError => e
       warn "Download failed for #{u}: #{e.class} #{e.message}"
     end
   end
@@ -160,7 +160,7 @@ LISTS.each do |url|
       index_by_ref[it[:council_reference]] << abs_url(it[:detail_href])
     end
     puts "  #{url} -> #{items.length} items"
-  rescue => e
+  rescue StandardError => e
     warn "List fetch failed #{url}: #{e.class} #{e.message}"
   end
 end
@@ -177,7 +177,7 @@ begin
     DB.client.query("ALTER TABLE `#{te}` ADD COLUMN local_document_url TEXT NULL")
     needs_local_col = false
   end
-rescue => e
+rescue StandardError => e
   warn "Could not add local_document_url column: #{e.class} #{e.message}"
 end
 
@@ -211,7 +211,7 @@ todo.each_with_index do |r, i|
       res = dorset_get(jar, detail_url)
       doc_urls = extract_doc_links(res.body)
       break unless doc_urls.empty?
-    rescue => e
+    rescue StandardError => e
       warn "Detail fetch failed #{detail_url}: #{e.class} #{e.message}"
       next
     end
@@ -241,7 +241,7 @@ todo.each_with_index do |r, i|
       stmt.execute(rep_web, ref, add)
     end
     puts "[#{i+1}/#{todo.length}] #{ref} -> saved #{saved_abs.length} file(s)"
-  rescue => e
+  rescue StandardError => e
     warn "Update failed for #{ref}: #{e.class} #{e.message}"
   end
 end

+ 1 - 1
tools/backfill_geocode.rb

@@ -121,7 +121,7 @@ def normalize_one!(row, table, upd_stmt, over_stmt, overwrite:)
   end
 
   :ok
-rescue => e
+rescue StandardError => e
   warnx "  [#{table}] ##{id} #{ref} — update error: #{e.class} #{e.message}"
   :err
 end

+ 2 - 2
tools/import_sqlites.rb

@@ -141,7 +141,7 @@ def import_file(path)
             "WHERE council_reference = ? AND address = ?"
           )
           upd.execute(on_notice_date, on_notice_to_raw, title_reference, document_url, ref, address)
-        rescue => e
+        rescue StandardError => e
           warnx "   extras update skipped for #{ref}: #{e.class} #{e.message}"
         end
       end
@@ -150,7 +150,7 @@ def import_file(path)
     end
     say "  Imported #{count} row(s)"
   end
-rescue => e
+rescue StandardError => e
   warnx "  ERROR importing #{File.basename(path)}: #{e.class} #{e.message}"
 end