# lib/scraper_helpers.rb # Shared top-level helpers required by individual DA scrapers. # # Usage — at the top of a scraper, after other requires: # # require_relative "../lib/scraper_helpers" # # This file requires db and enrich so scrapers don't need separate # require lines for those two libs. require "uri" require_relative "./db" require_relative "./enrich" # Resolve a possibly-relative href against a base URL. # Returns the href string unchanged if URI.join raises. def abs_url(base, href) return "" if href.to_s.strip.empty? URI.join(base, href).to_s rescue URI::InvalidURIError href.to_s end # Return node.text.strip, or default when node is nil. def text_or(node, default = "") node ? node.text.strip : default end # Upsert a DA row, run enrichment, and optionally UPDATE extra columns. # # table - validated DA table name (e.g. "da_glamorgan") # row - hash passed to DB.upsert; must include :council_reference and :address # extras - optional hash of { column_name => value } pairs to UPDATE after upsert # e.g. { document_url: "https://...", on_notice_to: Date.new(2025,6,1) } # # Prints "Upserted ->
" on success. def upsert_and_enrich!(table:, row:, extras: {}) DB.upsert(table, row) enrich_after_upsert!( table: table, council_reference: row[:council_reference], address: row[:address] ) unless extras.empty? begin esc = DB.client.escape(table) set_clause = extras.keys.map { |k| "`#{k}` = ?" }.join(", ") vals = extras.values + [row[:council_reference], row[:address]] upd = DB.client.prepare( "UPDATE `#{esc}` SET #{set_clause} WHERE council_reference = ? AND address = ?" ) upd.execute(*vals) rescue Mysql2::Error => e warn "[scraper_helpers] extras update skipped for #{row[:council_reference]}: #{e.message}" end end puts "Upserted #{row[:council_reference]} -> #{row[:address]}" end