| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- # lib/scraper_helpers.rb
- # Shared top-level helpers required by individual DA scrapers.
- #
- # Usage — at the top of a scraper, after other requires:
- #
- # require_relative "../lib/scraper_helpers"
- #
- # This file requires db and enrich so scrapers don't need separate
- # require lines for those two libs.
- require "uri"
- require_relative "./db"
- require_relative "./enrich"
- require_relative "./log"
- # Resolve a possibly-relative href against a base URL.
- # Returns the href string unchanged if URI.join raises.
- def abs_url(base, href)
- return "" if href.to_s.strip.empty?
- URI.join(base, href).to_s
- rescue URI::InvalidURIError
- href.to_s
- end
- # Return node.text.strip, or default when node is nil.
- def text_or(node, default = "")
- node ? node.text.strip : default
- end
- # Upsert a DA row, run enrichment, and optionally UPDATE extra columns.
- #
- # table - validated DA table name (e.g. "da_glamorgan")
- # row - hash passed to DB.upsert; must include :council_reference and :address
- # extras - optional hash of { column_name => value } pairs to UPDATE after upsert
- # e.g. { document_url: "https://...", on_notice_to: Date.new(2025,6,1) }
- #
- # Prints "Upserted <ref> -> <address>" on success.
- def upsert_and_enrich!(table:, row:, extras: {})
- DB.upsert(table, row)
- enrich_after_upsert!(
- table: table,
- council_reference: row[:council_reference],
- address: row[:address]
- )
- unless extras.empty?
- begin
- esc = DB.client.escape(table)
- set_clause = extras.keys.map { |k| "`#{k}` = ?" }.join(", ")
- vals = extras.values + [row[:council_reference], row[:address]]
- upd = DB.client.prepare(
- "UPDATE `#{esc}` SET #{set_clause} WHERE council_reference = ? AND address = ?"
- )
- upd.execute(*vals)
- rescue Mysql2::Error => e
- Log.warn "scraper", "extras update skipped for #{row[:council_reference]}: #{e.message}"
- end
- end
- Log.info "scraper", "upserted #{row[:council_reference]} -> #{row[:address]}"
- end
|