scraper_helpers.rb 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # lib/scraper_helpers.rb
  2. # Shared top-level helpers required by individual DA scrapers.
  3. #
  4. # Usage — at the top of a scraper, after other requires:
  5. #
  6. # require_relative "../lib/scraper_helpers"
  7. #
  8. # This file requires db and enrich so scrapers don't need separate
  9. # require lines for those two libs.
  10. require "uri"
  11. require_relative "./db"
  12. require_relative "./enrich"
  13. # Resolve a possibly-relative href against a base URL.
  14. # Returns the href string unchanged if URI.join raises.
  15. def abs_url(base, href)
  16. return "" if href.to_s.strip.empty?
  17. URI.join(base, href).to_s
  18. rescue URI::InvalidURIError
  19. href.to_s
  20. end
  21. # Return node.text.strip, or default when node is nil.
  22. def text_or(node, default = "")
  23. node ? node.text.strip : default
  24. end
  25. # Upsert a DA row, run enrichment, and optionally UPDATE extra columns.
  26. #
  27. # table - validated DA table name (e.g. "da_glamorgan")
  28. # row - hash passed to DB.upsert; must include :council_reference and :address
  29. # extras - optional hash of { column_name => value } pairs to UPDATE after upsert
  30. # e.g. { document_url: "https://...", on_notice_to: Date.new(2025,6,1) }
  31. #
  32. # Prints "Upserted <ref> -> <address>" on success.
  33. def upsert_and_enrich!(table:, row:, extras: {})
  34. DB.upsert(table, row)
  35. enrich_after_upsert!(
  36. table: table,
  37. council_reference: row[:council_reference],
  38. address: row[:address]
  39. )
  40. unless extras.empty?
  41. begin
  42. esc = DB.client.escape(table)
  43. set_clause = extras.keys.map { |k| "`#{k}` = ?" }.join(", ")
  44. vals = extras.values + [row[:council_reference], row[:address]]
  45. upd = DB.client.prepare(
  46. "UPDATE `#{esc}` SET #{set_clause} WHERE council_reference = ? AND address = ?"
  47. )
  48. upd.execute(*vals)
  49. rescue Mysql2::Error => e
  50. warn "[scraper_helpers] extras update skipped for #{row[:council_reference]}: #{e.message}"
  51. end
  52. end
  53. puts "Upserted #{row[:council_reference]} -> #{row[:address]}"
  54. end