scraper_helpers.rb 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. # lib/scraper_helpers.rb
  2. # Shared top-level helpers required by individual DA scrapers.
  3. #
  4. # Usage — at the top of a scraper, after other requires:
  5. #
  6. # require_relative "../lib/scraper_helpers"
  7. #
  8. # This file requires db and enrich so scrapers don't need separate
  9. # require lines for those two libs.
  10. require "uri"
  11. require_relative "./db"
  12. require_relative "./enrich"
  13. require_relative "./log"
  14. # Resolve a possibly-relative href against a base URL.
  15. # Returns the href string unchanged if URI.join raises.
  16. def abs_url(base, href)
  17. return "" if href.to_s.strip.empty?
  18. URI.join(base, href).to_s
  19. rescue URI::InvalidURIError
  20. href.to_s
  21. end
  22. # Return node.text.strip, or default when node is nil.
  23. def text_or(node, default = "")
  24. node ? node.text.strip : default
  25. end
  26. # Upsert a DA row, run enrichment, and optionally UPDATE extra columns.
  27. #
  28. # table - validated DA table name (e.g. "da_glamorgan")
  29. # row - hash passed to DB.upsert; must include :council_reference and :address
  30. # extras - optional hash of { column_name => value } pairs to UPDATE after upsert
  31. # e.g. { document_url: "https://...", on_notice_to: Date.new(2025,6,1) }
  32. #
  33. # Prints "Upserted <ref> -> <address>" on success.
  34. def upsert_and_enrich!(table:, row:, extras: {})
  35. DB.upsert(table, row)
  36. enrich_after_upsert!(
  37. table: table,
  38. council_reference: row[:council_reference],
  39. address: row[:address]
  40. )
  41. unless extras.empty?
  42. begin
  43. esc = DB.client.escape(table)
  44. set_clause = extras.keys.map { |k| "`#{k}` = ?" }.join(", ")
  45. vals = extras.values + [row[:council_reference], row[:address]]
  46. upd = DB.client.prepare(
  47. "UPDATE `#{esc}` SET #{set_clause} WHERE council_reference = ? AND address = ?"
  48. )
  49. upd.execute(*vals)
  50. rescue StandardError => e
  51. Log.warn "scraper", "extras update skipped for #{row[:council_reference]}: #{e.class} #{e.message}"
  52. end
  53. end
  54. Log.info "scraper", "upserted #{row[:council_reference]} -> #{row[:address]}"
  55. end