# lib/enrich.rb # Per-row enrichment called right after each DB.upsert: # 1. Geocode (address_std, street/locality/state/postcode, lat/lng) via Google Maps # 2. Property lookup (property_id, title_reference) via LOOKUP_URL service # # Schema is owned by DB.ensure_table! (new tables) and lib/migrate.rb (existing tables). # Scrapers only need to call DB.ensure_table! — no separate ensure_extra_columns! required. require "json" require "net/http" require "uri" require_relative "./db" require_relative "./geocode" require_relative "./log" LOOKUP_URL = ENV["LOOKUP_URL"] # e.g. http://web/list_lookup.php def http_post_json(url, payload, timeout: 15) uri = URI.parse(url) http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = (uri.scheme == "https") http.read_timeout = timeout http.open_timeout = timeout req = Net::HTTP::Post.new(uri.request_uri) req["Content-Type"] = "application/json" req.body = JSON.generate(payload) res = http.request(req) raise "HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess) JSON.parse(res.body) rescue JSON::ParserError {} end # Call this right after DB.upsert in each scraper: # enrich_after_upsert!(table: TABLE, council_reference: ref, address: addr) def enrich_after_upsert!(table:, council_reference:, address:) DB.validate_table_name!(table) esc = DB.client.escape(table) sel = DB.client.prepare( "SELECT id, address, address_std, lat, lng, property_id, title_reference " \ "FROM `#{esc}` WHERE council_reference = ? AND address = ? LIMIT 1" ) row = sel.execute(council_reference, address).first return unless row # 1) Geocode if missing lat/lng or normalised address if row["lat"].nil? || row["lng"].nil? || row["address_std"].to_s.strip.empty? begin geo = Geocode.format_au(row["address"]) Geocode.update_da_row!( table: table, council_reference: council_reference, orig_address: row["address"], geo: geo ) Log.debug "enrich", "geocoded #{table} #{council_reference}" row = sel.execute(council_reference, address).first rescue StandardError => e Log.warn "enrich", "geocode failed #{table} #{council_reference}: #{e.class} #{e.message}" end end # 2) Property lookup — only if coords exist and pid/title are missing need_pid = row["property_id"].to_s.strip.empty? need_title = row["title_reference"].to_s.strip.empty? if LOOKUP_URL && row["lat"] && row["lng"] && (need_pid || need_title) begin resp = http_post_json(LOOKUP_URL, { lat: row["lat"], lng: row["lng"] }) if resp["ok"] pid = (resp["pid"] || "").to_s title = (resp["title_id"] || "").to_s upd = DB.client.prepare( "UPDATE `#{esc}` SET " \ "property_id = COALESCE(?, property_id), " \ "title_reference = COALESCE(?, title_reference) " \ "WHERE council_reference = ? AND address = ?" ) upd.execute( pid.empty? ? nil : pid, title.empty? ? nil : title, council_reference, address ) Log.debug "enrich", "lookup ok #{table} #{council_reference} pid=#{pid} title=#{title}" else Log.warn "enrich", "lookup error #{table} #{council_reference}: #{resp["error"]}" end rescue StandardError => e Log.warn "enrich", "lookup failed #{table} #{council_reference}: #{e.class} #{e.message}" end end end