| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 |
- # lib/geocode.rb
- require "json"
- require "digest/sha1"
- require "cgi"
- require_relative "./db"
- require_relative "./http"
- module Geocode
- class Error < StandardError; end
- # Create the geo_cache table if it does not yet exist.
- # Guarded so the DDL statement only executes once per Ruby process —
- # Migrate.run! is the canonical place to create this table at startup,
- # but this guard keeps geocode.rb self-contained when run standalone.
- def self.ensure_cache!
- return if @cache_table_ready
- DB.client.query(<<~SQL)
- CREATE TABLE IF NOT EXISTS geo_cache (
- id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
- q_hash CHAR(40) NOT NULL UNIQUE,
- query_text VARCHAR(255) NOT NULL,
- formatted VARCHAR(255),
- street VARCHAR(255),
- locality VARCHAR(120),
- state VARCHAR(10),
- postcode VARCHAR(10),
- lat DECIMAL(10,7),
- lng DECIMAL(10,7),
- raw_json MEDIUMTEXT,
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
- updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
- SQL
- @cache_table_ready = true
- end
- # Add columns to any da_* table to store normalized fields
- def self.ensure_da_columns!(table)
- DB.validate_table_name!(table)
- esc = DB.client.escape(table)
- DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS address_std VARCHAR(255) NULL")
- DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS street VARCHAR(255) NULL")
- DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS locality VARCHAR(120) NULL")
- DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS state VARCHAR(10) NULL")
- DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS postcode VARCHAR(10) NULL")
- DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lat DECIMAL(10,7) NULL")
- DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lng DECIMAL(10,7) NULL")
- rescue Mysql2::Error => e
- warn "[geocode] ensure columns skipped for #{table}: #{e.message}"
- end
- # Public helper to geocode and return a hash of normalized components
- # Returns nil on hard failures
- def self.format_au(raw_address, council_hint: "TAS")
- key = ENV["GOOGLE_MAPS_API_KEY"]
- raise Error, "GOOGLE_MAPS_API_KEY is not set" if key.to_s.strip.empty?
- q = clean_for_geocode(raw_address.to_s)
- # Make the query unambiguous for AU TAS
- q += ", TAS, Australia" unless q =~ /\bTAS\b/i || q =~ /Tasmania/i || q =~ /Australia/i
- # Cache key
- h = Digest::SHA1.hexdigest(q.downcase.strip)
- # Try cache
- ensure_cache!
- if (row = fetch_cache(h))
- return row_to_result_hash(row)
- end
- url = URI("https://maps.googleapis.com/maps/api/geocode/json")
- params = {
- address: q,
- key: key,
- components: "country:AU|administrative_area:TAS",
- region: "au"
- }
- url.query = URI.encode_www_form(params)
- body = Http.get(url.to_s, headers: { "Accept" => "application/json" })
- data = begin
- JSON.parse(body)
- rescue JSON::ParserError
- {}
- end
- status = data["status"].to_s
- unless status == "OK" && data["results"].is_a?(Array) && !data["results"].empty?
- # Cache a negative to avoid hammering the API
- upsert_cache(h, q, nil)
- return nil
- end
- best = pick_best_result(data["results"])
- res = parse_result(best)
- # Store in cache
- upsert_cache(h, q, { formatted: res[:display], **res })
- res
- rescue Error => e
- warn "[geocode] #{e.message}"
- nil
- rescue Net::HTTPError, Net::ReadTimeout, Net::OpenTimeout, OpenSSL::SSL::SSLError,
- Errno::ECONNRESET, EOFError, Mysql2::Error => e
- warn "[geocode] network/db error for #{raw_address.inspect}: #{e.class} #{e.message}"
- nil
- end
- # Update a DA row with the normalized fields. Pass the same keys from format_au.
- # Columns are guaranteed to exist — either created by DB.ensure_table! (new tables)
- # or added by Migrate.run! migration v1 (existing tables).
- def self.update_da_row!(table:, council_reference:, orig_address:, geo:)
- return unless geo
- DB.validate_table_name!(table)
- esc = DB.client.escape(table)
- sql = <<~SQL
- UPDATE `#{esc}`
- SET address_std = ?,
- street = ?,
- locality = ?,
- state = ?,
- postcode = ?,
- lat = ?,
- lng = ?
- WHERE council_reference = ? AND address = ?
- SQL
- stmt = DB.client.prepare(sql)
- stmt.execute(
- geo[:display],
- geo[:street],
- geo[:locality],
- geo[:state],
- geo[:postcode],
- geo[:lat],
- geo[:lng],
- council_reference,
- orig_address
- )
- rescue Mysql2::Error => e
- warn "[geocode] failed to update normalized address for #{table}/#{council_reference}: #{e.message}"
- end
- # Helpers
- def self.clean_for_geocode(addr)
- s = addr.dup
- # Drop CT references and brackets
- s = s.gsub(/\(CT[:\s]*[^\)]+\)/i, "")
- s = s.gsub(/\s+/, " ").strip
- s
- end
- def self.fetch_cache(hash)
- stmt = DB.client.prepare("SELECT formatted, street, locality, state, postcode, lat, lng FROM geo_cache WHERE q_hash = ? LIMIT 1")
- rs = stmt.execute(hash)
- rs.first
- end
- def self.upsert_cache(hash, query, payload)
- if payload
- stmt = DB.client.prepare(<<~SQL)
- INSERT INTO geo_cache (q_hash, query_text, formatted, street, locality, state, postcode, lat, lng, raw_json)
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, NULL)
- ON DUPLICATE KEY UPDATE
- query_text = VALUES(query_text),
- formatted = VALUES(formatted),
- street = VALUES(street),
- locality = VALUES(locality),
- state = VALUES(state),
- postcode = VALUES(postcode),
- lat = VALUES(lat),
- lng = VALUES(lng)
- SQL
- stmt.execute(hash, query, payload[:display], payload[:street], payload[:locality], payload[:state], payload[:postcode], payload[:lat], payload[:lng])
- else
- # Negative cache
- stmt = DB.client.prepare("INSERT IGNORE INTO geo_cache (q_hash, query_text) VALUES (?, ?)")
- stmt.execute(hash, query)
- end
- end
- def self.row_to_result_hash(row)
- {
- display: row["formatted"],
- street: row["street"],
- locality: row["locality"],
- state: row["state"],
- postcode: row["postcode"],
- lat: row["lat"] && row["lat"].to_f,
- lng: row["lng"] && row["lng"].to_f
- }
- end
- # Prefer a rooftop or high precision result inside TAS
- def self.pick_best_result(results)
- tas = results.select { |r|
- comps = r["address_components"] || []
- comps.any? { |c| c["types"].include?("administrative_area_level_1") && c["short_name"].to_s.upcase == "TAS" }
- }
- arr = tas.empty? ? results : tas
- arr.sort_by { |r|
- types = r["types"] || []
- loc_type = (r["geometry"] || {})["location_type"].to_s
- [
- loc_type == "ROOFTOP" ? 0 : 1,
- types.include?("street_address") ? 0 : 1,
- types.include?("premise") ? 0 : 1
- ]
- }.first
- end
- def self.parse_result(r)
- comps = r["address_components"] || []
- h = {}
- comps.each do |c|
- types = c["types"]
- if types.include?("street_number")
- h[:street_number] = c["long_name"]
- elsif types.include?("route")
- h[:route] = c["long_name"]
- elsif types.include?("locality")
- h[:locality] = c["long_name"]
- elsif types.include?("postal_town") && h[:locality].to_s.empty?
- h[:locality] = c["long_name"]
- elsif types.include?("administrative_area_level_1")
- h[:state] = c["short_name"]
- elsif types.include?("postal_code")
- h[:postcode] = c["long_name"]
- end
- end
- street = [h[:street_number], h[:route]].compact.join(" ").strip
- display = [street, h[:locality], h[:state], h[:postcode]].compact.reject(&:empty?).join(", ").gsub(/\s+,/, ",")
- loc = (r["geometry"] || {})["location"] || {}
- {
- display: display,
- street: street,
- locality: h[:locality].to_s,
- state: h[:state].to_s,
- postcode: h[:postcode].to_s,
- lat: loc["lat"],
- lng: loc["lng"]
- }
- end
- end
|