geocode.rb 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. # lib/geocode.rb
  2. require "json"
  3. require "digest/sha1"
  4. require "cgi"
  5. require_relative "./db"
  6. require_relative "./http"
  7. require_relative "./log"
  8. module Geocode
  9. class Error < StandardError; end
  10. # Create the geo_cache table if it does not yet exist.
  11. # Guarded so the DDL statement only executes once per Ruby process —
  12. # Migrate.run! is the canonical place to create this table at startup,
  13. # but this guard keeps geocode.rb self-contained when run standalone.
  14. def self.ensure_cache!
  15. return if @cache_table_ready
  16. DB.client.query(<<~SQL)
  17. CREATE TABLE IF NOT EXISTS geo_cache (
  18. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  19. q_hash CHAR(40) NOT NULL UNIQUE,
  20. query_text VARCHAR(255) NOT NULL,
  21. formatted VARCHAR(255),
  22. street VARCHAR(255),
  23. locality VARCHAR(120),
  24. state VARCHAR(10),
  25. postcode VARCHAR(10),
  26. lat DECIMAL(10,7),
  27. lng DECIMAL(10,7),
  28. raw_json MEDIUMTEXT,
  29. created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  30. updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
  31. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
  32. SQL
  33. @cache_table_ready = true
  34. end
  35. # Add columns to any da_* table to store normalized fields
  36. def self.ensure_da_columns!(table)
  37. DB.validate_table_name!(table)
  38. esc = DB.client.escape(table)
  39. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS address_std VARCHAR(255) NULL")
  40. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS street VARCHAR(255) NULL")
  41. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS locality VARCHAR(120) NULL")
  42. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS state VARCHAR(10) NULL")
  43. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS postcode VARCHAR(10) NULL")
  44. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lat DECIMAL(10,7) NULL")
  45. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lng DECIMAL(10,7) NULL")
  46. rescue Mysql2::Error => e
  47. Log.warn "geocode", "ensure columns skipped for #{table}: #{e.message}"
  48. end
  49. # Public helper to geocode and return a hash of normalized components
  50. # Returns nil on hard failures
  51. def self.format_au(raw_address, council_hint: "TAS")
  52. key = ENV["GOOGLE_MAPS_API_KEY"]
  53. raise Error, "GOOGLE_MAPS_API_KEY is not set" if key.to_s.strip.empty?
  54. q = clean_for_geocode(raw_address.to_s)
  55. # Make the query unambiguous for AU TAS
  56. q += ", TAS, Australia" unless q =~ /\bTAS\b/i || q =~ /Tasmania/i || q =~ /Australia/i
  57. # Cache key
  58. h = Digest::SHA1.hexdigest(q.downcase.strip)
  59. # Try cache
  60. ensure_cache!
  61. if (row = fetch_cache(h))
  62. return row_to_result_hash(row)
  63. end
  64. url = URI("https://maps.googleapis.com/maps/api/geocode/json")
  65. params = {
  66. address: q,
  67. key: key,
  68. components: "country:AU|administrative_area:TAS",
  69. region: "au"
  70. }
  71. url.query = URI.encode_www_form(params)
  72. body = Http.get(url.to_s, headers: { "Accept" => "application/json" })
  73. data = begin
  74. JSON.parse(body)
  75. rescue JSON::ParserError
  76. {}
  77. end
  78. status = data["status"].to_s
  79. unless status == "OK" && data["results"].is_a?(Array) && !data["results"].empty?
  80. # Cache a negative to avoid hammering the API
  81. upsert_cache(h, q, nil)
  82. return nil
  83. end
  84. best = pick_best_result(data["results"])
  85. res = parse_result(best)
  86. # Store in cache
  87. upsert_cache(h, q, { formatted: res[:display], **res })
  88. res
  89. rescue Error => e
  90. Log.error "geocode", e.message
  91. nil
  92. rescue Net::HTTPError, Net::ReadTimeout, Net::OpenTimeout, OpenSSL::SSL::SSLError,
  93. Errno::ECONNRESET, EOFError, Mysql2::Error => e
  94. Log.warn "geocode", "network/db error for #{raw_address.inspect}: #{e.class} #{e.message}"
  95. nil
  96. end
  97. # Update a DA row with the normalized fields. Pass the same keys from format_au.
  98. # Columns are guaranteed to exist — either created by DB.ensure_table! (new tables)
  99. # or added by Migrate.run! migration v1 (existing tables).
  100. def self.update_da_row!(table:, council_reference:, orig_address:, geo:)
  101. return unless geo
  102. DB.validate_table_name!(table)
  103. esc = DB.client.escape(table)
  104. sql = <<~SQL
  105. UPDATE `#{esc}`
  106. SET address_std = ?,
  107. street = ?,
  108. locality = ?,
  109. state = ?,
  110. postcode = ?,
  111. lat = ?,
  112. lng = ?
  113. WHERE council_reference = ? AND address = ?
  114. SQL
  115. stmt = DB.client.prepare(sql)
  116. stmt.execute(
  117. geo[:display],
  118. geo[:street],
  119. geo[:locality],
  120. geo[:state],
  121. geo[:postcode],
  122. geo[:lat],
  123. geo[:lng],
  124. council_reference,
  125. orig_address
  126. )
  127. rescue Mysql2::Error => e
  128. Log.warn "geocode", "failed to update normalized address for #{table}/#{council_reference}: #{e.message}"
  129. end
  130. # Helpers
  131. def self.clean_for_geocode(addr)
  132. s = addr.dup
  133. # Drop CT references and brackets
  134. s = s.gsub(/\(CT[:\s]*[^\)]+\)/i, "")
  135. s = s.gsub(/\s+/, " ").strip
  136. s
  137. end
  138. def self.fetch_cache(hash)
  139. stmt = DB.client.prepare("SELECT formatted, street, locality, state, postcode, lat, lng FROM geo_cache WHERE q_hash = ? LIMIT 1")
  140. rs = stmt.execute(hash)
  141. rs.first
  142. end
  143. def self.upsert_cache(hash, query, payload)
  144. if payload
  145. stmt = DB.client.prepare(<<~SQL)
  146. INSERT INTO geo_cache (q_hash, query_text, formatted, street, locality, state, postcode, lat, lng, raw_json)
  147. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, NULL)
  148. ON DUPLICATE KEY UPDATE
  149. query_text = VALUES(query_text),
  150. formatted = VALUES(formatted),
  151. street = VALUES(street),
  152. locality = VALUES(locality),
  153. state = VALUES(state),
  154. postcode = VALUES(postcode),
  155. lat = VALUES(lat),
  156. lng = VALUES(lng)
  157. SQL
  158. stmt.execute(hash, query, payload[:display], payload[:street], payload[:locality], payload[:state], payload[:postcode], payload[:lat], payload[:lng])
  159. else
  160. # Negative cache
  161. stmt = DB.client.prepare("INSERT IGNORE INTO geo_cache (q_hash, query_text) VALUES (?, ?)")
  162. stmt.execute(hash, query)
  163. end
  164. end
  165. def self.row_to_result_hash(row)
  166. {
  167. display: row["formatted"],
  168. street: row["street"],
  169. locality: row["locality"],
  170. state: row["state"],
  171. postcode: row["postcode"],
  172. lat: row["lat"] && row["lat"].to_f,
  173. lng: row["lng"] && row["lng"].to_f
  174. }
  175. end
  176. # Prefer a rooftop or high precision result inside TAS
  177. def self.pick_best_result(results)
  178. tas = results.select { |r|
  179. comps = r["address_components"] || []
  180. comps.any? { |c| c["types"].include?("administrative_area_level_1") && c["short_name"].to_s.upcase == "TAS" }
  181. }
  182. arr = tas.empty? ? results : tas
  183. arr.sort_by { |r|
  184. types = r["types"] || []
  185. loc_type = (r["geometry"] || {})["location_type"].to_s
  186. [
  187. loc_type == "ROOFTOP" ? 0 : 1,
  188. types.include?("street_address") ? 0 : 1,
  189. types.include?("premise") ? 0 : 1
  190. ]
  191. }.first
  192. end
  193. def self.parse_result(r)
  194. comps = r["address_components"] || []
  195. h = {}
  196. comps.each do |c|
  197. types = c["types"]
  198. if types.include?("street_number")
  199. h[:street_number] = c["long_name"]
  200. elsif types.include?("route")
  201. h[:route] = c["long_name"]
  202. elsif types.include?("locality")
  203. h[:locality] = c["long_name"]
  204. elsif types.include?("postal_town") && h[:locality].to_s.empty?
  205. h[:locality] = c["long_name"]
  206. elsif types.include?("administrative_area_level_1")
  207. h[:state] = c["short_name"]
  208. elsif types.include?("postal_code")
  209. h[:postcode] = c["long_name"]
  210. end
  211. end
  212. street = [h[:street_number], h[:route]].compact.join(" ").strip
  213. display = [street, h[:locality], h[:state], h[:postcode]].compact.reject(&:empty?).join(", ").gsub(/\s+,/, ",")
  214. loc = (r["geometry"] || {})["location"] || {}
  215. {
  216. display: display,
  217. street: street,
  218. locality: h[:locality].to_s,
  219. state: h[:state].to_s,
  220. postcode: h[:postcode].to_s,
  221. lat: loc["lat"],
  222. lng: loc["lng"]
  223. }
  224. end
  225. end