geocode.rb 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. # lib/geocode.rb
  2. require "json"
  3. require "digest/sha1"
  4. require "cgi"
  5. require_relative "./db"
  6. require_relative "./http"
  7. module Geocode
  8. class Error < StandardError; end
  9. # Create the geo_cache table if it does not yet exist.
  10. # Guarded so the DDL statement only executes once per Ruby process —
  11. # Migrate.run! is the canonical place to create this table at startup,
  12. # but this guard keeps geocode.rb self-contained when run standalone.
  13. def self.ensure_cache!
  14. return if @cache_table_ready
  15. DB.client.query(<<~SQL)
  16. CREATE TABLE IF NOT EXISTS geo_cache (
  17. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  18. q_hash CHAR(40) NOT NULL UNIQUE,
  19. query_text VARCHAR(255) NOT NULL,
  20. formatted VARCHAR(255),
  21. street VARCHAR(255),
  22. locality VARCHAR(120),
  23. state VARCHAR(10),
  24. postcode VARCHAR(10),
  25. lat DECIMAL(10,7),
  26. lng DECIMAL(10,7),
  27. raw_json MEDIUMTEXT,
  28. created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  29. updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
  30. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
  31. SQL
  32. @cache_table_ready = true
  33. end
  34. # Add columns to any da_* table to store normalized fields
  35. def self.ensure_da_columns!(table)
  36. DB.validate_table_name!(table)
  37. esc = DB.client.escape(table)
  38. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS address_std VARCHAR(255) NULL")
  39. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS street VARCHAR(255) NULL")
  40. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS locality VARCHAR(120) NULL")
  41. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS state VARCHAR(10) NULL")
  42. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS postcode VARCHAR(10) NULL")
  43. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lat DECIMAL(10,7) NULL")
  44. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lng DECIMAL(10,7) NULL")
  45. rescue Mysql2::Error => e
  46. warn "[geocode] ensure columns skipped for #{table}: #{e.message}"
  47. end
  48. # Public helper to geocode and return a hash of normalized components
  49. # Returns nil on hard failures
  50. def self.format_au(raw_address, council_hint: "TAS")
  51. key = ENV["GOOGLE_MAPS_API_KEY"]
  52. raise Error, "GOOGLE_MAPS_API_KEY is not set" if key.to_s.strip.empty?
  53. q = clean_for_geocode(raw_address.to_s)
  54. # Make the query unambiguous for AU TAS
  55. q += ", TAS, Australia" unless q =~ /\bTAS\b/i || q =~ /Tasmania/i || q =~ /Australia/i
  56. # Cache key
  57. h = Digest::SHA1.hexdigest(q.downcase.strip)
  58. # Try cache
  59. ensure_cache!
  60. if (row = fetch_cache(h))
  61. return row_to_result_hash(row)
  62. end
  63. url = URI("https://maps.googleapis.com/maps/api/geocode/json")
  64. params = {
  65. address: q,
  66. key: key,
  67. components: "country:AU|administrative_area:TAS",
  68. region: "au"
  69. }
  70. url.query = URI.encode_www_form(params)
  71. body = Http.get(url.to_s, headers: { "Accept" => "application/json" })
  72. data = begin
  73. JSON.parse(body)
  74. rescue JSON::ParserError
  75. {}
  76. end
  77. status = data["status"].to_s
  78. unless status == "OK" && data["results"].is_a?(Array) && !data["results"].empty?
  79. # Cache a negative to avoid hammering the API
  80. upsert_cache(h, q, nil)
  81. return nil
  82. end
  83. best = pick_best_result(data["results"])
  84. res = parse_result(best)
  85. # Store in cache
  86. upsert_cache(h, q, { formatted: res[:display], **res })
  87. res
  88. rescue Error => e
  89. warn "[geocode] #{e.message}"
  90. nil
  91. rescue Net::HTTPError, Net::ReadTimeout, Net::OpenTimeout, OpenSSL::SSL::SSLError,
  92. Errno::ECONNRESET, EOFError, Mysql2::Error => e
  93. warn "[geocode] network/db error for #{raw_address.inspect}: #{e.class} #{e.message}"
  94. nil
  95. end
  96. # Update a DA row with the normalized fields. Pass the same keys from format_au.
  97. # Columns are guaranteed to exist — either created by DB.ensure_table! (new tables)
  98. # or added by Migrate.run! migration v1 (existing tables).
  99. def self.update_da_row!(table:, council_reference:, orig_address:, geo:)
  100. return unless geo
  101. DB.validate_table_name!(table)
  102. esc = DB.client.escape(table)
  103. sql = <<~SQL
  104. UPDATE `#{esc}`
  105. SET address_std = ?,
  106. street = ?,
  107. locality = ?,
  108. state = ?,
  109. postcode = ?,
  110. lat = ?,
  111. lng = ?
  112. WHERE council_reference = ? AND address = ?
  113. SQL
  114. stmt = DB.client.prepare(sql)
  115. stmt.execute(
  116. geo[:display],
  117. geo[:street],
  118. geo[:locality],
  119. geo[:state],
  120. geo[:postcode],
  121. geo[:lat],
  122. geo[:lng],
  123. council_reference,
  124. orig_address
  125. )
  126. rescue Mysql2::Error => e
  127. warn "[geocode] failed to update normalized address for #{table}/#{council_reference}: #{e.message}"
  128. end
  129. # Helpers
  130. def self.clean_for_geocode(addr)
  131. s = addr.dup
  132. # Drop CT references and brackets
  133. s = s.gsub(/\(CT[:\s]*[^\)]+\)/i, "")
  134. s = s.gsub(/\s+/, " ").strip
  135. s
  136. end
  137. def self.fetch_cache(hash)
  138. stmt = DB.client.prepare("SELECT formatted, street, locality, state, postcode, lat, lng FROM geo_cache WHERE q_hash = ? LIMIT 1")
  139. rs = stmt.execute(hash)
  140. rs.first
  141. end
  142. def self.upsert_cache(hash, query, payload)
  143. if payload
  144. stmt = DB.client.prepare(<<~SQL)
  145. INSERT INTO geo_cache (q_hash, query_text, formatted, street, locality, state, postcode, lat, lng, raw_json)
  146. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, NULL)
  147. ON DUPLICATE KEY UPDATE
  148. query_text = VALUES(query_text),
  149. formatted = VALUES(formatted),
  150. street = VALUES(street),
  151. locality = VALUES(locality),
  152. state = VALUES(state),
  153. postcode = VALUES(postcode),
  154. lat = VALUES(lat),
  155. lng = VALUES(lng)
  156. SQL
  157. stmt.execute(hash, query, payload[:display], payload[:street], payload[:locality], payload[:state], payload[:postcode], payload[:lat], payload[:lng])
  158. else
  159. # Negative cache
  160. stmt = DB.client.prepare("INSERT IGNORE INTO geo_cache (q_hash, query_text) VALUES (?, ?)")
  161. stmt.execute(hash, query)
  162. end
  163. end
  164. def self.row_to_result_hash(row)
  165. {
  166. display: row["formatted"],
  167. street: row["street"],
  168. locality: row["locality"],
  169. state: row["state"],
  170. postcode: row["postcode"],
  171. lat: row["lat"] && row["lat"].to_f,
  172. lng: row["lng"] && row["lng"].to_f
  173. }
  174. end
  175. # Prefer a rooftop or high precision result inside TAS
  176. def self.pick_best_result(results)
  177. tas = results.select { |r|
  178. comps = r["address_components"] || []
  179. comps.any? { |c| c["types"].include?("administrative_area_level_1") && c["short_name"].to_s.upcase == "TAS" }
  180. }
  181. arr = tas.empty? ? results : tas
  182. arr.sort_by { |r|
  183. types = r["types"] || []
  184. loc_type = (r["geometry"] || {})["location_type"].to_s
  185. [
  186. loc_type == "ROOFTOP" ? 0 : 1,
  187. types.include?("street_address") ? 0 : 1,
  188. types.include?("premise") ? 0 : 1
  189. ]
  190. }.first
  191. end
  192. def self.parse_result(r)
  193. comps = r["address_components"] || []
  194. h = {}
  195. comps.each do |c|
  196. types = c["types"]
  197. if types.include?("street_number")
  198. h[:street_number] = c["long_name"]
  199. elsif types.include?("route")
  200. h[:route] = c["long_name"]
  201. elsif types.include?("locality")
  202. h[:locality] = c["long_name"]
  203. elsif types.include?("postal_town") && h[:locality].to_s.empty?
  204. h[:locality] = c["long_name"]
  205. elsif types.include?("administrative_area_level_1")
  206. h[:state] = c["short_name"]
  207. elsif types.include?("postal_code")
  208. h[:postcode] = c["long_name"]
  209. end
  210. end
  211. street = [h[:street_number], h[:route]].compact.join(" ").strip
  212. display = [street, h[:locality], h[:state], h[:postcode]].compact.reject(&:empty?).join(", ").gsub(/\s+,/, ",")
  213. loc = (r["geometry"] || {})["location"] || {}
  214. {
  215. display: display,
  216. street: street,
  217. locality: h[:locality].to_s,
  218. state: h[:state].to_s,
  219. postcode: h[:postcode].to_s,
  220. lat: loc["lat"],
  221. lng: loc["lng"]
  222. }
  223. end
  224. end