geocode.rb 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. # lib/geocode.rb
  2. require "json"
  3. require "digest/sha1"
  4. require "cgi"
  5. require_relative "./db"
  6. require_relative "./http"
  7. require_relative "./log"
  8. module Geocode
  9. class Error < StandardError; end
  10. # Create the geo_cache table if it does not yet exist.
  11. # Guarded so the DDL statement only executes once per Ruby process —
  12. # Migrate.run! is the canonical place to create this table at startup,
  13. # but this guard keeps geocode.rb self-contained when run standalone.
  14. def self.ensure_cache!
  15. return if @cache_table_ready
  16. DB.client.query(<<~SQL)
  17. CREATE TABLE IF NOT EXISTS geo_cache (
  18. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  19. q_hash CHAR(40) NOT NULL UNIQUE,
  20. query_text VARCHAR(255) NOT NULL,
  21. formatted VARCHAR(255),
  22. street VARCHAR(255),
  23. locality VARCHAR(120),
  24. state VARCHAR(10),
  25. postcode VARCHAR(10),
  26. lat DECIMAL(10,7),
  27. lng DECIMAL(10,7),
  28. raw_json MEDIUMTEXT,
  29. created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  30. updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
  31. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
  32. SQL
  33. @cache_table_ready = true
  34. end
  35. # Public helper to geocode and return a hash of normalized components
  36. # Returns nil on hard failures
  37. def self.format_au(raw_address, council_hint: "TAS")
  38. key = ENV["GOOGLE_MAPS_API_KEY"]
  39. raise Error, "GOOGLE_MAPS_API_KEY is not set" if key.to_s.strip.empty?
  40. q = clean_for_geocode(raw_address.to_s)
  41. # Make the query unambiguous for AU TAS
  42. q += ", TAS, Australia" unless q =~ /\bTAS\b/i || q =~ /Tasmania/i || q =~ /Australia/i
  43. # Cache key
  44. h = Digest::SHA1.hexdigest(q.downcase.strip)
  45. # Try cache
  46. ensure_cache!
  47. if (row = fetch_cache(h))
  48. return row_to_result_hash(row)
  49. end
  50. url = URI("https://maps.googleapis.com/maps/api/geocode/json")
  51. params = {
  52. address: q,
  53. key: key,
  54. components: "country:AU|administrative_area:TAS",
  55. region: "au"
  56. }
  57. url.query = URI.encode_www_form(params)
  58. body = Http.get(url.to_s, headers: { "Accept" => "application/json" })
  59. data = begin
  60. JSON.parse(body)
  61. rescue JSON::ParserError
  62. {}
  63. end
  64. status = data["status"].to_s
  65. unless status == "OK" && data["results"].is_a?(Array) && !data["results"].empty?
  66. # Cache a negative to avoid hammering the API
  67. upsert_cache(h, q, nil)
  68. return nil
  69. end
  70. best = pick_best_result(data["results"])
  71. res = parse_result(best)
  72. # Store in cache
  73. upsert_cache(h, q, { formatted: res[:display], **res })
  74. res
  75. rescue Error => e
  76. Log.error "geocode", e.message
  77. nil
  78. rescue Net::HTTPError, Net::ReadTimeout, Net::OpenTimeout, OpenSSL::SSL::SSLError,
  79. Errno::ECONNRESET, EOFError, Mysql2::Error => e
  80. Log.warn "geocode", "network/db error for #{raw_address.inspect}: #{e.class} #{e.message}"
  81. nil
  82. end
  83. # Update a DA row with the normalized fields. Pass the same keys from format_au.
  84. # Columns are guaranteed to exist — either created by DB.ensure_table! (new tables)
  85. # or added by Migrate.run! migration v1 (existing tables).
  86. def self.update_da_row!(table:, council_reference:, orig_address:, geo:)
  87. return unless geo
  88. DB.validate_table_name!(table)
  89. esc = DB.client.escape(table)
  90. sql = <<~SQL
  91. UPDATE `#{esc}`
  92. SET address_std = ?,
  93. street = ?,
  94. locality = ?,
  95. state = ?,
  96. postcode = ?,
  97. lat = ?,
  98. lng = ?
  99. WHERE council_reference = ? AND address = ?
  100. SQL
  101. stmt = DB.client.prepare(sql)
  102. stmt.execute(
  103. geo[:display],
  104. geo[:street],
  105. geo[:locality],
  106. geo[:state],
  107. geo[:postcode],
  108. geo[:lat],
  109. geo[:lng],
  110. council_reference,
  111. orig_address
  112. )
  113. rescue Mysql2::Error => e
  114. Log.warn "geocode", "failed to update normalized address for #{table}/#{council_reference}: #{e.message}"
  115. end
  116. # Helpers
  117. def self.clean_for_geocode(addr)
  118. s = addr.dup
  119. # Drop CT references and brackets
  120. s = s.gsub(/\(CT[:\s]*[^\)]+\)/i, "")
  121. s = s.gsub(/\s+/, " ").strip
  122. s
  123. end
  124. def self.fetch_cache(hash)
  125. stmt = DB.client.prepare("SELECT formatted, street, locality, state, postcode, lat, lng FROM geo_cache WHERE q_hash = ? LIMIT 1")
  126. rs = stmt.execute(hash)
  127. rs.first
  128. end
  129. def self.upsert_cache(hash, query, payload)
  130. if payload
  131. stmt = DB.client.prepare(<<~SQL)
  132. INSERT INTO geo_cache (q_hash, query_text, formatted, street, locality, state, postcode, lat, lng, raw_json)
  133. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, NULL)
  134. ON DUPLICATE KEY UPDATE
  135. query_text = VALUES(query_text),
  136. formatted = VALUES(formatted),
  137. street = VALUES(street),
  138. locality = VALUES(locality),
  139. state = VALUES(state),
  140. postcode = VALUES(postcode),
  141. lat = VALUES(lat),
  142. lng = VALUES(lng)
  143. SQL
  144. stmt.execute(hash, query, payload[:display], payload[:street], payload[:locality], payload[:state], payload[:postcode], payload[:lat], payload[:lng])
  145. else
  146. # Negative cache
  147. stmt = DB.client.prepare("INSERT IGNORE INTO geo_cache (q_hash, query_text) VALUES (?, ?)")
  148. stmt.execute(hash, query)
  149. end
  150. end
  151. def self.row_to_result_hash(row)
  152. {
  153. display: row["formatted"],
  154. street: row["street"],
  155. locality: row["locality"],
  156. state: row["state"],
  157. postcode: row["postcode"],
  158. lat: row["lat"] && row["lat"].to_f,
  159. lng: row["lng"] && row["lng"].to_f
  160. }
  161. end
  162. # Prefer a rooftop or high precision result inside TAS
  163. def self.pick_best_result(results)
  164. tas = results.select { |r|
  165. comps = r["address_components"] || []
  166. comps.any? { |c| c["types"].include?("administrative_area_level_1") && c["short_name"].to_s.upcase == "TAS" }
  167. }
  168. arr = tas.empty? ? results : tas
  169. arr.sort_by { |r|
  170. types = r["types"] || []
  171. loc_type = (r["geometry"] || {})["location_type"].to_s
  172. [
  173. loc_type == "ROOFTOP" ? 0 : 1,
  174. types.include?("street_address") ? 0 : 1,
  175. types.include?("premise") ? 0 : 1
  176. ]
  177. }.first
  178. end
  179. def self.parse_result(r)
  180. comps = r["address_components"] || []
  181. h = {}
  182. comps.each do |c|
  183. types = c["types"]
  184. if types.include?("street_number")
  185. h[:street_number] = c["long_name"]
  186. elsif types.include?("route")
  187. h[:route] = c["long_name"]
  188. elsif types.include?("locality")
  189. h[:locality] = c["long_name"]
  190. elsif types.include?("postal_town") && h[:locality].to_s.empty?
  191. h[:locality] = c["long_name"]
  192. elsif types.include?("administrative_area_level_1")
  193. h[:state] = c["short_name"]
  194. elsif types.include?("postal_code")
  195. h[:postcode] = c["long_name"]
  196. end
  197. end
  198. street = [h[:street_number], h[:route]].compact.join(" ").strip
  199. display = [street, h[:locality], h[:state], h[:postcode]].compact.reject(&:empty?).join(", ").gsub(/\s+,/, ",")
  200. loc = (r["geometry"] || {})["location"] || {}
  201. {
  202. display: display,
  203. street: street,
  204. locality: h[:locality].to_s,
  205. state: h[:state].to_s,
  206. postcode: h[:postcode].to_s,
  207. lat: loc["lat"],
  208. lng: loc["lng"]
  209. }
  210. end
  211. end