geocode.rb 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. # lib/geocode.rb
  2. require "json"
  3. require "digest/sha1"
  4. require "cgi"
  5. require_relative "./db"
  6. require_relative "./http"
  7. module Geocode
  8. class Error < StandardError; end
  9. # Create a simple cache table once
  10. def self.ensure_cache!
  11. DB.client.query(<<~SQL)
  12. CREATE TABLE IF NOT EXISTS geo_cache (
  13. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  14. q_hash CHAR(40) NOT NULL UNIQUE,
  15. query_text VARCHAR(255) NOT NULL,
  16. formatted VARCHAR(255),
  17. street VARCHAR(255),
  18. locality VARCHAR(120),
  19. state VARCHAR(10),
  20. postcode VARCHAR(10),
  21. lat DECIMAL(10,7),
  22. lng DECIMAL(10,7),
  23. raw_json MEDIUMTEXT,
  24. created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  25. updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
  26. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
  27. SQL
  28. end
  29. # Add columns to any da_* table to store normalized fields
  30. def self.ensure_da_columns!(table)
  31. DB.validate_table_name!(table)
  32. esc = DB.client.escape(table)
  33. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS address_std VARCHAR(255) NULL")
  34. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS street VARCHAR(255) NULL")
  35. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS locality VARCHAR(120) NULL")
  36. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS state VARCHAR(10) NULL")
  37. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS postcode VARCHAR(10) NULL")
  38. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lat DECIMAL(10,7) NULL")
  39. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lng DECIMAL(10,7) NULL")
  40. rescue Mysql2::Error => e
  41. warn "[geocode] ensure columns skipped for #{table}: #{e.message}"
  42. end
  43. # Public helper to geocode and return a hash of normalized components
  44. # Returns nil on hard failures
  45. def self.format_au(raw_address, council_hint: "TAS")
  46. key = ENV["GOOGLE_MAPS_API_KEY"]
  47. raise Error, "GOOGLE_MAPS_API_KEY is not set" if key.to_s.strip.empty?
  48. q = clean_for_geocode(raw_address.to_s)
  49. # Make the query unambiguous for AU TAS
  50. q += ", TAS, Australia" unless q =~ /\bTAS\b/i || q =~ /Tasmania/i || q =~ /Australia/i
  51. # Cache key
  52. h = Digest::SHA1.hexdigest(q.downcase.strip)
  53. # Try cache
  54. ensure_cache!
  55. if (row = fetch_cache(h))
  56. return row_to_result_hash(row)
  57. end
  58. url = URI("https://maps.googleapis.com/maps/api/geocode/json")
  59. params = {
  60. address: q,
  61. key: key,
  62. components: "country:AU|administrative_area:TAS",
  63. region: "au"
  64. }
  65. url.query = URI.encode_www_form(params)
  66. body = Http.get(url.to_s, headers: { "Accept" => "application/json" })
  67. data = begin
  68. JSON.parse(body)
  69. rescue JSON::ParserError
  70. {}
  71. end
  72. status = data["status"].to_s
  73. unless status == "OK" && data["results"].is_a?(Array) && !data["results"].empty?
  74. # Cache a negative to avoid hammering the API
  75. upsert_cache(h, q, nil)
  76. return nil
  77. end
  78. best = pick_best_result(data["results"])
  79. res = parse_result(best)
  80. # Store in cache
  81. upsert_cache(h, q, { formatted: res[:display], **res })
  82. res
  83. rescue Error => e
  84. warn "[geocode] #{e.message}"
  85. nil
  86. rescue Net::HTTPError, Net::ReadTimeout, Net::OpenTimeout, OpenSSL::SSL::SSLError,
  87. Errno::ECONNRESET, EOFError, Mysql2::Error => e
  88. warn "[geocode] network/db error for #{raw_address.inspect}: #{e.class} #{e.message}"
  89. nil
  90. end
  91. # Update a DA row with the normalized fields. Pass the same keys from format_au.
  92. def self.update_da_row!(table:, council_reference:, orig_address:, geo:)
  93. return unless geo
  94. DB.validate_table_name!(table)
  95. ensure_da_columns!(table)
  96. esc = DB.client.escape(table)
  97. sql = <<~SQL
  98. UPDATE `#{esc}`
  99. SET address_std = ?,
  100. street = ?,
  101. locality = ?,
  102. state = ?,
  103. postcode = ?,
  104. lat = ?,
  105. lng = ?
  106. WHERE council_reference = ? AND address = ?
  107. SQL
  108. stmt = DB.client.prepare(sql)
  109. stmt.execute(
  110. geo[:display],
  111. geo[:street],
  112. geo[:locality],
  113. geo[:state],
  114. geo[:postcode],
  115. geo[:lat],
  116. geo[:lng],
  117. council_reference,
  118. orig_address
  119. )
  120. rescue Mysql2::Error => e
  121. warn "[geocode] failed to update normalized address for #{table}/#{council_reference}: #{e.message}"
  122. end
  123. # Helpers
  124. def self.clean_for_geocode(addr)
  125. s = addr.dup
  126. # Drop CT references and brackets
  127. s = s.gsub(/\(CT[:\s]*[^\)]+\)/i, "")
  128. s = s.gsub(/\s+/, " ").strip
  129. s
  130. end
  131. def self.fetch_cache(hash)
  132. stmt = DB.client.prepare("SELECT formatted, street, locality, state, postcode, lat, lng FROM geo_cache WHERE q_hash = ? LIMIT 1")
  133. rs = stmt.execute(hash)
  134. rs.first
  135. end
  136. def self.upsert_cache(hash, query, payload)
  137. if payload
  138. stmt = DB.client.prepare(<<~SQL)
  139. INSERT INTO geo_cache (q_hash, query_text, formatted, street, locality, state, postcode, lat, lng, raw_json)
  140. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, NULL)
  141. ON DUPLICATE KEY UPDATE
  142. query_text = VALUES(query_text),
  143. formatted = VALUES(formatted),
  144. street = VALUES(street),
  145. locality = VALUES(locality),
  146. state = VALUES(state),
  147. postcode = VALUES(postcode),
  148. lat = VALUES(lat),
  149. lng = VALUES(lng)
  150. SQL
  151. stmt.execute(hash, query, payload[:display], payload[:street], payload[:locality], payload[:state], payload[:postcode], payload[:lat], payload[:lng])
  152. else
  153. # Negative cache
  154. stmt = DB.client.prepare("INSERT IGNORE INTO geo_cache (q_hash, query_text) VALUES (?, ?)")
  155. stmt.execute(hash, query)
  156. end
  157. end
  158. def self.row_to_result_hash(row)
  159. {
  160. display: row["formatted"],
  161. street: row["street"],
  162. locality: row["locality"],
  163. state: row["state"],
  164. postcode: row["postcode"],
  165. lat: row["lat"] && row["lat"].to_f,
  166. lng: row["lng"] && row["lng"].to_f
  167. }
  168. end
  169. # Prefer a rooftop or high precision result inside TAS
  170. def self.pick_best_result(results)
  171. tas = results.select { |r|
  172. comps = r["address_components"] || []
  173. comps.any? { |c| c["types"].include?("administrative_area_level_1") && c["short_name"].to_s.upcase == "TAS" }
  174. }
  175. arr = tas.empty? ? results : tas
  176. arr.sort_by { |r|
  177. types = r["types"] || []
  178. loc_type = (r["geometry"] || {})["location_type"].to_s
  179. [
  180. loc_type == "ROOFTOP" ? 0 : 1,
  181. types.include?("street_address") ? 0 : 1,
  182. types.include?("premise") ? 0 : 1
  183. ]
  184. }.first
  185. end
  186. def self.parse_result(r)
  187. comps = r["address_components"] || []
  188. h = {}
  189. comps.each do |c|
  190. types = c["types"]
  191. if types.include?("street_number")
  192. h[:street_number] = c["long_name"]
  193. elsif types.include?("route")
  194. h[:route] = c["long_name"]
  195. elsif types.include?("locality")
  196. h[:locality] = c["long_name"]
  197. elsif types.include?("postal_town") && h[:locality].to_s.empty?
  198. h[:locality] = c["long_name"]
  199. elsif types.include?("administrative_area_level_1")
  200. h[:state] = c["short_name"]
  201. elsif types.include?("postal_code")
  202. h[:postcode] = c["long_name"]
  203. end
  204. end
  205. street = [h[:street_number], h[:route]].compact.join(" ").strip
  206. display = [street, h[:locality], h[:state], h[:postcode]].compact.reject(&:empty?).join(", ").gsub(/\s+,/, ",")
  207. loc = (r["geometry"] || {})["location"] || {}
  208. {
  209. display: display,
  210. street: street,
  211. locality: h[:locality].to_s,
  212. state: h[:state].to_s,
  213. postcode: h[:postcode].to_s,
  214. lat: loc["lat"],
  215. lng: loc["lng"]
  216. }
  217. end
  218. end