geocode.rb 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. # lib/geocode.rb
  2. require "json"
  3. require "digest/sha1"
  4. require "cgi"
  5. require_relative "./db"
  6. require_relative "./http"
  7. module Geocode
  8. class Error < StandardError; end
  9. # Create a simple cache table once
  10. def self.ensure_cache!
  11. DB.client.query(<<~SQL)
  12. CREATE TABLE IF NOT EXISTS geo_cache (
  13. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  14. q_hash CHAR(40) NOT NULL UNIQUE,
  15. query_text VARCHAR(255) NOT NULL,
  16. formatted VARCHAR(255),
  17. street VARCHAR(255),
  18. locality VARCHAR(120),
  19. state VARCHAR(10),
  20. postcode VARCHAR(10),
  21. lat DECIMAL(10,7),
  22. lng DECIMAL(10,7),
  23. raw_json MEDIUMTEXT,
  24. created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  25. updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
  26. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
  27. SQL
  28. end
  29. # Add columns to any da_* table to store normalized fields
  30. def self.ensure_da_columns!(table)
  31. esc = DB.client.escape(table)
  32. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS address_std VARCHAR(255) NULL")
  33. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS street VARCHAR(255) NULL")
  34. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS locality VARCHAR(120) NULL")
  35. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS state VARCHAR(10) NULL")
  36. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS postcode VARCHAR(10) NULL")
  37. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lat DECIMAL(10,7) NULL")
  38. DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS lng DECIMAL(10,7) NULL")
  39. rescue Mysql2::Error => e
  40. warn "[geocode] ensure columns skipped for #{table}: #{e.message}"
  41. end
  42. # Public helper to geocode and return a hash of normalized components
  43. # Returns nil on hard failures
  44. def self.format_au(raw_address, council_hint: "TAS")
  45. key = ENV["GOOGLE_MAPS_API_KEY"]
  46. raise Error, "GOOGLE_MAPS_API_KEY is not set" if key.to_s.strip.empty?
  47. q = clean_for_geocode(raw_address.to_s)
  48. # Make the query unambiguous for AU TAS
  49. q += ", TAS, Australia" unless q =~ /\bTAS\b/i || q =~ /Tasmania/i || q =~ /Australia/i
  50. # Cache key
  51. h = Digest::SHA1.hexdigest(q.downcase.strip)
  52. # Try cache
  53. ensure_cache!
  54. if (row = fetch_cache(h))
  55. return row_to_result_hash(row)
  56. end
  57. url = URI("https://maps.googleapis.com/maps/api/geocode/json")
  58. params = {
  59. address: q,
  60. key: key,
  61. components: "country:AU|administrative_area:TAS",
  62. region: "au"
  63. }
  64. url.query = URI.encode_www_form(params)
  65. body = Http.get(url.to_s, headers: { "Accept" => "application/json" })
  66. data = JSON.parse(body)
  67. rescue JSON::ParserError
  68. data = {}
  69. status = data["status"].to_s
  70. unless status == "OK" && data["results"].is_a?(Array) && !data["results"].empty?
  71. # Cache a negative to avoid hammering the API
  72. upsert_cache(h, q, nil)
  73. return nil
  74. end
  75. best = pick_best_result(data["results"])
  76. res = parse_result(best)
  77. # Store in cache
  78. upsert_cache(h, q, { formatted: res[:display], **res })
  79. res
  80. rescue Error => e
  81. warn "[geocode] #{e.message}"
  82. nil
  83. rescue Net::HTTPError, Net::ReadTimeout, Net::OpenTimeout, OpenSSL::SSL::SSLError,
  84. Errno::ECONNRESET, EOFError, Mysql2::Error => e
  85. warn "[geocode] network/db error for #{raw_address.inspect}: #{e.class} #{e.message}"
  86. nil
  87. end
  88. # Update a DA row with the normalized fields. Pass the same keys from format_au.
  89. def self.update_da_row!(table:, council_reference:, orig_address:, geo:)
  90. return unless geo
  91. ensure_da_columns!(table)
  92. esc = DB.client.escape(table)
  93. sql = <<~SQL
  94. UPDATE `#{esc}`
  95. SET address_std = ?,
  96. street = ?,
  97. locality = ?,
  98. state = ?,
  99. postcode = ?,
  100. lat = ?,
  101. lng = ?
  102. WHERE council_reference = ? AND address = ?
  103. SQL
  104. stmt = DB.client.prepare(sql)
  105. stmt.execute(
  106. geo[:display],
  107. geo[:street],
  108. geo[:locality],
  109. geo[:state],
  110. geo[:postcode],
  111. geo[:lat],
  112. geo[:lng],
  113. council_reference,
  114. orig_address
  115. )
  116. rescue Mysql2::Error => e
  117. warn "[geocode] failed to update normalized address for #{table}/#{council_reference}: #{e.message}"
  118. end
  119. # Helpers
  120. def self.clean_for_geocode(addr)
  121. s = addr.dup
  122. # Drop CT references and brackets
  123. s = s.gsub(/\(CT[:\s]*[^\)]+\)/i, "")
  124. s = s.gsub(/\s+/, " ").strip
  125. s
  126. end
  127. def self.fetch_cache(hash)
  128. stmt = DB.client.prepare("SELECT formatted, street, locality, state, postcode, lat, lng FROM geo_cache WHERE q_hash = ? LIMIT 1")
  129. rs = stmt.execute(hash)
  130. rs.first
  131. end
  132. def self.upsert_cache(hash, query, payload)
  133. if payload
  134. stmt = DB.client.prepare(<<~SQL)
  135. INSERT INTO geo_cache (q_hash, query_text, formatted, street, locality, state, postcode, lat, lng, raw_json)
  136. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, NULL)
  137. ON DUPLICATE KEY UPDATE
  138. query_text = VALUES(query_text),
  139. formatted = VALUES(formatted),
  140. street = VALUES(street),
  141. locality = VALUES(locality),
  142. state = VALUES(state),
  143. postcode = VALUES(postcode),
  144. lat = VALUES(lat),
  145. lng = VALUES(lng)
  146. SQL
  147. stmt.execute(hash, query, payload[:display], payload[:street], payload[:locality], payload[:state], payload[:postcode], payload[:lat], payload[:lng])
  148. else
  149. # Negative cache
  150. stmt = DB.client.prepare("INSERT IGNORE INTO geo_cache (q_hash, query_text) VALUES (?, ?)")
  151. stmt.execute(hash, query)
  152. end
  153. end
  154. def self.row_to_result_hash(row)
  155. {
  156. display: row["formatted"],
  157. street: row["street"],
  158. locality: row["locality"],
  159. state: row["state"],
  160. postcode: row["postcode"],
  161. lat: row["lat"] && row["lat"].to_f,
  162. lng: row["lng"] && row["lng"].to_f
  163. }
  164. end
  165. # Prefer a rooftop or high precision result inside TAS
  166. def self.pick_best_result(results)
  167. tas = results.select { |r|
  168. comps = r["address_components"] || []
  169. comps.any? { |c| c["types"].include?("administrative_area_level_1") && c["short_name"].to_s.upcase == "TAS" }
  170. }
  171. arr = tas.empty? ? results : tas
  172. arr.sort_by { |r|
  173. types = r["types"] || []
  174. loc_type = (r["geometry"] || {})["location_type"].to_s
  175. [
  176. loc_type == "ROOFTOP" ? 0 : 1,
  177. types.include?("street_address") ? 0 : 1,
  178. types.include?("premise") ? 0 : 1
  179. ]
  180. }.first
  181. end
  182. def self.parse_result(r)
  183. comps = r["address_components"] || []
  184. h = {}
  185. comps.each do |c|
  186. types = c["types"]
  187. if types.include?("street_number")
  188. h[:street_number] = c["long_name"]
  189. elsif types.include?("route")
  190. h[:route] = c["long_name"]
  191. elsif types.include?("locality")
  192. h[:locality] = c["long_name"]
  193. elsif types.include?("postal_town") && h[:locality].to_s.empty?
  194. h[:locality] = c["long_name"]
  195. elsif types.include?("administrative_area_level_1")
  196. h[:state] = c["short_name"]
  197. elsif types.include?("postal_code")
  198. h[:postcode] = c["long_name"]
  199. end
  200. end
  201. street = [h[:street_number], h[:route]].compact.join(" ").strip
  202. display = [street, h[:locality], h[:state], h[:postcode]].compact.reject(&:empty?).join(", ").gsub(/\s+,/, ",")
  203. loc = (r["geometry"] || {})["location"] || {}
  204. {
  205. display: display,
  206. street: street,
  207. locality: h[:locality].to_s,
  208. state: h[:state].to_s,
  209. postcode: h[:postcode].to_s,
  210. lat: loc["lat"],
  211. lng: loc["lng"]
  212. }
  213. end
  214. end