migrate.rb 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. # lib/migrate.rb
  2. # Lightweight sequential schema migration runner.
  3. #
  4. # Tracks applied migrations in a `schema_migrations` table (single source of
  5. # truth). Every migration is idempotent — it uses ADD COLUMN IF NOT EXISTS and
  6. # CREATE TABLE IF NOT EXISTS so re-running a partially-applied version is safe.
  7. #
  8. # Usage — call once at container/process startup, before any scrapers run:
  9. #
  10. # require_relative "lib/migrate"
  11. # Migrate.run!
  12. #
  13. # Or run as a standalone script:
  14. #
  15. # ruby /app/lib/migrate.rb
  16. require_relative "./db"
  17. require_relative "./log"
  18. module Migrate
  19. # -------------------------------------------------------------------------
  20. # Migration definitions — add new ones at the END of this array only.
  21. # Never edit or reorder existing entries; create a new version instead.
  22. # -------------------------------------------------------------------------
  23. MIGRATIONS = [
  24. {
  25. version: 1,
  26. description: "Add enrichment and geocode columns to all existing da_* tables",
  27. up: -> {
  28. # These are the columns every da_* table should have. New tables already
  29. # get all of them via DB.ensure_table!; this migration adds them to tables
  30. # that were created before these columns were introduced.
  31. cols = {
  32. "on_notice_to" => "DATE NULL",
  33. "on_notice_to_raw" => "VARCHAR(80) NULL",
  34. "title_reference" => "TEXT NULL",
  35. "property_id" => "TEXT NULL",
  36. "area_sqm" => "DOUBLE NULL",
  37. "area_ha" => "DOUBLE NULL",
  38. "address_std" => "VARCHAR(255) NULL",
  39. "street" => "VARCHAR(120) NULL",
  40. "locality" => "VARCHAR(120) NULL",
  41. "state" => "VARCHAR(10) NULL",
  42. "postcode" => "VARCHAR(10) NULL",
  43. "document_url" => "TEXT NULL",
  44. "local_document_url" => "TEXT NULL",
  45. "lat" => "DECIMAL(10,7) NULL",
  46. "lng" => "DECIMAL(10,7) NULL"
  47. }
  48. Migrate.da_tables.each do |table|
  49. esc = DB.client.escape(table)
  50. cols.each do |col, defn|
  51. DB.client.query(
  52. "ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS `#{col}` #{defn}"
  53. )
  54. rescue Mysql2::Error => e
  55. Log.warn "migrate", "skipped #{table}.#{col}: #{e.message}"
  56. end
  57. end
  58. }
  59. },
  60. {
  61. version: 2,
  62. description: "Create geo_cache table",
  63. up: -> {
  64. DB.client.query(<<~SQL)
  65. CREATE TABLE IF NOT EXISTS geo_cache (
  66. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  67. q_hash CHAR(40) NOT NULL UNIQUE,
  68. query_text VARCHAR(255) NOT NULL,
  69. formatted VARCHAR(255),
  70. street VARCHAR(255),
  71. locality VARCHAR(120),
  72. state VARCHAR(10),
  73. postcode VARCHAR(10),
  74. lat DECIMAL(10,7),
  75. lng DECIMAL(10,7),
  76. raw_json MEDIUMTEXT,
  77. created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  78. updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
  79. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
  80. SQL
  81. }
  82. },
  83. {
  84. version: 3,
  85. description: "Add documents_json column to all existing da_* tables",
  86. up: -> {
  87. Migrate.da_tables.each do |table|
  88. esc = DB.client.escape(table)
  89. DB.client.query(
  90. "ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS `documents_json` MEDIUMTEXT NULL"
  91. )
  92. rescue Mysql2::Error => e
  93. Log.warn "migrate", "skipped #{table}.documents_json: #{e.message}"
  94. end
  95. }
  96. },
  97. {
  98. version: 4,
  99. description: "Add application detail columns (status, assigned_officer, group, category, application_valid, advertised_on, property_legal_description)",
  100. up: -> {
  101. cols = {
  102. "status" => "VARCHAR(100) NULL",
  103. "assigned_officer" => "VARCHAR(255) NULL",
  104. "`group`" => "VARCHAR(100) NULL",
  105. "category" => "VARCHAR(100) NULL",
  106. "application_valid" => "DATE NULL",
  107. "application_valid_raw" => "VARCHAR(80) NULL",
  108. "advertised_on" => "DATE NULL",
  109. "advertised_on_raw" => "VARCHAR(80) NULL",
  110. "property_legal_description" => "TEXT NULL"
  111. }
  112. Migrate.da_tables.each do |table|
  113. esc = DB.client.escape(table)
  114. cols.each do |col, defn|
  115. DB.client.query(
  116. "ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS #{col} #{defn}"
  117. )
  118. rescue Mysql2::Error => e
  119. Log.warn "migrate", "skipped #{table}.#{col}: #{e.message}"
  120. end
  121. end
  122. }
  123. },
  124. {
  125. version: 5,
  126. description: "Rewrite local_document_url paths from /downloads/ to /files/",
  127. up: -> {
  128. Migrate.da_tables.each do |table|
  129. esc = DB.client.escape(table)
  130. DB.client.query(<<~SQL)
  131. UPDATE `#{esc}`
  132. SET local_document_url = REPLACE(local_document_url, '/downloads/', '/files/')
  133. WHERE local_document_url LIKE '/downloads/%'
  134. SQL
  135. affected = DB.client.affected_rows
  136. Log.info "migrate", "#{table}: updated #{affected} row(s)" if affected > 0
  137. rescue Mysql2::Error => e
  138. Log.warn "migrate", "skipped #{table}: #{e.message}"
  139. end
  140. }
  141. }
  142. ].freeze
  143. # -------------------------------------------------------------------------
  144. # Public API
  145. # -------------------------------------------------------------------------
  146. # Apply all pending migrations and record them in schema_migrations.
  147. # Raises on unexpected errors so the caller (run_all.sh) aborts early.
  148. def self.run!
  149. ensure_migrations_table!
  150. applied = applied_versions
  151. pending = MIGRATIONS.reject { |m| applied.include?(m[:version]) }
  152. if pending.empty?
  153. Log.info "migrate", "schema up to date (#{MIGRATIONS.size} migration(s) applied)"
  154. return
  155. end
  156. pending.each do |m|
  157. Log.info "migrate", "applying v#{m[:version]}: #{m[:description]}"
  158. m[:up].call
  159. stmt = DB.client.prepare(
  160. "INSERT INTO schema_migrations (version, description) VALUES (?, ?)"
  161. )
  162. stmt.execute(m[:version], m[:description])
  163. Log.info "migrate", "v#{m[:version]} done"
  164. end
  165. end
  166. # -------------------------------------------------------------------------
  167. # Helpers (private to module)
  168. # -------------------------------------------------------------------------
  169. def self.ensure_migrations_table!
  170. DB.client.query(<<~SQL)
  171. CREATE TABLE IF NOT EXISTS schema_migrations (
  172. version INT UNSIGNED NOT NULL,
  173. description VARCHAR(255) NOT NULL,
  174. applied_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  175. PRIMARY KEY (version)
  176. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
  177. SQL
  178. end
  179. def self.applied_versions
  180. rs = DB.client.query("SELECT version FROM schema_migrations ORDER BY version")
  181. rs.map { |r| r["version"] }.to_set
  182. end
  183. # All da_* tables currently in the database.
  184. def self.da_tables
  185. # The backslash escapes _ (a LIKE wildcard) so only genuine da_ prefixes match.
  186. rs = DB.client.query("SHOW TABLES LIKE 'da\\_%'")
  187. rs.map { |r| r.values.first }
  188. end
  189. private_class_method :ensure_migrations_table!, :applied_versions
  190. end
  191. # Allow running as a standalone script: ruby /app/lib/migrate.rb
  192. if __FILE__ == $0
  193. Migrate.run!
  194. end