# lib/migrate.rb # Lightweight sequential schema migration runner. # # Tracks applied migrations in a `schema_migrations` table (single source of # truth). Every migration is idempotent — it uses ADD COLUMN IF NOT EXISTS and # CREATE TABLE IF NOT EXISTS so re-running a partially-applied version is safe. # # Usage — call once at container/process startup, before any scrapers run: # # require_relative "lib/migrate" # Migrate.run! # # Or run as a standalone script: # # ruby /app/lib/migrate.rb require_relative "./db" require_relative "./log" module Migrate # ------------------------------------------------------------------------- # Migration definitions — add new ones at the END of this array only. # Never edit or reorder existing entries; create a new version instead. # ------------------------------------------------------------------------- MIGRATIONS = [ { version: 1, description: "Add enrichment and geocode columns to all existing da_* tables", up: -> { # These are the columns every da_* table should have. New tables already # get all of them via DB.ensure_table!; this migration adds them to tables # that were created before these columns were introduced. cols = { "on_notice_to" => "DATE NULL", "on_notice_to_raw" => "VARCHAR(80) NULL", "title_reference" => "TEXT NULL", "property_id" => "TEXT NULL", "area_sqm" => "DOUBLE NULL", "area_ha" => "DOUBLE NULL", "address_std" => "VARCHAR(255) NULL", "street" => "VARCHAR(120) NULL", "locality" => "VARCHAR(120) NULL", "state" => "VARCHAR(10) NULL", "postcode" => "VARCHAR(10) NULL", "document_url" => "TEXT NULL", "local_document_url" => "TEXT NULL", "lat" => "DECIMAL(10,7) NULL", "lng" => "DECIMAL(10,7) NULL" } Migrate.da_tables.each do |table| esc = DB.client.escape(table) cols.each do |col, defn| DB.client.query( "ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS `#{col}` #{defn}" ) rescue Mysql2::Error => e Log.warn "migrate", "skipped #{table}.#{col}: #{e.message}" end end } }, { version: 2, description: "Create geo_cache table", up: -> { DB.client.query(<<~SQL) CREATE TABLE IF NOT EXISTS geo_cache ( id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, q_hash CHAR(40) NOT NULL UNIQUE, query_text VARCHAR(255) NOT NULL, formatted VARCHAR(255), street VARCHAR(255), locality VARCHAR(120), state VARCHAR(10), postcode VARCHAR(10), lat DECIMAL(10,7), lng DECIMAL(10,7), raw_json MEDIUMTEXT, created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 SQL } }, { version: 3, description: "Add documents_json column to all existing da_* tables", up: -> { Migrate.da_tables.each do |table| esc = DB.client.escape(table) DB.client.query( "ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS `documents_json` MEDIUMTEXT NULL" ) rescue Mysql2::Error => e Log.warn "migrate", "skipped #{table}.documents_json: #{e.message}" end } }, { version: 4, description: "Add application detail columns (status, assigned_officer, group, category, application_valid, advertised_on, property_legal_description)", up: -> { cols = { "status" => "VARCHAR(100) NULL", "assigned_officer" => "VARCHAR(255) NULL", "`group`" => "VARCHAR(100) NULL", "category" => "VARCHAR(100) NULL", "application_valid" => "DATE NULL", "application_valid_raw" => "VARCHAR(80) NULL", "advertised_on" => "DATE NULL", "advertised_on_raw" => "VARCHAR(80) NULL", "property_legal_description" => "TEXT NULL" } Migrate.da_tables.each do |table| esc = DB.client.escape(table) cols.each do |col, defn| DB.client.query( "ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS #{col} #{defn}" ) rescue Mysql2::Error => e Log.warn "migrate", "skipped #{table}.#{col}: #{e.message}" end end } }, { version: 5, description: "Rewrite local_document_url paths from /downloads/ to /files/", up: -> { Migrate.da_tables.each do |table| esc = DB.client.escape(table) DB.client.query(<<~SQL) UPDATE `#{esc}` SET local_document_url = REPLACE(local_document_url, '/downloads/', '/files/') WHERE local_document_url LIKE '/downloads/%' SQL affected = DB.client.affected_rows Log.info "migrate", "#{table}: updated #{affected} row(s)" if affected > 0 rescue Mysql2::Error => e Log.warn "migrate", "skipped #{table}: #{e.message}" end } }, { version: 6, description: "Add LLM classification columns (application_type, application_type_raw, application_type_at)", up: -> { cols = { "application_type" => "VARCHAR(60) NULL", "application_type_raw" => "TEXT NULL", "application_type_at" => "DATETIME NULL" } Migrate.da_tables.each do |table| esc = DB.client.escape(table) cols.each do |col, defn| DB.client.query( "ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS `#{col}` #{defn}" ) rescue Mysql2::Error => e Log.warn "migrate", "skipped #{table}.#{col}: #{e.message}" end end } } ].freeze # ------------------------------------------------------------------------- # Public API # ------------------------------------------------------------------------- # Apply all pending migrations and record them in schema_migrations. # Raises on unexpected errors so the caller (run_all.sh) aborts early. def self.run! ensure_migrations_table! applied = applied_versions pending = MIGRATIONS.reject { |m| applied.include?(m[:version]) } if pending.empty? Log.info "migrate", "schema up to date (#{MIGRATIONS.size} migration(s) applied)" return end pending.each do |m| Log.info "migrate", "applying v#{m[:version]}: #{m[:description]}" m[:up].call stmt = DB.client.prepare( "INSERT INTO schema_migrations (version, description) VALUES (?, ?)" ) stmt.execute(m[:version], m[:description]) Log.info "migrate", "v#{m[:version]} done" end end # ------------------------------------------------------------------------- # Helpers (private to module) # ------------------------------------------------------------------------- def self.ensure_migrations_table! DB.client.query(<<~SQL) CREATE TABLE IF NOT EXISTS schema_migrations ( version INT UNSIGNED NOT NULL, description VARCHAR(255) NOT NULL, applied_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (version) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 SQL end def self.applied_versions rs = DB.client.query("SELECT version FROM schema_migrations ORDER BY version") rs.map { |r| r["version"] }.to_set end # All da_* tables currently in the database. def self.da_tables # The backslash escapes _ (a LIKE wildcard) so only genuine da_ prefixes match. rs = DB.client.query("SHOW TABLES LIKE 'da\\_%'") rs.map { |r| r.values.first } end private_class_method :ensure_migrations_table!, :applied_versions end # Allow running as a standalone script: ruby /app/lib/migrate.rb if __FILE__ == $0 Migrate.run! end