Ver Fonte

Schema Migrations

Benjamin Harris há 2 meses atrás
pai
commit
0e4e035
5 ficheiros alterados com 163 adições e 6 exclusões
  1. 1 2
      lib/enrich.rb
  2. 8 2
      lib/geocode.rb
  3. 146 0
      lib/migrate.rb
  4. 3 0
      run_all.sh
  5. 5 2
      scrapers/enrich.rb

+ 1 - 2
lib/enrich.rb

@@ -72,8 +72,7 @@ end
 # Call this right after DB.upsert in each scraper
 #   enrich_after_upsert!(table: TABLE, council_reference: council_reference, address: address)
 def enrich_after_upsert!(table:, council_reference:, address:)
-  ensure_extra_columns!(table)
-
+  DB.validate_table_name!(table)
   esc = DB.client.escape(table)
   sel = DB.client.prepare("SELECT id, address, address_std, lat, lng, property_id, title_reference FROM `#{esc}` WHERE council_reference = ? AND address = ? LIMIT 1")
   row = sel.execute(council_reference, address).first

+ 8 - 2
lib/geocode.rb

@@ -8,8 +8,12 @@ require_relative "./http"
 module Geocode
   class Error < StandardError; end
 
-  # Create a simple cache table once
+  # Create the geo_cache table if it does not yet exist.
+  # Guarded so the DDL statement only executes once per Ruby process —
+  # Migrate.run! is the canonical place to create this table at startup,
+  # but this guard keeps geocode.rb self-contained when run standalone.
   def self.ensure_cache!
+    return if @cache_table_ready
     DB.client.query(<<~SQL)
       CREATE TABLE IF NOT EXISTS geo_cache (
         id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
@@ -27,6 +31,7 @@ module Geocode
         updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
       ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
     SQL
+    @cache_table_ready = true
   end
 
   # Add columns to any da_* table to store normalized fields
@@ -104,10 +109,11 @@ module Geocode
   end
 
   # Update a DA row with the normalized fields. Pass the same keys from format_au.
+  # Columns are guaranteed to exist — either created by DB.ensure_table! (new tables)
+  # or added by Migrate.run! migration v1 (existing tables).
   def self.update_da_row!(table:, council_reference:, orig_address:, geo:)
     return unless geo
     DB.validate_table_name!(table)
-    ensure_da_columns!(table)
 
     esc = DB.client.escape(table)
     sql = <<~SQL

+ 146 - 0
lib/migrate.rb

@@ -0,0 +1,146 @@
+# lib/migrate.rb
+# Lightweight sequential schema migration runner.
+#
+# Tracks applied migrations in a `schema_migrations` table (single source of
+# truth). Every migration is idempotent — it uses ADD COLUMN IF NOT EXISTS and
+# CREATE TABLE IF NOT EXISTS so re-running a partially-applied version is safe.
+#
+# Usage — call once at container/process startup, before any scrapers run:
+#
+#   require_relative "lib/migrate"
+#   Migrate.run!
+#
+# Or run as a standalone script:
+#
+#   ruby /app/lib/migrate.rb
+
+require_relative "./db"
+
+module Migrate
+  # -------------------------------------------------------------------------
+  # Migration definitions — add new ones at the END of this array only.
+  # Never edit or reorder existing entries; create a new version instead.
+  # -------------------------------------------------------------------------
+  MIGRATIONS = [
+    {
+      version: 1,
+      description: "Add enrichment and geocode columns to all existing da_* tables",
+      up: -> {
+        # These are the columns every da_* table should have. New tables already
+        # get all of them via DB.ensure_table!; this migration adds them to tables
+        # that were created before these columns were introduced.
+        cols = {
+          "on_notice_to"       => "DATE NULL",
+          "on_notice_to_raw"   => "VARCHAR(80) NULL",
+          "title_reference"    => "TEXT NULL",
+          "property_id"        => "TEXT NULL",
+          "area_sqm"           => "DOUBLE NULL",
+          "area_ha"            => "DOUBLE NULL",
+          "address_std"        => "VARCHAR(255) NULL",
+          "street"             => "VARCHAR(120) NULL",
+          "locality"           => "VARCHAR(120) NULL",
+          "state"              => "VARCHAR(10) NULL",
+          "postcode"           => "VARCHAR(10) NULL",
+          "document_url"       => "TEXT NULL",
+          "local_document_url" => "TEXT NULL",
+          "lat"                => "DECIMAL(10,7) NULL",
+          "lng"                => "DECIMAL(10,7) NULL"
+        }
+        Migrate.da_tables.each do |table|
+          esc = DB.client.escape(table)
+          cols.each do |col, defn|
+            DB.client.query(
+              "ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS `#{col}` #{defn}"
+            )
+          rescue Mysql2::Error => e
+            warn "[migrate] skipped #{table}.#{col}: #{e.message}"
+          end
+        end
+      }
+    },
+    {
+      version: 2,
+      description: "Create geo_cache table",
+      up: -> {
+        DB.client.query(<<~SQL)
+          CREATE TABLE IF NOT EXISTS geo_cache (
+            id          BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
+            q_hash      CHAR(40)     NOT NULL UNIQUE,
+            query_text  VARCHAR(255) NOT NULL,
+            formatted   VARCHAR(255),
+            street      VARCHAR(255),
+            locality    VARCHAR(120),
+            state       VARCHAR(10),
+            postcode    VARCHAR(10),
+            lat         DECIMAL(10,7),
+            lng         DECIMAL(10,7),
+            raw_json    MEDIUMTEXT,
+            created_at  DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+            updated_at  DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
+          ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
+        SQL
+      }
+    }
+  ].freeze
+
+  # -------------------------------------------------------------------------
+  # Public API
+  # -------------------------------------------------------------------------
+
+  # Apply all pending migrations and record them in schema_migrations.
+  # Raises on unexpected errors so the caller (run_all.sh) aborts early.
+  def self.run!
+    ensure_migrations_table!
+    applied = applied_versions
+
+    pending = MIGRATIONS.reject { |m| applied.include?(m[:version]) }
+    if pending.empty?
+      puts "[migrate] schema up to date (#{MIGRATIONS.size} migration(s) applied)"
+      return
+    end
+
+    pending.each do |m|
+      puts "[migrate] applying v#{m[:version]}: #{m[:description]}"
+      m[:up].call
+      stmt = DB.client.prepare(
+        "INSERT INTO schema_migrations (version, description) VALUES (?, ?)"
+      )
+      stmt.execute(m[:version], m[:description])
+      puts "[migrate] v#{m[:version]} done"
+    end
+  end
+
+  # -------------------------------------------------------------------------
+  # Helpers (private to module)
+  # -------------------------------------------------------------------------
+
+  def self.ensure_migrations_table!
+    DB.client.query(<<~SQL)
+      CREATE TABLE IF NOT EXISTS schema_migrations (
+        version     INT UNSIGNED NOT NULL,
+        description VARCHAR(255) NOT NULL,
+        applied_at  DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+        PRIMARY KEY (version)
+      ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
+    SQL
+  end
+
+  def self.applied_versions
+    rs = DB.client.query("SELECT version FROM schema_migrations ORDER BY version")
+    rs.map { |r| r["version"] }.to_set
+  end
+
+  # All da_* tables currently in the database.
+  def self.da_tables
+    # The backslash escapes _ (a LIKE wildcard) so only genuine da_ prefixes match.
+    rs = DB.client.query("SHOW TABLES LIKE 'da\\_%'")
+    rs.map { |r| r.values.first }
+  end
+
+  private_class_method :ensure_migrations_table!, :applied_versions, :da_tables
+end
+
+# Allow running as a standalone script: ruby /app/lib/migrate.rb
+if __FILE__ == $0
+  Migrate.run!
+end

+ 3 - 0
run_all.sh

@@ -3,6 +3,9 @@ set -euo pipefail
 
 echo "Starting run at $(date -u +"%Y-%m-%d %H:%M:%S")"
 
+echo "Running schema migrations…"
+ruby /app/lib/migrate.rb
+
 ONLY_LIST="${ONLY:-}"     # e.g. "meandervalley" or "kentish,break_oday"
 SKIP_LIST="${SKIP:-}"     # e.g. "hobartcity,latrobe"
 DEBUG_FLAG="${DEBUG:-}"   # pass through to scrapers

+ 5 - 2
scrapers/enrich.rb

@@ -25,6 +25,10 @@ require "uri"
 require_relative "./db"
 require_relative "./util"
 require_relative "./geocode"
+require_relative "./migrate"
+
+# Ensure all da_* tables have the expected columns before enriching.
+Migrate.run!
 
 LOOKUP_URL   = ENV["LOOKUP_URL"] # e.g. http://web/list_lookup.php
 ENRICH_DEBUG = ENV["ENRICH_DEBUG"] == "1"
@@ -72,8 +76,7 @@ end
 # Call this right after DB.upsert in each scraper
 #   enrich_after_upsert!(table: TABLE, council_reference: council_reference, address: address)
 def enrich_after_upsert!(table:, council_reference:, address:)
-  ensure_extra_columns!(table)
-
+  DB.validate_table_name!(table)
   esc = DB.client.escape(table)
   sel = DB.client.prepare("SELECT id, address, address_std, lat, lng, property_id, title_reference FROM `#{esc}` WHERE council_reference = ? AND address = ? LIMIT 1")
   row = sel.execute(council_reference, address).first