require "mysql2" module DB # MySQL/MariaDB DDL statements (CREATE TABLE, ALTER TABLE) don't support # parameterized identifiers — only data values can be bound with ?. Using # client.escape() is the standard workaround for identifiers, but as a # defence-in-depth measure we also enforce that every table name matches # the expected da_* pattern before it is ever interpolated into SQL. SAFE_TABLE_NAME_RE = /\Ada_[a-z0-9_]+\z/ def self.validate_table_name!(name) unless name.to_s.match?(SAFE_TABLE_NAME_RE) raise ArgumentError, "Unsafe table name: #{name.inspect} — must match #{SAFE_TABLE_NAME_RE}" end name end def self.client @client ||= Mysql2::Client.new( host: ENV.fetch("MYSQL_HOST", "127.0.0.1"), port: Integer(ENV.fetch("MYSQL_PORT", "3306")), database: ENV.fetch("MYSQL_DB", "planning_scrapes"), username: ENV.fetch("MYSQL_USER", "scraper"), password: ENV.fetch("MYSQL_PASSWORD", ""), encoding: "utf8mb4", reconnect: true ) end def self.ensure_table!(table) validate_table_name!(table) client.query(<<~SQL) CREATE TABLE IF NOT EXISTS `#{client.escape(table)}` ( id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, description TEXT, date_received DATE NULL, date_received_raw VARCHAR(100), address VARCHAR(255) NOT NULL, council_reference VARCHAR(100) NOT NULL, applicant VARCHAR(255), owner VARCHAR(255), created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL, on_notice_to DATE NULL, on_notice_to_raw VARCHAR(80) NULL, title_reference TEXT NULL, property_id TEXT NULL, area_sqm DOUBLE NULL, area_ha DOUBLE NULL, address_std VARCHAR(255) NULL, street VARCHAR(120) NULL, locality VARCHAR(120) NULL, state VARCHAR(10) NULL, postcode VARCHAR(10) NULL, document_url TEXT NULL, local_document_url TEXT NULL, lat DECIMAL(10,7) NULL, lng DECIMAL(10,7) NULL, PRIMARY KEY (id), UNIQUE KEY uniq_ref_addr (council_reference, address) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; SQL end def self.upsert(table, row) validate_table_name!(table) columns = [ :description, :date_received, :date_received_raw, :address, :council_reference, :applicant, :owner, :local_document_url, :document_url, :on_notice_to, :on_notice_to_raw, :title_reference, :property_id, :area_sqm, :area_ha, :address_std, :street, :locality, :state, :postcode, :lat, :lng ] esc_table = client.escape(table) col_names = columns.map { |c| "`#{c}`" }.join(", ") placeholders = (["?"] * columns.size).join(", ") updates = columns.map { |c| case c when :date_received # write-once: only set if currently NULL "`#{c}` = IFNULL(`#{c}`, VALUES(`#{c}`))" when :date_received_raw # write-once for strings: only set if NULL or '' "`#{c}` = CASE WHEN `#{c}` IS NULL OR `#{c}` = '' THEN VALUES(`#{c}`) ELSE `#{c}` END" when :document_url, :local_document_url # don't blank out existing value if new is NULL "`#{c}` = COALESCE(VALUES(`#{c}`), `#{c}`)" else "`#{c}` = VALUES(`#{c}`)" end }.join(", ") sql = <<~SQL INSERT INTO `#{esc_table}` (#{col_names}, created_at, updated_at) VALUES (#{placeholders}, NOW(), NOW()) ON DUPLICATE KEY UPDATE #{updates}, updated_at = NOW() SQL stmt = client.prepare(sql) values = columns.map { |c| row[c] } stmt.execute(*values) end end