#!/usr/bin/env ruby require "sqlite3" require "time" require_relative "../lib/db" require_relative "../lib/util" SRC_DIR = ARGV[0] || "/app/sqlite_in" ONLY_TABLE = ENV["ONLY_TABLE"].to_s.strip DRY_RUN = ENV["DRY_RUN"] == "1" def say(s) puts s end def warnx(s) $stderr.puts s end def mysql_table_for(filename) base = File.basename(filename, ".sqlite").downcase "da_" + base.gsub(/[^a-z0-9]+/, "_") end def columns_for(db, table) db.execute("PRAGMA table_info(#{table})").map { |row| row[1].to_s } end def sql_ident(c) # Quote identifiers that have spaces or caps c =~ /\A[a-z_][a-z0-9_]*\z/i ? c : %Q{"#{c.gsub('"','""')}"} end def detect_source_table(db) preferred = %w[scraped_data data applications] all = db.execute("SELECT name FROM sqlite_master WHERE type='table'").map { |r| r[0].to_s } preferred.each { |t| return t if all.include?(t) } all.each do |t| cols = columns_for(db, t) if cols.any? { |c| c =~ /\baddress\b/i } && cols.any? { |c| c =~ /\bcouncil[_ ]?reference\b/i } return t end end all.first end def pick(cols, *candidates) candidates.find { |c| c && cols.include?(c) } end def build_select(db, src_table) cols = columns_for(db, src_table) mapping = { desc_col: pick(cols, "description", "type_of_work", "Type of Work"), date_col: pick(cols, "date_received", "date_lodged", "Date Lodged", "date_scraped"), addr_col: pick(cols, "address", "Address"), ref_col: pick(cols, "council_reference", "application_no", "application", "Application No.", "Application"), appl_col: pick(cols, "applicant", "Applicant"), owner_col: pick(cols, "owner", "Owner"), notice_col: pick(cols, "on_notice_to", "on_notice", "close_date", "Closes"), notice_raw_col: pick(cols, "on_notice_to_raw", "on_notice_raw", "close_date_raw"), title_col: pick(cols, "title_reference", "title", "name"), doc_col: pick(cols, "document_url", "document", "url", "info_url") } unless mapping[:addr_col] && mapping[:ref_col] raise "Source table #{src_table} missing address or council_reference columns" end sel = [] sel << "#{mapping[:desc_col]} AS description" if mapping[:desc_col] #sel << "#{mapping[:date_col]} AS date_received" if mapping[:date_col] # Build a COALESCE(...) AS date_received from any present date columns date_candidates = %w[date_received date_lodged Date\ Lodged date_scraped].select { |c| cols.include?(c) } if date_candidates.any? # Treat empty strings as NULL so blanks don't block fallbacks expr = date_candidates.map { |c| "NULLIF(#{sql_ident(c)}, '')" }.join(", ") sel << "COALESCE(#{expr}) AS date_received" end sel << "#{mapping[:addr_col]} AS address" sel << "#{mapping[:ref_col]} AS council_reference" sel << "#{mapping[:appl_col]} AS applicant" if mapping[:appl_col] sel << "#{mapping[:owner_col]} AS owner" if mapping[:owner_col] sel << "#{mapping[:notice_col]} AS on_notice_to" if mapping[:notice_col] sel << "#{mapping[:notice_raw_col]} AS on_notice_to_raw" if mapping[:notice_raw_col] sel << "#{mapping[:title_col]} AS title_reference" if mapping[:title_col] sel << "#{mapping[:doc_col]} AS document_url" if mapping[:doc_col] ["SELECT #{sel.join(", ")} FROM #{src_table}", mapping] end def ensure_target_table!(table) DB.ensure_table!(table) end def normalize_date(v) s = v.to_s.strip return nil if s.empty? Util.parse_aus_date(s) || (Date.parse(s) rescue nil) end def import_file(path) target_table = mysql_table_for(path) return if ONLY_TABLE != "" && target_table != ONLY_TABLE say "Importing #{File.basename(path)} → #{target_table}" ensure_target_table!(target_table) SQLite3::Database.new(path) do |sdb| sdb.results_as_hash = true src_table = detect_source_table(sdb) sql, _mapping = build_select(sdb, src_table) count = 0 sdb.execute(sql) do |row| description = row["description"].to_s date_raw = row["date_received"].to_s address = row["address"].to_s[0,255] ref = row["council_reference"].to_s[0,100] applicant = row["applicant"].to_s owner = row["owner"].to_s on_notice_to_raw = row["on_notice_to_raw"].to_s on_notice_to_val = row["on_notice_to"] on_notice_date = normalize_date(on_notice_to_val || on_notice_to_raw) title_reference = row["title_reference"].to_s document_url = row["document_url"].to_s description = "Development Application" if description.strip.empty? if DRY_RUN && count < 5 say " [dry] ref=#{ref.inspect} addr=#{address.inspect} date=#{date_raw.inspect}" end unless DRY_RUN DB.upsert(target_table, { description: description, date_received: normalize_date(date_raw), date_received_raw: date_raw, address: address, council_reference: ref, applicant: applicant, owner: owner }) begin upd = DB.client.prepare( "UPDATE `#{DB.client.escape(target_table)}` " \ "SET on_notice_to = ?, on_notice_to_raw = ?, title_reference = ?, document_url = ? " \ "WHERE council_reference = ? AND address = ?" ) upd.execute(on_notice_date, on_notice_to_raw, title_reference, document_url, ref, address) rescue StandardError => e warnx " extras update skipped for #{ref}: #{e.class} #{e.message}" end end count += 1 end say " Imported #{count} row(s)" end rescue StandardError => e warnx " ERROR importing #{File.basename(path)}: #{e.class} #{e.message}" end unless Dir.exist?(SRC_DIR) warnx "Source dir not found: #{SRC_DIR}" exit 1 end files = Dir.glob(File.join(SRC_DIR, "*.sqlite")).sort if files.empty? warnx "No .sqlite files in #{SRC_DIR}" exit 0 end files.each { |f| import_file(f) } say "Done."