db.rb 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. require "mysql2"
  2. module DB
  3. def self.client
  4. @client ||= Mysql2::Client.new(
  5. host: ENV.fetch("MYSQL_HOST", "127.0.0.1"),
  6. port: Integer(ENV.fetch("MYSQL_PORT", "3306")),
  7. database: ENV.fetch("MYSQL_DB", "planning_scrapes"),
  8. username: ENV.fetch("MYSQL_USER", "scraper"),
  9. password: ENV.fetch("MYSQL_PASSWORD", ""),
  10. encoding: "utf8mb4",
  11. reconnect: true
  12. )
  13. end
  14. def self.ensure_table!(table)
  15. client.query(<<~SQL)
  16. CREATE TABLE IF NOT EXISTS `#{client.escape(table)}` (
  17. id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
  18. description TEXT,
  19. date_received DATE NULL,
  20. date_received_raw VARCHAR(100),
  21. address VARCHAR(255) NOT NULL,
  22. council_reference VARCHAR(100) NOT NULL,
  23. applicant VARCHAR(255),
  24. owner VARCHAR(255),
  25. created_at DATETIME NOT NULL,
  26. updated_at DATETIME NOT NULL,
  27. on_notice_to DATE NULL,
  28. on_notice_to_raw VARCHAR(80) NULL,
  29. title_reference TEXT NULL,
  30. property_id TEXT NULL,
  31. area_sqm DOUBLE NULL,
  32. area_ha DOUBLE NULL,
  33. address_std VARCHAR(255) NULL,
  34. street VARCHAR(120) NULL,
  35. locality VARCHAR(120) NULL,
  36. state VARCHAR(10) NULL,
  37. postcode VARCHAR(10) NULL,
  38. document_url TEXT NULL,
  39. local_document_url TEXT NULL,
  40. lat DECIMAL(10,7) NULL,
  41. lng DECIMAL(10,7) NULL,
  42. PRIMARY KEY (id),
  43. UNIQUE KEY uniq_ref_addr (council_reference, address)
  44. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
  45. SQL
  46. end
  47. def self.upsert(table, row)
  48. columns = [
  49. :description,
  50. :date_received,
  51. :date_received_raw,
  52. :address,
  53. :council_reference,
  54. :applicant,
  55. :owner,
  56. :local_document_url,
  57. :document_url,
  58. :on_notice_to,
  59. :on_notice_to_raw,
  60. :title_reference,
  61. :property_id,
  62. :area_sqm,
  63. :area_ha,
  64. :address_std,
  65. :street,
  66. :locality,
  67. :state,
  68. :postcode,
  69. :lat,
  70. :lng
  71. ]
  72. esc_table = client.escape(table)
  73. col_names = columns.map { |c| "`#{c}`" }.join(", ")
  74. placeholders = (["?"] * columns.size).join(", ")
  75. updates = columns.map { |c|
  76. case c
  77. when :date_received
  78. # write-once: only set if currently NULL
  79. "`#{c}` = IFNULL(`#{c}`, VALUES(`#{c}`))"
  80. when :date_received_raw
  81. # write-once for strings: only set if NULL or ''
  82. "`#{c}` = CASE WHEN `#{c}` IS NULL OR `#{c}` = '' THEN VALUES(`#{c}`) ELSE `#{c}` END"
  83. when :document_url, :local_document_url
  84. # don't blank out existing value if new is NULL
  85. "`#{c}` = COALESCE(VALUES(`#{c}`), `#{c}`)"
  86. else
  87. "`#{c}` = VALUES(`#{c}`)"
  88. end
  89. }.join(", ")
  90. sql = <<~SQL
  91. INSERT INTO `#{esc_table}` (#{col_names}, created_at, updated_at)
  92. VALUES (#{placeholders}, NOW(), NOW())
  93. ON DUPLICATE KEY UPDATE
  94. #{updates},
  95. updated_at = NOW()
  96. SQL
  97. stmt = client.prepare(sql)
  98. values = columns.map { |c| row[c] }
  99. stmt.execute(*values)
  100. end
  101. end