Quellcode durchsuchen

SQL Injection Updates

Benjamin Harris vor 2 Monaten
Ursprung
Commit
f3c06ab7ea
10 geänderte Dateien mit 405 neuen und 5 gelöschten Zeilen
  1. 3 1
      .claude/settings.local.json
  2. 2 1
      .gitignore
  3. 16 0
      lib/db.rb
  4. 1 0
      lib/enrich.rb
  5. 7 3
      lib/geocode.rb
  6. 1 0
      scrapers/enrich.rb
  7. 40 0
      test/support/stubs.rb
  8. 72 0
      test/test_db_validation.rb
  9. 140 0
      test/test_geocode_helpers.rb
  10. 123 0
      test/test_util.rb

+ 3 - 1
.claude/settings.local.json

@@ -5,7 +5,9 @@
       "Bash(grep -c \"def \" /f/GIT_REPO/tas_councils/scrapers/*.rb)",
       "Bash(sort -t: -k2 -rn)",
       "Bash(grep -v \"next\\\\s*$\")",
-      "Bash(grep -n -B3 \"^\\\\s*rescue\\\\s*$\\\\|^rescue\\\\s*$\" \"f:/GIT_REPO/tas_councils/scrapers/centralcoast.rb\" \"f:/GIT_REPO/tas_councils/scrapers/circularhead.rb\" \"f:/GIT_REPO/tas_councils/scrapers/clarence.rb\" \"f:/GIT_REPO/tas_councils/scrapers/flinders_council.rb\" \"f:/GIT_REPO/tas_councils/scrapers/georgetown.rb\" \"f:/GIT_REPO/tas_councils/scrapers/glamorgan.rb\" \"f:/GIT_REPO/tas_councils/scrapers/glenorchy.rb\" \"f:/GIT_REPO/tas_councils/scrapers/huonvalley.rb\" \"f:/GIT_REPO/tas_councils/scrapers/tasman.rb\" \"f:/GIT_REPO/tas_councils/scrapers/westtamar.rb\" \"f:/GIT_REPO/tas_councils/scrapers/meandervalley.rb\" \"f:/GIT_REPO/tas_councils/scrapers/devonportcity.rb\" \"f:/GIT_REPO/tas_councils/scrapers/dorset.rb\" \"f:/GIT_REPO/tas_councils/scrapers/launcestoncity.rb\" \"f:/GIT_REPO/tas_councils/scrapers/waratah_wynyard.rb\")"
+      "Bash(grep -n -B3 \"^\\\\s*rescue\\\\s*$\\\\|^rescue\\\\s*$\" \"f:/GIT_REPO/tas_councils/scrapers/centralcoast.rb\" \"f:/GIT_REPO/tas_councils/scrapers/circularhead.rb\" \"f:/GIT_REPO/tas_councils/scrapers/clarence.rb\" \"f:/GIT_REPO/tas_councils/scrapers/flinders_council.rb\" \"f:/GIT_REPO/tas_councils/scrapers/georgetown.rb\" \"f:/GIT_REPO/tas_councils/scrapers/glamorgan.rb\" \"f:/GIT_REPO/tas_councils/scrapers/glenorchy.rb\" \"f:/GIT_REPO/tas_councils/scrapers/huonvalley.rb\" \"f:/GIT_REPO/tas_councils/scrapers/tasman.rb\" \"f:/GIT_REPO/tas_councils/scrapers/westtamar.rb\" \"f:/GIT_REPO/tas_councils/scrapers/meandervalley.rb\" \"f:/GIT_REPO/tas_councils/scrapers/devonportcity.rb\" \"f:/GIT_REPO/tas_councils/scrapers/dorset.rb\" \"f:/GIT_REPO/tas_councils/scrapers/launcestoncity.rb\" \"f:/GIT_REPO/tas_councils/scrapers/waratah_wynyard.rb\")",
+      "Bash(ruby test/test_util.rb)",
+      "Bash(docker compose:*)"
     ]
   }
 }

+ 2 - 1
.gitignore

@@ -1 +1,2 @@
-/downloads
+/downloads
+.env

+ 16 - 0
lib/db.rb

@@ -1,6 +1,20 @@
 require "mysql2"
 
 module DB
+    # MySQL/MariaDB DDL statements (CREATE TABLE, ALTER TABLE) don't support
+    # parameterized identifiers — only data values can be bound with ?. Using
+    # client.escape() is the standard workaround for identifiers, but as a
+    # defence-in-depth measure we also enforce that every table name matches
+    # the expected da_* pattern before it is ever interpolated into SQL.
+    SAFE_TABLE_NAME_RE = /\Ada_[a-z0-9_]+\z/
+
+    def self.validate_table_name!(name)
+        unless name.to_s.match?(SAFE_TABLE_NAME_RE)
+            raise ArgumentError, "Unsafe table name: #{name.inspect} — must match #{SAFE_TABLE_NAME_RE}"
+        end
+        name
+    end
+
     def self.client
         @client ||= Mysql2::Client.new(
             host: ENV.fetch("MYSQL_HOST", "127.0.0.1"),
@@ -14,6 +28,7 @@ module DB
     end
 
     def self.ensure_table!(table)
+        validate_table_name!(table)
         client.query(<<~SQL)
         CREATE TABLE IF NOT EXISTS `#{client.escape(table)}` (
             id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
@@ -48,6 +63,7 @@ module DB
     end
 
     def self.upsert(table, row)
+        validate_table_name!(table)
         columns = [
             :description,
             :date_received,

+ 1 - 0
lib/enrich.rb

@@ -34,6 +34,7 @@ def log_enrich(msg)
 end
 
 def ensure_extra_columns!(table)
+  DB.validate_table_name!(table)
   esc = DB.client.escape(table)
   {
     "address_std"        => "VARCHAR(255) NULL",

+ 7 - 3
lib/geocode.rb

@@ -31,6 +31,7 @@ module Geocode
 
   # Add columns to any da_* table to store normalized fields
   def self.ensure_da_columns!(table)
+    DB.validate_table_name!(table)
     esc = DB.client.escape(table)
     DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS address_std VARCHAR(255) NULL")
     DB.client.query("ALTER TABLE `#{esc}` ADD COLUMN IF NOT EXISTS street VARCHAR(255) NULL")
@@ -73,9 +74,11 @@ module Geocode
     url.query = URI.encode_www_form(params)
 
     body = Http.get(url.to_s, headers: { "Accept" => "application/json" })
-    data = JSON.parse(body)
-  rescue JSON::ParserError
-    data = {}
+    data = begin
+      JSON.parse(body)
+    rescue JSON::ParserError
+      {}
+    end
 
     status = data["status"].to_s
     unless status == "OK" && data["results"].is_a?(Array) && !data["results"].empty?
@@ -103,6 +106,7 @@ module Geocode
   # Update a DA row with the normalized fields. Pass the same keys from format_au.
   def self.update_da_row!(table:, council_reference:, orig_address:, geo:)
     return unless geo
+    DB.validate_table_name!(table)
     ensure_da_columns!(table)
 
     esc = DB.client.escape(table)

+ 1 - 0
scrapers/enrich.rb

@@ -34,6 +34,7 @@ def log_enrich(msg)
 end
 
 def ensure_extra_columns!(table)
+  DB.validate_table_name!(table)
   esc = DB.client.escape(table)
   {
     "address_std"        => "VARCHAR(255) NULL",

+ 40 - 0
test/support/stubs.rb

@@ -0,0 +1,40 @@
+# test/support/stubs.rb
+#
+# Lightweight stubs for DB and Http so that lib/geocode.rb can be loaded in
+# unit tests without a real database connection or network access.
+#
+# Usage — require this file BEFORE requiring any lib that depends on DB or Http:
+#
+#   require_relative "support/stubs"
+#   require_relative "../../lib/geocode"
+
+# Stub DB module — pure validation logic is implemented here so that
+# validate_table_name! can be tested directly.
+module DB
+  SAFE_TABLE_NAME_RE = /\Ada_[a-z0-9_]+\z/
+
+  def self.validate_table_name!(name)
+    unless name.to_s.match?(SAFE_TABLE_NAME_RE)
+      raise ArgumentError, "Unsafe table name: #{name.inspect} — must match #{SAFE_TABLE_NAME_RE}"
+    end
+    name
+  end
+
+  def self.client
+    raise NotImplementedError, "DB.client is not available in unit tests"
+  end
+end
+
+# Stub Http module — no network calls in unit tests
+module Http
+  def self.get(*)
+    raise NotImplementedError, "Http.get is not available in unit tests"
+  end
+end
+
+# Prevent require_relative inside lib files from loading the real db.rb / http.rb,
+# which would overwrite the stubs above and pull in the mysql2 gem.
+# Use File.dirname so expand_path treats the base as a directory, not a file.
+_test_support_dir = File.dirname(File.expand_path(__FILE__))
+$LOADED_FEATURES << File.expand_path("../../lib/db.rb",   _test_support_dir)
+$LOADED_FEATURES << File.expand_path("../../lib/http.rb",  _test_support_dir)

+ 72 - 0
test/test_db_validation.rb

@@ -0,0 +1,72 @@
+require "minitest/autorun"
+require_relative "support/stubs"
+
+# Tests for DB.validate_table_name!
+# The stub in support/stubs.rb implements the same regex as lib/db.rb,
+# so these tests validate the intended allowlist pattern.
+
+class TestDbValidateTableName < Minitest::Test
+  # --- Valid names ---
+
+  def test_simple_council_name
+    assert_equal "da_brighton", DB.validate_table_name!("da_brighton")
+  end
+
+  def test_hyphenated_council_name
+    assert_equal "da_break_oday", DB.validate_table_name!("da_break_oday")
+  end
+
+  def test_council_with_numbers
+    assert_equal "da_waratah_wynyard", DB.validate_table_name!("da_waratah_wynyard")
+  end
+
+  def test_all_known_tables
+    known = %w[
+      da_break_oday da_brighton da_burnie da_centralcoast da_centralhighlands
+      da_circularhead da_clarence da_derwentvalley da_devonportcity da_dorset
+      da_flinders_council da_georgetown da_glamorgan da_glenorchy da_hobartcity
+      da_huonvalley da_kentish da_kingborough da_latrobe da_launcestoncity
+      da_meandervalley da_northernmidlands da_southernmidlands da_tasman
+      da_waratah_wynyard da_westcoast da_westtamar
+    ]
+    known.each do |t|
+      assert_equal t, DB.validate_table_name!(t), "Expected #{t} to be valid"
+    end
+  end
+
+  # --- Invalid names ---
+
+  def test_no_da_prefix_raises
+    assert_raises(ArgumentError) { DB.validate_table_name!("users") }
+  end
+
+  def test_sql_injection_raises
+    assert_raises(ArgumentError) { DB.validate_table_name!("da_foo; DROP TABLE users--") }
+  end
+
+  def test_backtick_injection_raises
+    assert_raises(ArgumentError) { DB.validate_table_name!("da_foo`") }
+  end
+
+  def test_empty_string_raises
+    assert_raises(ArgumentError) { DB.validate_table_name!("") }
+  end
+
+  def test_nil_raises
+    assert_raises(ArgumentError) { DB.validate_table_name!(nil) }
+  end
+
+  def test_uppercase_raises
+    # Table names are lowercase by convention; uppercase would be unexpected
+    assert_raises(ArgumentError) { DB.validate_table_name!("DA_BRIGHTON") }
+  end
+
+  def test_whitespace_raises
+    assert_raises(ArgumentError) { DB.validate_table_name!("da_foo bar") }
+  end
+
+  def test_error_message_includes_name
+    err = assert_raises(ArgumentError) { DB.validate_table_name!("bad_name") }
+    assert_includes err.message, "bad_name"
+  end
+end

+ 140 - 0
test/test_geocode_helpers.rb

@@ -0,0 +1,140 @@
+require "minitest/autorun"
+require_relative "support/stubs"
+require_relative "../lib/geocode"
+
+# Tests for the pure helper methods in Geocode that do not require a DB
+# connection or network access. All DB/Http calls are stubbed out via
+# test/support/stubs.rb.
+
+class TestGeocodeCleanForGeocode < Minitest::Test
+  def test_removes_ct_reference_with_colon
+    input  = "123 Main St (CT: 12345/678)"
+    result = Geocode.clean_for_geocode(input)
+    assert_equal "123 Main St", result
+  end
+
+  def test_removes_ct_reference_with_space
+    input  = "45 High St (CT 99/100)"
+    result = Geocode.clean_for_geocode(input)
+    assert_equal "45 High St", result
+  end
+
+  def test_collapses_whitespace
+    result = Geocode.clean_for_geocode("12  George   St")
+    assert_equal "12 George St", result
+  end
+
+  def test_strips_leading_and_trailing_whitespace
+    result = Geocode.clean_for_geocode("  4 River Rd  ")
+    assert_equal "4 River Rd", result
+  end
+
+  def test_plain_address_unchanged
+    result = Geocode.clean_for_geocode("7 Beach Rd, Hobart")
+    assert_equal "7 Beach Rd, Hobart", result
+  end
+
+  def test_empty_string
+    assert_equal "", Geocode.clean_for_geocode("")
+  end
+end
+
+class TestGeocodeParseResult < Minitest::Test
+  # Build a minimal Google Maps geocode result hash
+  def result_fixture(components:, location: { "lat" => -42.8, "lng" => 147.3 }, location_type: "ROOFTOP", types: ["street_address"])
+    {
+      "types" => types,
+      "address_components" => components,
+      "geometry" => {
+        "location"      => location,
+        "location_type" => location_type
+      }
+    }
+  end
+
+  def test_full_address
+    components = [
+      { "types" => ["street_number"],            "long_name" => "42",        "short_name" => "42" },
+      { "types" => ["route"],                    "long_name" => "Main Street", "short_name" => "Main St" },
+      { "types" => ["locality"],                 "long_name" => "Hobart",    "short_name" => "Hobart" },
+      { "types" => ["administrative_area_level_1"], "long_name" => "Tasmania", "short_name" => "TAS" },
+      { "types" => ["postal_code"],              "long_name" => "7000",      "short_name" => "7000" }
+    ]
+    r = Geocode.parse_result(result_fixture(components: components))
+
+    assert_equal "42 Main Street, Hobart, TAS, 7000", r[:display]
+    assert_equal "42 Main Street",                    r[:street]
+    assert_equal "Hobart",                            r[:locality]
+    assert_equal "TAS",                               r[:state]
+    assert_equal "7000",                              r[:postcode]
+    assert_in_delta(-42.8, r[:lat],  0.001)
+    assert_in_delta(147.3, r[:lng], 0.001)
+  end
+
+  def test_missing_street_number
+    components = [
+      { "types" => ["route"],      "long_name" => "Collins Street", "short_name" => "Collins St" },
+      { "types" => ["locality"],   "long_name" => "Launceston",     "short_name" => "Launceston" },
+      { "types" => ["administrative_area_level_1"], "long_name" => "Tasmania", "short_name" => "TAS" },
+      { "types" => ["postal_code"], "long_name" => "7250",          "short_name" => "7250" }
+    ]
+    r = Geocode.parse_result(result_fixture(components: components))
+
+    assert_equal "Collins Street",                    r[:street]
+    assert_equal "Collins Street, Launceston, TAS, 7250", r[:display]
+  end
+
+  def test_postal_town_fallback_for_locality
+    # When no "locality" component, fall back to "postal_town"
+    components = [
+      { "types" => ["street_number"],  "long_name" => "5",       "short_name" => "5" },
+      { "types" => ["route"],          "long_name" => "Hill Rd", "short_name" => "Hill Rd" },
+      { "types" => ["postal_town"],    "long_name" => "Burnie",  "short_name" => "Burnie" },
+      { "types" => ["administrative_area_level_1"], "long_name" => "Tasmania", "short_name" => "TAS" },
+      { "types" => ["postal_code"],    "long_name" => "7320",    "short_name" => "7320" }
+    ]
+    r = Geocode.parse_result(result_fixture(components: components))
+    assert_equal "Burnie", r[:locality]
+  end
+
+  def test_empty_components_returns_safe_defaults
+    r = Geocode.parse_result(result_fixture(components: []))
+    assert_equal "", r[:street]
+    assert_equal "", r[:locality]
+    assert_equal "", r[:state]
+    assert_equal "", r[:postcode]
+    assert_equal "", r[:display]
+  end
+end
+
+class TestGeocodePickBestResult < Minitest::Test
+  def make_result(location_type:, types:, state: "TAS")
+    {
+      "types" => types,
+      "address_components" => [
+        { "types" => ["administrative_area_level_1"], "short_name" => state }
+      ],
+      "geometry" => { "location_type" => location_type }
+    }
+  end
+
+  def test_prefers_rooftop_over_approximate
+    rooftop    = make_result(location_type: "ROOFTOP",     types: ["street_address"])
+    approx     = make_result(location_type: "APPROXIMATE", types: ["locality"])
+    best = Geocode.pick_best_result([approx, rooftop])
+    assert_equal "ROOFTOP", best.dig("geometry", "location_type")
+  end
+
+  def test_prefers_tas_result_over_non_tas
+    tas_result = make_result(location_type: "APPROXIMATE", types: ["street_address"], state: "TAS")
+    vic_result = make_result(location_type: "ROOFTOP",     types: ["street_address"], state: "VIC")
+    best = Geocode.pick_best_result([vic_result, tas_result])
+    tas_state = best["address_components"].find { |c| c["types"].include?("administrative_area_level_1") }&.fetch("short_name")
+    assert_equal "TAS", tas_state
+  end
+
+  def test_single_result_returned_as_is
+    r = make_result(location_type: "ROOFTOP", types: ["street_address"])
+    assert_equal r, Geocode.pick_best_result([r])
+  end
+end

+ 123 - 0
test/test_util.rb

@@ -0,0 +1,123 @@
+require "minitest/autorun"
+require_relative "../lib/util"
+
+class TestUtilParseAusDate < Minitest::Test
+  # --- Standard formats ---
+
+  def test_dd_mm_yyyy
+    assert_equal Date.new(2025, 3, 15), Util.parse_aus_date("15/03/2025")
+  end
+
+  def test_dd_mm_yy
+    assert_equal Date.new(2025, 8, 1), Util.parse_aus_date("01/08/25")
+  end
+
+  def test_natural_language_date
+    assert_equal Date.new(2025, 6, 20), Util.parse_aus_date("20 June 2025")
+  end
+
+  def test_abbreviated_month
+    assert_equal Date.new(2025, 4, 5), Util.parse_aus_date("5 Apr 2025")
+  end
+
+  def test_iso_format
+    assert_equal Date.new(2025, 12, 1), Util.parse_aus_date("2025-12-01")
+  end
+
+  # --- Edge cases ---
+
+  def test_nil_returns_nil
+    assert_nil Util.parse_aus_date(nil)
+  end
+
+  def test_empty_string_returns_nil
+    assert_nil Util.parse_aus_date("")
+  end
+
+  def test_whitespace_only_returns_nil
+    assert_nil Util.parse_aus_date("   ")
+  end
+
+  def test_garbage_returns_nil
+    assert_nil Util.parse_aus_date("not a date")
+  end
+
+  def test_partial_date_returns_nil
+    assert_nil Util.parse_aus_date("15/03")
+  end
+
+  def test_strips_whitespace
+    assert_equal Date.new(2025, 3, 15), Util.parse_aus_date("  15/03/2025  ")
+  end
+
+  # --- Ambiguity: dd/mm not mm/dd ---
+  # 01/02/2025 should be February 1st, not January 2nd
+
+  def test_day_month_order
+    result = Util.parse_aus_date("01/02/2025")
+    assert_equal 2, result.month, "Expected month 2 (February), got #{result.month}"
+    assert_equal 1, result.day
+  end
+end
+
+class TestUtilParseEpochMs < Minitest::Test
+  def test_known_timestamp
+    # 2025-01-01 00:00:00 UTC in ms
+    ms = Date.new(2025, 1, 1).to_time.to_i * 1000
+    assert_equal Date.new(2025, 1, 1), Util.parse_epoch_ms(ms)
+  end
+
+  def test_string_input
+    ms = Date.new(2024, 6, 15).to_time.to_i * 1000
+    assert_equal Date.new(2024, 6, 15), Util.parse_epoch_ms(ms.to_s)
+  end
+
+  def test_nil_returns_nil
+    assert_nil Util.parse_epoch_ms(nil)
+  end
+
+  def test_empty_string_returns_nil
+    assert_nil Util.parse_epoch_ms("")
+  end
+
+  def test_zero_returns_a_date
+    # epoch 0 = 1970-01-01 (valid, should not raise)
+    result = Util.parse_epoch_ms(0)
+    refute_nil result
+  end
+end
+
+class TestUtilRefToTable < Minitest::Test
+  def test_known_planbuild_ref
+    # "DA-HOB-2025-1234" -> "da_hobartcity"
+    assert_equal "da_hobartcity", Util.ref_to_table("DA-HOB-2025-1234")
+  end
+
+  def test_launceston_ref
+    assert_equal "da_launceston", Util.ref_to_table("DA-LAU-2024-99")
+  end
+
+  def test_case_insensitive_code
+    assert_equal "da_hobartcity", Util.ref_to_table("DA-hob-2025-1")
+  end
+
+  def test_unknown_code_raises
+    assert_raises(RuntimeError) { Util.ref_to_table("DA-ZZZ-2025-1") }
+  end
+end
+
+class TestUtilCouncilMap < Minitest::Test
+  def test_all_council_map_values_start_with_da
+    Util::COUNCIL_MAP.each do |key, table|
+      assert table.start_with?("da_"),
+        "COUNCIL_MAP[#{key.inspect}] = #{table.inspect} does not start with 'da_'"
+    end
+  end
+
+  def test_all_planbuild_codes_map_to_known_councils
+    Util::PLANBUILD_CODE_MAP.each do |code, council_key|
+      assert Util::COUNCIL_MAP.key?(council_key),
+        "PLANBUILD_CODE_MAP[#{code.inspect}] = #{council_key.inspect} has no entry in COUNCIL_MAP"
+    end
+  end
+end