| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- # Circular Head Council — Planning page list scraper
- require "nokogiri"
- require_relative "../lib/http"
- require_relative "../lib/db"
- require_relative "../lib/util"
- require_relative "../lib/enrich"
- TABLE = ENV.fetch("TABLE_NAME") # run_all.sh -> da_circularhead
- URL = "https://www.circularhead.tas.gov.au/council-services/development/planning"
- DB.ensure_table!(TABLE)
- # Optional columns for extras
- begin
- DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS document_url VARCHAR(1024) NULL")
- DB.client.query("ALTER TABLE `#{DB.client.escape(TABLE)}` ADD COLUMN IF NOT EXISTS title_reference TEXT NULL")
- rescue => e
- warn "Optional column add skipped: #{e.class} #{e.message}"
- end
- def abs_url(base, href)
- return "" if href.to_s.strip.empty?
- URI.join(base, href).to_s rescue href.to_s
- end
- html = Http.get(URL)
- doc = Nokogiri::HTML(html)
- items = doc.css("li.link-listing__no-icon")
- puts "Found #{items.length} items for #{TABLE}"
- saved = 0
- items.each_with_index do |li, idx|
- a = li.at_css("a")
- next unless a
- title_reference = a.text.to_s.strip
- href = a["href"].to_s
- document_url = abs_url(URL, href)
- # Your original logic: split the title on " - "
- parts = title_reference.split(" - ")
- council_reference = parts.first.to_s.strip
- description_part = parts.last.to_s
- description = description_part.split("(").first.to_s.strip
- address = if parts.length > 2
- parts[1..-2].join(" - ").strip
- else
- # fallback to a trimmed title if no middle section
- title_reference[0, 140]
- end
- # No dates on the list view
- date_received_raw = ""
- date_received = nil
- # Require the key fields
- next if council_reference.empty? || address.empty?
- DB.upsert(TABLE, {
- description: description,
- date_received: date_received,
- date_received_raw: date_received_raw,
- address: address,
- council_reference: council_reference,
- applicant: "",
- owner: ""
- })
-
- enrich_after_upsert!(
- table: TABLE,
- council_reference: council_reference,
- address: address
- )
- # Save link and title if the columns exist
- begin
- upd = DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET document_url = ?, title_reference = ? WHERE council_reference = ? AND address = ?")
- upd.execute(document_url, title_reference, council_reference, address)
- rescue Mysql2::Error => e
- warn "[circularhead] db update skipped for #{council_reference}: #{e.message}"
- end
- puts "Upserted #{council_reference} -> #{address}"
- saved += 1
- end
- puts "Done #{TABLE}. Saved #{saved} item(s)."
|