# Circular Head Council — Planning page list scraper require "nokogiri" require_relative "../lib/enrich" require_relative "../lib/log" TABLE = ENV.fetch("TABLE_NAME") # run_all.sh -> da_circularhead URL = "https://www.circularhead.tas.gov.au/council-services/development/planning" DB.ensure_table!(TABLE) def abs_url(base, href) return "" if href.to_s.strip.empty? URI.join(base, href).to_s rescue href.to_s end html = Http.get(URL) doc = Nokogiri::HTML(html) items = doc.css("li.link-listing__no-icon") puts "Found #{items.length} items for #{TABLE}" saved = 0 items.each_with_index do |li, idx| a = li.at_css("a") next unless a title_reference = a.text.to_s.strip href = a["href"].to_s document_url = abs_url(URL, href) # Your original logic: split the title on " - " parts = title_reference.split(" - ") council_reference = parts.first.to_s.strip description_part = parts.last.to_s description = description_part.split("(").first.to_s.strip address = if parts.length > 2 parts[1..-2].join(" - ").strip else # fallback to a trimmed title if no middle section title_reference[0, 140] end # No dates on the list view date_received_raw = "" date_received = nil # Require the key fields next if council_reference.empty? || address.empty? DB.upsert(TABLE, { description: description, date_received: date_received, date_received_raw: date_received_raw, address: address, council_reference: council_reference, applicant: "", owner: "" }) enrich_after_upsert!( table: TABLE, council_reference: council_reference, address: address ) # Save link and title if the columns exist begin upd = DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET document_url = ?, title_reference = ? WHERE council_reference = ? AND address = ?") upd.execute(document_url, title_reference, council_reference, address) rescue Mysql2::Error => e Log.warn "scraper", "[circularhead] db update skipped for #{council_reference}: #{e.message}" end puts "Upserted #{council_reference} -> #{address}" saved += 1 end puts "Done #{TABLE}. Saved #{saved} item(s)."