# Huon Valley Council — Advertised Applications (site page, not PlanBuild) # Source: https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/ # # Page structure per application (flat siblings, no wrapper div): #

DA-37/2026

#

Description, Address (CT-land-title-ref)

#

More Information

# ... #

Available Documents:

# Copy of application for viewing require "nokogiri" require "uri" require "cgi" require_relative "../lib/http" require_relative "../lib/db" require_relative "../lib/util" require_relative "../lib/enrich" require_relative "../lib/log" TABLE = ENV.fetch("TABLE_NAME") # run_all.sh -> da_huonvalley START_URL = "https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/" DB.ensure_table!(TABLE) # DA-37/2026 or DA 37/2026 (number/year order) REF_RX = /\bDA[-\s]?\d{1,4}\/20\d{2}\b/i def abs_url(base, href) return nil if href.to_s.strip.empty? URI.join(base, href).to_s rescue URI::InvalidURIError nil end def parse_page(html, base_url) doc = Nokogiri::HTML(html) rows = [] # Drive from each plain

whose text matches the DA ref pattern doc.css("h2").each do |h2| ref = h2.text.strip next unless ref.match?(REF_RX) desc_addr = nil document_url = nil sib = h2.next_element 15.times do break if sib.nil? # First

after the heading holds description + address if sib.name == "p" && desc_addr.nil? desc_addr = sib.text.strip.gsub(/\s+/, " ") end # Document link follows

Available Documents:

if sib.name == "a" && sib.text.strip.match?(/copy of application for viewing/i) document_url = abs_url(base_url, sib["href"]) break end # Stop at the next application's

break if sib.name == "h2" && sib.text.strip.match?(REF_RX) sib = sib.next_element end next if desc_addr.nil? || desc_addr.empty? # Split "Dwelling, outbuilding..., 100 Turners Road, Cradoc (CT-237651/1)" # into description and address at the first ", " pattern description, address = if (m = desc_addr.match(/\A(.+?),\s*(\d+\s+\S.+)\z/m)) [m[1].strip, m[2].strip] else ["Development Application", desc_addr] end # Strip cadastral reference from end of address: "(CT-237651/1)" address = address.sub(/\s*\(CT-[\d\/]+\)\s*\z/, "").strip next if address.empty? rows << { council_reference: ref, address: address[0, 255], description: description, date_received_raw: "", date_received: nil, document_url: document_url } end # Pagination: find a "Next" link next_href = nil if (next_a = doc.css("a").find { |a| a.text.strip.downcase == "next" }) next_href = abs_url(base_url, next_a["href"]) end [rows, next_href] end saved = 0 url = START_URL seen = {} loop do html = begin Http.get(url) rescue StandardError => e Log.warn "huonvalley", "Failed to fetch #{url}: #{e.class} #{e.message}" break end rows, next_url = parse_page(html, url) puts "Found #{rows.length} item(s) on #{url}" rows.each do |r| key = [r[:council_reference], r[:address]] next if seen[key] seen[key] = true begin DB.upsert(TABLE, { description: r[:description], date_received: r[:date_received], date_received_raw: r[:date_received_raw], address: r[:address], council_reference: r[:council_reference], document_url: r[:document_url], applicant: "", owner: "" }) enrich_after_upsert!( table: TABLE, council_reference: r[:council_reference], address: r[:address] ) Log.info "huonvalley", "Upserted #{r[:council_reference]} -> #{r[:address]}" saved += 1 rescue StandardError => e Log.warn "huonvalley", "DB error for #{r[:council_reference]}: #{e.class} #{e.message}" end end break if next_url.nil? || next_url == url url = next_url end puts "Done #{TABLE}. Saved #{saved} item(s)."