# Huon Valley Council — Advertised Applications (site page, not PlanBuild) # Source: https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/ # # Page structure per application: #

DA-37/2026

#

Description, Address (land title ref)

#
#

Available Documents:

# Copy of application for viewing #
require "nokogiri" require "uri" require "cgi" require_relative "../lib/http" require_relative "../lib/db" require_relative "../lib/util" require_relative "../lib/enrich" require_relative "../lib/log" TABLE = ENV.fetch("TABLE_NAME") # run_all.sh -> da_huonvalley START_URL = "https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/" DB.ensure_table!(TABLE) # DA-37/2026 or DA 37/2026 (number/year order) REF_RX = /\bDA[-\s]?\d{1,4}\/20\d{2}\b/i def abs_url(base, href) return nil if href.to_s.strip.empty? URI.join(base, href).to_s rescue URI::InvalidURIError nil end def parse_page(html, base_url) doc = Nokogiri::HTML(html) rows = [] doc.css("h3.application-heading").each do |h3| ref = h3.text.strip next unless ref.match?(REF_RX) # Walk forward siblings to find h4 (description+address) and # the .more-information div (document link) desc_addr = nil document_url = nil sib = h3.next_element 10.times do break if sib.nil? if sib.name == "h4" && desc_addr.nil? desc_addr = sib.text.strip.gsub(/\s+/, " ") end if sib["class"].to_s.include?("more-information") link = sib.at_css("a[href]") document_url = abs_url(base_url, link["href"]) if link break end # Stop if we hit the next application heading break if sib.name == "h3" && sib["class"].to_s.include?("application-heading") sib = sib.next_element end next if desc_addr.nil? || desc_addr.empty? # Split "Dwelling description, 100 Street Name, Suburb (CT-ref)" into # description + address. Address starts at the first ", " pattern. description, address = if (m = desc_addr.match(/\A(.+?),\s*(\d+\s+\S.+)\z/m)) [m[1].strip, m[2].strip] else ["Development Application", desc_addr] end # Strip land-title reference from end of address: "(CT-237651/1)" address = address.sub(/\s*\(CT-[\d\/]+\)\s*\z/, "").strip next if address.empty? rows << { council_reference: ref, address: address[0, 255], description: description, date_received_raw: "", date_received: nil, document_url: document_url } end # Pagination: find a "Next" link next_href = nil if (next_a = doc.css("a").find { |a| a.text.strip.downcase == "next" }) next_href = abs_url(base_url, next_a["href"]) end [rows, next_href] end saved = 0 url = START_URL seen = {} loop do html = begin Http.get(url) rescue StandardError => e Log.warn "huonvalley", "Failed to fetch #{url}: #{e.class} #{e.message}" break end rows, next_url = parse_page(html, url) puts "Found #{rows.length} item(s) on #{url}" rows.each do |r| key = [r[:council_reference], r[:address]] next if seen[key] seen[key] = true begin DB.upsert(TABLE, { description: r[:description], date_received: r[:date_received], date_received_raw: r[:date_received_raw], address: r[:address], council_reference: r[:council_reference], document_url: r[:document_url], applicant: "", owner: "" }) enrich_after_upsert!( table: TABLE, council_reference: r[:council_reference], address: r[:address] ) Log.info "huonvalley", "Upserted #{r[:council_reference]} -> #{r[:address]}" saved += 1 rescue StandardError => e Log.warn "huonvalley", "DB error for #{r[:council_reference]}: #{e.class} #{e.message}" end end break if next_url.nil? || next_url == url url = next_url end puts "Done #{TABLE}. Saved #{saved} item(s)."