# Huon Valley Council — Advertised Applications (site page, not PlanBuild) # Source: https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/ # # Page structure per application: #
#

DA-37/2026

#
Description, Address (CT-ref)
# Copy of application for viewing #
require "nokogiri" require "uri" require "cgi" require_relative "../lib/http" require_relative "../lib/db" require_relative "../lib/util" require_relative "../lib/enrich" require_relative "../lib/log" TABLE = ENV.fetch("TABLE_NAME") # run_all.sh -> da_huonvalley START_URL = "https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/" DB.ensure_table!(TABLE) # DA-37/2026 or DA 37/2026 (number/year order) REF_RX = /\bDA[-\s]?\d{1,4}\/20\d{2}\b/i def abs_url(base, href) h = href.to_s.strip return nil if h.empty? return h if h.start_with?("http://", "https://") URI.join(base, h).to_s rescue URI::InvalidURIError h end def parse_page(html, base_url) doc = Nokogiri::HTML(html) rows = [] doc.css("div.accordion-grid-item").each do |item| ref = item.at_css("h2.accordion-grid-item__title")&.text&.strip desc_addr = item.at_css("div.accordion-grid-item__description")&.text&.strip&.gsub(/\s+/, " ") doc_link = item.at_css("a.plan-file-list__item")&.[]("href") next if ref.nil? || !ref.match?(REF_RX) next if desc_addr.nil? || desc_addr.empty? document_url = abs_url(base_url, doc_link) # Split "Dwelling, outbuilding..., 100 Turners Road, Cradoc (CT-237651/1)" # into description + address at the first ", " pattern description, address = if (m = desc_addr.match(/\A(.+?),\s*(\d+\s+\S.+)\z/m)) [m[1].strip, m[2].strip] else ["Development Application", desc_addr] end # Strip cadastral reference from end of address: "(CT-237651/1)" address = address.sub(/\s*\(CT-[\d\/]+\)\s*\z/, "").strip next if address.empty? rows << { council_reference: ref, address: address[0, 255], description: description, date_received_raw: "", date_received: nil, document_url: document_url } end # Pagination: find a "Next" link next_href = nil if (next_a = doc.css("a").find { |a| a.text.strip.downcase == "next" }) next_href = abs_url(base_url, next_a["href"]) end [rows, next_href] end saved = 0 url = START_URL seen = {} loop do html = begin Http.get(url) rescue StandardError => e Log.warn "huonvalley", "Failed to fetch #{url}: #{e.class} #{e.message}" break end rows, next_url = parse_page(html, url) puts "Found #{rows.length} item(s) on #{url}" rows.each do |r| key = [r[:council_reference], r[:address]] next if seen[key] seen[key] = true begin DB.upsert(TABLE, { description: r[:description], date_received: r[:date_received], date_received_raw: r[:date_received_raw], address: r[:address], council_reference: r[:council_reference], document_url: r[:document_url], applicant: "", owner: "" }) enrich_after_upsert!( table: TABLE, council_reference: r[:council_reference], address: r[:address] ) Log.info "huonvalley", "Upserted #{r[:council_reference]} -> #{r[:address]}" saved += 1 rescue StandardError => e Log.warn "huonvalley", "DB error for #{r[:council_reference]}: #{e.class} #{e.message}" end end break if next_url.nil? || next_url == url url = next_url end puts "Done #{TABLE}. Saved #{saved} item(s)."