# Huon Valley Council — Advertised Applications (site page, not PlanBuild)
# Source: https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/
#
# Page structure per application (flat siblings, no wrapper div):
#
DA-37/2026
# Description, Address (CT-land-title-ref)
# More Information
# ...
# Available Documents:
# Copy of application for viewing
require "nokogiri"
require "uri"
require "cgi"
require_relative "../lib/http"
require_relative "../lib/db"
require_relative "../lib/util"
require_relative "../lib/enrich"
require_relative "../lib/log"
TABLE = ENV.fetch("TABLE_NAME") # run_all.sh -> da_huonvalley
START_URL = "https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/"
DB.ensure_table!(TABLE)
# DA-37/2026 or DA 37/2026 (number/year order)
REF_RX = /\bDA[-\s]?\d{1,4}\/20\d{2}\b/i
def abs_url(base, href)
return nil if href.to_s.strip.empty?
URI.join(base, href).to_s
rescue URI::InvalidURIError
nil
end
def parse_page(html, base_url)
doc = Nokogiri::HTML(html)
rows = []
# Drive from each plain whose text matches the DA ref pattern
doc.css("h2").each do |h2|
ref = h2.text.strip
next unless ref.match?(REF_RX)
desc_addr = nil
document_url = nil
sib = h2.next_element
15.times do
break if sib.nil?
# First
after the heading holds description + address
if sib.name == "p" && desc_addr.nil?
desc_addr = sib.text.strip.gsub(/\s+/, " ")
end
# Document link follows
Available Documents:
if sib.name == "a" && sib.text.strip.match?(/copy of application for viewing/i)
document_url = abs_url(base_url, sib["href"])
break
end
# Stop at the next application's
break if sib.name == "h2" && sib.text.strip.match?(REF_RX)
sib = sib.next_element
end
next if desc_addr.nil? || desc_addr.empty?
# Split "Dwelling, outbuilding..., 100 Turners Road, Cradoc (CT-237651/1)"
# into description and address at the first ", " pattern
description, address = if (m = desc_addr.match(/\A(.+?),\s*(\d+\s+\S.+)\z/m))
[m[1].strip, m[2].strip]
else
["Development Application", desc_addr]
end
# Strip cadastral reference from end of address: "(CT-237651/1)"
address = address.sub(/\s*\(CT-[\d\/]+\)\s*\z/, "").strip
next if address.empty?
rows << {
council_reference: ref,
address: address[0, 255],
description: description,
date_received_raw: "",
date_received: nil,
document_url: document_url
}
end
# Pagination: find a "Next" link
next_href = nil
if (next_a = doc.css("a").find { |a| a.text.strip.downcase == "next" })
next_href = abs_url(base_url, next_a["href"])
end
[rows, next_href]
end
saved = 0
url = START_URL
seen = {}
loop do
html = begin
Http.get(url)
rescue StandardError => e
Log.warn "huonvalley", "Failed to fetch #{url}: #{e.class} #{e.message}"
break
end
rows, next_url = parse_page(html, url)
puts "Found #{rows.length} item(s) on #{url}"
rows.each do |r|
key = [r[:council_reference], r[:address]]
next if seen[key]
seen[key] = true
begin
DB.upsert(TABLE, {
description: r[:description],
date_received: r[:date_received],
date_received_raw: r[:date_received_raw],
address: r[:address],
council_reference: r[:council_reference],
document_url: r[:document_url],
applicant: "",
owner: ""
})
enrich_after_upsert!(
table: TABLE,
council_reference: r[:council_reference],
address: r[:address]
)
Log.info "huonvalley", "Upserted #{r[:council_reference]} -> #{r[:address]}"
saved += 1
rescue StandardError => e
Log.warn "huonvalley", "DB error for #{r[:council_reference]}: #{e.class} #{e.message}"
end
end
break if next_url.nil? || next_url == url
url = next_url
end
puts "Done #{TABLE}. Saved #{saved} item(s)."