# West Tamar Council — Advertised Planning Applications
#
# Source: https://www.wtc.tas.gov.au/advertised-planning-applications/
#
# Page structure — all entries on one page, grouped by h2 headings:
#
#
92 Sunset Boulevard, Clarence Point
#
# APPLICANT: J & E West
# PROPOSAL: Residential - Dwelling & Outbuilding
# LOCATION: 92 Sunset Boulevard, Clarence Point
# CLOSES: 5pm on 16 April 2026
#
#
# - Application Number: PA NO: 2025065
# - Closes 16 April 2026
#
# Proposal description
require "nokogiri"
require "uri"
require "fileutils"
require_relative "../lib/scraper_helpers"
require_relative "../lib/util"
require_relative "../lib/log"
TABLE = ENV.fetch("TABLE_NAME")
URL = "https://www.wtc.tas.gov.au/advertised-planning-applications/"
DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
DOWNLOAD_DIR = ENV["DOWNLOAD_DIR"] || "/app/downloads"
DB.ensure_table!(TABLE)
def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
def download_pdf(url, council_reference)
return nil if url.to_s.strip.empty?
dir = File.join(DOWNLOAD_DIR, "westtamar", safe_name(council_reference))
FileUtils.mkdir_p(dir)
fname = safe_name(File.basename(URI.parse(url).path))
fname = "document.pdf" if fname.empty?
path = File.join(dir, fname)
body = Http.get(url, headers: { "Accept" => "application/pdf,*/*", "Referer" => URL })
File.binwrite(path, body)
puts " saved #{fname} (#{body.bytesize} bytes)"
"/files/westtamar/#{safe_name(council_reference)}/#{fname}"
rescue StandardError => e
Log.warn "westtamar", "Download failed for #{url}: #{e.class} #{e.message}"
nil
end
# Parse "KEY: VALUE
" pairs from a node
def parse_strong_labels(p_node)
kv = {}
return kv unless p_node
# Replace
with newlines so we can split cleanly
html = p_node.inner_html.gsub(/
/i, "\n")
Nokogiri::HTML.fragment(html).text.split("\n").each do |line|
line = line.gsub(/\u00a0|\s+/, " ").strip
next if line.empty?
if (m = line.match(/\A([A-Z][A-Z\s]{1,20}):\s*(.+)\z/))
kv[m[1].strip.upcase] = m[2].strip
end
end
kv
end
html = Http.get(URL)
doc = Nokogiri::HTML(html)
items = []
# Walk h2 elements; collect their following siblings until the next h2
doc.css("h2").each do |h2|
sibling_nodes = []
sib = h2.next_sibling
while sib
break if sib.element? && sib.name == "h2"
sibling_nodes << sib if sib.element?
sib = sib.next_sibling
end
next if sibling_nodes.empty?
# Find the
containing APPLICANT/PROPOSAL/LOCATION/CLOSES labels
label_p = sibling_nodes.find { |n| n.name == "p" && n.text =~ /APPLICANT|PROPOSAL|LOCATION|CLOSES/i }
kv = parse_strong_labels(label_p)
# Find the
containing the application number
ul_node = sibling_nodes.find { |n| n.name == "ul" }
ul_text = ul_node&.text.to_s.gsub(/\u00a0|\s+/, " ")
# PDF link lives inside a - within the
pdf_link = ul_node&.css("li a[href]")&.find { |a| a["href"].to_s =~ /\.pdf/i }
# Fallback: any element in the section with a .pdf href
pdf_link ||= sibling_nodes.flat_map { |n| n.css("a[href]").to_a }
.find { |a| a["href"].to_s =~ /\.pdf/i }
# --- Reference: "PA NO: 2025065" from ul, or filename ---
ref = nil
if (m = ul_text.to_s.match(/PA\s*(?:NO:?)?\s*(\d{5,})/i))
ref = "PA #{m[1]}"
end
if ref.nil? && pdf_link
href = pdf_link["href"].to_s
ref = href.match(/PA(\d{5,})/i)&.then { |mm| "PA #{mm[1]}" }
end
next unless ref
# --- Address from LOCATION label, fallback to h2 text ---
address = kv["LOCATION"] || kv["ADDRESS"] || h2.text.gsub(/\u00a0|\s+/, " ").strip
next if address.empty?
# --- Other fields ---
applicant = kv["APPLICANT"].to_s
description = kv["PROPOSAL"].to_s
description = "Development Application" if description.empty?
closes_raw = kv["CLOSES"].to_s
# Strip time prefix: "5pm on 16 April 2026" → "16 April 2026"
closes_raw = closes_raw.sub(/\A.*?\bon\s+/i, "").strip
# Also try list item: "Closes 16 April 2026"
if closes_raw.empty? && (m = ul_text.match(/Closes?\s+(\d{1,2}\s+[A-Za-z]+\s+\d{4})/i))
closes_raw = m[1]
end
on_notice_to = Util.parse_aus_date(closes_raw)
document_url = pdf_link ? abs_url(URL, pdf_link["href"].to_s) : ""
items << {
council_reference: ref,
address: address,
description: description,
applicant: applicant,
on_notice_to: on_notice_to,
on_notice_to_raw: closes_raw,
document_url: document_url
}
end
puts "Found #{items.length} item(s) for #{TABLE}"
items.each do |r|
local_url = DOWNLOAD_ATTACHMENTS ? download_pdf(r[:document_url], r[:council_reference]) : nil
upsert_and_enrich!(
table: TABLE,
row: {
council_reference: r[:council_reference],
address: r[:address],
description: r[:description],
applicant: r[:applicant],
on_notice_to: r[:on_notice_to],
on_notice_to_raw: r[:on_notice_to_raw],
owner: ""
},
extras: {
document_url: r[:document_url],
local_document_url: local_url
}
)
end
puts "Done #{TABLE}. Saved #{items.length} item(s)."