# West Tamar Council — Advertised Planning Applications # # Source: https://www.wtc.tas.gov.au/advertised-planning-applications/ # # Page structure — all entries on one page, grouped by h2 headings: # #
# APPLICANT: J & E West
# PROPOSAL: Residential - Dwelling & Outbuilding
# LOCATION: 92 Sunset Boulevard, Clarence Point
# CLOSES: 5pm on 16 April 2026
#
node
def parse_strong_labels(p_node)
kv = {}
return kv unless p_node
# Replace
with newlines so we can split cleanly
html = p_node.inner_html.gsub(/
/i, "\n")
Nokogiri::HTML.fragment(html).text.split("\n").each do |line|
line = line.gsub(/\u00a0|\s+/, " ").strip
next if line.empty?
if (m = line.match(/\A([A-Z][A-Z\s]{1,20}):\s*(.+)\z/))
kv[m[1].strip.upcase] = m[2].strip
end
end
kv
end
html = Http.get(URL)
doc = Nokogiri::HTML(html)
items = []
# Walk h2 elements; collect their following siblings until the next h2
doc.css("h2").each do |h2|
sibling_nodes = []
sib = h2.next_sibling
while sib
break if sib.element? && sib.name == "h2"
sibling_nodes << sib if sib.element?
sib = sib.next_sibling
end
next if sibling_nodes.empty?
# Find the
containing APPLICANT/PROPOSAL/LOCATION/CLOSES labels label_p = sibling_nodes.find { |n| n.name == "p" && n.text =~ /APPLICANT|PROPOSAL|LOCATION|CLOSES/i } kv = parse_strong_labels(label_p) # Find the
with a PDF link pdf_p = sibling_nodes.find { |n| n.name == "p" && n.at_css("a[href]") } pdf_link = pdf_p&.at_css("a[href]") # --- Reference: "PA NO: 2025065" from ul, or filename --- ref = nil if (m = ul_text.to_s.match(/PA\s*(?:NO:?)?\s*(\d{5,})/i)) ref = "PA #{m[1]}" end if ref.nil? && pdf_link href = pdf_link["href"].to_s ref = href.match(/PA(\d{5,})/i)&.then { |mm| "PA #{mm[1]}" } end next unless ref # --- Address from LOCATION label, fallback to h2 text --- address = kv["LOCATION"] || kv["ADDRESS"] || h2.text.gsub(/\u00a0|\s+/, " ").strip next if address.empty? # --- Other fields --- applicant = kv["APPLICANT"].to_s description = kv["PROPOSAL"].to_s description = "Development Application" if description.empty? closes_raw = kv["CLOSES"].to_s # Strip time prefix: "5pm on 16 April 2026" → "16 April 2026" closes_raw = closes_raw.sub(/\A.*?\bon\s+/i, "").strip # Also try list item: "Closes 16 April 2026" if closes_raw.empty? && (m = ul_text.match(/Closes?\s+(\d{1,2}\s+[A-Za-z]+\s+\d{4})/i)) closes_raw = m[1] end on_notice_to = Util.parse_aus_date(closes_raw) document_url = pdf_link ? abs_url(URL, pdf_link["href"].to_s) : "" items << { council_reference: ref, address: address, description: description, applicant: applicant, on_notice_to: on_notice_to, on_notice_to_raw: closes_raw, document_url: document_url } end puts "Found #{items.length} item(s) for #{TABLE}" items.each do |r| local_url = DOWNLOAD_ATTACHMENTS ? download_pdf(r[:document_url], r[:council_reference]) : nil upsert_and_enrich!( table: TABLE, row: { council_reference: r[:council_reference], address: r[:address], description: r[:description], applicant: r[:applicant], on_notice_to: r[:on_notice_to], on_notice_to_raw: r[:on_notice_to_raw], owner: "" }, extras: { document_url: r[:document_url], local_document_url: local_url } ) end puts "Done #{TABLE}. Saved #{items.length} item(s)."