|
|
@@ -1,13 +1,12 @@
|
|
|
# Huon Valley Council — Advertised Applications (site page, not PlanBuild)
|
|
|
# Source: https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/
|
|
|
#
|
|
|
-# Page structure per application (flat siblings, no wrapper div):
|
|
|
-# <h2>DA-37/2026</h2>
|
|
|
-# <p>Description, Address (CT-land-title-ref)</p>
|
|
|
-# <h3>More Information</h3>
|
|
|
-# <a href="mapbox...">...</a>
|
|
|
-# <h3>Available Documents:</h3>
|
|
|
-# <a href="sharepoint...">Copy of application for viewing</a>
|
|
|
+# Page structure per application:
|
|
|
+# <div class="accordion-grid-item">
|
|
|
+# <h2 class="accordion-grid-item__title">DA-37/2026</h2>
|
|
|
+# <div class="accordion-grid-item__description">Description, Address (CT-ref)</div>
|
|
|
+# <a class="plan-file-list__item" href="sharepoint...">Copy of application for viewing</a>
|
|
|
+# </div>
|
|
|
|
|
|
require "nokogiri"
|
|
|
require "uri"
|
|
|
@@ -37,35 +36,18 @@ def parse_page(html, base_url)
|
|
|
doc = Nokogiri::HTML(html)
|
|
|
rows = []
|
|
|
|
|
|
- # Drive from each plain <h2> whose text matches the DA ref pattern
|
|
|
- doc.css("h2").each do |h2|
|
|
|
- ref = h2.text.strip
|
|
|
- next unless ref.match?(REF_RX)
|
|
|
-
|
|
|
- desc_addr = nil
|
|
|
- document_url = nil
|
|
|
-
|
|
|
- sib = h2.next_element
|
|
|
- 15.times do
|
|
|
- break if sib.nil?
|
|
|
- # First <p> after the heading holds description + address
|
|
|
- if sib.name == "p" && desc_addr.nil?
|
|
|
- desc_addr = sib.text.strip.gsub(/\s+/, " ")
|
|
|
- end
|
|
|
- # Document link follows <h3>Available Documents:</h3>
|
|
|
- if sib.name == "a" && sib.text.strip.match?(/copy of application for viewing/i)
|
|
|
- document_url = abs_url(base_url, sib["href"])
|
|
|
- break
|
|
|
- end
|
|
|
- # Stop at the next application's <h2>
|
|
|
- break if sib.name == "h2" && sib.text.strip.match?(REF_RX)
|
|
|
- sib = sib.next_element
|
|
|
- end
|
|
|
+ doc.css("div.accordion-grid-item").each do |item|
|
|
|
+ ref = item.at_css("h2.accordion-grid-item__title")&.text&.strip
|
|
|
+ desc_addr = item.at_css("div.accordion-grid-item__description")&.text&.strip&.gsub(/\s+/, " ")
|
|
|
+ doc_link = item.at_css("a.plan-file-list__item")&.[]("href")
|
|
|
|
|
|
+ next if ref.nil? || !ref.match?(REF_RX)
|
|
|
next if desc_addr.nil? || desc_addr.empty?
|
|
|
|
|
|
+ document_url = abs_url(base_url, doc_link)
|
|
|
+
|
|
|
# Split "Dwelling, outbuilding..., 100 Turners Road, Cradoc (CT-237651/1)"
|
|
|
- # into description and address at the first ", <number> " pattern
|
|
|
+ # into description + address at the first ", <digits> " pattern
|
|
|
description, address = if (m = desc_addr.match(/\A(.+?),\s*(\d+\s+\S.+)\z/m))
|
|
|
[m[1].strip, m[2].strip]
|
|
|
else
|