2 ay önce · 1ad06e1c48
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -29,7 +29,9 @@
 
				       "Bash(grep -l \"def abs_url\" *.rb)",
			
 
				       "WebFetch(domain:www.southernmidlands.tas.gov.au)",
			
 
				       "Bash(python3 -)",
			
 
				-      "WebFetch(domain:www.huonvalley.tas.gov.au)"
			
 
				+      "WebFetch(domain:www.huonvalley.tas.gov.au)",
			
 
				+      "Bash(curl -s -A \"Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36\" \"https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/\")",
			
 
				+      "Bash(python3 -c \" import sys from html.parser import HTMLParser class P\\(HTMLParser\\): def __init__\\(self\\): super\\(\\).__init__\\(\\) self.depth = 0 self.capture = False self.tag = None def handle_starttag\\(self, tag, attrs\\): d = dict\\(attrs\\) cls = d.get\\('class',''\\) if 'accordion' in cls or 'plan-file' in cls: print\\(f'<{tag} class=\\\\\"{cls}\\\\\">'\\) self.capture = True def handle_data\\(self, data\\): if self.capture and data.strip\\(\\): print\\(f' TEXT: {data.strip\\(\\)[:120]}'\\) def handle_endtag\\(self, tag\\): if tag in \\('h2','h3','a','p','div'\\) and self.capture: self.capture = False P\\(\\).feed\\(sys.stdin.read\\(\\)\\) \")"
			
 
				     ]
			
 
				   }
			
 
				 }
			
--- a/scrapers/huonvalley.rb
+++ b/scrapers/huonvalley.rb
@@ -1,13 +1,12 @@
 
				 # Huon Valley Council — Advertised Applications (site page, not PlanBuild)
			
 
				 # Source: https://www.huonvalley.tas.gov.au/development/planning/advertised-applications/
			
 
				 #
			
 
				-# Page structure per application (flat siblings, no wrapper div):
			
 
				-#   <h2>DA-37/2026</h2>
			
 
				-#   <p>Description, Address (CT-land-title-ref)</p>
			
 
				-#   <h3>More Information</h3>
			
 
				-#   <a href="mapbox...">...</a>
			
 
				-#   <h3>Available Documents:</h3>
			
 
				-#   <a href="sharepoint...">Copy of application for viewing</a>
			
 
				+# Page structure per application:
			
 
				+#   <div class="accordion-grid-item">
			
 
				+#     <h2 class="accordion-grid-item__title">DA-37/2026</h2>
			
 
				+#     <div class="accordion-grid-item__description">Description, Address (CT-ref)</div>
			
 
				+#     <a class="plan-file-list__item" href="sharepoint...">Copy of application for viewing</a>
			
 
				+#   </div>
			
 
				 
			
 
				 require "nokogiri"
			
 
				 require "uri"
			
@@ -37,35 +36,18 @@ def parse_page(html, base_url)
 
				     doc  = Nokogiri::HTML(html)
			
 
				     rows = []
			
 
				 
			
 
				-    # Drive from each plain <h2> whose text matches the DA ref pattern
			
 
				-    doc.css("h2").each do |h2|
			
 
				-        ref = h2.text.strip
			
 
				-        next unless ref.match?(REF_RX)
			
 
				-
			
 
				-        desc_addr    = nil
			
 
				-        document_url = nil
			
 
				-
			
 
				-        sib = h2.next_element
			
 
				-        15.times do
			
 
				-            break if sib.nil?
			
 
				-            # First <p> after the heading holds description + address
			
 
				-            if sib.name == "p" && desc_addr.nil?
			
 
				-                desc_addr = sib.text.strip.gsub(/\s+/, " ")
			
 
				-            end
			
 
				-            # Document link follows <h3>Available Documents:</h3>
			
 
				-            if sib.name == "a" && sib.text.strip.match?(/copy of application for viewing/i)
			
 
				-                document_url = abs_url(base_url, sib["href"])
			
 
				-                break
			
 
				-            end
			
 
				-            # Stop at the next application's <h2>
			
 
				-            break if sib.name == "h2" && sib.text.strip.match?(REF_RX)
			
 
				-            sib = sib.next_element
			
 
				-        end
			
 
				+    doc.css("div.accordion-grid-item").each do |item|
			
 
				+        ref       = item.at_css("h2.accordion-grid-item__title")&.text&.strip
			
 
				+        desc_addr = item.at_css("div.accordion-grid-item__description")&.text&.strip&.gsub(/\s+/, " ")
			
 
				+        doc_link  = item.at_css("a.plan-file-list__item")&.[]("href")
			
 
				 
			
 
				+        next if ref.nil? || !ref.match?(REF_RX)
			
 
				         next if desc_addr.nil? || desc_addr.empty?
			
 
				 
			
 
				+        document_url = abs_url(base_url, doc_link)
			
 
				+
			
 
				         # Split "Dwelling, outbuilding..., 100 Turners Road, Cradoc (CT-237651/1)"
			
 
				-        # into description and address at the first ", <number> " pattern
			
 
				+        # into description + address at the first ", <digits> " pattern
			
 
				         description, address = if (m = desc_addr.match(/\A(.+?),\s*(\d+\s+\S.+)\z/m))
			
 
				             [m[1].strip, m[2].strip]
			
 
				         else