2 ヶ月前 · 7bb466cba8
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -50,7 +50,11 @@
 
				       "Read(//c/Users/lumion/AppData/Local/Temp/**)",
			
 
				       "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' https://www.latrobe.tas.gov.au/services/building-and-planning-services/planningapp -o C:/Users/lumion/AppData/Local/Temp/latrobe.html)",
			
 
				       "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Upgrade-Insecure-Requests: 1' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' -c C:/Users/lumion/AppData/Local/Temp/latrobe_cookies.txt https://www.latrobe.tas.gov.au/ -o /dev/null)",
			
 
				-      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.latrobe.tas.gov.au/' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/latrobe_cookies.txt https://www.latrobe.tas.gov.au/services/building-and-planning-services/planningapp -o C:/Users/lumion/AppData/Local/Temp/latrobe2.html)"
			
 
				+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.latrobe.tas.gov.au/' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/latrobe_cookies.txt https://www.latrobe.tas.gov.au/services/building-and-planning-services/planningapp -o C:/Users/lumion/AppData/Local/Temp/latrobe2.html)",
			
 
				+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Upgrade-Insecure-Requests: 1' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' https://www.derwentvalley.tas.gov.au/ -c C:/Users/lumion/AppData/Local/Temp/dv_cookies.txt -o /dev/null -w '%{http_code}')",
			
 
				+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.derwentvalley.tas.gov.au/' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/dv_cookies.txt https://www.derwentvalley.tas.gov.au/home/card-listing/development-applications -o C:/Users/lumion/AppData/Local/Temp/dv_list.html -w '%{http_code} %{size_download} bytes')",
			
 
				+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.derwentvalley.tas.gov.au/' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/dv_cookies.txt 'https://www.derwentvalley.tas.gov.au/home/latest-news?f.News+category%7CnewsCategory=Public+Notice' -o C:/Users/lumion/AppData/Local/Temp/dv_news.html -w '%{http_code} %{size_download} bytes')",
			
 
				+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.derwentvalley.tas.gov.au/home/latest-news' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/dv_cookies.txt https://www.derwentvalley.tas.gov.au/home/latest-news/application-for-planning-approval-160-wyre-forest-road-molesworth -o C:/Users/lumion/AppData/Local/Temp/dv_detail1.html -w '%{http_code} %{size_download} bytes')"
			
 
				     ]
			
 
				   }
			
 
				 }
			
--- a/scrapers/derwentvalley.rb
+++ b/scrapers/derwentvalley.rb
@@ -1,185 +1,318 @@
 
				 # Derwent Valley Council — Development Applications being advertised
			
 
				-# Primary list: https://www.derwentvalley.tas.gov.au/home/card-listing/development-applications
			
 
				-# Fallback list (Public Notice posts): https://www.derwentvalley.tas.gov.au/home/latest-news?f.News+category%7CnewsCategory=Public+Notice
			
 
				+#
			
 
				+# Source: https://www.derwentvalley.tas.gov.au/home/latest-news?f.News+category...=Public+Notice
			
 
				+#
			
 
				+# The site is Cloudflare-protected — requires homepage warmup with browser-like
			
 
				+# headers (same technique as burnie.rb / kingisland.rb).
			
 
				+#
			
 
				+# The news listing links go through lgasa-search.lga.sa.gov.au → squiz.cloud.
			
 
				+# Rather than following that full redirect chain, we extract the `index_url`
			
 
				+# parameter from each lgasa href, which points at lgasa-web.squiz.cloud/?a=ID.
			
 
				+# A single (non-following) GET to that URL returns a Location header with the
			
 
				+# real derwentvalley.tas.gov.au detail page URL.
			
 
				+#
			
 
				+# Detail page structure:
			
 
				+#   <table><tbody>
			
 
				+#     <tr><td>APP No.</td><td>SITE</td><td>PROPOSAL</td></tr>
			
 
				+#     <tr><td>DA 2026/023</td><td>160 Wyre Forest Road, Molesworth</td>
			
 
				+#         <td>Dwelling and outbuilding</td></tr>
			
 
				+#   </tbody></table>
			
 
				+#   <p>...received no later than 5.00pm on 15 April 2026...</p>
			
 
				+#   <div class="content-container"><a href="...DA-2026-023.pdf">plans</a></div>
			
 
				 
			
 
				+require "date"
			
 
				 require "nokogiri"
			
 
				+require "net/http"
			
 
				+require "uri"
			
 
				 
			
 
				-require_relative "../lib/scraper_helpers"
			
 
				-require_relative "../lib/util"
			
 
				+require_relative "../lib/db"
			
 
				+require_relative "../lib/enrich"
			
 
				 require_relative "../lib/log"
			
 
				-TABLE        = ENV.fetch("TABLE_NAME")  # run_all.sh -> da_derwentvalley
			
 
				-LIST_URL     = "https://www.derwentvalley.tas.gov.au/home/card-listing/development-applications"
			
 
				-NEWS_URL     = "https://www.derwentvalley.tas.gov.au/home/latest-news?f.News+category%7CnewsCategory=Public+Notice"
			
 
				+require_relative "../lib/util"
			
 
				+
			
 
				+TABLE    = ENV.fetch("TABLE_NAME")  # run_all.sh sets from filename: da_derwentvalley
			
 
				+BASE_URL = "https://www.derwentvalley.tas.gov.au"
			
 
				+NEWS_URL = "#{BASE_URL}/home/latest-news?f.News+category%7CnewsCategory=Public+Notice"
			
 
				 
			
 
				 DB.ensure_table!(TABLE)
			
 
				 
			
 
				-# Common reference forms: "DA 2025/097"
			
 
				-REF_RX = %r{\bDA\s*(20\d{2})\s*/\s*([A-Za-z0-9\-_.]+)}i
			
 
				-def extract_ref(s)
			
 
				-  t = s.to_s
			
 
				-  if (m = t.match(REF_RX))
			
 
				-    return "DA #{m[1]} / #{m[2]}"
			
 
				-  end
			
 
				-  nil
			
 
				-end
			
 
				+# ----- Browser-like headers (WAF/Cloudflare warmup) -----
			
 
				+BASE_HEADERS = {
			
 
				+    "User-Agent"                => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
			
 
				+    "Accept"                    => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
			
 
				+    "Accept-Language"           => "en-AU,en;q=0.9",
			
 
				+    "Accept-Encoding"           => "identity",
			
 
				+    "Upgrade-Insecure-Requests" => "1",
			
 
				+    "Sec-Fetch-Dest"            => "document",
			
 
				+    "Sec-Fetch-Mode"            => "navigate",
			
 
				+    "Sec-Fetch-Site"            => "none",
			
 
				+    "Sec-Fetch-User"            => "?1",
			
 
				+    "sec-ch-ua"                 => '"Chromium";v="127", "Not)A;Brand";v="99", "Google Chrome";v="127"',
			
 
				+    "sec-ch-ua-mobile"          => "?0",
			
 
				+    "sec-ch-ua-platform"        => '"Windows"',
			
 
				+    "Connection"                => "close",
			
 
				+}.freeze
			
 
				 
			
 
				-def extract_date_token(s)
			
 
				-  text = s.to_s
			
 
				-  return $1 if text =~ /(\b\d{1,2}\/\d{1,2}\/\d{2,4}\b)/
			
 
				-  return $1 if text =~ /(\b\d{1,2}\s+[A-Za-z]{3,}\s+\d{4}\b)/
			
 
				-  return $1 if text =~ /(\b[A-Za-z]{3,}\s+\d{1,2},?\s+\d{4}\b)/
			
 
				-  ""
			
 
				-end
			
 
				+class CookieJar
			
 
				+    def initialize; @h = {}; end
			
 
				+
			
 
				+    def for(host)
			
 
				+        @h[host] || ""
			
 
				+    end
			
 
				 
			
 
				-def extract_on_notice_raw(text)
			
 
				-  s = text.to_s.gsub(/\s+/, " ")
			
 
				-  # Look for wording like "Submissions must be received by ...", "close on ...", "on notice until ..."
			
 
				-  if s =~ /(submissions?|representations?)\s+(must\s+be\s+)?(received|made|close|closing)\s+(by|on)\s*[:\-]?\s*([A-Za-z0-9\/ ,]+)/i
			
 
				-    d = extract_date_token($5)
			
 
				-    return d unless d.empty?
			
 
				-  end
			
 
				-  if s =~ /\bon\s*notice\s*(until|to)\s*[:\-]?\s*([A-Za-z0-9\/ ,]+)/i
			
 
				-    d = extract_date_token($2)
			
 
				-    return d unless d.empty?
			
 
				-  end
			
 
				-  extract_date_token(s)
			
 
				+    def merge_from(resp, host)
			
 
				+        cookies = resp.get_fields("Set-Cookie") || []
			
 
				+        return if cookies.empty?
			
 
				+        existing = parse_header(@h[host])
			
 
				+        cookies.each do |sc|
			
 
				+            kv = sc.split(";", 2).first
			
 
				+            k, v = kv.split("=", 2)
			
 
				+            existing[k.to_s.strip] = v.to_s unless k.to_s.strip.empty?
			
 
				+        end
			
 
				+        @h[host] = existing.map { |k, v| "#{k}=#{v}" }.join("; ")
			
 
				+    end
			
 
				+
			
 
				+    private
			
 
				+
			
 
				+    def parse_header(s)
			
 
				+        s.to_s.split(";").map(&:strip).filter_map { |kv|
			
 
				+            k, v = kv.split("=", 2)
			
 
				+            [k, v] unless k.to_s.empty?
			
 
				+        }.to_h
			
 
				+    end
			
 
				 end
			
 
				 
			
 
				-def parse_detail(url)
			
 
				-  html = Http.get(url)
			
 
				-  doc  = Nokogiri::HTML(html)
			
 
				-
			
 
				-  title = doc.at_css("h1, .entry-title")&.text&.strip.to_s
			
 
				-  body_text = doc.at_css("main")&.text.to_s
			
 
				-  body_text = doc.text.to_s if body_text.strip.empty?
			
 
				-
			
 
				-  council_reference = extract_ref(title) || extract_ref(body_text)
			
 
				-  # Address often sits in the title after " - "
			
 
				-  address = if title.include?(" - ")
			
 
				-    title.split(" - ", 2)[1].to_s.strip
			
 
				-  else
			
 
				-    # Fallback: first line with a number and street
			
 
				-    line = body_text.split(/\n/).find { |l| l =~ /\d{1,4}\s+\S+/ }
			
 
				-    line.to_s.strip
			
 
				-  end
			
 
				-  address = title if address.to_s.strip.empty?
			
 
				-
			
 
				-  pdf_a = doc.at_css("a[href$='.pdf'], a[href*='.pdf?']")
			
 
				-  pdf   = pdf_a ? abs_url(url, pdf_a["href"].to_s) : ""
			
 
				-
			
 
				-  on_raw = extract_on_notice_raw(body_text)
			
 
				-  on_dt  = Util.parse_aus_date(on_raw)
			
 
				-
			
 
				-  return nil if council_reference.to_s.strip.empty? || address.to_s.strip.empty?
			
 
				-
			
 
				-  {
			
 
				-    council_reference: council_reference,
			
 
				-    address: address,
			
 
				-    description: "Development Application",
			
 
				-    date_received_raw: on_raw,
			
 
				-    date_received: on_dt,
			
 
				-    document_url: pdf,
			
 
				-    title_reference: title
			
 
				-  }
			
 
				+# GET url; follow redirects; return [final_url_string, body, http_code]
			
 
				+def http_get(url, jar:, referer: nil, fetch_site: "none", follow: true)
			
 
				+    uri  = URI(url)
			
 
				+    hdrs = BASE_HEADERS.merge("Sec-Fetch-Site" => fetch_site)
			
 
				+    hdrs["Referer"] = referer if referer
			
 
				+    cookie = jar.for(uri.host)
			
 
				+    hdrs["Cookie"] = cookie unless cookie.empty?
			
 
				+
			
 
				+    limit = 8
			
 
				+    code  = 0
			
 
				+    body  = ""
			
 
				+
			
 
				+    while limit > 0
			
 
				+        limit -= 1
			
 
				+        redirect_to = nil
			
 
				+
			
 
				+        req = Net::HTTP::Get.new(uri, hdrs)
			
 
				+        Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
			
 
				+                        read_timeout: 30, open_timeout: 15) do |http|
			
 
				+            resp = http.request(req)
			
 
				+            jar.merge_from(resp, uri.host)
			
 
				+            code = resp.code.to_i
			
 
				+
			
 
				+            if follow && [301, 302, 303, 307, 308].include?(code) && resp["location"]
			
 
				+                redirect_to = URI.join(uri, resp["location"])
			
 
				+            else
			
 
				+                body = resp.body.to_s
			
 
				+            end
			
 
				+        end
			
 
				+
			
 
				+        if redirect_to
			
 
				+            uri = redirect_to
			
 
				+            # Update Referer and Sec-Fetch-Site for subsequent hops
			
 
				+            hdrs["Referer"]        = uri.to_s
			
 
				+            hdrs["Sec-Fetch-Site"] = "same-origin"
			
 
				+            cookie = jar.for(uri.host)
			
 
				+            hdrs["Cookie"] = cookie.empty? ? nil : cookie
			
 
				+            next
			
 
				+        end
			
 
				+        break
			
 
				+    end
			
 
				+
			
 
				+    [uri.to_s, body, code]
			
 
				+rescue StandardError => e
			
 
				+    Log.warn "derwentvalley", "HTTP error for #{url}: #{e.class} #{e.message}"
			
 
				+    [url, "", 0]
			
 
				 end
			
 
				 
			
 
				-def detail_links_from_list(list_url)
			
 
				-  html = Http.get(list_url)
			
 
				-  doc  = Nokogiri::HTML(html)
			
 
				-  # Cards or list items link to detail posts
			
 
				-  links = doc.css("a").map { |a|
			
 
				-    href = a["href"].to_s
			
 
				-    next if href.empty? || href.start_with?("#")
			
 
				-    abs_url(list_url, href)
			
 
				-  }.compact.uniq
			
 
				-
			
 
				-  # Keep obvious news or notice items
			
 
				-  links.select { |u|
			
 
				-    u.include?("/home/latest-news/") || u.include?("/news/") || u =~ /application-for-planning-approval/i
			
 
				-  }
			
 
				+# Resolve lgasa redirect href -> real derwentvalley.tas.gov.au URL.
			
 
				+# Extracts index_url from the lgasa query string, then makes a non-following
			
 
				+# GET to squiz.cloud/?a=ID and reads the Location header.
			
 
				+def resolve_detail_url(lgasa_href)
			
 
				+    query = URI.decode_www_form(URI(lgasa_href).query.to_s).to_h
			
 
				+    index_url = query["index_url"]
			
 
				+    return nil if index_url.to_s.empty?
			
 
				+
			
 
				+    uri = URI(index_url)
			
 
				+    req = Net::HTTP::Get.new(uri, "User-Agent" => BASE_HEADERS["User-Agent"])
			
 
				+    Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
			
 
				+                    open_timeout: 10, read_timeout: 10) do |http|
			
 
				+        resp = http.request(req)
			
 
				+        loc  = resp["location"].to_s
			
 
				+        return loc unless loc.empty?
			
 
				+    end
			
 
				+    nil
			
 
				+rescue StandardError => e
			
 
				+    Log.warn "derwentvalley", "Could not resolve squiz redirect #{index_url}: #{e.class} #{e.message}"
			
 
				+    nil
			
 
				 end
			
 
				 
			
 
				-def detail_links_from_news(news_url)
			
 
				-  html = Http.get(news_url)
			
 
				-  doc  = Nokogiri::HTML(html)
			
 
				-  doc.css("a").map { |a|
			
 
				-    href = a["href"].to_s
			
 
				-    next if href.empty? || href.start_with?("#")
			
 
				-    u = abs_url(news_url, href)
			
 
				-    u if u =~ /application-for-planning-approval/i
			
 
				-  }.compact.uniq
			
 
				+# Parse a detail page for DA data
			
 
				+def parse_detail(html, page_url)
			
 
				+    doc = Nokogiri::HTML(html)
			
 
				+
			
 
				+    # Table: header row "APP No. | SITE | PROPOSAL", then data rows
			
 
				+    data_rows = []
			
 
				+    doc.css("table").each do |tbl|
			
 
				+        tbl.css("tr").each do |tr|
			
 
				+            cells = tr.css("td").map { |td| td.text.gsub(/\u00a0|\s+/, " ").strip }
			
 
				+            next if cells.empty?
			
 
				+            next if cells.join =~ /\AAPP\s*No\.?/i   # skip header row
			
 
				+            next unless cells[0] =~ /\bDA\s*\d{4}\/\d+/i
			
 
				+            data_rows << cells
			
 
				+        end
			
 
				+    end
			
 
				+
			
 
				+    return [] if data_rows.empty?
			
 
				+
			
 
				+    body_text = doc.css(".content-container, main").first&.text.to_s
			
 
				+                   .gsub(/\u00a0/, " ").gsub(/\s+/, " ")
			
 
				+
			
 
				+    # Closing date from "received no later than ... DATE"
			
 
				+    on_notice_to_raw = ""
			
 
				+    on_notice_to     = nil
			
 
				+    if (m = body_text.match(/no\s+later\s+than\b.{0,60}?(\d{1,2}\s+[A-Za-z]{3,}\s+\d{4})/i))
			
 
				+        on_notice_to_raw = m[1].strip
			
 
				+        on_notice_to     = Util.parse_aus_date(on_notice_to_raw)
			
 
				+    end
			
 
				+
			
 
				+    # Commencing / start date → date_received
			
 
				+    date_received_raw = ""
			
 
				+    date_received     = nil
			
 
				+    if (m = body_text.match(/commencing\s+on\s+(\d{1,2}\s+[A-Za-z]{3,}\s+\d{4})/i)) ||
			
 
				+       (m = body_text.match(/Start\s+Date\s+(\d{1,2}\/\d{1,2}\/\d{2,4})/i))
			
 
				+        date_received_raw = m[1].strip
			
 
				+        date_received     = Util.parse_aus_date(date_received_raw)
			
 
				+    end
			
 
				+
			
 
				+    # PDF link within the content area only
			
 
				+    doc_url = nil
			
 
				+    doc.css(".content-container a[href]").each do |a|
			
 
				+        href = a["href"].to_s
			
 
				+        if href =~ /\.pdf/i && href.include?("derwentvalley")
			
 
				+            doc_url = href
			
 
				+            break
			
 
				+        end
			
 
				+    end
			
 
				+
			
 
				+    data_rows.map do |cells|
			
 
				+        ref         = cells[0].to_s.gsub(/\A\s+/, "").strip
			
 
				+        address     = cells[1].to_s.strip
			
 
				+        description = cells[2].to_s.strip
			
 
				+        description = "Development Application" if description.empty?
			
 
				+
			
 
				+        next if ref.empty? || address.empty?
			
 
				+
			
 
				+        {
			
 
				+            council_reference: ref,
			
 
				+            address:           address,
			
 
				+            description:       description,
			
 
				+            date_received:     date_received,
			
 
				+            date_received_raw: date_received_raw,
			
 
				+            on_notice_to:      on_notice_to,
			
 
				+            on_notice_to_raw:  on_notice_to_raw,
			
 
				+            document_url:      doc_url
			
 
				+        }
			
 
				+    end.compact
			
 
				 end
			
 
				 
			
 
				-def cloudflare_blocked?(html)
			
 
				-  html.to_s.include?("Just a moment") || html.to_s.include?("Enable JavaScript and cookies")
			
 
				+# ----- Warmup then fetch news listing -----
			
 
				+jar = CookieJar.new
			
 
				+
			
 
				+Log.info "derwentvalley", "Warming up via homepage..."
			
 
				+_url0, _body0, code0 = http_get("#{BASE_URL}/", jar: jar)
			
 
				+Log.info "derwentvalley", "Homepage: #{code0}"
			
 
				+
			
 
				+sleep(0.5)
			
 
				+
			
 
				+Log.info "derwentvalley", "Fetching news listing..."
			
 
				+_url1, html1, code1 = http_get(NEWS_URL, jar: jar, referer: "#{BASE_URL}/", fetch_site: "same-origin")
			
 
				+Log.info "derwentvalley", "News listing: #{code1} (#{html1.bytesize} bytes)"
			
 
				+
			
 
				+if code1 != 200 || html1.bytesize < 5_000
			
 
				+    Log.warn "derwentvalley", "Could not fetch news listing (status #{code1}). " \
			
 
				+        "DAs for this council are also available via planbuild.rb (council code DER)."
			
 
				+    puts "Done #{TABLE}. Saved 0 item(s)."
			
 
				+    exit 0
			
 
				 end
			
 
				 
			
 
				-links = []
			
 
				-begin
			
 
				-  links = detail_links_from_list(LIST_URL)
			
 
				-rescue StandardError => e
			
 
				-  Log.warn "derwentvalley", "List fetch failed, will try news listing: #{e.class} #{e.message}"
			
 
				+if html1.include?("Just a moment") || html1.include?("Enable JavaScript and cookies")
			
 
				+    Log.warn "derwentvalley", "Cloudflare challenge page returned. " \
			
 
				+        "DAs for this council are also available via planbuild.rb (council code DER)."
			
 
				+    puts "Done #{TABLE}. Saved 0 item(s)."
			
 
				+    exit 0
			
 
				 end
			
 
				 
			
 
				-if links.empty?
			
 
				-  begin
			
 
				-    links = detail_links_from_news(NEWS_URL)
			
 
				-  rescue StandardError => e
			
 
				-    Log.warn "derwentvalley", "News fetch failed: #{e.class} #{e.message}"
			
 
				-  end
			
 
				+# ----- Extract detail page URLs from news listing -----
			
 
				+list_doc     = Nokogiri::HTML(html1)
			
 
				+detail_urls  = []
			
 
				+
			
 
				+list_doc.css("li.news-listing__item a[href]").each do |a|
			
 
				+    href = a["href"].to_s
			
 
				+    next unless href.include?("lgasa-search")
			
 
				+
			
 
				+    detail_url = resolve_detail_url(href)
			
 
				+    detail_urls << detail_url if detail_url && !detail_url.empty?
			
 
				 end
			
 
				 
			
 
				-# Both URLs return a Cloudflare JS-challenge page (HTTP 200 with challenge HTML).
			
 
				-# We can't solve this without browser-level JS execution.
			
 
				-# Derwent Valley DAs are also published on PlanBuild (council code DER),
			
 
				-# so planbuild.rb covers this council independently.
			
 
				-if links.empty?
			
 
				-  begin
			
 
				-    probe = Http.get(LIST_URL)
			
 
				-    if cloudflare_blocked?(probe)
			
 
				-      Log.warn "derwentvalley", "Site is returning a Cloudflare challenge page — cannot scrape without browser-level JS execution. DAs for this council are available via planbuild.rb (council code DER)."
			
 
				-      puts "Done #{TABLE}. Saved 0 item(s) — site blocked by Cloudflare."
			
 
				-      exit 0
			
 
				+detail_urls.uniq!
			
 
				+Log.info "derwentvalley", "Found #{detail_urls.length} detail page(s)"
			
 
				+
			
 
				+saved = 0
			
 
				+
			
 
				+detail_urls.each do |detail_url|
			
 
				+    Log.info "derwentvalley", "Fetching #{detail_url}"
			
 
				+    sleep(0.4)
			
 
				+
			
 
				+    _final_url, html2, code2 = http_get(
			
 
				+        detail_url, jar: jar,
			
 
				+        referer:    NEWS_URL,
			
 
				+        fetch_site: "same-origin"
			
 
				+    )
			
 
				+
			
 
				+    if code2 != 200 || html2.bytesize < 5_000
			
 
				+        Log.warn "derwentvalley", "Detail page failed (#{code2}): #{detail_url}"
			
 
				+        next
			
 
				     end
			
 
				-  rescue StandardError => e
			
 
				-    Log.warn "derwentvalley", "Probe fetch failed: #{e.class} #{e.message}"
			
 
				-  end
			
 
				-end
			
 
				 
			
 
				-links.uniq!
			
 
				+    records = parse_detail(html2, detail_url)
			
 
				+    if records.empty?
			
 
				+        Log.warn "derwentvalley", "No DA records parsed from #{detail_url}"
			
 
				+        next
			
 
				+    end
			
 
				 
			
 
				-puts "Found #{links.length} candidate link(s) for #{TABLE}"
			
 
				+    records.each do |r|
			
 
				+        begin
			
 
				+            DB.upsert(TABLE, {
			
 
				+                council_reference: r[:council_reference],
			
 
				+                address:           r[:address][0, 255],
			
 
				+                description:       r[:description],
			
 
				+                date_received:     r[:date_received],
			
 
				+                date_received_raw: r[:date_received_raw],
			
 
				+                on_notice_to:      r[:on_notice_to],
			
 
				+                on_notice_to_raw:  r[:on_notice_to_raw],
			
 
				+                document_url:      r[:document_url],
			
 
				+                applicant:         "",
			
 
				+                owner:             ""
			
 
				+            })
			
 
				 
			
 
				-saved = 0
			
 
				+            enrich_after_upsert!(
			
 
				+                table:             TABLE,
			
 
				+                council_reference: r[:council_reference],
			
 
				+                address:           r[:address]
			
 
				+            )
			
 
				 
			
 
				-links.each do |u|
			
 
				-  begin
			
 
				-    item = parse_detail(u)
			
 
				-  rescue StandardError => e
			
 
				-    Log.warn "scraper", "Skip #{u}: #{e.class} #{e.message}"
			
 
				-    next
			
 
				-  end
			
 
				-  next unless item
			
 
				-
			
 
				-  upsert_and_enrich!(
			
 
				-    table: TABLE,
			
 
				-    row: {
			
 
				-      description: item[:description],
			
 
				-      date_received: item[:date_received],
			
 
				-      date_received_raw: item[:date_received_raw],
			
 
				-      address: item[:address],
			
 
				-      council_reference: item[:council_reference],
			
 
				-      applicant: "",
			
 
				-      owner: ""
			
 
				-    },
			
 
				-    extras: {
			
 
				-      document_url:     item[:document_url],
			
 
				-      on_notice_to:     item[:date_received],
			
 
				-      on_notice_to_raw: item[:date_received_raw],
			
 
				-      title_reference:  item[:title_reference]
			
 
				-    }
			
 
				-  )
			
 
				-  saved += 1
			
 
				+            Log.info "derwentvalley", "Upserted #{r[:council_reference]} -> #{r[:address]}"
			
 
				+            saved += 1
			
 
				+        rescue StandardError => e
			
 
				+            Log.warn "derwentvalley", "DB error for #{r[:council_reference]}: #{e.class} #{e.message}"
			
 
				+        end
			
 
				+    end
			
 
				 end
			
 
				 
			
 
				 puts "Done #{TABLE}. Saved #{saved} item(s)."