# Derwent Valley Council — Development Applications being advertised # # Source: https://www.derwentvalley.tas.gov.au/home/latest-news?f.News+category...=Public+Notice # # The site is Cloudflare-protected — requires homepage warmup with browser-like # headers (same technique as burnie.rb / kingisland.rb). # # The news listing links go through lgasa-search.lga.sa.gov.au → squiz.cloud. # Rather than following that full redirect chain, we extract the `index_url` # parameter from each lgasa href, which points at lgasa-web.squiz.cloud/?a=ID. # A single (non-following) GET to that URL returns a Location header with the # real derwentvalley.tas.gov.au detail page URL. # # Detail page structure: # # # # #
APP No.SITEPROPOSAL
DA 2026/023160 Wyre Forest Road, MolesworthDwelling and outbuilding
#

...received no later than 5.00pm on 15 April 2026...

#
plans
require "date" require "nokogiri" require "net/http" require "uri" require_relative "../lib/db" require_relative "../lib/enrich" require_relative "../lib/log" require_relative "../lib/util" TABLE = ENV.fetch("TABLE_NAME") # run_all.sh sets from filename: da_derwentvalley BASE_URL = "https://www.derwentvalley.tas.gov.au" NEWS_URL = "#{BASE_URL}/home/latest-news?f.News+category%7CnewsCategory=Public+Notice" DB.ensure_table!(TABLE) # ----- Browser-like headers (WAF/Cloudflare warmup) ----- BASE_HEADERS = { "User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language" => "en-AU,en;q=0.9", "Accept-Encoding" => "identity", "Upgrade-Insecure-Requests" => "1", "Sec-Fetch-Dest" => "document", "Sec-Fetch-Mode" => "navigate", "Sec-Fetch-Site" => "none", "Sec-Fetch-User" => "?1", "sec-ch-ua" => '"Chromium";v="127", "Not)A;Brand";v="99", "Google Chrome";v="127"', "sec-ch-ua-mobile" => "?0", "sec-ch-ua-platform" => '"Windows"', "Connection" => "close", }.freeze class CookieJar def initialize; @h = {}; end def for(host) @h[host] || "" end def merge_from(resp, host) cookies = resp.get_fields("Set-Cookie") || [] return if cookies.empty? existing = parse_header(@h[host]) cookies.each do |sc| kv = sc.split(";", 2).first k, v = kv.split("=", 2) existing[k.to_s.strip] = v.to_s unless k.to_s.strip.empty? end @h[host] = existing.map { |k, v| "#{k}=#{v}" }.join("; ") end private def parse_header(s) s.to_s.split(";").map(&:strip).filter_map { |kv| k, v = kv.split("=", 2) [k, v] unless k.to_s.empty? }.to_h end end # GET url; follow redirects; return [final_url_string, body, http_code] def http_get(url, jar:, referer: nil, fetch_site: "none", follow: true) uri = URI(url) hdrs = BASE_HEADERS.merge("Sec-Fetch-Site" => fetch_site) hdrs["Referer"] = referer if referer cookie = jar.for(uri.host) hdrs["Cookie"] = cookie unless cookie.empty? limit = 8 code = 0 body = "" while limit > 0 limit -= 1 redirect_to = nil req = Net::HTTP::Get.new(uri, hdrs) Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", read_timeout: 30, open_timeout: 15) do |http| resp = http.request(req) jar.merge_from(resp, uri.host) code = resp.code.to_i if follow && [301, 302, 303, 307, 308].include?(code) && resp["location"] redirect_to = URI.join(uri, resp["location"]) else body = resp.body.to_s end end if redirect_to uri = redirect_to # Update Referer and Sec-Fetch-Site for subsequent hops hdrs["Referer"] = uri.to_s hdrs["Sec-Fetch-Site"] = "same-origin" cookie = jar.for(uri.host) hdrs["Cookie"] = cookie.empty? ? nil : cookie next end break end [uri.to_s, body, code] rescue StandardError => e Log.warn "derwentvalley", "HTTP error for #{url}: #{e.class} #{e.message}" [url, "", 0] end # Resolve lgasa redirect href -> real derwentvalley.tas.gov.au URL. # Extracts index_url from the lgasa query string, then makes a non-following # GET to squiz.cloud/?a=ID and reads the Location header. def resolve_detail_url(lgasa_href) query = URI.decode_www_form(URI(lgasa_href).query.to_s).to_h index_url = query["index_url"] return nil if index_url.to_s.empty? uri = URI(index_url) req = Net::HTTP::Get.new(uri, "User-Agent" => BASE_HEADERS["User-Agent"]) Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: 10, read_timeout: 10) do |http| resp = http.request(req) loc = resp["location"].to_s return loc unless loc.empty? end nil rescue StandardError => e Log.warn "derwentvalley", "Could not resolve squiz redirect #{index_url}: #{e.class} #{e.message}" nil end # Parse a detail page for DA data def parse_detail(html, page_url) doc = Nokogiri::HTML(html) # Table: header row "APP No. | SITE | PROPOSAL", then data rows data_rows = [] doc.css("table").each do |tbl| tbl.css("tr").each do |tr| cells = tr.css("td").map { |td| td.text.gsub(/\u00a0|\s+/, " ").strip } next if cells.empty? next if cells.join =~ /\AAPP\s*No\.?/i # skip header row next unless cells[0] =~ /\bDA\s*\d{4}\/\d+/i data_rows << cells end end return [] if data_rows.empty? body_text = doc.css(".content-container, main").first&.text.to_s .gsub(/\u00a0/, " ").gsub(/\s+/, " ") # Closing date from "received no later than ... DATE" on_notice_to_raw = "" on_notice_to = nil if (m = body_text.match(/no\s+later\s+than\b.{0,60}?(\d{1,2}\s+[A-Za-z]{3,}\s+\d{4})/i)) on_notice_to_raw = m[1].strip on_notice_to = Util.parse_aus_date(on_notice_to_raw) end # Commencing / start date → date_received date_received_raw = "" date_received = nil if (m = body_text.match(/commencing\s+on\s+(\d{1,2}\s+[A-Za-z]{3,}\s+\d{4})/i)) || (m = body_text.match(/Start\s+Date\s+(\d{1,2}\/\d{1,2}\/\d{2,4})/i)) date_received_raw = m[1].strip date_received = Util.parse_aus_date(date_received_raw) end # PDF link within the content area only doc_url = nil doc.css(".content-container a[href]").each do |a| href = a["href"].to_s if href =~ /\.pdf/i && href.include?("derwentvalley") doc_url = href break end end data_rows.map do |cells| ref = cells[0].to_s.gsub(/\A\s+/, "").strip address = cells[1].to_s.strip description = cells[2].to_s.strip description = "Development Application" if description.empty? next if ref.empty? || address.empty? { council_reference: ref, address: address, description: description, date_received: date_received, date_received_raw: date_received_raw, on_notice_to: on_notice_to, on_notice_to_raw: on_notice_to_raw, document_url: doc_url } end.compact end # ----- Warmup then fetch news listing ----- jar = CookieJar.new Log.info "derwentvalley", "Warming up via homepage..." _url0, _body0, code0 = http_get("#{BASE_URL}/", jar: jar) Log.info "derwentvalley", "Homepage: #{code0}" sleep(0.5) Log.info "derwentvalley", "Fetching news listing..." _url1, html1, code1 = http_get(NEWS_URL, jar: jar, referer: "#{BASE_URL}/", fetch_site: "same-origin") Log.info "derwentvalley", "News listing: #{code1} (#{html1.bytesize} bytes)" if code1 != 200 || html1.bytesize < 5_000 Log.warn "derwentvalley", "Could not fetch news listing (status #{code1}). " \ "DAs for this council are also available via planbuild.rb (council code DER)." puts "Done #{TABLE}. Saved 0 item(s)." exit 0 end if html1.include?("Just a moment") || html1.include?("Enable JavaScript and cookies") Log.warn "derwentvalley", "Cloudflare challenge page returned. " \ "DAs for this council are also available via planbuild.rb (council code DER)." puts "Done #{TABLE}. Saved 0 item(s)." exit 0 end # ----- Extract detail page URLs from news listing ----- list_doc = Nokogiri::HTML(html1) detail_urls = [] list_doc.css("li.news-listing__item a[href]").each do |a| href = a["href"].to_s next unless href.include?("lgasa-search") detail_url = resolve_detail_url(href) detail_urls << detail_url if detail_url && !detail_url.empty? end detail_urls.uniq! Log.info "derwentvalley", "Found #{detail_urls.length} detail page(s)" saved = 0 detail_urls.each do |detail_url| Log.info "derwentvalley", "Fetching #{detail_url}" sleep(0.4) _final_url, html2, code2 = http_get( detail_url, jar: jar, referer: NEWS_URL, fetch_site: "same-origin" ) if code2 != 200 || html2.bytesize < 5_000 Log.warn "derwentvalley", "Detail page failed (#{code2}): #{detail_url}" next end records = parse_detail(html2, detail_url) if records.empty? Log.warn "derwentvalley", "No DA records parsed from #{detail_url}" next end records.each do |r| begin DB.upsert(TABLE, { council_reference: r[:council_reference], address: r[:address][0, 255], description: r[:description], date_received: r[:date_received], date_received_raw: r[:date_received_raw], on_notice_to: r[:on_notice_to], on_notice_to_raw: r[:on_notice_to_raw], document_url: r[:document_url], applicant: "", owner: "" }) enrich_after_upsert!( table: TABLE, council_reference: r[:council_reference], address: r[:address] ) Log.info "derwentvalley", "Upserted #{r[:council_reference]} -> #{r[:address]}" saved += 1 rescue StandardError => e Log.warn "derwentvalley", "DB error for #{r[:council_reference]}: #{e.class} #{e.message}" end end end puts "Done #{TABLE}. Saved #{saved} item(s)."