# King Island Council — Advertised Development Applications # # Source: https://kingisland.tas.gov.au/develop/planning/ # # The site returns HTTP 403 on direct requests but succeeds after a homepage # warmup using browser-like headers (same technique as burnie.rb). # Accept-Encoding: identity is used to avoid gzip decompression complexity. # # Page structure (WordPress accordion, id="accordion-1-c4"): #
...(preamble)...
#Notice of Planning Application – DA 2025/28 15 Kurrajong Street, # Grassy, TAS 7256 – Visitor/workers' Accommodation.
#...representations no later than 2 April 2026...
# require "date" require "nokogiri" require "net/http" require "uri" require_relative "../lib/db" require_relative "../lib/enrich" require_relative "../lib/log" require_relative "../lib/util" TABLE = ENV.fetch("TABLE_NAME") # run_all.sh sets from filename: da_kingisland BASE_URL = "https://kingisland.tas.gov.au" URL = "#{BASE_URL}/develop/planning/" DB.ensure_table!(TABLE) # ----- Browser-like headers (WAF warmup technique from burnie.rb) ----- UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " \ "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" BASE_HEADERS = { "User-Agent" => UA, "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language" => "en-AU,en;q=0.8", "Accept-Encoding" => "identity", "Upgrade-Insecure-Requests" => "1", "Sec-Fetch-Dest" => "document", "Sec-Fetch-Mode" => "navigate", "Sec-Fetch-Site" => "none", "Sec-Fetch-User" => "?1", "sec-ch-ua" => '"Chromium";v="124", "Not.A/Brand";v="24", "Google Chrome";v="124"', "sec-ch-ua-platform" => '"Windows"', "sec-ch-ua-mobile" => "?0", "Connection" => "close", }.freeze class CookieJar def initialize; @h = {}; end def for(host) @h[host] || "" end def merge_from(resp, host) cookies = resp.get_fields("Set-Cookie") || [] return if cookies.empty? existing = parse_header(@h[host]) cookies.each do |sc| kv = sc.split(";", 2).first k, v = kv.split("=", 2) existing[k.to_s.strip] = v.to_s unless k.to_s.strip.empty? end @h[host] = existing.map { |k, v| "#{k}=#{v}" }.join("; ") end private def parse_header(s) s.to_s.split(";").map(&:strip).filter_map { |kv| k, v = kv.split("=", 2) [k, v] unless k.to_s.empty? }.to_h end end def http_get(url, jar:, referer: nil, fetch_site: "none") uri = URI(url) hdrs = BASE_HEADERS.merge("Sec-Fetch-Site" => fetch_site) hdrs["Referer"] = referer if referer cookie = jar.for(uri.host) hdrs["Cookie"] = cookie unless cookie.empty? limit = 5 code = 0 body = "" while limit > 0 limit -= 1 redirect_to = nil req = Net::HTTP::Get.new(uri, hdrs) Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http| resp = http.request(req) jar.merge_from(resp, uri.host) code = resp.code.to_i if [301, 302, 303, 307, 308].include?(code) && resp["location"] redirect_to = URI.join(uri, resp["location"]) else body = resp.body.to_s end end if redirect_to uri = redirect_to next end break end [code, body] rescue StandardError => e Log.warn "kingisland", "HTTP error for #{url}: #{e.class} #{e.message}" [0, ""] end # ----- Warmup: hit homepage first to get cookies, then fetch planning page ----- jar = CookieJar.new Log.info "kingisland", "Warming up via homepage..." code0, _body0 = http_get("#{BASE_URL}/", jar: jar) Log.info "kingisland", "Homepage: #{code0}" sleep(0.5) Log.info "kingisland", "Fetching planning page..." code1, html = http_get(URL, jar: jar, referer: "#{BASE_URL}/", fetch_site: "same-origin") Log.info "kingisland", "Planning page: #{code1} (#{html.bytesize} bytes)" if code1 != 200 || html.bytesize < 5_000 Log.warn "kingisland", "Could not fetch planning page (status #{code1}). " \ "King Island DAs are also available via planbuild.rb (council code KIS -> da_kingisland)." exit 0 end if html.include?("Just a moment") || html.include?("Enable JavaScript and cookies") Log.warn "kingisland", "Cloudflare challenge returned. " \ "King Island DAs are also available via planbuild.rb (council code KIS -> da_kingisland)." exit 0 end # ----- Parse ----- # Ref format: DA 2025/28 (year/sequential) REF_RX = /\bDA\s*\d{4}\/\d{1,4}\b/i doc = Nokogiri::HTML(html) # The advertised applications are inside div#accordion-1-c4. # If the div id ever changes, fall back to finding the h2 by text. section = doc.at_css("div#accordion-1-c4") || doc.xpath('//h2[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "advertised development")]')&.parent unless section Log.warn "kingisland", "Could not find 'Advertised development applications' section on page." puts "Done #{TABLE}. Saved 0 item(s)." exit 0 end paragraphs = section.css("p").to_a saved = 0 paragraphs.each_with_index do |para, idx| text = para.text.gsub(/[[:space:]]+/, " ").strip next unless (m = text.match(REF_RX)) ref = m[0].strip.gsub(/\s+/, " ") # Strip any "Notice of Planning Application" prefix and the ref itself, # leaving "ADDRESS – DESCRIPTION." rest = text .sub(/Notice\s+of\s+Planning\s+Application\s*[-\u2013\u2014]?\s*/i, "") .sub(ref, "") .gsub(/\A[\s\-\u2013\u2014]+/, "") .gsub(/[.\s]+\z/, "") # Split at last " – " (en-dash) or " - " to separate address from description if (split_idx = rest.rindex(/\s[\-\u2013\u2014]\s/)) address = rest[0, split_idx].strip description = rest[(split_idx + 1)..]&.gsub(/\A[\s\-\u2013\u2014]+/, "")&.strip else address = rest.strip description = "Development Application" end next if address.empty? # Scan forward up to 5 paragraphs for closing date and PDF link on_notice_to_raw = "" on_notice_to = nil doc_url = nil (1..5).each do |offset| break if idx + offset >= paragraphs.length fwd = paragraphs[idx + offset] fwd_text = fwd.text.gsub(/[[:space:]]+/, " ").strip if on_notice_to_raw.empty? && fwd_text =~ /no\s+later\s+than|representations|closing/i if (dm = fwd_text.match(/\b(\d{1,2})\s+([A-Za-z]{3,})\s+(\d{4})\b/)) on_notice_to_raw = "#{dm[1]} #{dm[2]} #{dm[3]}" on_notice_to = Util.parse_aus_date(on_notice_to_raw) end end if doc_url.nil? a = fwd.at_css("a[href]") if a && a["href"].to_s =~ /\.pdf/i doc_url = a["href"].strip end end end begin DB.upsert(TABLE, { council_reference: ref, address: address[0, 255], description: description.to_s, date_received: nil, date_received_raw: "", on_notice_to: on_notice_to, on_notice_to_raw: on_notice_to_raw, document_url: doc_url, applicant: "", owner: "" }) enrich_after_upsert!( table: TABLE, council_reference: ref, address: address ) Log.info "kingisland", "Upserted #{ref} -> #{address}" saved += 1 rescue StandardError => e Log.warn "kingisland", "DB error for #{ref}: #{e.class} #{e.message}" end end puts "Done #{TABLE}. Saved #{saved} item(s)."