# Kentish Council — Advertised / Planning Applications (site page, not PlanBuild) require "nokogiri" require "uri" require "cgi" require_relative "../lib/enrich" require_relative "../lib/log" require_relative "../lib/util" TABLE = ENV.fetch("TABLE_NAME") # run_all.sh -> da_kentish # Set this to the exact page you use for Kentish (from your original file) URL = "https://www.kentish.tas.gov.au/services/building-and-planning-services/planningapp" DB.ensure_table!(TABLE) def abs_url(base, href) h = href.to_s.strip return nil if h.empty? return h if h.start_with?("http://", "https://") URI.join(base, h).to_s rescue URI::InvalidURIError h end # Kentish uses K-DA{number}/{year} format, e.g. K-DA016/2026 REF_RX = /\bK-DA\d+\/20\d{2}\b/i def parse_items(doc, base_url) rows = [] # Each DA is a
  • with a PDF link in the title # Link text: "K-DA016/2026 41 George Road, Nook - proposed 2 Lot Subdivision (submissions by 21/04/2026)" doc.css("li.generic-list__item").each do |li| link = li.at_css("h3.generic-list__title a, a[href$='.pdf']") next unless link raw_text = link.text.gsub(/\(PDF File[^)]*\)/i, "").gsub(/\s+/, " ").strip pdf_href = link["href"].to_s ref_match = raw_text.match(REF_RX) next unless ref_match ref = ref_match[0] rest = raw_text.sub(ref, "").strip # Extract on-notice date: "(submissions by 21/04/2026)" on_raw = rest[/\(submissions\s+by\s+([^)]+)\)/i, 1]&.strip || "" on_dt = Util.parse_aus_date(on_raw) # Strip the on-notice clause and split "address - description" body = rest.sub(/\s*\(submissions\s+by\s+[^)]+\)/i, "").strip if (m = body.match(/\A(.+?)\s+-\s+(.+)\z/)) address = m[1].strip description = m[2].strip else address = body description = "Development Application" end next if address.empty? rows << { council_reference: ref, address: address[0, 255], description: description, on_notice_to: on_dt, on_notice_to_raw: on_raw, document_url: abs_url(base_url, pdf_href) } end rows end begin html = Http.get(URL) rescue StandardError => e Log.warn "kentish", "Failed to fetch #{URL}: #{e.class} #{e.message}" exit 1 end # Kentish Council's site is protected by Cloudflare JS challenge. # When blocked, the page title is "Just a moment..." and contains no DA data. # Note: Kentish DAs are also published on PlanBuild (council code KEN), # so planbuild.rb covers this council independently. if html.include?("Just a moment") || html.include?("Enable JavaScript and cookies") Log.warn "kentish", "Site is returning a Cloudflare challenge page — cannot scrape without browser-level JS execution. DAs for this council are available via planbuild.rb (council code KEN)." puts "Done #{TABLE}. Saved 0 item(s) — site blocked by Cloudflare." exit 0 end doc = Nokogiri::HTML(html) items = parse_items(doc, URL) puts "Found #{items.length} item(s) for #{TABLE}" saved = 0 items.each do |r| begin DB.upsert(TABLE, { description: r[:description], on_notice_to: r[:on_notice_to], on_notice_to_raw: r[:on_notice_to_raw], address: r[:address], council_reference: r[:council_reference], document_url: r[:document_url], applicant: "", owner: "" }) enrich_after_upsert!( table: TABLE, council_reference: r[:council_reference], address: r[:address] ) Log.info "kentish", "Upserted #{r[:council_reference]} -> #{r[:address]}" saved += 1 rescue StandardError => e Log.warn "kentish", "DB error for #{r[:council_reference]}: #{e.class} #{e.message}" end end puts "Done #{TABLE}. Saved #{saved} item(s)."