# Kentish Council — Advertised / Planning Applications (site page, not PlanBuild)
require "nokogiri"
require "uri"
require "cgi"
require_relative "../lib/enrich"
require_relative "../lib/log"
require_relative "../lib/util"
TABLE = ENV.fetch("TABLE_NAME") # run_all.sh -> da_kentish
# Set this to the exact page you use for Kentish (from your original file)
URL = "https://www.kentish.tas.gov.au/services/building-and-planning-services/planningapp"
DB.ensure_table!(TABLE)
def abs_url(base, href)
h = href.to_s.strip
return nil if h.empty?
return h if h.start_with?("http://", "https://")
URI.join(base, h).to_s
rescue URI::InvalidURIError
h
end
# Kentish uses K-DA{number}/{year} format, e.g. K-DA016/2026
REF_RX = /\bK-DA\d+\/20\d{2}\b/i
def parse_items(doc, base_url)
rows = []
# Each DA is a
with a PDF link in the title
# Link text: "K-DA016/2026 41 George Road, Nook - proposed 2 Lot Subdivision (submissions by 21/04/2026)"
doc.css("li.generic-list__item").each do |li|
link = li.at_css("h3.generic-list__title a, a[href$='.pdf']")
next unless link
raw_text = link.text.gsub(/\(PDF File[^)]*\)/i, "").gsub(/\s+/, " ").strip
pdf_href = link["href"].to_s
ref_match = raw_text.match(REF_RX)
next unless ref_match
ref = ref_match[0]
rest = raw_text.sub(ref, "").strip
# Extract on-notice date: "(submissions by 21/04/2026)"
on_raw = rest[/\(submissions\s+by\s+([^)]+)\)/i, 1]&.strip || ""
on_dt = Util.parse_aus_date(on_raw)
# Strip the on-notice clause and split "address - description"
body = rest.sub(/\s*\(submissions\s+by\s+[^)]+\)/i, "").strip
if (m = body.match(/\A(.+?)\s+-\s+(.+)\z/))
address = m[1].strip
description = m[2].strip
else
address = body
description = "Development Application"
end
next if address.empty?
rows << {
council_reference: ref,
address: address[0, 255],
description: description,
on_notice_to: on_dt,
on_notice_to_raw: on_raw,
document_url: abs_url(base_url, pdf_href)
}
end
rows
end
begin
html = Http.get(URL)
rescue StandardError => e
Log.warn "kentish", "Failed to fetch #{URL}: #{e.class} #{e.message}"
exit 1
end
# Kentish Council's site is protected by Cloudflare JS challenge.
# When blocked, the page title is "Just a moment..." and contains no DA data.
# Note: Kentish DAs are also published on PlanBuild (council code KEN),
# so planbuild.rb covers this council independently.
if html.include?("Just a moment") || html.include?("Enable JavaScript and cookies")
Log.warn "kentish", "Site is returning a Cloudflare challenge page — cannot scrape without browser-level JS execution. DAs for this council are available via planbuild.rb (council code KEN)."
puts "Done #{TABLE}. Saved 0 item(s) — site blocked by Cloudflare."
exit 0
end
doc = Nokogiri::HTML(html)
items = parse_items(doc, URL)
puts "Found #{items.length} item(s) for #{TABLE}"
saved = 0
items.each do |r|
begin
DB.upsert(TABLE, {
description: r[:description],
on_notice_to: r[:on_notice_to],
on_notice_to_raw: r[:on_notice_to_raw],
address: r[:address],
council_reference: r[:council_reference],
document_url: r[:document_url],
applicant: "",
owner: ""
})
enrich_after_upsert!(
table: TABLE,
council_reference: r[:council_reference],
address: r[:address]
)
Log.info "kentish", "Upserted #{r[:council_reference]} -> #{r[:address]}"
saved += 1
rescue StandardError => e
Log.warn "kentish", "DB error for #{r[:council_reference]}: #{e.class} #{e.message}"
end
end
puts "Done #{TABLE}. Saved #{saved} item(s)."