# Dorset Council — Advertised Development Applications # # Source: https://www.dorset.tas.gov.au/online-development-application-enquiry # # Page structure — each application is a
: # # PLA/2026/22: Residential dwelling and carport addition - Chris Triebe # and Associates Town Planning Services - 13 Gladstone Road # Herrick - Closes 18.04.2026 # # Text format: REF: DESCRIPTION - APPLICANT - ADDRESS - Closes DD.MM.YYYY # # Note: the old eServices portal (eservices.dorset.tas.gov.au) is still live # and was the previous data source. The council now publishes the advertised # list on their main website with direct PDF links, which is simpler to scrape. require "date" require "nokogiri" require "uri" require "fileutils" require_relative "../lib/scraper_helpers" require_relative "../lib/util" require_relative "../lib/log" TABLE = ENV.fetch("TABLE_NAME") URL = "https://www.dorset.tas.gov.au/online-development-application-enquiry" DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1" DOWNLOAD_DIR = ENV["DOWNLOAD_DIR"] || "/app/downloads" DB.ensure_table!(TABLE) REF_RX = /\bPLA\/\d{4}\/\d+\b/i CLOSE_RX = /\bCloses\s+(\d{1,2}[.\-]\d{1,2}[.\-]\d{4})\b/i def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_") def download_pdf(url, council_reference) return nil if url.to_s.strip.empty? dir = File.join(DOWNLOAD_DIR, "dorset", safe_name(council_reference)) FileUtils.mkdir_p(dir) fname = safe_name(File.basename(URI.parse(url).path)) fname = "document.pdf" if fname.empty? path = File.join(dir, fname) body = Http.get(url, headers: { "Accept" => "application/pdf,*/*", "Referer" => URL }) File.binwrite(path, body) puts " saved #{fname} (#{body.bytesize} bytes)" "/files/dorset/#{safe_name(council_reference)}/#{fname}" rescue StandardError => e Log.warn "dorset", "Download failed for #{url}: #{e.class} #{e.message}" nil end html = Http.get(URL) doc = Nokogiri::HTML(html) items = [] doc.css("p a[href]").each do |a| text = a.text.gsub(/[[:space:]]+/, " ").strip next unless (ref_m = text.match(REF_RX)) ref = ref_m[0] # Strip "PLA/YYYY/NNN: " prefix remainder = text.sub(/\A#{Regexp.escape(ref)}:\s*/i, "") # Extract and strip closing date from the end close_raw = "" on_notice_to = nil if (close_m = remainder.match(CLOSE_RX)) close_raw = close_m[1] on_notice_to = Date.strptime(close_raw, "%d.%m.%Y") rescue nil remainder = remainder.sub(/\s*-\s*#{Regexp.escape(close_m[0])}\s*\z/i, "").strip end # Remaining text: "Description - Applicant - Address" # Split on " - "; last part = address, second-to-last = applicant, rest = description parts = remainder.split(/\s+-\s+/) if parts.length >= 3 address = parts.last.strip applicant = parts[-2].strip description = parts[0..-3].join(" - ").strip elsif parts.length == 2 address = parts.last.strip applicant = "" description = parts.first.strip else address = remainder.strip applicant = "" description = "Development Application" end next if address.empty? description = "Development Application" if description.empty? pdf_url = abs_url(URL, a["href"].to_s.strip) items << { council_reference: ref, address: address, description: description, applicant: applicant, on_notice_to: on_notice_to, on_notice_to_raw: close_raw, document_url: pdf_url } end puts "Found #{items.length} item(s) for #{TABLE}" items.each do |r| local_url = DOWNLOAD_ATTACHMENTS ? download_pdf(r[:document_url], r[:council_reference]) : nil upsert_and_enrich!( table: TABLE, row: { council_reference: r[:council_reference], address: r[:address], description: r[:description], applicant: r[:applicant], on_notice_to: r[:on_notice_to], on_notice_to_raw: r[:on_notice_to_raw], owner: "" }, extras: { document_url: r[:document_url], local_document_url: local_url } ) end puts "Done #{TABLE}. Saved #{items.length} item(s)."