| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- # Northern Midlands Council — Advertised Planning Applications
- #
- # Source: https://northernmidlands.tas.gov.au/planning/development-in-the-northern-midlands/development-applications-2
- #
- # Page structure:
- # <h2>Closing 17 April 2026</h2>
- # <p>
- # <a href="...pdf"><strong>PLN-26-0030 - 13 Murray Street, Evandale:</strong></a>
- # (CT 21/1332) - Subdivision (2 Lot)
- # </p>
- require "nokogiri"
- require "uri"
- require "fileutils"
- require_relative "../lib/scraper_helpers"
- require_relative "../lib/util"
- require_relative "../lib/log"
- TABLE = ENV.fetch("TABLE_NAME")
- URL = "https://northernmidlands.tas.gov.au/planning/development-in-the-northern-midlands/development-applications-2"
- DOWNLOAD_ATTACHMENTS = ENV["DOWNLOAD_ATTACHMENTS"] == "1"
- DOWNLOAD_DIR = ENV["DOWNLOAD_DIR"] || "/app/downloads"
- DB.ensure_table!(TABLE)
- def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
- def download_pdf(url, council_reference)
- return nil if url.to_s.strip.empty?
- dir = File.join(DOWNLOAD_DIR, "northernmidlands", safe_name(council_reference))
- FileUtils.mkdir_p(dir)
- fname = safe_name(File.basename(URI.parse(url).path))
- fname = "document.pdf" if fname.empty?
- path = File.join(dir, fname)
- body = Http.get(url)
- File.binwrite(path, body)
- puts " saved #{fname} (#{body.bytesize} bytes)"
- "/files/northernmidlands/#{safe_name(council_reference)}/#{fname}"
- rescue StandardError => e
- Log.warn "northernmidlands", "Download failed for #{url}: #{e.class} #{e.message}"
- nil
- end
- REF_RX = /\bPLN-\d{2}-\d{4}\b/i
- html = Http.get(URL)
- doc = Nokogiri::HTML(html)
- items = []
- closing_date = nil
- closing_date_raw = ""
- # Walk nodes in document order so h2 headings set the closing date for
- # the <p> entries that follow them.
- doc.css("h2, p").each do |node|
- if node.name == "h2"
- text = node.text.gsub(/\u00a0|\s+/, " ").strip
- if (m = text.match(/Closing\s+(.+)/i))
- closing_date_raw = m[1].strip
- closing_date = Util.parse_aus_date(closing_date_raw)
- end
- next
- end
- # <p> — look for a PLN reference inside a link
- link = node.at_css("a[href]")
- next unless link
- strong = node.at_css("strong")
- label = (strong || link).text.gsub(/\u00a0|\s+/, " ").strip
- # e.g. "PLN-26-0030 - 13 Murray Street, Evandale:"
- ref = label.match(REF_RX)&.[](0)
- next unless ref
- # Address: everything after "PLN-XX-XXXX - " with trailing colon stripped
- address = label.sub(/\APLN-\d{2}-\d{4}\s*-\s*/i, "").sub(/:?\s*\z/, "").strip
- next if address.empty?
- # Remainder of the <p> text (outside the link/strong) gives description + CT
- remainder = node.text.sub(label, "").gsub(/\u00a0|\s+/, " ").strip
- # e.g. "(CT 189429/1) - Multiple Dwelling (1 existing 1 new manager's residence)"
- title_reference = remainder.match(/CT\s+([\d\/]+)/i)&.[](1).to_s
- description = remainder.sub(/\A\s*\(CT[^)]*\)\s*-?\s*/i, "").strip
- description = "Development Application" if description.empty?
- document_url = abs_url(URL, link["href"].to_s)
- items << {
- council_reference: ref,
- address: address,
- description: description,
- on_notice_to: closing_date,
- on_notice_to_raw: closing_date_raw,
- title_reference: title_reference,
- document_url: document_url
- }
- end
- puts "Found #{items.length} item(s) for #{TABLE}"
- items.each do |r|
- local_url = DOWNLOAD_ATTACHMENTS ? download_pdf(r[:document_url], r[:council_reference]) : nil
- upsert_and_enrich!(
- table: TABLE,
- row: {
- council_reference: r[:council_reference],
- address: r[:address],
- description: r[:description],
- on_notice_to: r[:on_notice_to],
- on_notice_to_raw: r[:on_notice_to_raw],
- title_reference: r[:title_reference],
- applicant: "",
- owner: ""
- },
- extras: {
- document_url: r[:document_url],
- local_document_url: local_url
- }
- )
- end
- puts "Done #{TABLE}. Saved #{items.length} item(s)."
|