# Northern Midlands Council — Advertised Planning Applications # # Source: https://northernmidlands.tas.gov.au/planning/development-in-the-northern-midlands/development-applications-2 # # Page structure: #
# PLN-26-0030 - 13 Murray Street, Evandale: # (CT 21/1332) - Subdivision (2 Lot) #
require "nokogiri" require "uri" require_relative "../lib/scraper_helpers" require_relative "../lib/util" require_relative "../lib/log" TABLE = ENV.fetch("TABLE_NAME") URL = "https://northernmidlands.tas.gov.au/planning/development-in-the-northern-midlands/development-applications-2" DB.ensure_table!(TABLE) REF_RX = /\bPLN-\d{2}-\d{4}\b/i html = Http.get(URL) doc = Nokogiri::HTML(html) items = [] closing_date = nil closing_date_raw = "" # Walk nodes in document order so h2 headings set the closing date for # theentries that follow them. doc.css("h2, p").each do |node| if node.name == "h2" text = node.text.gsub(/\u00a0|\s+/, " ").strip if (m = text.match(/Closing\s+(.+)/i)) closing_date_raw = m[1].strip closing_date = Util.parse_aus_date(closing_date_raw) end next end #
— look for a PLN reference inside a link link = node.at_css("a[href]") next unless link strong = node.at_css("strong") label = (strong || link).text.gsub(/\u00a0|\s+/, " ").strip # e.g. "PLN-26-0030 - 13 Murray Street, Evandale:" ref = label.match(REF_RX)&.[](0) next unless ref # Address: everything after "PLN-XX-XXXX - " with trailing colon stripped address = label.sub(/\APLN-\d{2}-\d{4}\s*-\s*/i, "").sub(/:?\s*\z/, "").strip next if address.empty? # Remainder of the
text (outside the link/strong) gives description + CT remainder = node.text.sub(label, "").gsub(/\u00a0|\s+/, " ").strip # e.g. "(CT 189429/1) - Multiple Dwelling (1 existing 1 new manager's residence)" title_reference = remainder.match(/CT\s+([\d\/]+)/i)&.[](1).to_s description = remainder.sub(/\A\s*\(CT[^)]*\)\s*-?\s*/i, "").strip description = "Development Application" if description.empty? document_url = abs_url(URL, link["href"].to_s) items << { council_reference: ref, address: address, description: description, on_notice_to: closing_date, on_notice_to_raw: closing_date_raw, title_reference: title_reference, document_url: document_url } end puts "Found #{items.length} item(s) for #{TABLE}" items.each do |r| upsert_and_enrich!( table: TABLE, row: { council_reference: r[:council_reference], address: r[:address], description: r[:description], on_notice_to: r[:on_notice_to], on_notice_to_raw: r[:on_notice_to_raw], title_reference: r[:title_reference], applicant: "", owner: "" }, extras: { document_url: r[:document_url] } ) end puts "Done #{TABLE}. Saved #{items.length} item(s)."