benjamin.harris
/
tas_councils


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
							# Northern Midlands Council — Advertised Planning Applications
#
# Source: https://northernmidlands.tas.gov.au/planning/development-in-the-northern-midlands/development-applications-2
#
# Page structure:
#   <h2>Closing 17 April 2026</h2>
#   <p>
#     <a href="...pdf"><strong>PLN-26-0030 - 13 Murray Street, Evandale:</strong></a>
#     (CT 21/1332) - Subdivision (2 Lot)
#   </p>

require "nokogiri"
require "uri"

require_relative "../lib/scraper_helpers"
require_relative "../lib/util"
require_relative "../lib/log"

TABLE = ENV.fetch("TABLE_NAME")
URL   = "https://northernmidlands.tas.gov.au/planning/development-in-the-northern-midlands/development-applications-2"

DB.ensure_table!(TABLE)

REF_RX = /\bPLN-\d{2}-\d{4}\b/i

html = Http.get(URL)
doc  = Nokogiri::HTML(html)

items = []
closing_date     = nil
closing_date_raw = ""

# Walk nodes in document order so h2 headings set the closing date for
# the <p> entries that follow them.
doc.css("h2, p").each do |node|
  if node.name == "h2"
    text = node.text.gsub(/\u00a0|\s+/, " ").strip
    if (m = text.match(/Closing\s+(.+)/i))
      closing_date_raw = m[1].strip
      closing_date     = Util.parse_aus_date(closing_date_raw)
    end
    next
  end

  # <p> — look for a PLN reference inside a link
  link = node.at_css("a[href]")
  next unless link

  strong = node.at_css("strong")
  label  = (strong || link).text.gsub(/\u00a0|\s+/, " ").strip
  # e.g. "PLN-26-0030 - 13 Murray Street, Evandale:"

  ref = label.match(REF_RX)&.[](0)
  next unless ref

  # Address: everything after "PLN-XX-XXXX - " with trailing colon stripped
  address = label.sub(/\APLN-\d{2}-\d{4}\s*-\s*/i, "").sub(/:?\s*\z/, "").strip
  next if address.empty?

  # Remainder of the <p> text (outside the link/strong) gives description + CT
  remainder = node.text.sub(label, "").gsub(/\u00a0|\s+/, " ").strip
  # e.g. "(CT 189429/1) - Multiple Dwelling (1 existing 1 new manager's residence)"

  title_reference = remainder.match(/CT\s+([\d\/]+)/i)&.[](1).to_s
  description     = remainder.sub(/\A\s*\(CT[^)]*\)\s*-?\s*/i, "").strip
  description     = "Development Application" if description.empty?

  document_url = abs_url(URL, link["href"].to_s)

  items << {
    council_reference: ref,
    address:           address,
    description:       description,
    on_notice_to:      closing_date,
    on_notice_to_raw:  closing_date_raw,
    title_reference:   title_reference,
    document_url:      document_url
  }
end

puts "Found #{items.length} item(s) for #{TABLE}"

items.each do |r|
  upsert_and_enrich!(
    table: TABLE,
    row: {
      council_reference: r[:council_reference],
      address:           r[:address],
      description:       r[:description],
      on_notice_to:      r[:on_notice_to],
      on_notice_to_raw:  r[:on_notice_to_raw],
      title_reference:   r[:title_reference],
      applicant:         "",
      owner:             ""
    },
    extras: {
      document_url: r[:document_url]
    }
  )
end

puts "Done #{TABLE}. Saved #{items.length} item(s)."