# Tasman Council — Advertised Applications (site page, not PlanBuild) require "nokogiri" require "date" require_relative "../lib/http" require_relative "../lib/util" require_relative "../lib/scraper_helpers" TABLE = ENV.fetch("TABLE_NAME") # run_all.sh sets from filename: da_tasman URL = "https://tasman.tas.gov.au/advertised-applications/" DB.ensure_table!(TABLE) # Nokogiri CSS :contains(...) is a bit special. Use a safer find for the Date row. def find_date_from_details(row) details = row.at_css(".details") return "" unless details details.css("tr").each do |tr| tds = tr.css("td") next unless tds.length >= 2 key = tds[0].text.strip val = tds[1].text.strip return val if key =~ /\bDate\b/i end "" end html = Http.get(URL) doc = Nokogiri::HTML(html) items = doc.css(".wpfilebase-file-default") puts "Found #{items.length} items for #{TABLE}" saved = 0 items.each_with_index do |row, idx| link = row.at_css(".filetitle a") next unless link title_text = link.text.strip document_url = abs_url(URL, link["href"]) # Common pattern on this page is "REF - Address - On notice date" council_reference = title_text.split(" - ").first.to_s.strip council_reference = council_reference.empty? ? title_text[0,120] : council_reference # Use title as address if nothing cleaner is available address = title_text # On-notice date often appears inside the title on_notice_to_raw = if (m = title_text.match(/(\d{1,2}\s+[A-Za-z]+\s+\d{4})/)) m[1] else "" end on_notice_to = Util.parse_aus_date(on_notice_to_raw) # Application date is shown inside the details table under "Date" date_received_raw = find_date_from_details(row) date_received = Util.parse_aus_date(date_received_raw) || begin s = date_received_raw.to_s s.empty? ? nil : Date.strptime(s, "%A, %d %B, %Y") rescue nil end description = "Development Application" # Require core fields next if council_reference.empty? || address.empty? upsert_and_enrich!( table: TABLE, row: { description: description, date_received: date_received, date_received_raw: date_received_raw, address: address, council_reference: council_reference, applicant: "", owner: "" }, extras: { document_url: document_url } ) saved += 1 end puts "Done #{TABLE}. Saved #{saved} item(s)."