# Hobart City Council – PlanBuild "Currently Advertised" scraper # Table name is injected by run_all.sh as TABLE_NAME=da_hobartcity require "nokogiri" require "open-uri" require_relative "../lib/db" require_relative "../lib/util" require_relative "../lib/http" require_relative "../lib/enrich" TABLE = ENV.fetch("TABLE_NAME") URL = "https://portal.planbuild.tas.gov.au/external/advertisement/search" # Optional: restrict results to one LGA (substring match) COUNCIL_FILTER = ENV.fetch("COUNCIL_FILTER", "Hobart City Council").strip DB.ensure_table!(TABLE) html = Http.get(URL) doc = Nokogiri::HTML(html) # PlanBuild markup shifts occasionally. We try a few result wrappers. result_blocks = doc.css(".advertisement-result, .panel.panel-default, .panel.panel-info, .result-row, .row") found = 0 result_blocks.each do |blk| text = blk.text.strip.gsub(/\s+/, " ") # Skip blocks that do not look like a single advertised item next unless text.match?(/Application/i) || text.match?(/Reference/i) || text.match?(/Council/i) # Extract fields using common column patterns first address_el = blk.at_css(".col-xs-8, .col-sm-8, .address, [data-field='address']") ref_el = blk.at_css(".col-xs-4, .col-sm-4, .reference, [data-field='reference']") address = address_el&.text&.strip.to_s council_reference = ref_el&.text&.strip.to_s # Fallbacks from label-value pairs (e.g., "Address: …", "Reference: …") if address.empty? m = text.match(/Address:\s*(.+?)(?:\s{2,}|Reference:|$)/i) address = m[1].strip if m end if council_reference.empty? # m = text.match(/(Application|Reference)\s*(No\.?|Number)?:\s*([A-Za-z0-9\-\./_]+)/i) REF_RX = %r{(Application|Reference)\s*(No\.?|Number)?:\s*([A-Za-z0-9\-._/]+)}i m = text.match(REF_RX) council_reference = (m && m[3]) ? m[3].strip : council_reference end # Try to find the LGA/council name in the block text # Common patterns: "Council: Hobart City Council" or a badge/label nearby council_name = nil if (m = text.match(/Council:\s*([A-Za-z \-]+Council)/i)) council_name = m[1].strip end # Light filter: if a filter is set and we can't see Hobart in this block, skip it if COUNCIL_FILTER != "" && council_name && !council_name.include?(COUNCIL_FILTER) next elsif COUNCIL_FILTER != "" && council_name.nil? # If no explicit council field, do a substring check across the block text next unless text.include?(COUNCIL_FILTER) end # Optional extras if present in the block # Patterns seen across councils vary, so treat all as best-effort description = "" if (m = text.match(/(Type of Work|Proposal|Description):\s*(.+?)(?:\s{2,}|Address:|Application|Reference|$)/i)) description = m[2].strip end date_received_raw = "" if (m = text.match(/(Date Lodged|Date Received|Lodged):\s*([0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4})/i)) date_received_raw = m[2].strip end date_received = Util.parse_aus_date(date_received_raw) # If we still don't have key fields, skip next if address.empty? || council_reference.empty? DB.upsert(TABLE, { description: description, date_received: date_received, date_received_raw: date_received_raw, address: address, council_reference: council_reference, applicant: "", # PlanBuild usually doesn't expose these in the list owner: "" }) enrich_after_upsert!( table: TABLE, council_reference: council_reference, address: address ) puts "Upserted #{council_reference} | #{address}" found += 1 end puts "Done #{TABLE}. Saved #{found} item(s)."