| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- # Circular Head Council — Planning page list scraper
- require "nokogiri"
- require_relative "../lib/enrich"
- require_relative "../lib/log"
- TABLE = ENV.fetch("TABLE_NAME") # run_all.sh -> da_circularhead
- URL = "https://www.circularhead.tas.gov.au/council-services/development/planning"
- DB.ensure_table!(TABLE)
- def abs_url(base, href)
- return "" if href.to_s.strip.empty?
- URI.join(base, href).to_s rescue href.to_s
- end
- html = Http.get(URL)
- doc = Nokogiri::HTML(html)
- items = doc.css("li.link-listing__no-icon")
- puts "Found #{items.length} items for #{TABLE}"
- saved = 0
- items.each_with_index do |li, idx|
- a = li.at_css("a")
- next unless a
- title_reference = a.text.to_s.strip
- href = a["href"].to_s
- document_url = abs_url(URL, href)
- # Your original logic: split the title on " - "
- parts = title_reference.split(" - ")
- council_reference = parts.first.to_s.strip
- description_part = parts.last.to_s
- description = description_part.split("(").first.to_s.strip
- address = if parts.length > 2
- parts[1..-2].join(" - ").strip
- else
- # fallback to a trimmed title if no middle section
- title_reference[0, 140]
- end
- # No dates on the list view
- date_received_raw = ""
- date_received = nil
- # Require the key fields
- next if council_reference.empty? || address.empty?
- DB.upsert(TABLE, {
- description: description,
- date_received: date_received,
- date_received_raw: date_received_raw,
- address: address,
- council_reference: council_reference,
- applicant: "",
- owner: ""
- })
-
- enrich_after_upsert!(
- table: TABLE,
- council_reference: council_reference,
- address: address
- )
- # Save link and title if the columns exist
- begin
- upd = DB.client.prepare("UPDATE `#{DB.client.escape(TABLE)}` SET document_url = ?, title_reference = ? WHERE council_reference = ? AND address = ?")
- upd.execute(document_url, title_reference, council_reference, address)
- rescue Mysql2::Error => e
- Log.warn "scraper", "[circularhead] db update skipped for #{council_reference}: #{e.message}"
- end
- puts "Upserted #{council_reference} -> #{address}"
- saved += 1
- end
- puts "Done #{TABLE}. Saved #{saved} item(s)."
|