Browse Source

Ruby Issues

Benjamin Harris 2 tháng trước cách đây
mục cha
commit
c1b754848b

+ 1 - 1
scrapers/brighton.rb

@@ -16,7 +16,7 @@ DOWNLOAD_DIR         = ENV["DOWNLOAD_DIR"] || "/app/downloads"
 DB.ensure_table!(TABLE)
 
 # --- helpers ---------------------------------------------------------------
-# DA/APP refs like “DA2025-130”, “DA 2024/174”, etc → “DA YYYY / NNN…”
+# DA/APP refs like "DA2025-130", "DA 2024/174", etc → "DA YYYY / NNN…"
 REF_RX = %r{
 \b(?:DA|APP|APPLICATION)\s*
 (20\d{2})\s*[/\-]?\s*

+ 1 - 1
scrapers/burnie.rb

@@ -37,7 +37,7 @@ BASE_HEADERS = {
   "User-Agent"                => UA,
   "Accept"                    => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
   "Accept-Language"           => "en-AU,en;q=0.8",
-  # Avoid Brotli (Ruby stdlib wont auto-decode it)
+  # Avoid Brotli (Ruby stdlib won't auto-decode it)
   "Accept-Encoding"           => "gzip,deflate",
   "Upgrade-Insecure-Requests" => "1",
   "Sec-Fetch-Dest"            => "document",

+ 1 - 1
scrapers/centralhighlands.rb

@@ -42,7 +42,7 @@ end
 
 def extract_close_raw(text)
   s = text.gsub(/\s+/, " ")
-  # “… until 20 August 2025”
+  # "… until 20 August 2025"
   if (m = s.match(/\buntil\s+([0-9]{1,2}\s+[A-Za-z]{3,}\s+[0-9]{4})\b/i))
     return m[1]
   end

+ 3 - 3
scrapers/clarence.rb

@@ -95,7 +95,7 @@ def parse_date_token(s)
         items = []
 
         # Headings tend to be h2/h3, followed by blocks that contain
-        # “Closes:” and “Application Number:” and a PDF link.
+        # "Closes:" and "Application Number:" and a PDF link.
         doc.css("h2, h3").each do |h|
             title = h.text.to_s.strip
             next if title.empty?
@@ -151,10 +151,10 @@ def parse_date_token(s)
             cr   = r[:council_reference].to_s
             addr = r[:address].to_s
 
-            # Skip site promo / competitions that occasionally appear as a “heading”
+            # Skip site promo / competitions that occasionally appear as a "heading"
             next if cr =~ /turn your two cents/i || r[:title_reference].to_s =~ /two cents/i
 
-            # Skip if we didnt get a sensible address
+            # Skip if we didn't get a sensible address
             next if addr.strip.empty? || addr == cr
 
             # Clarence app numbers look like PDPLANPMTD-2025/054004 etc

+ 1 - 1
scrapers/georgetown.rb

@@ -27,7 +27,7 @@ end
 html = Http.get(URL)
 doc  = Nokogiri::HTML(html)
 
-# Most items on this page are shown as “cards” with a small details table inside
+# Most items on this page are shown as "cards" with a small details table inside
 cards = doc.css(".card, .entry-content .wp-block-group, .entry-content .content-block, .entry-content .notice, .entry-content")
 items = []
 

+ 1 - 1
scrapers/glenorchy.rb

@@ -74,7 +74,7 @@ def safe_name(s) = s.to_s.gsub(/[^\w\-.]+/, "_")
     html = Http.get(URL)
     doc  = Nokogiri::HTML(html)
 
-    # Cards on this page use “content-block” classes (WordPress pattern).
+    # Cards on this page use "content-block" classes (WordPress pattern).
     cards = doc.css(".content-block, .content-block--featured")
 
     puts "Found #{cards.length} items for #{TABLE}"

+ 6 - 6
scrapers/hobartcity.rb

@@ -1,4 +1,4 @@
-# Hobart City Council – PlanBuild “Currently Advertised” scraper
+# Hobart City Council – PlanBuild "Currently Advertised" scraper
 # Table name is injected by run_all.sh as TABLE_NAME=da_hobartcity
 
 require "nokogiri"
@@ -36,7 +36,7 @@ result_blocks.each do |blk|
   address = address_el&.text&.strip.to_s
   council_reference = ref_el&.text&.strip.to_s
 
-  # Fallbacks from label-value pairs (e.g., “Address: …”, “Reference: …”)
+  # Fallbacks from label-value pairs (e.g., "Address: …", "Reference: …")
   if address.empty?
     m = text.match(/Address:\s*(.+?)(?:\s{2,}|Reference:|$)/i)
     address = m[1].strip if m
@@ -49,12 +49,12 @@ result_blocks.each do |blk|
   end
 
   # Try to find the LGA/council name in the block text
-  # Common patterns: “Council: Hobart City Council” or a badge/label nearby
+  # Common patterns: "Council: Hobart City Council" or a badge/label nearby
   council_name = nil
   if (m = text.match(/Council:\s*([A-Za-z \-]+Council)/i))
     council_name = m[1].strip
   end
-  # Light filter: if a filter is set and we cant see Hobart in this block, skip it
+  # Light filter: if a filter is set and we can't see Hobart in this block, skip it
   if COUNCIL_FILTER != "" && council_name && !council_name.include?(COUNCIL_FILTER)
     next
   elsif COUNCIL_FILTER != "" && council_name.nil?
@@ -75,7 +75,7 @@ result_blocks.each do |blk|
   end
   date_received = Util.parse_aus_date(date_received_raw)
 
-  # If we still dont have key fields, skip
+  # If we still don't have key fields, skip
   next if address.empty? || council_reference.empty?
 
   DB.upsert(TABLE, {
@@ -84,7 +84,7 @@ result_blocks.each do |blk|
     date_received_raw: date_received_raw,
     address: address,
     council_reference: council_reference,
-    applicant: "",  # PlanBuild usually doesnt expose these in the list
+    applicant: "",  # PlanBuild usually doesn't expose these in the list
     owner: ""
   })
   

+ 13 - 13
scrapers/kentish.rb

@@ -16,7 +16,7 @@ DB.ensure_table!(TABLE)
 def abs_url(base, href)
   h = href.to_s.strip
   return nil if h.empty?
-  return h if h.start_with?(“http://”, “https://”)
+  return h if h.start_with?("http://", "https://")
   URI.join(base, h).to_s
 rescue URI::InvalidURIError
   h
@@ -28,33 +28,33 @@ REF_RX = /\bK-DA\d+\/20\d{2}\b/i
 def parse_items(doc, base_url)
   rows = []
 
-  # Each DA is a <li class=”generic-list__item”> with a PDF link in the title
-  # Link text: “K-DA016/2026 41 George Road, Nook - proposed 2 Lot Subdivision (submissions by 21/04/2026)”
-  doc.css(“li.generic-list__item”).each do |li|
-    link = li.at_css(“h3.generic-list__title a, a[href$='.pdf']”)
+  # Each DA is a <li class="generic-list__item"> with a PDF link in the title
+  # Link text: "K-DA016/2026 41 George Road, Nook - proposed 2 Lot Subdivision (submissions by 21/04/2026)"
+  doc.css("li.generic-list__item").each do |li|
+    link = li.at_css("h3.generic-list__title a, a[href$='.pdf']")
     next unless link
 
-    raw_text = link.text.gsub(/\(PDF File[^)]*\)/i, “”).gsub(/\s+/, “ “).strip
-    pdf_href = link[“href”].to_s
+    raw_text = link.text.gsub(/\(PDF File[^)]*\)/i, "").gsub(/\s+/, " ").strip
+    pdf_href = link["href"].to_s
 
     ref_match = raw_text.match(REF_RX)
     next unless ref_match
 
     ref  = ref_match[0]
-    rest = raw_text.sub(ref, “”).strip
+    rest = raw_text.sub(ref, "").strip
 
-    # Extract on-notice date: “(submissions by 21/04/2026)”
-    on_raw = rest[/\(submissions\s+by\s+([^)]+)\)/i, 1]&.strip || “”
+    # Extract on-notice date: "(submissions by 21/04/2026)"
+    on_raw = rest[/\(submissions\s+by\s+([^)]+)\)/i, 1]&.strip || ""
     on_dt  = Util.parse_aus_date(on_raw)
 
-    # Strip the on-notice clause and split “address - description”
-    body = rest.sub(/\s*\(submissions\s+by\s+[^)]+\)/i, “”).strip
+    # Strip the on-notice clause and split "address - description"
+    body = rest.sub(/\s*\(submissions\s+by\s+[^)]+\)/i, "").strip
     if (m = body.match(/\A(.+?)\s+-\s+(.+)\z/))
       address     = m[1].strip
       description = m[2].strip
     else
       address     = body
-      description = “Development Application”
+      description = "Development Application"
     end
 
     next if address.empty?

+ 1 - 1
scrapers/latrobe.rb

@@ -1,4 +1,4 @@
-# Latrobe Council – PlanBuild “Currently Advertised” scraper
+# Latrobe Council – PlanBuild "Currently Advertised" scraper
 
 require "nokogiri"
 require_relative "../lib/http"

+ 1 - 1
scrapers/northernmidlands.rb

@@ -12,7 +12,7 @@ URL   = "https://northernmidlands.tas.gov.au/planning/development-in-the-norther
 
 DB.ensure_table!(TABLE)
 
-# “DA 2025/00123”, “DA2025/00123”, “Application No. DA 2025/123”
+# "DA 2025/00123", "DA2025/00123", "Application No. DA 2025/123"
 REF_RX1 = %r{\bDA\s*(20\d{2})\s*/\s*([A-Za-z0-9\-_.]+)}i
 REF_RX2 = %r{\bDA(20\d{2})\s*[-\/]?\s*([0-9]{3,})\b}i
 

+ 1 - 1
tools/import_sqlites.rb

@@ -65,7 +65,7 @@ def build_select(db, src_table)
   # Build a COALESCE(...) AS date_received from any present date columns
 date_candidates = %w[date_received date_lodged Date\ Lodged date_scraped].select { |c| cols.include?(c) }
 if date_candidates.any?
-  # Treat empty strings as NULL so blanks dont block fallbacks
+  # Treat empty strings as NULL so blanks don't block fallbacks
   expr = date_candidates.map { |c| "NULLIF(#{sql_ident(c)}, '')" }.join(", ")
   sel << "COALESCE(#{expr}) AS date_received"
 end