Kaynağa Gözat

Updates after run

Benjamin Harris 2 ay önce
ebeveyn
işleme
f28377983e
4 değiştirilmiş dosya ile 11 ekleme ve 14 silme
  1. 7 2
      run_all.sh
  2. 1 1
      scrapers/hobartcity.rb
  3. 3 3
      scrapers/launcestoncity.rb
  4. 0 8
      scrapers/westcoast.rb

+ 7 - 2
run_all.sh

@@ -79,8 +79,13 @@ for f in "${SCRIPTS[@]}"; do
 
   # --- Parse summary fields from captured output ---
 
-  # "Saved N item(s)" — take the last occurrence in case there are multiple
-  saved=$(grep -oE 'Saved [0-9]+' "$tmpfile" | tail -1 | grep -oE '[0-9]+' || true)
+  # "Saved N item(s)" or "saved N" — case-insensitive, last occurrence wins.
+  # Fallback: count "Upserted" lines (every scraper prints one per DB write).
+  saved=$(grep -oiE 'saved [0-9]+' "$tmpfile" | tail -1 | grep -oE '[0-9]+' || true)
+  if [[ -z "$saved" || "$saved" == "0" ]]; then
+    upsert_count=$(grep -cE '^(Upserted|  Upserted)' "$tmpfile" 2>/dev/null || true)
+    [[ "${upsert_count:-0}" -gt 0 ]] && saved="$upsert_count"
+  fi
   saved="${saved:-0}"
 
   # Count WARN lines (from Log.warn)

+ 1 - 1
scrapers/hobartcity.rb

@@ -98,4 +98,4 @@ result_blocks.each do |blk|
   found += 1
 end
 
-puts "Done #{TABLE}. Found #{found} item(s)."
+puts "Done #{TABLE}. Saved #{found} item(s)."

+ 3 - 3
scrapers/launcestoncity.rb

@@ -266,8 +266,8 @@ tables.each do |t|
     council_reference:   council_reference,
     description:         description,
     address:             address,
-    closing_date:        closing_date,
-    closing_date_raw:    closing_raw,
+    on_notice_to:        closing_date,
+    on_notice_to_raw:    closing_raw,
     info_url:            info_url,
     applicant:           "",
     owner:               ""
@@ -483,4 +483,4 @@ tables.each do |t|
   kept += 1
 end
 
-puts "Done #{TABLE}. Found #{kept}, saved #{kept}."
+puts "Done #{TABLE}. Saved #{kept} item(s)."

+ 0 - 8
scrapers/westcoast.rb

@@ -122,14 +122,6 @@ end
 list_html = Http.get(URL)
 list_doc  = Nokogiri::HTML(list_html)
 
-detail_links = list_doc.css("a").map { |a|
-  href = a["href"].to_s
-  next if href.empty? || href.start_with?("#")
-  u = abs_url(URL, href)
-  u.include?("/development-application/")
-}.compact
-
-# Convert booleans from map to urls properly
 detail_links = list_doc.css("a").map { |a|
   href = a["href"].to_s
   u = abs_url(URL, href)