Ver Fonte

Summary on Run All

Benjamin Harris há 2 meses atrás
pai
commit
87c98d93d1
1 ficheiros alterados com 80 adições e 2 exclusões
  1. 80 2
      run_all.sh

+ 80 - 2
run_all.sh

@@ -47,6 +47,12 @@ should_run() {
 }
 
 count=0
+total_saved=0
+total_warns=0
+
+# Each entry: "name|saved|warns|status"
+SUMMARY=()
+
 for f in "${SCRIPTS[@]}"; do
   name="$(basename "$f" .rb)"
   table="da_${name}"
@@ -55,9 +61,81 @@ for f in "${SCRIPTS[@]}"; do
     continue
   fi
 
+  echo ""
   echo "Running ${name} -> table ${table}"
-  TABLE_NAME="$table" DEBUG="$DEBUG_FLAG" DRY_RUN="$DRY_FLAG" ruby "$f" || echo "Error in $f"
+
+  tmpfile=$(mktemp /tmp/scraper_XXXXXX.log)
+
+  # Run scraper; merge stderr into stdout so tee captures both.
+  # Disable pipefail temporarily so a non-zero ruby exit doesn't abort the loop.
+  set +e
+  TABLE_NAME="$table" DEBUG="$DEBUG_FLAG" DRY_RUN="$DRY_FLAG" ruby "$f" 2>&1 | tee "$tmpfile"
+  ruby_exit=${PIPESTATUS[0]}
+  set -e
+
+  if [[ $ruby_exit -ne 0 ]]; then
+    echo "  [run_all] scraper exited with code ${ruby_exit}"
+  fi
+
+  # --- Parse summary fields from captured output ---
+
+  # "Saved N item(s)" — take the last occurrence in case there are multiple
+  saved=$(grep -oE 'Saved [0-9]+' "$tmpfile" | tail -1 | grep -oE '[0-9]+' || true)
+  saved="${saved:-0}"
+
+  # Count WARN lines (from Log.warn)
+  warns=$(grep -c '^\s*WARN' "$tmpfile" || true)
+  warns="${warns:-0}"
+
+  # Determine status
+  if [[ $ruby_exit -ne 0 ]]; then
+    status="ERROR"
+  elif grep -qiE 'cloudflare|blocked by|challenge page' "$tmpfile" 2>/dev/null; then
+    status="blocked"
+  elif [[ $warns -gt 0 ]]; then
+    status="warn"
+  else
+    status="ok"
+  fi
+
+  rm -f "$tmpfile"
+
+  SUMMARY+=("${name}|${saved}|${warns}|${status}")
+  total_saved=$((total_saved + saved))
+  total_warns=$((total_warns + warns))
   count=$((count+1))
 done
 
-echo "Finished run at $(date -u +"%Y-%m-%d %H:%M:%S"). Ran ${count} scraper(s)."
+finish_time=$(date -u +"%Y-%m-%d %H:%M:%S")
+
+# ---------------------------------------------------------------------------
+# Summary table
+# ---------------------------------------------------------------------------
+echo ""
+echo "========================================================================"
+printf "  SCRAPE SUMMARY — finished %s UTC\n" "$finish_time"
+echo "========================================================================"
+printf "  %-32s  %6s  %5s  %s\n" "Council" "Saved" "Warns" "Status"
+echo "  ------------------------------------------------------------------------"
+
+for entry in "${SUMMARY[@]}"; do
+  IFS='|' read -r n s w st <<< "$entry"
+  # Colour-code status when output is a terminal; plain text otherwise
+  if [[ -t 1 ]]; then
+    case "$st" in
+      ok)      colour="\033[0;32m" ;;   # green
+      warn)    colour="\033[0;33m" ;;   # yellow
+      blocked) colour="\033[0;33m" ;;   # yellow
+      ERROR)   colour="\033[0;31m" ;;   # red
+      *)       colour="" ;;
+    esac
+    reset="\033[0m"
+    printf "  %-32s  %6s  %5s  ${colour}%s${reset}\n" "$n" "$s" "$w" "$st"
+  else
+    printf "  %-32s  %6s  %5s  %s\n" "$n" "$s" "$w" "$st"
+  fi
+done
+
+echo "  ------------------------------------------------------------------------"
+printf "  %-32s  %6s  %5s\n" "TOTAL (${count} scrapers)" "$total_saved" "$total_warns"
+echo "========================================================================"