|
@@ -47,6 +47,12 @@ should_run() {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
count=0
|
|
count=0
|
|
|
|
|
+total_saved=0
|
|
|
|
|
+total_warns=0
|
|
|
|
|
+
|
|
|
|
|
+# Each entry: "name|saved|warns|status"
|
|
|
|
|
+SUMMARY=()
|
|
|
|
|
+
|
|
|
for f in "${SCRIPTS[@]}"; do
|
|
for f in "${SCRIPTS[@]}"; do
|
|
|
name="$(basename "$f" .rb)"
|
|
name="$(basename "$f" .rb)"
|
|
|
table="da_${name}"
|
|
table="da_${name}"
|
|
@@ -55,9 +61,81 @@ for f in "${SCRIPTS[@]}"; do
|
|
|
continue
|
|
continue
|
|
|
fi
|
|
fi
|
|
|
|
|
|
|
|
|
|
+ echo ""
|
|
|
echo "Running ${name} -> table ${table}"
|
|
echo "Running ${name} -> table ${table}"
|
|
|
- TABLE_NAME="$table" DEBUG="$DEBUG_FLAG" DRY_RUN="$DRY_FLAG" ruby "$f" || echo "Error in $f"
|
|
|
|
|
|
|
+
|
|
|
|
|
+ tmpfile=$(mktemp /tmp/scraper_XXXXXX.log)
|
|
|
|
|
+
|
|
|
|
|
+ # Run scraper; merge stderr into stdout so tee captures both.
|
|
|
|
|
+ # Disable pipefail temporarily so a non-zero ruby exit doesn't abort the loop.
|
|
|
|
|
+ set +e
|
|
|
|
|
+ TABLE_NAME="$table" DEBUG="$DEBUG_FLAG" DRY_RUN="$DRY_FLAG" ruby "$f" 2>&1 | tee "$tmpfile"
|
|
|
|
|
+ ruby_exit=${PIPESTATUS[0]}
|
|
|
|
|
+ set -e
|
|
|
|
|
+
|
|
|
|
|
+ if [[ $ruby_exit -ne 0 ]]; then
|
|
|
|
|
+ echo " [run_all] scraper exited with code ${ruby_exit}"
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ # --- Parse summary fields from captured output ---
|
|
|
|
|
+
|
|
|
|
|
+ # "Saved N item(s)" — take the last occurrence in case there are multiple
|
|
|
|
|
+ saved=$(grep -oE 'Saved [0-9]+' "$tmpfile" | tail -1 | grep -oE '[0-9]+' || true)
|
|
|
|
|
+ saved="${saved:-0}"
|
|
|
|
|
+
|
|
|
|
|
+ # Count WARN lines (from Log.warn)
|
|
|
|
|
+ warns=$(grep -c '^\s*WARN' "$tmpfile" || true)
|
|
|
|
|
+ warns="${warns:-0}"
|
|
|
|
|
+
|
|
|
|
|
+ # Determine status
|
|
|
|
|
+ if [[ $ruby_exit -ne 0 ]]; then
|
|
|
|
|
+ status="ERROR"
|
|
|
|
|
+ elif grep -qiE 'cloudflare|blocked by|challenge page' "$tmpfile" 2>/dev/null; then
|
|
|
|
|
+ status="blocked"
|
|
|
|
|
+ elif [[ $warns -gt 0 ]]; then
|
|
|
|
|
+ status="warn"
|
|
|
|
|
+ else
|
|
|
|
|
+ status="ok"
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ rm -f "$tmpfile"
|
|
|
|
|
+
|
|
|
|
|
+ SUMMARY+=("${name}|${saved}|${warns}|${status}")
|
|
|
|
|
+ total_saved=$((total_saved + saved))
|
|
|
|
|
+ total_warns=$((total_warns + warns))
|
|
|
count=$((count+1))
|
|
count=$((count+1))
|
|
|
done
|
|
done
|
|
|
|
|
|
|
|
-echo "Finished run at $(date -u +"%Y-%m-%d %H:%M:%S"). Ran ${count} scraper(s)."
|
|
|
|
|
|
|
+finish_time=$(date -u +"%Y-%m-%d %H:%M:%S")
|
|
|
|
|
+
|
|
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
|
|
+# Summary table
|
|
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
|
|
+echo ""
|
|
|
|
|
+echo "========================================================================"
|
|
|
|
|
+printf " SCRAPE SUMMARY — finished %s UTC\n" "$finish_time"
|
|
|
|
|
+echo "========================================================================"
|
|
|
|
|
+printf " %-32s %6s %5s %s\n" "Council" "Saved" "Warns" "Status"
|
|
|
|
|
+echo " ------------------------------------------------------------------------"
|
|
|
|
|
+
|
|
|
|
|
+for entry in "${SUMMARY[@]}"; do
|
|
|
|
|
+ IFS='|' read -r n s w st <<< "$entry"
|
|
|
|
|
+ # Colour-code status when output is a terminal; plain text otherwise
|
|
|
|
|
+ if [[ -t 1 ]]; then
|
|
|
|
|
+ case "$st" in
|
|
|
|
|
+ ok) colour="\033[0;32m" ;; # green
|
|
|
|
|
+ warn) colour="\033[0;33m" ;; # yellow
|
|
|
|
|
+ blocked) colour="\033[0;33m" ;; # yellow
|
|
|
|
|
+ ERROR) colour="\033[0;31m" ;; # red
|
|
|
|
|
+ *) colour="" ;;
|
|
|
|
|
+ esac
|
|
|
|
|
+ reset="\033[0m"
|
|
|
|
|
+ printf " %-32s %6s %5s ${colour}%s${reset}\n" "$n" "$s" "$w" "$st"
|
|
|
|
|
+ else
|
|
|
|
|
+ printf " %-32s %6s %5s %s\n" "$n" "$s" "$w" "$st"
|
|
|
|
|
+ fi
|
|
|
|
|
+done
|
|
|
|
|
+
|
|
|
|
|
+echo " ------------------------------------------------------------------------"
|
|
|
|
|
+printf " %-32s %6s %5s\n" "TOTAL (${count} scrapers)" "$total_saved" "$total_warns"
|
|
|
|
|
+echo "========================================================================"
|