#!/usr/bin/env bash set -euo pipefail echo "Starting run at $(date -u +"%Y-%m-%d %H:%M:%S")" echo "Running schema migrations…" ruby /app/lib/migrate.rb ONLY_LIST="${ONLY:-}" # e.g. "meandervalley" or "kentish,break_oday" SKIP_LIST="${SKIP:-}" # e.g. "hobartcity,latrobe" DEBUG_FLAG="${DEBUG:-}" # pass through to scrapers DRY_FLAG="${DRY_RUN:-}" # pass through to scrapers shopt -s nullglob SCRIPTS=(/app/scrapers/*.rb) should_run() { local name="$1" if [[ -n "$ONLY_LIST" ]]; then IFS=',' read -ra arr <<< "$ONLY_LIST" local pick for pick in "${arr[@]}"; do pick="${pick// /}" if [[ "$name" == "$pick" ]]; then echo "1" return fi done echo "0" return fi if [[ -n "$SKIP_LIST" ]]; then IFS=',' read -ra arr <<< "$SKIP_LIST" local skip for skip in "${arr[@]}"; do skip="${skip// /}" if [[ "$name" == "$skip" ]]; then echo "0" return fi done fi echo "1" } count=0 total_saved=0 total_warns=0 # Each entry: "name|saved|warns|status" SUMMARY=() for f in "${SCRIPTS[@]}"; do name="$(basename "$f" .rb)" table="da_${name}" if [[ "$(should_run "$name")" != "1" ]]; then continue fi echo "" echo "Running ${name} -> table ${table}" tmpfile=$(mktemp /tmp/scraper_XXXXXX.log) # Run scraper; merge stderr into stdout so tee captures both. # Disable pipefail temporarily so a non-zero ruby exit doesn't abort the loop. set +e TABLE_NAME="$table" DEBUG="$DEBUG_FLAG" DRY_RUN="$DRY_FLAG" ruby "$f" 2>&1 | tee "$tmpfile" ruby_exit=${PIPESTATUS[0]} set -e if [[ $ruby_exit -ne 0 ]]; then echo " [run_all] scraper exited with code ${ruby_exit}" fi # --- Parse summary fields from captured output --- # "Saved N item(s)" or "saved N" — case-insensitive, last occurrence wins. # Fallback: count "Upserted" lines (every scraper prints one per DB write). saved=$(grep -oiE 'saved [0-9]+' "$tmpfile" | tail -1 | grep -oE '[0-9]+' || true) if [[ -z "$saved" || "$saved" == "0" ]]; then upsert_count=$(grep -cE '^(Upserted| Upserted)' "$tmpfile" 2>/dev/null || true) [[ "${upsert_count:-0}" -gt 0 ]] && saved="$upsert_count" fi saved="${saved:-0}" # Count WARN lines (from Log.warn) warns=$(grep -c '^\s*WARN' "$tmpfile" || true) warns="${warns:-0}" # Determine status if [[ $ruby_exit -ne 0 ]]; then status="ERROR" elif grep -qiE 'cloudflare|blocked by|challenge page' "$tmpfile" 2>/dev/null; then status="blocked" elif [[ $warns -gt 0 ]]; then status="warn" else status="ok" fi rm -f "$tmpfile" SUMMARY+=("${name}|${saved}|${warns}|${status}") total_saved=$((total_saved + saved)) total_warns=$((total_warns + warns)) count=$((count+1)) done finish_time=$(date -u +"%Y-%m-%d %H:%M:%S") # --------------------------------------------------------------------------- # Summary table # --------------------------------------------------------------------------- echo "" echo "========================================================================" printf " SCRAPE SUMMARY — finished %s UTC\n" "$finish_time" echo "========================================================================" printf " %-32s %6s %5s %s\n" "Council" "Saved" "Warns" "Status" echo " ------------------------------------------------------------------------" for entry in "${SUMMARY[@]}"; do IFS='|' read -r n s w st <<< "$entry" # Colour-code status when output is a terminal; plain text otherwise if [[ -t 1 ]]; then case "$st" in ok) colour="\033[0;32m" ;; # green warn) colour="\033[0;33m" ;; # yellow blocked) colour="\033[0;33m" ;; # yellow ERROR) colour="\033[0;31m" ;; # red *) colour="" ;; esac reset="\033[0m" printf " %-32s %6s %5s ${colour}%s${reset}\n" "$n" "$s" "$w" "$st" else printf " %-32s %6s %5s %s\n" "$n" "$s" "$w" "$st" fi done echo " ------------------------------------------------------------------------" printf " %-32s %6s %5s\n" "TOTAL (${count} scrapers)" "$total_saved" "$total_warns" echo "========================================================================" # --------------------------------------------------------------------------- # Email summary if any scraper errored (requires SMTP_HOST to be configured) # --------------------------------------------------------------------------- has_errors=0 for entry in "${SUMMARY[@]}"; do IFS='|' read -r _n _s _w st <<< "$entry" if [[ "$st" == "ERROR" ]]; then has_errors=1 break fi done if [[ $has_errors -eq 1 ]] && [[ -n "${SMTP_HOST:-}" ]]; then echo "" echo "Sending error summary email..." { echo "$finish_time" echo "$count" echo "$total_saved" echo "$total_warns" for entry in "${SUMMARY[@]}"; do echo "$entry" done } | ruby /app/tools/send_summary_email.rb fi