run_all.sh 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #!/usr/bin/env bash
  2. set -euo pipefail
  3. echo "Starting run at $(date -u +"%Y-%m-%d %H:%M:%S")"
  4. echo "Running schema migrations…"
  5. ruby /app/lib/migrate.rb
  6. ONLY_LIST="${ONLY:-}" # e.g. "meandervalley" or "kentish,break_oday"
  7. SKIP_LIST="${SKIP:-}" # e.g. "hobartcity,latrobe"
  8. DEBUG_FLAG="${DEBUG:-}" # pass through to scrapers
  9. DRY_FLAG="${DRY_RUN:-}" # pass through to scrapers
  10. shopt -s nullglob
  11. SCRIPTS=(/app/scrapers/*.rb)
  12. should_run() {
  13. local name="$1"
  14. if [[ -n "$ONLY_LIST" ]]; then
  15. IFS=',' read -ra arr <<< "$ONLY_LIST"
  16. local pick
  17. for pick in "${arr[@]}"; do
  18. pick="${pick// /}"
  19. if [[ "$name" == "$pick" ]]; then
  20. echo "1"
  21. return
  22. fi
  23. done
  24. echo "0"
  25. return
  26. fi
  27. if [[ -n "$SKIP_LIST" ]]; then
  28. IFS=',' read -ra arr <<< "$SKIP_LIST"
  29. local skip
  30. for skip in "${arr[@]}"; do
  31. skip="${skip// /}"
  32. if [[ "$name" == "$skip" ]]; then
  33. echo "0"
  34. return
  35. fi
  36. done
  37. fi
  38. echo "1"
  39. }
  40. count=0
  41. total_saved=0
  42. total_warns=0
  43. # Each entry: "name|saved|warns|status"
  44. SUMMARY=()
  45. for f in "${SCRIPTS[@]}"; do
  46. name="$(basename "$f" .rb)"
  47. table="da_${name}"
  48. if [[ "$(should_run "$name")" != "1" ]]; then
  49. continue
  50. fi
  51. echo ""
  52. echo "Running ${name} -> table ${table}"
  53. tmpfile=$(mktemp /tmp/scraper_XXXXXX.log)
  54. # Run scraper; merge stderr into stdout so tee captures both.
  55. # Disable pipefail temporarily so a non-zero ruby exit doesn't abort the loop.
  56. set +e
  57. TABLE_NAME="$table" DEBUG="$DEBUG_FLAG" DRY_RUN="$DRY_FLAG" ruby "$f" 2>&1 | tee "$tmpfile"
  58. ruby_exit=${PIPESTATUS[0]}
  59. set -e
  60. if [[ $ruby_exit -ne 0 ]]; then
  61. echo " [run_all] scraper exited with code ${ruby_exit}"
  62. fi
  63. # --- Parse summary fields from captured output ---
  64. # "Saved N item(s)" — take the last occurrence in case there are multiple
  65. saved=$(grep -oE 'Saved [0-9]+' "$tmpfile" | tail -1 | grep -oE '[0-9]+' || true)
  66. saved="${saved:-0}"
  67. # Count WARN lines (from Log.warn)
  68. warns=$(grep -c '^\s*WARN' "$tmpfile" || true)
  69. warns="${warns:-0}"
  70. # Determine status
  71. if [[ $ruby_exit -ne 0 ]]; then
  72. status="ERROR"
  73. elif grep -qiE 'cloudflare|blocked by|challenge page' "$tmpfile" 2>/dev/null; then
  74. status="blocked"
  75. elif [[ $warns -gt 0 ]]; then
  76. status="warn"
  77. else
  78. status="ok"
  79. fi
  80. rm -f "$tmpfile"
  81. SUMMARY+=("${name}|${saved}|${warns}|${status}")
  82. total_saved=$((total_saved + saved))
  83. total_warns=$((total_warns + warns))
  84. count=$((count+1))
  85. done
  86. finish_time=$(date -u +"%Y-%m-%d %H:%M:%S")
  87. # ---------------------------------------------------------------------------
  88. # Summary table
  89. # ---------------------------------------------------------------------------
  90. echo ""
  91. echo "========================================================================"
  92. printf " SCRAPE SUMMARY — finished %s UTC\n" "$finish_time"
  93. echo "========================================================================"
  94. printf " %-32s %6s %5s %s\n" "Council" "Saved" "Warns" "Status"
  95. echo " ------------------------------------------------------------------------"
  96. for entry in "${SUMMARY[@]}"; do
  97. IFS='|' read -r n s w st <<< "$entry"
  98. # Colour-code status when output is a terminal; plain text otherwise
  99. if [[ -t 1 ]]; then
  100. case "$st" in
  101. ok) colour="\033[0;32m" ;; # green
  102. warn) colour="\033[0;33m" ;; # yellow
  103. blocked) colour="\033[0;33m" ;; # yellow
  104. ERROR) colour="\033[0;31m" ;; # red
  105. *) colour="" ;;
  106. esac
  107. reset="\033[0m"
  108. printf " %-32s %6s %5s ${colour}%s${reset}\n" "$n" "$s" "$w" "$st"
  109. else
  110. printf " %-32s %6s %5s %s\n" "$n" "$s" "$w" "$st"
  111. fi
  112. done
  113. echo " ------------------------------------------------------------------------"
  114. printf " %-32s %6s %5s\n" "TOTAL (${count} scrapers)" "$total_saved" "$total_warns"
  115. echo "========================================================================"