run_all.sh 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. #!/usr/bin/env bash
  2. set -euo pipefail
  3. echo "Starting run at $(date -u +"%Y-%m-%d %H:%M:%S")"
  4. echo "Running schema migrations…"
  5. ruby /app/lib/migrate.rb
  6. ONLY_LIST="${ONLY:-}" # e.g. "meandervalley" or "kentish,break_oday"
  7. SKIP_LIST="${SKIP:-}" # e.g. "hobartcity,latrobe"
  8. DEBUG_FLAG="${DEBUG:-}" # pass through to scrapers
  9. DRY_FLAG="${DRY_RUN:-}" # pass through to scrapers
  10. shopt -s nullglob
  11. SCRIPTS=(/app/scrapers/*.rb)
  12. should_run() {
  13. local name="$1"
  14. if [[ -n "$ONLY_LIST" ]]; then
  15. IFS=',' read -ra arr <<< "$ONLY_LIST"
  16. local pick
  17. for pick in "${arr[@]}"; do
  18. pick="${pick// /}"
  19. if [[ "$name" == "$pick" ]]; then
  20. echo "1"
  21. return
  22. fi
  23. done
  24. echo "0"
  25. return
  26. fi
  27. if [[ -n "$SKIP_LIST" ]]; then
  28. IFS=',' read -ra arr <<< "$SKIP_LIST"
  29. local skip
  30. for skip in "${arr[@]}"; do
  31. skip="${skip// /}"
  32. if [[ "$name" == "$skip" ]]; then
  33. echo "0"
  34. return
  35. fi
  36. done
  37. fi
  38. echo "1"
  39. }
  40. count=0
  41. total_saved=0
  42. total_warns=0
  43. # Each entry: "name|saved|warns|status"
  44. SUMMARY=()
  45. for f in "${SCRIPTS[@]}"; do
  46. name="$(basename "$f" .rb)"
  47. table="da_${name}"
  48. if [[ "$(should_run "$name")" != "1" ]]; then
  49. continue
  50. fi
  51. echo ""
  52. echo "Running ${name} -> table ${table}"
  53. tmpfile=$(mktemp /tmp/scraper_XXXXXX.log)
  54. # Run scraper; merge stderr into stdout so tee captures both.
  55. # Disable pipefail temporarily so a non-zero ruby exit doesn't abort the loop.
  56. set +e
  57. TABLE_NAME="$table" DEBUG="$DEBUG_FLAG" DRY_RUN="$DRY_FLAG" ruby "$f" 2>&1 | tee "$tmpfile"
  58. ruby_exit=${PIPESTATUS[0]}
  59. set -e
  60. if [[ $ruby_exit -ne 0 ]]; then
  61. echo " [run_all] scraper exited with code ${ruby_exit}"
  62. fi
  63. # --- Parse summary fields from captured output ---
  64. # "Saved N item(s)" or "saved N" — case-insensitive, last occurrence wins.
  65. # Fallback: count "Upserted" lines (every scraper prints one per DB write).
  66. saved=$(grep -oiE 'saved [0-9]+' "$tmpfile" | tail -1 | grep -oE '[0-9]+' || true)
  67. if [[ -z "$saved" || "$saved" == "0" ]]; then
  68. upsert_count=$(grep -cE '^(Upserted| Upserted)' "$tmpfile" 2>/dev/null || true)
  69. [[ "${upsert_count:-0}" -gt 0 ]] && saved="$upsert_count"
  70. fi
  71. saved="${saved:-0}"
  72. # Count WARN lines (from Log.warn)
  73. warns=$(grep -c '^\s*WARN' "$tmpfile" || true)
  74. warns="${warns:-0}"
  75. # Determine status
  76. if [[ $ruby_exit -ne 0 ]]; then
  77. status="ERROR"
  78. elif grep -qiE 'cloudflare|blocked by|challenge page' "$tmpfile" 2>/dev/null; then
  79. status="blocked"
  80. elif [[ $warns -gt 0 ]]; then
  81. status="warn"
  82. else
  83. status="ok"
  84. fi
  85. rm -f "$tmpfile"
  86. SUMMARY+=("${name}|${saved}|${warns}|${status}")
  87. total_saved=$((total_saved + saved))
  88. total_warns=$((total_warns + warns))
  89. count=$((count+1))
  90. done
  91. finish_time=$(date -u +"%Y-%m-%d %H:%M:%S")
  92. # ---------------------------------------------------------------------------
  93. # Summary table
  94. # ---------------------------------------------------------------------------
  95. echo ""
  96. echo "========================================================================"
  97. printf " SCRAPE SUMMARY — finished %s UTC\n" "$finish_time"
  98. echo "========================================================================"
  99. printf " %-32s %6s %5s %s\n" "Council" "Saved" "Warns" "Status"
  100. echo " ------------------------------------------------------------------------"
  101. for entry in "${SUMMARY[@]}"; do
  102. IFS='|' read -r n s w st <<< "$entry"
  103. # Colour-code status when output is a terminal; plain text otherwise
  104. if [[ -t 1 ]]; then
  105. case "$st" in
  106. ok) colour="\033[0;32m" ;; # green
  107. warn) colour="\033[0;33m" ;; # yellow
  108. blocked) colour="\033[0;33m" ;; # yellow
  109. ERROR) colour="\033[0;31m" ;; # red
  110. *) colour="" ;;
  111. esac
  112. reset="\033[0m"
  113. printf " %-32s %6s %5s ${colour}%s${reset}\n" "$n" "$s" "$w" "$st"
  114. else
  115. printf " %-32s %6s %5s %s\n" "$n" "$s" "$w" "$st"
  116. fi
  117. done
  118. echo " ------------------------------------------------------------------------"
  119. printf " %-32s %6s %5s\n" "TOTAL (${count} scrapers)" "$total_saved" "$total_warns"
  120. echo "========================================================================"