Browse Source

Send Email

Benjamin Harris 2 months ago
parent
commit
9ff5b421df
3 changed files with 197 additions and 0 deletions
  1. 8 0
      docker-compose.yml
  2. 26 0
      run_all.sh
  3. 163 0
      tools/send_summary_email.rb

+ 8 - 0
docker-compose.yml

@@ -43,6 +43,14 @@ services:
       LOOKUP_THROTTLE_MS: ${LOOKUP_THROTTLE_MS:-150}
       # Log verbosity: debug | info (default) | warn | error
       LOG_LEVEL: ${LOG_LEVEL:-info}
+      # SMTP — summary error emails (optional; leave blank to disable)
+      SMTP_HOST: ${SMTP_HOST:-}
+      SMTP_PORT: ${SMTP_PORT:-587}
+      SMTP_USERNAME: ${SMTP_USERNAME:-}
+      SMTP_PASSWORD: ${SMTP_PASSWORD:-}
+      SMTP_SMTPSecure: ${SMTP_SMTPSecure:-tls}
+      SMTP_SENTFROM: ${SMTP_SENTFROM:-}
+      SMTP_ADDADDRESS: ${SMTP_ADDADDRESS:-}
     volumes:
       - ./scrapers:/app/scrapers:ro
       - ./downloads:/downloads

+ 26 - 0
run_all.sh

@@ -144,3 +144,29 @@ done
 echo "  ------------------------------------------------------------------------"
 printf "  %-32s  %6s  %5s\n" "TOTAL (${count} scrapers)" "$total_saved" "$total_warns"
 echo "========================================================================"
+
+# ---------------------------------------------------------------------------
+# Email summary if any scraper errored (requires SMTP_HOST to be configured)
+# ---------------------------------------------------------------------------
+has_errors=0
+for entry in "${SUMMARY[@]}"; do
+  IFS='|' read -r _n _s _w st <<< "$entry"
+  if [[ "$st" == "ERROR" ]]; then
+    has_errors=1
+    break
+  fi
+done
+
+if [[ $has_errors -eq 1 ]] && [[ -n "${SMTP_HOST:-}" ]]; then
+  echo ""
+  echo "Sending error summary email..."
+  {
+    echo "$finish_time"
+    echo "$count"
+    echo "$total_saved"
+    echo "$total_warns"
+    for entry in "${SUMMARY[@]}"; do
+      echo "$entry"
+    done
+  } | ruby /app/tools/send_summary_email.rb
+fi

+ 163 - 0
tools/send_summary_email.rb

@@ -0,0 +1,163 @@
+# tools/send_summary_email.rb
+#
+# Sends an HTML summary email via SMTP when scraper errors are present.
+# Called by run_all.sh; reads summary data from stdin in the format:
+#
+#   Line 1: finish timestamp (UTC)
+#   Line 2: total scraper count
+#   Line 3: total saved
+#   Line 4: total warns
+#   Lines 5+: pipe-delimited summary rows — name|saved|warns|status
+#
+# Required env vars (set via docker-compose.yml from .env):
+#   SMTP_HOST, SMTP_PORT, SMTP_USERNAME, SMTP_PASSWORD,
+#   SMTP_SMTPSecure, SMTP_SENTFROM, SMTP_ADDADDRESS
+
+require "net/smtp"
+require "openssl"
+
+# ── Read SMTP config ────────────────────────────────────────────────────────
+host     = ENV.fetch("SMTP_HOST",       "")
+port     = ENV.fetch("SMTP_PORT",       "587").to_i
+username = ENV.fetch("SMTP_USERNAME",   "")
+password = ENV.fetch("SMTP_PASSWORD",   "")
+secure   = ENV.fetch("SMTP_SMTPSecure", "tls").downcase   # "tls" or "ssl"
+from     = ENV.fetch("SMTP_SENTFROM",   "")
+to       = ENV.fetch("SMTP_ADDADDRESS", "")
+
+if host.empty? || from.empty? || to.empty?
+  warn "[send_summary_email] SMTP not configured — skipping email"
+  exit 0
+end
+
+# ── Read stdin ───────────────────────────────────────────────────────────────
+lines = $stdin.read.split("\n")
+finish_time  = lines[0].to_s.strip
+total_count  = lines[1].to_s.strip
+total_saved  = lines[2].to_s.strip
+total_warns  = lines[3].to_s.strip
+entries      = lines[4..] || []
+
+rows = entries.map do |e|
+  parts = e.split("|")
+  { name: parts[0].to_s, saved: parts[1].to_s, warns: parts[2].to_s, status: parts[3].to_s.strip }
+end
+
+error_rows   = rows.select { |r| r[:status] == "ERROR" }
+blocked_rows = rows.select { |r| r[:status] == "blocked" }
+warn_rows    = rows.select { |r| r[:status] == "warn" }
+
+# ── Build subject ────────────────────────────────────────────────────────────
+error_count = error_rows.size
+subject = if error_count > 0
+  "TAS Councils Scraper — #{error_count} error(s) — #{finish_time} UTC"
+else
+  "TAS Councils Scraper — completed with warnings — #{finish_time} UTC"
+end
+
+# ── Build HTML body ──────────────────────────────────────────────────────────
+STATUS_COLOUR = {
+  "ok"      => "#198754",
+  "warn"    => "#856404",
+  "blocked" => "#856404",
+  "ERROR"   => "#dc3545"
+}.freeze
+STATUS_BG = {
+  "ok"      => "#d1e7dd",
+  "warn"    => "#fff3cd",
+  "blocked" => "#fff3cd",
+  "ERROR"   => "#f8d7da"
+}.freeze
+
+row_html = rows.map do |r|
+  colour = STATUS_COLOUR.fetch(r[:status], "#333")
+  bg     = STATUS_BG.fetch(r[:status], "#fff")
+  <<~TR
+    <tr style="background:#{bg}">
+      <td style="padding:4px 10px;font-family:monospace">#{r[:name]}</td>
+      <td style="padding:4px 10px;text-align:right">#{r[:saved]}</td>
+      <td style="padding:4px 10px;text-align:right">#{r[:warns]}</td>
+      <td style="padding:4px 10px;font-weight:bold;color:#{colour}">#{r[:status]}</td>
+    </tr>
+  TR
+end.join
+
+html_body = <<~HTML
+  <html><body style="font-family:sans-serif;color:#333;max-width:700px">
+    <h2 style="margin-bottom:4px">TAS Councils Scraper Summary</h2>
+    <p style="color:#666;margin-top:0">Finished #{finish_time} UTC</p>
+
+    #{error_rows.any? ? "<p style='color:#dc3545;font-weight:bold'>&#9888; #{error_rows.size} scraper(s) exited with errors: #{error_rows.map { |r| r[:name] }.join(', ')}</p>" : ""}
+    #{blocked_rows.any? ? "<p style='color:#856404'>&#9888; #{blocked_rows.size} scraper(s) blocked by WAF/Cloudflare: #{blocked_rows.map { |r| r[:name] }.join(', ')}</p>" : ""}
+
+    <table cellspacing="0" cellpadding="0" style="border-collapse:collapse;width:100%;margin-top:12px">
+      <thead>
+        <tr style="background:#343a40;color:#fff">
+          <th style="padding:6px 10px;text-align:left">Council</th>
+          <th style="padding:6px 10px;text-align:right">Saved</th>
+          <th style="padding:6px 10px;text-align:right">Warns</th>
+          <th style="padding:6px 10px;text-align:left">Status</th>
+        </tr>
+      </thead>
+      <tbody>
+        #{row_html}
+      </tbody>
+      <tfoot>
+        <tr style="background:#f8f9fa;font-weight:bold">
+          <td style="padding:6px 10px">TOTAL (#{total_count} scrapers)</td>
+          <td style="padding:6px 10px;text-align:right">#{total_saved}</td>
+          <td style="padding:6px 10px;text-align:right">#{total_warns}</td>
+          <td></td>
+        </tr>
+      </tfoot>
+    </table>
+  </body></html>
+HTML
+
+# ── Compose RFC2822 message ──────────────────────────────────────────────────
+boundary = "boundary_#{Time.now.to_i}"
+message  = <<~MSG
+  From: TAS Scraper <#{from}>
+  To: #{to}
+  Subject: #{subject}
+  MIME-Version: 1.0
+  Content-Type: multipart/alternative; boundary="#{boundary}"
+
+  --#{boundary}
+  Content-Type: text/plain; charset=UTF-8
+
+  TAS Councils Scraper Summary — #{finish_time} UTC
+
+  #{error_rows.any?  ? "ERRORS (#{error_rows.size}): #{error_rows.map { |r| r[:name] }.join(', ')}\n" : ""}#{blocked_rows.any? ? "BLOCKED (#{blocked_rows.size}): #{blocked_rows.map { |r| r[:name] }.join(', ')}\n" : ""}
+  Council                           Saved  Warns  Status
+  #{rows.map { |r| "%-34s %5s  %5s  %s" % [r[:name], r[:saved], r[:warns], r[:status]] }.join("\n  ")}
+
+  TOTAL (#{total_count} scrapers)  #{total_saved} saved, #{total_warns} warns
+
+  --#{boundary}
+  Content-Type: text/html; charset=UTF-8
+
+  #{html_body}
+  --#{boundary}--
+MSG
+
+# ── Send ─────────────────────────────────────────────────────────────────────
+begin
+  smtp = Net::SMTP.new(host, port)
+
+  if secure == "ssl"
+    smtp.enable_tls(OpenSSL::SSL::SSLContext.new)
+  else
+    # "tls" → STARTTLS
+    smtp.enable_starttls(OpenSSL::SSL::SSLContext.new)
+  end
+
+  smtp.start("localhost", username, password, :login) do |s|
+    s.send_message(message, from, Array(to))
+  end
+
+  warn "[send_summary_email] Email sent to #{to}"
+rescue StandardError => e
+  warn "[send_summary_email] Failed to send email: #{e.class} #{e.message}"
+  exit 1
+end