Sfoglia il codice sorgente

Derwent Valley Update

Benjamin Harris 2 mesi fa
parent
commit
7bb466cba8
2 ha cambiato i file con 291 aggiunte e 154 eliminazioni
  1. 5 1
      .claude/settings.local.json
  2. 286 153
      scrapers/derwentvalley.rb

+ 5 - 1
.claude/settings.local.json

@@ -50,7 +50,11 @@
       "Read(//c/Users/lumion/AppData/Local/Temp/**)",
       "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' https://www.latrobe.tas.gov.au/services/building-and-planning-services/planningapp -o C:/Users/lumion/AppData/Local/Temp/latrobe.html)",
       "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Upgrade-Insecure-Requests: 1' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' -c C:/Users/lumion/AppData/Local/Temp/latrobe_cookies.txt https://www.latrobe.tas.gov.au/ -o /dev/null)",
-      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.latrobe.tas.gov.au/' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/latrobe_cookies.txt https://www.latrobe.tas.gov.au/services/building-and-planning-services/planningapp -o C:/Users/lumion/AppData/Local/Temp/latrobe2.html)"
+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.latrobe.tas.gov.au/' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/latrobe_cookies.txt https://www.latrobe.tas.gov.au/services/building-and-planning-services/planningapp -o C:/Users/lumion/AppData/Local/Temp/latrobe2.html)",
+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Upgrade-Insecure-Requests: 1' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' https://www.derwentvalley.tas.gov.au/ -c C:/Users/lumion/AppData/Local/Temp/dv_cookies.txt -o /dev/null -w '%{http_code}')",
+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.derwentvalley.tas.gov.au/' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/dv_cookies.txt https://www.derwentvalley.tas.gov.au/home/card-listing/development-applications -o C:/Users/lumion/AppData/Local/Temp/dv_list.html -w '%{http_code} %{size_download} bytes')",
+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.derwentvalley.tas.gov.au/' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/dv_cookies.txt 'https://www.derwentvalley.tas.gov.au/home/latest-news?f.News+category%7CnewsCategory=Public+Notice' -o C:/Users/lumion/AppData/Local/Temp/dv_news.html -w '%{http_code} %{size_download} bytes')",
+      "Bash(curl -s -L --max-time 30 -H 'User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/127.0.0.0 Safari/537.36' -H 'Accept-Encoding: identity' -H 'Referer: https://www.derwentvalley.tas.gov.au/home/latest-news' -H 'Sec-Fetch-Site: same-origin' -b C:/Users/lumion/AppData/Local/Temp/dv_cookies.txt https://www.derwentvalley.tas.gov.au/home/latest-news/application-for-planning-approval-160-wyre-forest-road-molesworth -o C:/Users/lumion/AppData/Local/Temp/dv_detail1.html -w '%{http_code} %{size_download} bytes')"
     ]
   }
 }

+ 286 - 153
scrapers/derwentvalley.rb

@@ -1,185 +1,318 @@
 # Derwent Valley Council — Development Applications being advertised
-# Primary list: https://www.derwentvalley.tas.gov.au/home/card-listing/development-applications
-# Fallback list (Public Notice posts): https://www.derwentvalley.tas.gov.au/home/latest-news?f.News+category%7CnewsCategory=Public+Notice
+#
+# Source: https://www.derwentvalley.tas.gov.au/home/latest-news?f.News+category...=Public+Notice
+#
+# The site is Cloudflare-protected — requires homepage warmup with browser-like
+# headers (same technique as burnie.rb / kingisland.rb).
+#
+# The news listing links go through lgasa-search.lga.sa.gov.au → squiz.cloud.
+# Rather than following that full redirect chain, we extract the `index_url`
+# parameter from each lgasa href, which points at lgasa-web.squiz.cloud/?a=ID.
+# A single (non-following) GET to that URL returns a Location header with the
+# real derwentvalley.tas.gov.au detail page URL.
+#
+# Detail page structure:
+#   <table><tbody>
+#     <tr><td>APP No.</td><td>SITE</td><td>PROPOSAL</td></tr>
+#     <tr><td>DA 2026/023</td><td>160 Wyre Forest Road, Molesworth</td>
+#         <td>Dwelling and outbuilding</td></tr>
+#   </tbody></table>
+#   <p>...received no later than 5.00pm on 15 April 2026...</p>
+#   <div class="content-container"><a href="...DA-2026-023.pdf">plans</a></div>
 
+require "date"
 require "nokogiri"
+require "net/http"
+require "uri"
 
-require_relative "../lib/scraper_helpers"
-require_relative "../lib/util"
+require_relative "../lib/db"
+require_relative "../lib/enrich"
 require_relative "../lib/log"
-TABLE        = ENV.fetch("TABLE_NAME")  # run_all.sh -> da_derwentvalley
-LIST_URL     = "https://www.derwentvalley.tas.gov.au/home/card-listing/development-applications"
-NEWS_URL     = "https://www.derwentvalley.tas.gov.au/home/latest-news?f.News+category%7CnewsCategory=Public+Notice"
+require_relative "../lib/util"
+
+TABLE    = ENV.fetch("TABLE_NAME")  # run_all.sh sets from filename: da_derwentvalley
+BASE_URL = "https://www.derwentvalley.tas.gov.au"
+NEWS_URL = "#{BASE_URL}/home/latest-news?f.News+category%7CnewsCategory=Public+Notice"
 
 DB.ensure_table!(TABLE)
 
-# Common reference forms: "DA 2025/097"
-REF_RX = %r{\bDA\s*(20\d{2})\s*/\s*([A-Za-z0-9\-_.]+)}i
-def extract_ref(s)
-  t = s.to_s
-  if (m = t.match(REF_RX))
-    return "DA #{m[1]} / #{m[2]}"
-  end
-  nil
-end
+# ----- Browser-like headers (WAF/Cloudflare warmup) -----
+BASE_HEADERS = {
+    "User-Agent"                => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
+    "Accept"                    => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    "Accept-Language"           => "en-AU,en;q=0.9",
+    "Accept-Encoding"           => "identity",
+    "Upgrade-Insecure-Requests" => "1",
+    "Sec-Fetch-Dest"            => "document",
+    "Sec-Fetch-Mode"            => "navigate",
+    "Sec-Fetch-Site"            => "none",
+    "Sec-Fetch-User"            => "?1",
+    "sec-ch-ua"                 => '"Chromium";v="127", "Not)A;Brand";v="99", "Google Chrome";v="127"',
+    "sec-ch-ua-mobile"          => "?0",
+    "sec-ch-ua-platform"        => '"Windows"',
+    "Connection"                => "close",
+}.freeze
 
-def extract_date_token(s)
-  text = s.to_s
-  return $1 if text =~ /(\b\d{1,2}\/\d{1,2}\/\d{2,4}\b)/
-  return $1 if text =~ /(\b\d{1,2}\s+[A-Za-z]{3,}\s+\d{4}\b)/
-  return $1 if text =~ /(\b[A-Za-z]{3,}\s+\d{1,2},?\s+\d{4}\b)/
-  ""
-end
+class CookieJar
+    def initialize; @h = {}; end
+
+    def for(host)
+        @h[host] || ""
+    end
 
-def extract_on_notice_raw(text)
-  s = text.to_s.gsub(/\s+/, " ")
-  # Look for wording like "Submissions must be received by ...", "close on ...", "on notice until ..."
-  if s =~ /(submissions?|representations?)\s+(must\s+be\s+)?(received|made|close|closing)\s+(by|on)\s*[:\-]?\s*([A-Za-z0-9\/ ,]+)/i
-    d = extract_date_token($5)
-    return d unless d.empty?
-  end
-  if s =~ /\bon\s*notice\s*(until|to)\s*[:\-]?\s*([A-Za-z0-9\/ ,]+)/i
-    d = extract_date_token($2)
-    return d unless d.empty?
-  end
-  extract_date_token(s)
+    def merge_from(resp, host)
+        cookies = resp.get_fields("Set-Cookie") || []
+        return if cookies.empty?
+        existing = parse_header(@h[host])
+        cookies.each do |sc|
+            kv = sc.split(";", 2).first
+            k, v = kv.split("=", 2)
+            existing[k.to_s.strip] = v.to_s unless k.to_s.strip.empty?
+        end
+        @h[host] = existing.map { |k, v| "#{k}=#{v}" }.join("; ")
+    end
+
+    private
+
+    def parse_header(s)
+        s.to_s.split(";").map(&:strip).filter_map { |kv|
+            k, v = kv.split("=", 2)
+            [k, v] unless k.to_s.empty?
+        }.to_h
+    end
 end
 
-def parse_detail(url)
-  html = Http.get(url)
-  doc  = Nokogiri::HTML(html)
-
-  title = doc.at_css("h1, .entry-title")&.text&.strip.to_s
-  body_text = doc.at_css("main")&.text.to_s
-  body_text = doc.text.to_s if body_text.strip.empty?
-
-  council_reference = extract_ref(title) || extract_ref(body_text)
-  # Address often sits in the title after " - "
-  address = if title.include?(" - ")
-    title.split(" - ", 2)[1].to_s.strip
-  else
-    # Fallback: first line with a number and street
-    line = body_text.split(/\n/).find { |l| l =~ /\d{1,4}\s+\S+/ }
-    line.to_s.strip
-  end
-  address = title if address.to_s.strip.empty?
-
-  pdf_a = doc.at_css("a[href$='.pdf'], a[href*='.pdf?']")
-  pdf   = pdf_a ? abs_url(url, pdf_a["href"].to_s) : ""
-
-  on_raw = extract_on_notice_raw(body_text)
-  on_dt  = Util.parse_aus_date(on_raw)
-
-  return nil if council_reference.to_s.strip.empty? || address.to_s.strip.empty?
-
-  {
-    council_reference: council_reference,
-    address: address,
-    description: "Development Application",
-    date_received_raw: on_raw,
-    date_received: on_dt,
-    document_url: pdf,
-    title_reference: title
-  }
+# GET url; follow redirects; return [final_url_string, body, http_code]
+def http_get(url, jar:, referer: nil, fetch_site: "none", follow: true)
+    uri  = URI(url)
+    hdrs = BASE_HEADERS.merge("Sec-Fetch-Site" => fetch_site)
+    hdrs["Referer"] = referer if referer
+    cookie = jar.for(uri.host)
+    hdrs["Cookie"] = cookie unless cookie.empty?
+
+    limit = 8
+    code  = 0
+    body  = ""
+
+    while limit > 0
+        limit -= 1
+        redirect_to = nil
+
+        req = Net::HTTP::Get.new(uri, hdrs)
+        Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
+                        read_timeout: 30, open_timeout: 15) do |http|
+            resp = http.request(req)
+            jar.merge_from(resp, uri.host)
+            code = resp.code.to_i
+
+            if follow && [301, 302, 303, 307, 308].include?(code) && resp["location"]
+                redirect_to = URI.join(uri, resp["location"])
+            else
+                body = resp.body.to_s
+            end
+        end
+
+        if redirect_to
+            uri = redirect_to
+            # Update Referer and Sec-Fetch-Site for subsequent hops
+            hdrs["Referer"]        = uri.to_s
+            hdrs["Sec-Fetch-Site"] = "same-origin"
+            cookie = jar.for(uri.host)
+            hdrs["Cookie"] = cookie.empty? ? nil : cookie
+            next
+        end
+        break
+    end
+
+    [uri.to_s, body, code]
+rescue StandardError => e
+    Log.warn "derwentvalley", "HTTP error for #{url}: #{e.class} #{e.message}"
+    [url, "", 0]
 end
 
-def detail_links_from_list(list_url)
-  html = Http.get(list_url)
-  doc  = Nokogiri::HTML(html)
-  # Cards or list items link to detail posts
-  links = doc.css("a").map { |a|
-    href = a["href"].to_s
-    next if href.empty? || href.start_with?("#")
-    abs_url(list_url, href)
-  }.compact.uniq
-
-  # Keep obvious news or notice items
-  links.select { |u|
-    u.include?("/home/latest-news/") || u.include?("/news/") || u =~ /application-for-planning-approval/i
-  }
+# Resolve lgasa redirect href -> real derwentvalley.tas.gov.au URL.
+# Extracts index_url from the lgasa query string, then makes a non-following
+# GET to squiz.cloud/?a=ID and reads the Location header.
+def resolve_detail_url(lgasa_href)
+    query = URI.decode_www_form(URI(lgasa_href).query.to_s).to_h
+    index_url = query["index_url"]
+    return nil if index_url.to_s.empty?
+
+    uri = URI(index_url)
+    req = Net::HTTP::Get.new(uri, "User-Agent" => BASE_HEADERS["User-Agent"])
+    Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
+                    open_timeout: 10, read_timeout: 10) do |http|
+        resp = http.request(req)
+        loc  = resp["location"].to_s
+        return loc unless loc.empty?
+    end
+    nil
+rescue StandardError => e
+    Log.warn "derwentvalley", "Could not resolve squiz redirect #{index_url}: #{e.class} #{e.message}"
+    nil
 end
 
-def detail_links_from_news(news_url)
-  html = Http.get(news_url)
-  doc  = Nokogiri::HTML(html)
-  doc.css("a").map { |a|
-    href = a["href"].to_s
-    next if href.empty? || href.start_with?("#")
-    u = abs_url(news_url, href)
-    u if u =~ /application-for-planning-approval/i
-  }.compact.uniq
+# Parse a detail page for DA data
+def parse_detail(html, page_url)
+    doc = Nokogiri::HTML(html)
+
+    # Table: header row "APP No. | SITE | PROPOSAL", then data rows
+    data_rows = []
+    doc.css("table").each do |tbl|
+        tbl.css("tr").each do |tr|
+            cells = tr.css("td").map { |td| td.text.gsub(/\u00a0|\s+/, " ").strip }
+            next if cells.empty?
+            next if cells.join =~ /\AAPP\s*No\.?/i   # skip header row
+            next unless cells[0] =~ /\bDA\s*\d{4}\/\d+/i
+            data_rows << cells
+        end
+    end
+
+    return [] if data_rows.empty?
+
+    body_text = doc.css(".content-container, main").first&.text.to_s
+                   .gsub(/\u00a0/, " ").gsub(/\s+/, " ")
+
+    # Closing date from "received no later than ... DATE"
+    on_notice_to_raw = ""
+    on_notice_to     = nil
+    if (m = body_text.match(/no\s+later\s+than\b.{0,60}?(\d{1,2}\s+[A-Za-z]{3,}\s+\d{4})/i))
+        on_notice_to_raw = m[1].strip
+        on_notice_to     = Util.parse_aus_date(on_notice_to_raw)
+    end
+
+    # Commencing / start date → date_received
+    date_received_raw = ""
+    date_received     = nil
+    if (m = body_text.match(/commencing\s+on\s+(\d{1,2}\s+[A-Za-z]{3,}\s+\d{4})/i)) ||
+       (m = body_text.match(/Start\s+Date\s+(\d{1,2}\/\d{1,2}\/\d{2,4})/i))
+        date_received_raw = m[1].strip
+        date_received     = Util.parse_aus_date(date_received_raw)
+    end
+
+    # PDF link within the content area only
+    doc_url = nil
+    doc.css(".content-container a[href]").each do |a|
+        href = a["href"].to_s
+        if href =~ /\.pdf/i && href.include?("derwentvalley")
+            doc_url = href
+            break
+        end
+    end
+
+    data_rows.map do |cells|
+        ref         = cells[0].to_s.gsub(/\A\s+/, "").strip
+        address     = cells[1].to_s.strip
+        description = cells[2].to_s.strip
+        description = "Development Application" if description.empty?
+
+        next if ref.empty? || address.empty?
+
+        {
+            council_reference: ref,
+            address:           address,
+            description:       description,
+            date_received:     date_received,
+            date_received_raw: date_received_raw,
+            on_notice_to:      on_notice_to,
+            on_notice_to_raw:  on_notice_to_raw,
+            document_url:      doc_url
+        }
+    end.compact
 end
 
-def cloudflare_blocked?(html)
-  html.to_s.include?("Just a moment") || html.to_s.include?("Enable JavaScript and cookies")
+# ----- Warmup then fetch news listing -----
+jar = CookieJar.new
+
+Log.info "derwentvalley", "Warming up via homepage..."
+_url0, _body0, code0 = http_get("#{BASE_URL}/", jar: jar)
+Log.info "derwentvalley", "Homepage: #{code0}"
+
+sleep(0.5)
+
+Log.info "derwentvalley", "Fetching news listing..."
+_url1, html1, code1 = http_get(NEWS_URL, jar: jar, referer: "#{BASE_URL}/", fetch_site: "same-origin")
+Log.info "derwentvalley", "News listing: #{code1} (#{html1.bytesize} bytes)"
+
+if code1 != 200 || html1.bytesize < 5_000
+    Log.warn "derwentvalley", "Could not fetch news listing (status #{code1}). " \
+        "DAs for this council are also available via planbuild.rb (council code DER)."
+    puts "Done #{TABLE}. Saved 0 item(s)."
+    exit 0
 end
 
-links = []
-begin
-  links = detail_links_from_list(LIST_URL)
-rescue StandardError => e
-  Log.warn "derwentvalley", "List fetch failed, will try news listing: #{e.class} #{e.message}"
+if html1.include?("Just a moment") || html1.include?("Enable JavaScript and cookies")
+    Log.warn "derwentvalley", "Cloudflare challenge page returned. " \
+        "DAs for this council are also available via planbuild.rb (council code DER)."
+    puts "Done #{TABLE}. Saved 0 item(s)."
+    exit 0
 end
 
-if links.empty?
-  begin
-    links = detail_links_from_news(NEWS_URL)
-  rescue StandardError => e
-    Log.warn "derwentvalley", "News fetch failed: #{e.class} #{e.message}"
-  end
+# ----- Extract detail page URLs from news listing -----
+list_doc     = Nokogiri::HTML(html1)
+detail_urls  = []
+
+list_doc.css("li.news-listing__item a[href]").each do |a|
+    href = a["href"].to_s
+    next unless href.include?("lgasa-search")
+
+    detail_url = resolve_detail_url(href)
+    detail_urls << detail_url if detail_url && !detail_url.empty?
 end
 
-# Both URLs return a Cloudflare JS-challenge page (HTTP 200 with challenge HTML).
-# We can't solve this without browser-level JS execution.
-# Derwent Valley DAs are also published on PlanBuild (council code DER),
-# so planbuild.rb covers this council independently.
-if links.empty?
-  begin
-    probe = Http.get(LIST_URL)
-    if cloudflare_blocked?(probe)
-      Log.warn "derwentvalley", "Site is returning a Cloudflare challenge page — cannot scrape without browser-level JS execution. DAs for this council are available via planbuild.rb (council code DER)."
-      puts "Done #{TABLE}. Saved 0 item(s) — site blocked by Cloudflare."
-      exit 0
+detail_urls.uniq!
+Log.info "derwentvalley", "Found #{detail_urls.length} detail page(s)"
+
+saved = 0
+
+detail_urls.each do |detail_url|
+    Log.info "derwentvalley", "Fetching #{detail_url}"
+    sleep(0.4)
+
+    _final_url, html2, code2 = http_get(
+        detail_url, jar: jar,
+        referer:    NEWS_URL,
+        fetch_site: "same-origin"
+    )
+
+    if code2 != 200 || html2.bytesize < 5_000
+        Log.warn "derwentvalley", "Detail page failed (#{code2}): #{detail_url}"
+        next
     end
-  rescue StandardError => e
-    Log.warn "derwentvalley", "Probe fetch failed: #{e.class} #{e.message}"
-  end
-end
 
-links.uniq!
+    records = parse_detail(html2, detail_url)
+    if records.empty?
+        Log.warn "derwentvalley", "No DA records parsed from #{detail_url}"
+        next
+    end
 
-puts "Found #{links.length} candidate link(s) for #{TABLE}"
+    records.each do |r|
+        begin
+            DB.upsert(TABLE, {
+                council_reference: r[:council_reference],
+                address:           r[:address][0, 255],
+                description:       r[:description],
+                date_received:     r[:date_received],
+                date_received_raw: r[:date_received_raw],
+                on_notice_to:      r[:on_notice_to],
+                on_notice_to_raw:  r[:on_notice_to_raw],
+                document_url:      r[:document_url],
+                applicant:         "",
+                owner:             ""
+            })
 
-saved = 0
+            enrich_after_upsert!(
+                table:             TABLE,
+                council_reference: r[:council_reference],
+                address:           r[:address]
+            )
 
-links.each do |u|
-  begin
-    item = parse_detail(u)
-  rescue StandardError => e
-    Log.warn "scraper", "Skip #{u}: #{e.class} #{e.message}"
-    next
-  end
-  next unless item
-
-  upsert_and_enrich!(
-    table: TABLE,
-    row: {
-      description: item[:description],
-      date_received: item[:date_received],
-      date_received_raw: item[:date_received_raw],
-      address: item[:address],
-      council_reference: item[:council_reference],
-      applicant: "",
-      owner: ""
-    },
-    extras: {
-      document_url:     item[:document_url],
-      on_notice_to:     item[:date_received],
-      on_notice_to_raw: item[:date_received_raw],
-      title_reference:  item[:title_reference]
-    }
-  )
-  saved += 1
+            Log.info "derwentvalley", "Upserted #{r[:council_reference]} -> #{r[:address]}"
+            saved += 1
+        rescue StandardError => e
+            Log.warn "derwentvalley", "DB error for #{r[:council_reference]}: #{e.class} #{e.message}"
+        end
+    end
 end
 
 puts "Done #{TABLE}. Saved #{saved} item(s)."