|
|
@@ -202,8 +202,13 @@ def probe_common_docs(base_url:, key:, danum:, referer:)
|
|
|
"Onsite Notice ", # trailing space variant seen on this site
|
|
|
]
|
|
|
|
|
|
- prefix = "#{BASE_URL}/eProperty/Publicnotices/#{key}/#{danum_slug} - "
|
|
|
- candidates = names.map { |n| "#{prefix}#{n}.pdf" }
|
|
|
+ # Build candidates with percent-encoded filenames (spaces → %20).
|
|
|
+ # URI.parse rejects bare spaces, so the filename portion must be encoded.
|
|
|
+ candidates = names.map do |n|
|
|
|
+ filename = "#{danum_slug} - #{n}.pdf"
|
|
|
+ encoded = filename.gsub(" ", "%20")
|
|
|
+ "#{BASE_URL}/eProperty/Publicnotices/#{key}/#{encoded}"
|
|
|
+ end
|
|
|
|
|
|
found = []
|
|
|
candidates.each do |pdf_url|
|
|
|
@@ -330,6 +335,7 @@ tables.each do |t|
|
|
|
documents = [] if documents.nil?
|
|
|
anchors_added = 0
|
|
|
used_url = nil
|
|
|
+ probe_done = false # ensure probe_common_docs fires at most once per DA
|
|
|
|
|
|
referers = [
|
|
|
info_url, # details page
|
|
|
@@ -373,10 +379,9 @@ tables.each do |t|
|
|
|
anchors_added += 1
|
|
|
end
|
|
|
|
|
|
- # Regex fallback
|
|
|
- if anchors_added == 0
|
|
|
- # Final fallback: probe known filenames directly
|
|
|
- # Extract KEY and DANUM from the original doc_list_url
|
|
|
+ # Final fallback: probe known filenames directly (runs at most once per DA)
|
|
|
+ if anchors_added == 0 && !probe_done
|
|
|
+ probe_done = true
|
|
|
begin
|
|
|
u = URI.parse(doc_list_url)
|
|
|
q = URI.decode_www_form(u.query || "").to_h
|
|
|
@@ -387,7 +392,7 @@ tables.each do |t|
|
|
|
base_url: BASE_URL,
|
|
|
key: key,
|
|
|
danum: danum,
|
|
|
- referer: doc_list_url # better context for this server
|
|
|
+ referer: doc_list_url
|
|
|
)
|
|
|
documents.concat(probed)
|
|
|
anchors_added = probed.size if probed.any?
|