|
@@ -26,10 +26,12 @@ DB.ensure_table!(TABLE)
|
|
|
REF_RX = /\bDA[-\s]?\d{1,4}\/20\d{2}\b/i
|
|
REF_RX = /\bDA[-\s]?\d{1,4}\/20\d{2}\b/i
|
|
|
|
|
|
|
|
def abs_url(base, href)
|
|
def abs_url(base, href)
|
|
|
- return nil if href.to_s.strip.empty?
|
|
|
|
|
- URI.join(base, href).to_s
|
|
|
|
|
|
|
+ h = href.to_s.strip
|
|
|
|
|
+ return nil if h.empty?
|
|
|
|
|
+ return h if h.start_with?("http://", "https://")
|
|
|
|
|
+ URI.join(base, h).to_s
|
|
|
rescue URI::InvalidURIError
|
|
rescue URI::InvalidURIError
|
|
|
- nil
|
|
|
|
|
|
|
+ h
|
|
|
end
|
|
end
|
|
|
|
|
|
|
|
def parse_page(html, base_url)
|
|
def parse_page(html, base_url)
|