# lib/http.rb require "net/http" require "uri" require "openssl" require "open3" module Http BASE_HEADERS = { "User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language" => "en-AU,en;q=0.9", # keep identity to avoid manual gzip handling; servers still work with this "Accept-Encoding" => "identity", "Connection" => "keep-alive" }.freeze def self.merge_set_cookie!(jar, response) Array(response.get_fields("set-cookie")).each do |raw| raw.split(/,\s*(?=[^;]+=[^;]+)/).each do |cookie| pair = cookie.split(";", 2).first k, v = pair.split("=", 2) next if k.nil? || v.nil? jar[k.strip] = v.strip end end end def self.cookie_header(jar) return nil if jar.empty? jar.map { |k, v| "#{k}=#{v}" }.join("; ") end def self.request(uri, headers: {}, jar: {}, referer: nil) http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = uri.scheme == "https" http.verify_mode = (ENV["ALLOW_INSECURE"] == "1") ? OpenSSL::SSL::VERIFY_NONE : OpenSSL::SSL::VERIFY_PEER http.read_timeout = 30 http.open_timeout = 15 http.keep_alive_timeout = 10 h = BASE_HEADERS.merge(headers) h["Referer"] = referer if referer ck = cookie_header(jar) h["Cookie"] = ck if ck req = Net::HTTP::Get.new(uri.request_uri, h) # leave decode_content default to let Net::HTTP handle it http.start do |hcli| res = hcli.request(req) merge_set_cookie!(jar, res) res end ensure http.finish if http&.started? end # Generic GET with retries, cookie jar persistence, and 403 warmup + curl fallback def self.get(url, headers: {}, tries: 4, referer: nil) uri = URI.parse(url) jar = {} warmed = false attempts = 0 loop do ref = referer || "#{uri.scheme}://#{uri.host}/" begin res = request(uri, headers: headers, jar: jar, referer: ref) case res when Net::HTTPRedirection loc = res["location"] or raise "redirect without location" uri = URI.join(uri.to_s, loc) next # follow redirect immediately with same jar when Net::HTTPSuccess return res.body else code = res.code.to_i if [403, 406].include?(code) && !warmed # warm up same-site, then try again once begin request(URI.parse(ref), headers: headers, jar: jar, referer: ref) rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout end warmed = true next end if [403, 406].include?(code) # final curl fallback — use array form to avoid shell injection out, = Open3.capture2( "curl", "-sSL", "--compressed", "-A", BASE_HEADERS["User-Agent"], "-H", "Accept: #{BASE_HEADERS["Accept"]}", "-H", "Accept-Language: #{BASE_HEADERS["Accept-Language"]}", "-e", ref, uri.to_s ) return out unless out.to_s.strip.empty? end raise "#{res.code} #{res.message}" end rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout => e attempts += 1 raise e if attempts >= tries sleep(2**attempts) next end end end # Dorset eServices: tolerant warm-up and HTTPS→HTTP fallback def self.dorset_session_get(target_url) tgt_uri = URI.parse(target_url) host = tgt_uri.host https_base = "https://#{host}" http_base = "http://#{host}" warm_candidates = ["/", "/eservice/"] [https_base, http_base].each do |base| jar = {} begin warm_candidates.each do |p| begin request(URI.parse("#{base}#{p}"), headers: {}, jar: jar, referer: "#{base}/") rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout end end tgt = URI.parse(target_url.sub(%r{\Ahttps?://[^/]+}, base)) res = request(tgt, headers: {}, jar: jar, referer: "#{base}/eservice/") if res.is_a?(Net::HTTPRedirection) && res["location"] res = request(URI.join(tgt.to_s, res["location"]), headers: {}, jar: jar, referer: "#{base}/eservice/") end return res.body if res.is_a?(Net::HTTPSuccess) rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout next end end raise "Dorset fetch failed via HTTPS and HTTP" end end