http.rb 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. # lib/http.rb
  2. require "net/http"
  3. require "uri"
  4. require "openssl"
  5. module Http
  6. BASE_HEADERS = {
  7. "User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
  8. "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
  9. "Accept-Language" => "en-AU,en;q=0.9",
  10. # keep identity to avoid manual gzip handling; servers still work with this
  11. "Accept-Encoding" => "identity",
  12. "Connection" => "keep-alive"
  13. }.freeze
  14. def self.merge_set_cookie!(jar, response)
  15. Array(response.get_fields("set-cookie")).each do |raw|
  16. raw.split(/,\s*(?=[^;]+=[^;]+)/).each do |cookie|
  17. pair = cookie.split(";", 2).first
  18. k, v = pair.split("=", 2)
  19. next if k.nil? || v.nil?
  20. jar[k.strip] = v.strip
  21. end
  22. end
  23. end
  24. def self.cookie_header(jar)
  25. return nil if jar.empty?
  26. jar.map { |k, v| "#{k}=#{v}" }.join("; ")
  27. end
  28. def self.request(uri, headers: {}, jar: {}, referer: nil)
  29. http = Net::HTTP.new(uri.host, uri.port)
  30. http.use_ssl = uri.scheme == "https"
  31. http.verify_mode = (ENV["ALLOW_INSECURE"] == "1") ? OpenSSL::SSL::VERIFY_NONE : OpenSSL::SSL::VERIFY_PEER
  32. http.read_timeout = 30
  33. http.open_timeout = 15
  34. http.keep_alive_timeout = 10
  35. h = BASE_HEADERS.merge(headers)
  36. h["Referer"] = referer if referer
  37. ck = cookie_header(jar)
  38. h["Cookie"] = ck if ck
  39. req = Net::HTTP::Get.new(uri.request_uri, h)
  40. # leave decode_content default to let Net::HTTP handle it
  41. http.start do |hcli|
  42. res = hcli.request(req)
  43. merge_set_cookie!(jar, res)
  44. res
  45. end
  46. ensure
  47. http.finish if http&.started?
  48. end
  49. # Generic GET with retries, cookie jar persistence, and 403 warmup + curl fallback
  50. def self.get(url, headers: {}, tries: 4, referer: nil)
  51. uri = URI.parse(url)
  52. jar = {}
  53. warmed = false
  54. attempts = 0
  55. loop do
  56. ref = referer || "#{uri.scheme}://#{uri.host}/"
  57. begin
  58. res = request(uri, headers: headers, jar: jar, referer: ref)
  59. case res
  60. when Net::HTTPRedirection
  61. loc = res["location"] or raise "redirect without location"
  62. uri = URI.join(uri.to_s, loc)
  63. next # follow redirect immediately with same jar
  64. when Net::HTTPSuccess
  65. return res.body
  66. else
  67. code = res.code.to_i
  68. if [403, 406].include?(code) && !warmed
  69. # warm up same-site, then try again once
  70. begin
  71. request(URI.parse(ref), headers: headers, jar: jar, referer: ref)
  72. rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout
  73. end
  74. warmed = true
  75. next
  76. end
  77. if [403, 406].include?(code)
  78. # final curl fallback
  79. ua = BASE_HEADERS["User-Agent"]
  80. acc = BASE_HEADERS["Accept"]
  81. lang = BASE_HEADERS["Accept-Language"]
  82. cmd = %Q{curl -sSL --compressed -A "#{ua}" -H "Accept: #{acc}" -H "Accept-Language: #{lang}" -e "#{ref}" "#{uri}"}
  83. out = `#{cmd}`
  84. return out unless out.to_s.strip.empty?
  85. end
  86. raise "#{res.code} #{res.message}"
  87. end
  88. rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout => e
  89. attempts += 1
  90. raise e if attempts >= tries
  91. sleep(2**attempts)
  92. next
  93. end
  94. end
  95. end
  96. # Dorset eServices: tolerant warm-up and HTTPS→HTTP fallback
  97. def self.dorset_session_get(target_url)
  98. tgt_uri = URI.parse(target_url)
  99. host = tgt_uri.host
  100. https_base = "https://#{host}"
  101. http_base = "http://#{host}"
  102. warm_candidates = ["/", "/eservice/"]
  103. [https_base, http_base].each do |base|
  104. jar = {}
  105. begin
  106. warm_candidates.each do |p|
  107. begin
  108. request(URI.parse("#{base}#{p}"), headers: {}, jar: jar, referer: "#{base}/")
  109. rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout
  110. end
  111. end
  112. tgt = URI.parse(target_url.sub(%r{\Ahttps?://[^/]+}, base))
  113. res = request(tgt, headers: {}, jar: jar, referer: "#{base}/eservice/")
  114. if res.is_a?(Net::HTTPRedirection) && res["location"]
  115. res = request(URI.join(tgt.to_s, res["location"]), headers: {}, jar: jar, referer: "#{base}/eservice/")
  116. end
  117. return res.body if res.is_a?(Net::HTTPSuccess)
  118. rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout
  119. next
  120. end
  121. end
  122. raise "Dorset fetch failed via HTTPS and HTTP"
  123. end
  124. end