http.rb 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. # lib/http.rb
  2. require "net/http"
  3. require "uri"
  4. require "openssl"
  5. require "open3"
  6. module Http
  7. BASE_HEADERS = {
  8. "User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
  9. "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
  10. "Accept-Language" => "en-AU,en;q=0.9",
  11. # keep identity to avoid manual gzip handling; servers still work with this
  12. "Accept-Encoding" => "identity",
  13. "Connection" => "keep-alive"
  14. }.freeze
  15. def self.merge_set_cookie!(jar, response)
  16. Array(response.get_fields("set-cookie")).each do |raw|
  17. raw.split(/,\s*(?=[^;]+=[^;]+)/).each do |cookie|
  18. pair = cookie.split(";", 2).first
  19. k, v = pair.split("=", 2)
  20. next if k.nil? || v.nil?
  21. jar[k.strip] = v.strip
  22. end
  23. end
  24. end
  25. def self.cookie_header(jar)
  26. return nil if jar.empty?
  27. jar.map { |k, v| "#{k}=#{v}" }.join("; ")
  28. end
  29. def self.request(uri, headers: {}, jar: {}, referer: nil)
  30. http = Net::HTTP.new(uri.host, uri.port)
  31. http.use_ssl = uri.scheme == "https"
  32. http.verify_mode = (ENV["ALLOW_INSECURE"] == "1") ? OpenSSL::SSL::VERIFY_NONE : OpenSSL::SSL::VERIFY_PEER
  33. http.read_timeout = 30
  34. http.open_timeout = 15
  35. http.keep_alive_timeout = 10
  36. h = BASE_HEADERS.merge(headers)
  37. h["Referer"] = referer if referer
  38. ck = cookie_header(jar)
  39. h["Cookie"] = ck if ck
  40. req = Net::HTTP::Get.new(uri.request_uri, h)
  41. # leave decode_content default to let Net::HTTP handle it
  42. http.start do |hcli|
  43. res = hcli.request(req)
  44. merge_set_cookie!(jar, res)
  45. res
  46. end
  47. ensure
  48. http.finish if http&.started?
  49. end
  50. # Generic GET with retries, cookie jar persistence, and 403 warmup + curl fallback
  51. def self.get(url, headers: {}, tries: 4, referer: nil)
  52. uri = URI.parse(url)
  53. jar = {}
  54. warmed = false
  55. attempts = 0
  56. loop do
  57. ref = referer || "#{uri.scheme}://#{uri.host}/"
  58. begin
  59. res = request(uri, headers: headers, jar: jar, referer: ref)
  60. case res
  61. when Net::HTTPRedirection
  62. loc = res["location"] or raise "redirect without location"
  63. uri = URI.join(uri.to_s, loc)
  64. next # follow redirect immediately with same jar
  65. when Net::HTTPSuccess
  66. return res.body
  67. else
  68. code = res.code.to_i
  69. if [403, 406].include?(code) && !warmed
  70. # warm up same-site, then try again once
  71. begin
  72. request(URI.parse(ref), headers: headers, jar: jar, referer: ref)
  73. rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout
  74. end
  75. warmed = true
  76. next
  77. end
  78. if [403, 406].include?(code)
  79. # final curl fallback — use array form to avoid shell injection
  80. out, = Open3.capture2(
  81. "curl", "-sSL", "--compressed",
  82. "-A", BASE_HEADERS["User-Agent"],
  83. "-H", "Accept: #{BASE_HEADERS["Accept"]}",
  84. "-H", "Accept-Language: #{BASE_HEADERS["Accept-Language"]}",
  85. "-e", ref,
  86. uri.to_s
  87. )
  88. return out unless out.to_s.strip.empty?
  89. end
  90. raise "#{res.code} #{res.message}"
  91. end
  92. rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout => e
  93. attempts += 1
  94. raise e if attempts >= tries
  95. sleep(2**attempts)
  96. next
  97. end
  98. end
  99. end
  100. # Dorset eServices: tolerant warm-up and HTTPS→HTTP fallback
  101. def self.dorset_session_get(target_url)
  102. tgt_uri = URI.parse(target_url)
  103. host = tgt_uri.host
  104. https_base = "https://#{host}"
  105. http_base = "http://#{host}"
  106. warm_candidates = ["/", "/eservice/"]
  107. [https_base, http_base].each do |base|
  108. jar = {}
  109. begin
  110. warm_candidates.each do |p|
  111. begin
  112. request(URI.parse("#{base}#{p}"), headers: {}, jar: jar, referer: "#{base}/")
  113. rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout
  114. end
  115. end
  116. tgt = URI.parse(target_url.sub(%r{\Ahttps?://[^/]+}, base))
  117. res = request(tgt, headers: {}, jar: jar, referer: "#{base}/eservice/")
  118. if res.is_a?(Net::HTTPRedirection) && res["location"]
  119. res = request(URI.join(tgt.to_s, res["location"]), headers: {}, jar: jar, referer: "#{base}/eservice/")
  120. end
  121. return res.body if res.is_a?(Net::HTTPSuccess)
  122. rescue OpenSSL::SSL::SSLError, EOFError, Errno::ECONNRESET, Net::ReadTimeout, Net::OpenTimeout
  123. next
  124. end
  125. end
  126. raise "Dorset fetch failed via HTTPS and HTTP"
  127. end
  128. end