module NetHTTPUtils

Attributes

logger[RW]

Public Class Methods

<<(msg) click to toggle source
# File lib/nethttputils.rb, line 59
def << msg
  @@buffer ||= "[Net::HTTP debug] "
  @@buffer.concat msg
  @@buffer = @@buffer[0...997] + "..." if @@buffer.size > 1000
  return unless @@buffer.end_with? ?\n
  NetHTTPUtils.logger.debug @@buffer.sub ?\n, "  "
  @@buffer = nil
end
remove_tags(str) click to toggle source
# File lib/nethttputils.rb, line 30
def remove_tags str
  str.gsub(/<script( [a-z-]+="[^"]*")*>.*?<\/script>/m, "").
      gsub(/<style( [a-z-]+="[^"]*")*>.*?<\/style>/m, "").
      gsub(/<[^>]*>/, "").split(?\n).map(&:strip).reject(&:empty?).join(?\n)
end
request_data(http, mtd = :GET, type = :form, form: {}) click to toggle source
# File lib/nethttputils.rb, line 307
def request_data http, mtd = :GET, type = :form, form: {}, header: {}, auth: nil, proxy: nil,
    timeout: nil, no_redirect: false,
    max_start_http_retry_delay: 3600,
    max_read_retry_delay: 3600,
    patch_request: nil, &block
  timeout ||= 30
  http = start_http http, max_start_http_retry_delay, timeout, no_redirect, *proxy unless http.is_a? Net::HTTP
  path = http.instance_variable_get(:@uri).path

  check_code = lambda do |body|
    fail unless code = body.instance_variable_get(:@last_response).code
    case code
      # TODO: raise on 405
      when /\A(20\d|3\d\d|405)\z/
        nil
      else
        ct = body.instance_variable_get(:@last_response).to_hash["content-type"]
        raise Error.new(
          (ct == ["image/png"] ? "<#{ct.first}>" : body),
          code.to_i
        )
    end
  end
  if mtd == :GET && !@@_405.include?(http.address)
    body = begin
      request_data http, :HEAD, form: form, header: header, auth: auth,
        max_start_http_retry_delay: max_start_http_retry_delay,
        max_read_retry_delay: max_read_retry_delay
    rescue NetHTTPUtils::Error => e
      raise unless e.code == 400
    end
    if !body || "405" == body.instance_variable_get(:@last_response).code
      @@_405.add http.address
    else
      check_code.call body
    end
  end
  body = read http, mtd, type, form: form, header: header, auth: auth,
    timeout: timeout, no_redirect: no_redirect,
    max_read_retry_delay: max_read_retry_delay,
    patch_request: patch_request, &block
  check_code.call body

  last_response = body.instance_variable_get :@last_response
  if last_response.to_hash["content-encoding"] == "gzip"
    Zlib::GzipReader.new(StringIO.new(body)).read
  else
    body
  end.tap do |string|
  end
# ensure
#   response.instance_variable_get("@nethttputils_close").call if response
end
start_http(url, max_start_http_retry_delay = 3600, timeout = nil, no_redirect = false, proxy = nil) click to toggle source
# File lib/nethttputils.rb, line 36
def start_http url, max_start_http_retry_delay = 3600, timeout = nil, no_redirect = false, proxy = nil
  timeout ||= 30
  uri = url
  uri = URI.parse begin
    URI url
    url
  rescue URI::InvalidURIError
    URI.escape url
  end unless url.is_a? URI::HTTP
  delay = 5
  begin
    Net::HTTP.start(
      uri.host, uri.port,
      *(proxy.split ?: if proxy),
      use_ssl: uri.scheme == "https",
      verify_mode: OpenSSL::SSL::VERIFY_NONE,
      **({open_timeout: timeout}), #  if timeout
      **({read_timeout: timeout}), #  if timeout
    ) do |http|
      # http.open_timeout = timeout   # seems like when opening hangs, this line in unreachable
      # http.read_timeout = timeout
      http.set_debug_output( Object.new.tap do |obj|
        obj.instance_eval do
          def << msg
            @@buffer ||= "[Net::HTTP debug] "
            @@buffer.concat msg
            @@buffer = @@buffer[0...997] + "..." if @@buffer.size > 1000
            return unless @@buffer.end_with? ?\n
            NetHTTPUtils.logger.debug @@buffer.sub ?\n, "  "
            @@buffer = nil
          end
        end
      end ) if logger.level == Logger::DEBUG # use `logger.debug?`?
      http
    end
  rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ENETUNREACH, Errno::ECONNRESET => e
    if max_start_http_retry_delay < delay *= 2
      e.message.concat " to #{uri}"
      raise
    end
    logger.warn "retrying in #{delay} seconds because of #{e.class} '#{e.message}'"
    sleep delay
    retry
  rescue SocketError => e
    if max_start_http_retry_delay < delay *= 2
      e.message.concat " to #{uri}"
      raise e
    end
    logger.warn "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{uri}"
    sleep delay
    retry
  rescue Errno::ETIMEDOUT, Net::OpenTimeout => e
    raise if max_start_http_retry_delay < delay *= 2
    logger.warn "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{uri}"
    sleep delay
    retry
  rescue OpenSSL::SSL::SSLError => e
    raise if max_start_http_retry_delay < delay *= 2
    logger.error "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{uri}"
    sleep delay
    retry
  end.tap do |http|
    http.instance_variable_set :@uri, uri
    http.instance_variable_set :@max_start_http_retry_delay, max_start_http_retry_delay
  end
end

Private Class Methods

read(http, mtd = :GET, type = :form, form: {}) click to toggle source
# File lib/nethttputils.rb, line 104
def read http, mtd = :GET, type = :form, form: {}, header: {}, auth: nil, timeout: nil, no_redirect: false, max_read_retry_delay: 3600, patch_request: nil, &block
  timeout ||= 30
      logger = NetHTTPUtils.logger

      uri = http.instance_variable_get :@uri
      logger.debug "Warning: query params included in `url` argument are discarded because `:form` isn't empty" if uri.query && !form.empty?
      # we can't just merge because URI fails to parse such queries as "/?1"

      uri.query = URI.encode_www_form form if %i{ HEAD GET }.include?(mtd = mtd.upcase) && !form.empty?
      cookies = {}
      prepare_request = lambda do |uri|
        case mtd.upcase
          when :HEAD   ; Net::HTTP::Head
          when :GET    ; Net::HTTP::Get
          when :POST   ; Net::HTTP::Post
          when :PUT    ; Net::HTTP::Put
          when :DELETE ; Net::HTTP::Delete
          when :PATCH  ; Net::HTTP::Patch
          else         ; raise "unknown method '#{mtd}'"
        end.new(uri).tap do |request| # somehow Get eats even raw url, not URI object
          patch_request.call uri, form, request if patch_request
          # p Object.instance_method(:method).bind(request).call(:basic_auth).source_location
          # p Object.instance_method(:method).bind(request).call(:set_form).source_location
          # request.basic_auth *p(auth.map(&URI.method(:escape))) if auth
          request.basic_auth *auth if auth
          if (mtd == :POST || mtd == :PATCH) && !form.empty?
            case type
              when :json
                                request.body = JSON.dump form
                                request.content_type = "application/json"
              when :multipart
                request.set_form form, "multipart/form-data"
              when :form
                           if form.any?{ |k, v| v.respond_to? :to_path }
                             request.set_form form, "multipart/form-data"
                           else
                             request.set_form_data form
                             request.content_type = "application/x-www-form-urlencoded;charset=UTF-8"
                           end
              else
                raise "unknown content-type '#{type}'"
            end
          end
          header.each{ |k, v| request[k.to_s] = v.is_a?(Array) ? v.first : v }
          request["cookie"] = [*request["cookie"], cookies.map{ |k, v| "#{k}=#{v}" }].join "; " unless cookies.empty?

          logger.info "> #{request.class} #{uri.host} #{request.path}"
          next unless logger.debug?
          logger.debug "content-length: #{request.content_length.to_i}, content-type: #{request.content_type}" unless %i{ HEAD GET }.include? mtd
          logger.debug "query: #{uri.query.inspect}"
          logger.debug "content-type: #{request.content_type.inspect}"
          curl_form = case request.content_type
            when "application/json" ; "-d #{JSON.dump form} "
            when "multipart/form-data" ; form.map{ |k, v| "-F \"#{k}=#{v.respond_to?(:to_path) ? "@#{v.to_path}" : v}\" " }.join
            when "application/x-www-form-urlencoded" ; "-d \"#{URI.encode_www_form form}\" "
            else %i{ HEAD GET }.include?(mtd) ? "" : fail("unknown content-type '#{request.content_type}'")
          end
          logger.debug "curl -vsSL --compressed -o /dev/null #{"-X HEAD " if request.is_a? Net::HTTP::Head}#{
            request.each_header.map{ |k, v| "-H \"#{k}: #{v}\" " unless k == "host" }.join
          }#{curl_form}'#{uri.scheme}://#{uri.host}#{uri.path}#{"?#{uri.query}" if uri.query && !uri.query.empty?}'"
          logger.debug "> header: #{request.each_header.to_a}"
          logger.debug "> body: #{request.body.inspect.tap{ |body| body.replace body[0...997] + "..." if body.size > 1000 }}"
          # TODO this is buggy -- mixes lines from different files into one line
          stack = caller.reverse.map do |level|
            /((?:[^\/:]+\/)?[^\/:]+):([^:]+)/.match(level).captures
          end.chunk(&:first).map do |file, group|
            "#{file}:#{group.map(&:last).chunk{|_|_}.map(&:first).join(",")}"
          end
          logger.debug stack.join " -> "
        end
      end
      do_request = lambda do |request|
        delay = 5
        response = begin
          http.request request, &block
        rescue Errno::ECONNREFUSED, Net::ReadTimeout, Net::OpenTimeout, Zlib::BufError, Errno::ECONNRESET, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT, Errno::ENETUNREACH => e
          raise if max_read_retry_delay < delay *= 2
          logger.error "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{request.uri}"
          sleep delay
          retry
        rescue EOFError => e
          raise unless e.backtrace.empty?
          # https://bugs.ruby-lang.org/issues/13018
          # https://blog.kalina.tech/2019/04/exception-without-backtrace-in-ruby.html
          raise EOFError_from_rbuf_fill.new "probably the old Ruby empty backtrace EOFError exception from net/protocol.rb"
        end
        # response.instance_variable_set "@nethttputils_close", http.method(:finish)
        # response.singleton_class.instance_eval{ attr_accessor :nethttputils_socket_to_close }

        now = Time.now
        remaining, reset_time, current_timestamp = if response.key? "x-ratelimit-userremaining"
          logger.debug "x-ratelimit-clientremaining: #{response.fetch("x-ratelimit-clientremaining").to_i}"
          [
            response.fetch("x-ratelimit-userremaining").to_i,
            response.fetch("x-ratelimit-userreset").to_i,
            response.fetch("x-timer")[/\d+/].to_i,
          ]
        elsif response.key? "x-rate-limit-remaining"
          [
            response.fetch("x-rate-limit-remaining").to_i,
            response.fetch("x-rate-limit-reset").to_i,
            now.to_i,
          ]
        elsif response.key? "x-ratelimit-remaining"
          [
            response.fetch("x-ratelimit-remaining").to_i,
            now + response.fetch("x-ratelimit-reset").to_i,
            now.to_i,
          ]
        end
        if remaining
          logger.debug "x-remaining: #{remaining}"
          if remaining <= 100
            t = (reset_time - current_timestamp + 1).fdiv([remaining - 5, 1].max)
            logger.warn "x-ratelimit sleep #{t} seconds"
            sleep t
          end
        end

        # TODO: use WEBrick::Cookie
        old_cookies = cookies.dup
        response.to_hash.fetch("set-cookie", []).each do |c|
          next logger.warn "bad cookie: #{c.inspect}" unless /\A([^\s=]+)=([^\s]*)\z/.match c.split(/\s*;\s*/).first
          logger.debug "set-cookie: #{$1}=#{$2}"
          old_cookies.delete $1
          cookies.store $1, $2
        end
        old_cookies.each do |k, v|
          logger.debug "faking an old cookie into new response: #{k}=#{v}"
          response.add_field "Set-Cookie", "#{k}=#{v}"
        end

        case response.code
        when /\A20/
          response
        when /\A30\d\z/
          next response if no_redirect
          logger.info "redirect: #{response["location"]}"
          require "addressable"
          new_uri = URI.join request.uri.to_s, Addressable::URI.escape(response["location"])
          new_host = new_uri.host
          raise Error.new "redirected in place" if new_uri == http.instance_variable_get(:@uri)
          if http.address != new_host ||
             http.port != new_uri.port ||
             http.use_ssl? != (new_uri.scheme == "https")
            logger.debug "changing host from '#{http.address}' to '#{new_host}'"
            # http.finish   # why commented out?
            http = NetHTTPUtils.start_http new_uri, http.instance_variable_get(:@max_start_http_retry_delay), timeout, no_redirect
          end
          if request.method == "POST"
            logger.info "POST redirects to GET (RFC)"
            mtd = :GET
          end
          do_request.call prepare_request[new_uri]
        when "404"
          logger.error "404 at #{request.method} #{request.uri} with body: #{
            if !response.body
              response.body.class
            elsif response.body.is_a? Net::ReadAdapter
              "<<impossible to reread Net::ReadAdapter -- check the IO you've used in block form>>"
            elsif response.to_hash["content-type"] == ["image/png"]
              response.to_hash["content-type"].to_s
            else
              response.body.tap do |body|
                body.replace NetHTTPUtils.remove_tags body if body[/<html[> ]/]
              end.inspect
            end
          }"
          response
        when "429"
          logger.error "429 at #{request.method} #{request.uri} with body: #{response.body.inspect}"
          response
        when /\A50\d\z/
          logger.error "#{response.code} at #{request.method} #{request.uri} with body: #{
            if !response.body
              response.body.class
            else
              response.body.tap do |body|
                body.replace NetHTTPUtils.remove_tags body if body[/<html[> ]/]
              end.inspect
            end
          }"
          response
        else
          logger.warn "code #{response.code} at #{request.method} #{request.uri} from #{
            [__FILE__, caller.map{ |i| i[/(?<=:)\d+/] }].join ?:
          }"
          logger.debug "< body: #{
            response.body.tap do |body|
              body.replace NetHTTPUtils.remove_tags body if body[/<html[> ]/]
            end.inspect
          }" if request.is_a? Net::HTTP::Get
          response
        end
      end
      response = do_request.call prepare_request[uri]
      logger.debug "< header: #{response.to_hash}"
      (response.body || "").tap{ |r| r.instance_variable_set :@last_response, response }
end