class Capybara::Session

Attributes

spider[RW]

Public Instance Methods

current_response(response_type = :html) click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 88
def current_response(response_type = :html)
  case response_type
  when :html
    if config.encoding
      if config.encoding == :auto
        charset = body.force_encoding("ISO-8859-1").encode("UTF-8")[/<meta.*?charset=["]?([\w+\d+\-]*)/i, 1]
        Nokogiri::HTML(body, nil, charset)
      else
        Nokogiri::HTML(body, nil, config.encoding)
      end
    else
      Nokogiri::HTML(body)
    end
  when :json
    JSON.parse(body)
  end
end
destroy_driver!() click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 60
def destroy_driver!
  if @driver
    begin
      @driver.quit
    # handle Net::ReadTimeout error for Selenium like drivers
    rescue Net::ReadTimeout => e
      @driver.quit
    end

    @driver = nil
    logger.info "Browser: driver #{mode} has been destroyed"
  else
    logger.warn "Browser: driver #{mode} is not present"
  end
end
original_visit(visit_uri, delay: config.before_request[:delay], skip_request_options: false, max_retries: 3)
Alias for: visit
restart!() click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 76
def restart!
  if mode.match?(/poltergeist/)
    @driver.browser.restart
    @driver.requests, @driver.responses = 0, 0
  else
    destroy_driver!
    driver
  end

  logger.info "Browser: driver has been restarted: name: #{mode}, pid: #{driver.pid}, port: #{driver.port}"
end
scroll_to_bottom() click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 139
def scroll_to_bottom
  execute_script("window.scrollBy(0,10000)")
end
visit(visit_uri, delay: config.before_request[:delay], skip_request_options: false, max_retries: 3) click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 11
def visit(visit_uri, delay: config.before_request[:delay], skip_request_options: false, max_retries: 3)
  if spider
    process_delay(delay) if delay
    retries, sleep_interval = 0, 0

    begin
      check_request_options(visit_uri) unless skip_request_options
      driver.requests += 1 and logger.info "Browser: started get request to: #{visit_uri}"
      spider.class.update(:visits, :requests) if spider.with_info

      original_visit(visit_uri)
    rescue => e
      if match_error?(e, type: :to_skip)
        logger.error "Browser: skip request error: #{e.inspect}, url: #{visit_uri}"
        spider.add_event(:requests_errors, e.inspect) if spider.with_info
        false
      elsif match_error?(e, type: :to_retry)
        logger.error "Browser: retry request error: #{e.inspect}, url: #{visit_uri}"
        spider.add_event(:requests_errors, e.inspect) if spider.with_info

        if (retries += 1) <= max_retries
          logger.info "Browser: sleep #{(sleep_interval += 15)} seconds and process retry № #{retries} to the url: #{visit_uri}"
          sleep sleep_interval and retry
        else
          logger.error "Browser: all retries (#{retries - 1}) to the url #{visit_uri} are gone"
          raise e unless skip_error_on_failure?(e)
        end
      else
        raise e
      end
    else
      driver.responses += 1 and logger.info "Browser: finished get request to: #{visit_uri}"
      spider.class.update(:visits, :responses) if spider.with_info
      driver.visited = true unless driver.visited
      true
    ensure
      if spider.with_info
        logger.info "Info: visits: requests: #{spider.class.visits[:requests]}, responses: #{spider.class.visits[:responses]}"
      end

      if memory = driver.current_memory
        logger.debug "Browser: driver.current_memory: #{memory}"
      end
    end
  else
    original_visit(visit_uri)
  end
end
Also aliased as: original_visit
within_new_window_by(action: nil, url: nil) { || ... } click to toggle source

Handy method to perform some processing in the new tab within block and then automatically close this tab: Usage (url): browser.within_new_window_by(url: “google.com”) do do some stuff and then automatically close this tab and return back to the first tab end Usage (action) (when new tab opening by some action, for example by clicking on a particular element): action = -> { browser.find(“//some/element/path”).click } browser.within_new_window_by(action: action) do do some stuff and then automatically close this tab and return back to the first tab end

# File lib/kimurai/capybara_ext/session.rb, line 119
def within_new_window_by(action: nil, url: nil)
  case
  when action
    opened_window = window_opened_by { action.call }
    within_window(opened_window) do
      yield
      current_window.close
    end
  when url
    within_window(open_new_window) do
      visit(url)

      yield
      current_window.close
    end
  end
end

Private Instance Methods

check_request_options(url_to_visit) click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 182
def check_request_options(url_to_visit)
  # restart_if
  if memory_limit = config.restart_if[:memory_limit]
    memory = driver.current_memory
    if memory && memory >= memory_limit
      logger.warn "Browser: memory_limit #{memory_limit} of driver.current_memory (#{memory}) is exceeded (engine: #{mode})"
      restart!
    end
  end

  if requests_limit = config.restart_if[:requests_limit]
    requests = driver.requests
    if requests >= requests_limit
      logger.warn "Browser: requests_limit #{requests_limit} of driver.requests (#{requests}) is exceeded (engine: #{mode})"
      restart!
    end
  end

  # cookies
  # (Selenium only) if config.cookies present and browser was just created,
  # visit url_to_visit first and only then set cookies:
  if driver.visited.nil? && config.cookies && mode.match?(/selenium/)
    visit(url_to_visit, skip_request_options: true)
    config.cookies.each do |cookie|
      driver.set_cookie(cookie[:name], cookie[:value], cookie)
    end
  end

  if config.before_request[:clear_cookies]
    driver.clear_cookies
    logger.debug "Browser: cleared cookies before request"
  end

  if config.before_request[:clear_and_set_cookies]
    driver.clear_cookies

    # (Selenium only) if browser is not visited yet any page, visit url_to_visit
    # first and then set cookies (needs after browser restart):
    if driver.visited.nil? && mode.match?(/selenium/)
      visit(url_to_visit, skip_request_options: true)
    end

    config.cookies.each do |cookie|
      driver.set_cookie(cookie[:name], cookie[:value], cookie)
    end

    logger.debug "Browser: cleared and set cookies before request"
  end

  # user_agent
  if config.before_request[:change_user_agent]
    driver.add_header("User-Agent", config.user_agent.call)
    logger.debug "Browser: changed user_agent before request"
  end

  # proxy
  if config.before_request[:change_proxy]
    proxy_string = config.proxy.call
    driver.set_proxy(*proxy_string.split(":"))
    logger.debug "Browser: changed proxy before request"
  end
end
logger() click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 245
def logger
  spider.logger
end
match_error?(e, type:) click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 151
def match_error?(e, type:)
  errors =
    case type
    when :to_retry then config.retry_request_errors
    when :to_skip then config.skip_request_errors
    end

  errors.any? do |error|
    if error.kind_of?(Hash)
      match_class = e.class.ancestors.include?(error[:error])
      if error[:message].present?
        if error[:message].kind_of?(Regexp)
          e.message&.match?(error[:message])
        else
          e.message&.include?(error[:message])
        end && match_class
      else
        match_class
      end
    else
      e.class.ancestors.include?(error)
    end
  end
end
process_delay(delay) click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 176
def process_delay(delay)
  interval = (delay.class == Range ? rand(delay) : delay)
  logger.debug "Browser: sleep #{interval.round(2)} #{'second'.pluralize(interval)} before request..."
  sleep interval
end
skip_error_on_failure?(e) click to toggle source
# File lib/kimurai/capybara_ext/session.rb, line 145
def skip_error_on_failure?(e)
  config.retry_request_errors.any? do |error|
    error[:skip_on_failure] && e.class.ancestors.include?(error[:error]) if error.kind_of?(Hash)
  end
end