module Scraypa

Constants

USER_AGENT_LIST
USER_AGENT_MOBILE_LIST
VERSION

Attributes

agent[RW]
driver_resetter[RW]
throttle[RW]
tor_ip_control[RW]
tor_process[RW]
tor_proxy[RW]
user_agent_retriever[RW]

Public Class Methods

change_tor_ip_address() click to toggle source
# File lib/scraypa.rb, line 54
def change_tor_ip_address
  @tor_ip_control.get_new_ip if using_tor?
end
configuration() click to toggle source
# File lib/scraypa.rb, line 27
def configuration
  @configuration ||= Configuration.new
end
configuration=(config) click to toggle source
# File lib/scraypa.rb, line 31
def configuration=(config)
  @configuration = config
end
configure() { |configuration| ... } click to toggle source
# File lib/scraypa.rb, line 42
def configure
  yield(configuration).tap{
    validate_configuration
    setup_scraypa
  }
end
reset() click to toggle source
# File lib/scraypa.rb, line 35
def reset
  @configuration = Configuration.new
  reset_throttle
  setup_scraypa
  @configuration
end
user_agent() click to toggle source
# File lib/scraypa.rb, line 58
def user_agent
  @user_agent_retriever ?
      @user_agent_retriever.current_user_agent : nil
end
visit(params={}) click to toggle source
# File lib/scraypa.rb, line 49
def visit params={}
  setup_scraypa unless @agent
  visit_with_throttle params
end

Private Class Methods

destruct_tor() click to toggle source
# File lib/scraypa.rb, line 143
def destruct_tor
  @tor_process.stop if @tor_process
  TorManager::TorProcess.stop_obsolete_processes
  @tor_ip_control = nil
  @tor_proxy = nil
  @tor_process = nil
end
ensure_tor_options_are_configured() click to toggle source
# File lib/scraypa.rb, line 105
def ensure_tor_options_are_configured
  if using_tor?
    @configuration.tor_options ||= {}
    @configuration.tor_options[:tor_port] ||= 9050
    @configuration.tor_options[:control_port] ||= 50500
  else
    @configuration.tor_options = nil
  end
end
headless_chromium_with_tor_is_invalid() click to toggle source
# File lib/scraypa.rb, line 69
def headless_chromium_with_tor_is_invalid
  raise TorNotSupportedByAgent,
        "Capybara :headless_chromium does not support Tor" if
      using_tor? && @configuration.driver == :headless_chromium
end
initialize_tor(params={}) click to toggle source
# File lib/scraypa.rb, line 134
def initialize_tor params={}
  @tor_process = TorManager::TorProcess.new params || {}
  @tor_proxy =
      TorManager::Proxy.new tor_process: @tor_process
  @tor_ip_control = TorManager::IpAddressControl.new(
      tor_process: @tor_process, tor_proxy: @tor_proxy)
  @tor_process.start
end
merge_user_agent_list_limit_for_chrome(config) click to toggle source
# File lib/scraypa.rb, line 91
def merge_user_agent_list_limit_for_chrome config
  @configuration.driver == :headless_chromium &&
      !config[:list_limit] ?
      config.merge({list_limit: 30}) : config
end
reset_throttle() click to toggle source
# File lib/scraypa.rb, line 184
def reset_throttle
  @throttle.last_request_time = nil if @throttle
  @throttle = nil
end
reset_tor() click to toggle source
# File lib/scraypa.rb, line 129
def reset_tor
  destruct_tor
  initialize_tor(@configuration.tor_options) if @configuration.tor
end
setup_agent() click to toggle source
# File lib/scraypa.rb, line 157
def setup_agent
  @agent = Scraypa::VisitFactory
               .build(config: @configuration,
                      tor_proxy: @tor_proxy,
                      driver_resetter: @driver_resetter,
                      user_agent_retriever: @user_agent_retriever)
end
setup_driver_resetter() click to toggle source
# File lib/scraypa.rb, line 151
def setup_driver_resetter
  @driver_resetter =
      DriverResetter.new(
          @configuration.reset_driver_every_n_requests)
end
setup_scraypa() click to toggle source
# File lib/scraypa.rb, line 75
def setup_scraypa
  setup_user_agent
  setup_tor
  setup_driver_resetter
  setup_agent
  setup_throttle
end
setup_throttle() click to toggle source
# File lib/scraypa.rb, line 165
def setup_throttle
  @throttle = Throttle.new seconds: @configuration.throttle_seconds if
      throttle_config_has_changed?
end
setup_tor() click to toggle source
# File lib/scraypa.rb, line 97
def setup_tor
  ensure_tor_options_are_configured
  using_tor? && !tor_running_in_current_process? ?
      reset_tor :
      (!using_tor? && tor_running_in_current_process? ?
          destruct_tor : nil)
end
setup_user_agent() click to toggle source
# File lib/scraypa.rb, line 83
def setup_user_agent
  @user_agent_retriever =
      @configuration.user_agent ?
          UserAgentFactory.build(
              merge_user_agent_list_limit_for_chrome(
                  @configuration.user_agent)) : nil
end
throttle_config_has_changed?() click to toggle source
# File lib/scraypa.rb, line 170
def throttle_config_has_changed?
  @configuration.throttle_seconds &&
      (@configuration.throttle_seconds.is_a?(Hash) ||
          @configuration.throttle_seconds.to_f > 0) &&
      (!@throttle || @throttle.seconds != @configuration.throttle_seconds)
end
tor_running_in_current_process?() click to toggle source
# File lib/scraypa.rb, line 119
def tor_running_in_current_process?
  @configuration.tor_options &&
      @configuration.tor_options[:tor_port] ?
      TorManager::TorProcess
          .tor_running_on?(port: @configuration.tor_options[:tor_port],
                           parent_pid: Process.pid) :
      TorManager::TorProcess
          .tor_running_on?(parent_pid: Process.pid)
end
using_tor?() click to toggle source
# File lib/scraypa.rb, line 115
def using_tor?
  @configuration.tor
end
validate_configuration() click to toggle source
# File lib/scraypa.rb, line 65
def validate_configuration
  headless_chromium_with_tor_is_invalid
end
visit_with_throttle(params) click to toggle source
# File lib/scraypa.rb, line 177
def visit_with_throttle params
  @throttle.throttle if @throttle
  response = @agent.execute(params)
  @throttle.last_request_time = Time.now if @throttle
  response
end