module Scraypa
Constants
- USER_AGENT_LIST
- USER_AGENT_MOBILE_LIST
- VERSION
Attributes
agent[RW]
driver_resetter[RW]
throttle[RW]
tor_ip_control[RW]
tor_process[RW]
tor_proxy[RW]
user_agent_retriever[RW]
Public Class Methods
change_tor_ip_address()
click to toggle source
# File lib/scraypa.rb, line 54 def change_tor_ip_address @tor_ip_control.get_new_ip if using_tor? end
configuration()
click to toggle source
# File lib/scraypa.rb, line 27 def configuration @configuration ||= Configuration.new end
configuration=(config)
click to toggle source
# File lib/scraypa.rb, line 31 def configuration=(config) @configuration = config end
configure() { |configuration| ... }
click to toggle source
# File lib/scraypa.rb, line 42 def configure yield(configuration).tap{ validate_configuration setup_scraypa } end
reset()
click to toggle source
# File lib/scraypa.rb, line 35 def reset @configuration = Configuration.new reset_throttle setup_scraypa @configuration end
user_agent()
click to toggle source
# File lib/scraypa.rb, line 58 def user_agent @user_agent_retriever ? @user_agent_retriever.current_user_agent : nil end
visit(params={})
click to toggle source
# File lib/scraypa.rb, line 49 def visit params={} setup_scraypa unless @agent visit_with_throttle params end
Private Class Methods
destruct_tor()
click to toggle source
# File lib/scraypa.rb, line 143 def destruct_tor @tor_process.stop if @tor_process TorManager::TorProcess.stop_obsolete_processes @tor_ip_control = nil @tor_proxy = nil @tor_process = nil end
ensure_tor_options_are_configured()
click to toggle source
# File lib/scraypa.rb, line 105 def ensure_tor_options_are_configured if using_tor? @configuration.tor_options ||= {} @configuration.tor_options[:tor_port] ||= 9050 @configuration.tor_options[:control_port] ||= 50500 else @configuration.tor_options = nil end end
headless_chromium_with_tor_is_invalid()
click to toggle source
# File lib/scraypa.rb, line 69 def headless_chromium_with_tor_is_invalid raise TorNotSupportedByAgent, "Capybara :headless_chromium does not support Tor" if using_tor? && @configuration.driver == :headless_chromium end
initialize_tor(params={})
click to toggle source
# File lib/scraypa.rb, line 134 def initialize_tor params={} @tor_process = TorManager::TorProcess.new params || {} @tor_proxy = TorManager::Proxy.new tor_process: @tor_process @tor_ip_control = TorManager::IpAddressControl.new( tor_process: @tor_process, tor_proxy: @tor_proxy) @tor_process.start end
merge_user_agent_list_limit_for_chrome(config)
click to toggle source
# File lib/scraypa.rb, line 91 def merge_user_agent_list_limit_for_chrome config @configuration.driver == :headless_chromium && !config[:list_limit] ? config.merge({list_limit: 30}) : config end
reset_throttle()
click to toggle source
# File lib/scraypa.rb, line 184 def reset_throttle @throttle.last_request_time = nil if @throttle @throttle = nil end
reset_tor()
click to toggle source
# File lib/scraypa.rb, line 129 def reset_tor destruct_tor initialize_tor(@configuration.tor_options) if @configuration.tor end
setup_agent()
click to toggle source
# File lib/scraypa.rb, line 157 def setup_agent @agent = Scraypa::VisitFactory .build(config: @configuration, tor_proxy: @tor_proxy, driver_resetter: @driver_resetter, user_agent_retriever: @user_agent_retriever) end
setup_driver_resetter()
click to toggle source
# File lib/scraypa.rb, line 151 def setup_driver_resetter @driver_resetter = DriverResetter.new( @configuration.reset_driver_every_n_requests) end
setup_scraypa()
click to toggle source
# File lib/scraypa.rb, line 75 def setup_scraypa setup_user_agent setup_tor setup_driver_resetter setup_agent setup_throttle end
setup_throttle()
click to toggle source
# File lib/scraypa.rb, line 165 def setup_throttle @throttle = Throttle.new seconds: @configuration.throttle_seconds if throttle_config_has_changed? end
setup_tor()
click to toggle source
# File lib/scraypa.rb, line 97 def setup_tor ensure_tor_options_are_configured using_tor? && !tor_running_in_current_process? ? reset_tor : (!using_tor? && tor_running_in_current_process? ? destruct_tor : nil) end
setup_user_agent()
click to toggle source
# File lib/scraypa.rb, line 83 def setup_user_agent @user_agent_retriever = @configuration.user_agent ? UserAgentFactory.build( merge_user_agent_list_limit_for_chrome( @configuration.user_agent)) : nil end
throttle_config_has_changed?()
click to toggle source
# File lib/scraypa.rb, line 170 def throttle_config_has_changed? @configuration.throttle_seconds && (@configuration.throttle_seconds.is_a?(Hash) || @configuration.throttle_seconds.to_f > 0) && (!@throttle || @throttle.seconds != @configuration.throttle_seconds) end
tor_running_in_current_process?()
click to toggle source
# File lib/scraypa.rb, line 119 def tor_running_in_current_process? @configuration.tor_options && @configuration.tor_options[:tor_port] ? TorManager::TorProcess .tor_running_on?(port: @configuration.tor_options[:tor_port], parent_pid: Process.pid) : TorManager::TorProcess .tor_running_on?(parent_pid: Process.pid) end
using_tor?()
click to toggle source
# File lib/scraypa.rb, line 115 def using_tor? @configuration.tor end
validate_configuration()
click to toggle source
# File lib/scraypa.rb, line 65 def validate_configuration headless_chromium_with_tor_is_invalid end
visit_with_throttle(params)
click to toggle source
# File lib/scraypa.rb, line 177 def visit_with_throttle params @throttle.throttle if @throttle response = @agent.execute(params) @throttle.last_request_time = Time.now if @throttle response end