class Kimurai::BrowserBuilder::PoltergeistPhantomjsBuilder
Attributes
logger[R]
spider[R]
Public Class Methods
new(config, spider:)
click to toggle source
# File lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb, line 11 def initialize(config, spider:) @config = config @spider = spider @logger = spider.logger end
Public Instance Methods
build()
click to toggle source
# File lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb, line 17 def build # Register driver Capybara.register_driver :poltergeist_phantomjs do |app| # Create driver options driver_options = { js_errors: false, debug: false, inspector: false, phantomjs_options: [] } if extensions = @config[:extensions].presence driver_options[:extensions] = extensions logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled extensions" end # Window size if size = @config[:window_size].presence driver_options[:window_size] = size logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled window_size" end # SSL if ssl_cert_path = @config[:ssl_cert_path].presence driver_options[:phantomjs_options] << "--ssl-certificates-path=#{ssl_cert_path}" logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom ssl_cert" end if @config[:ignore_ssl_errors].present? driver_options[:phantomjs_options].push("--ignore-ssl-errors=yes", "--ssl-protocol=any") logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled ignore_ssl_errors" end # Disable images if @config[:disable_images].present? driver_options[:phantomjs_options] << "--load-images=no" logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled disable_images" end Capybara::Poltergeist::Driver.new(app, driver_options) end # Create browser instance (Capybara session) @browser = Capybara::Session.new(:poltergeist_phantomjs) @browser.spider = spider logger.debug "BrowserBuilder (poltergeist_phantomjs): created browser instance" # Proxy if proxy = @config[:proxy].presence proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip ip, port, type = proxy_string.split(":") if %w(http socks5).include?(type) @browser.driver.set_proxy(*proxy_string.split(":")) logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled #{type} proxy, ip: #{ip}, port: #{port}" else logger.error "BrowserBuilder (poltergeist_phantomjs): wrong type of proxy: #{type}, skipped" end end # Headers if headers = @config[:headers].presence @browser.driver.headers = headers logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom headers" end if user_agent = @config[:user_agent].presence user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip @browser.driver.add_header("User-Agent", user_agent_string) logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom user_agent" end # Cookies if cookies = @config[:cookies].presence cookies.each do |cookie| @browser.driver.set_cookie(cookie[:name], cookie[:value], cookie) end logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom cookies" end # Browser instance options # skip_request_errors if skip_errors = @config[:skip_request_errors].presence @browser.config.skip_request_errors = skip_errors logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled skip_request_errors" end # retry_request_errors if retry_errors = @config[:retry_request_errors].presence @browser.config.retry_request_errors = retry_errors logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled retry_request_errors" end # restart_if if requests_limit = @config.dig(:restart_if, :requests_limit).presence @browser.config.restart_if[:requests_limit] = requests_limit logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled restart_if.requests_limit >= #{requests_limit}" end if memory_limit = @config.dig(:restart_if, :memory_limit).presence @browser.config.restart_if[:memory_limit] = memory_limit logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled restart_if.memory_limit >= #{memory_limit}" end # before_request clear_cookies if @config.dig(:before_request, :clear_cookies) @browser.config.before_request[:clear_cookies] = true logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.clear_cookies" end # before_request clear_and_set_cookies if @config.dig(:before_request, :clear_and_set_cookies) if cookies = @config[:cookies].presence @browser.config.cookies = cookies @browser.config.before_request[:clear_and_set_cookies] = true logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.clear_and_set_cookies" else logger.error "BrowserBuilder (poltergeist_phantomjs): cookies should be present to enable before_request.clear_and_set_cookies, skipped" end end # before_request change_user_agent if @config.dig(:before_request, :change_user_agent) if @config[:user_agent].present? && @config[:user_agent].class == Proc @browser.config.user_agent = @config[:user_agent] @browser.config.before_request[:change_user_agent] = true logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.change_user_agent" else logger.error "BrowserBuilder (poltergeist_phantomjs): user_agent should be present and has lambda format to enable before_request.change_user_agent, skipped" end end # before_request change_proxy if @config.dig(:before_request, :change_proxy) if @config[:proxy].present? && @config[:proxy].class == Proc @browser.config.proxy = @config[:proxy] @browser.config.before_request[:change_proxy] = true logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.change_proxy" else logger.error "BrowserBuilder (poltergeist_phantomjs): proxy should be present and has lambda format to enable before_request.change_proxy, skipped" end end # before_request delay if delay = @config.dig(:before_request, :delay).presence @browser.config.before_request[:delay] = delay logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.delay" end # encoding if encoding = @config[:encoding] @browser.config.encoding = encoding logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled encoding: #{encoding}" end # return Capybara session instance @browser end