class Kimurai::BrowserBuilder::SeleniumFirefoxBuilder
Attributes
virtual_display[RW]
logger[R]
spider[R]
Public Class Methods
new(config, spider:)
click to toggle source
# File lib/kimurai/browser_builder/selenium_firefox_builder.rb, line 15 def initialize(config, spider:) @config = config @spider = spider @logger = spider.logger end
Public Instance Methods
build()
click to toggle source
# File lib/kimurai/browser_builder/selenium_firefox_builder.rb, line 21 def build # Register driver Capybara.register_driver :selenium_firefox do |app| # Create driver options driver_options = Selenium::WebDriver::Firefox::Options.new driver_options.profile = Selenium::WebDriver::Firefox::Profile.new driver_options.profile["browser.link.open_newwindow"] = 3 # open windows in tabs driver_options.profile["media.peerconnection.enabled"] = false # disable web rtc # Proxy if proxy = @config[:proxy].presence proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip ip, port, type, user, password = proxy_string.split(":") if user.nil? && password.nil? driver_options.profile["network.proxy.type"] = 1 if type == "http" driver_options.profile["network.proxy.http"] = ip driver_options.profile["network.proxy.http_port"] = port.to_i driver_options.profile["network.proxy.ssl"] = ip driver_options.profile["network.proxy.ssl_port"] = port.to_i logger.debug "BrowserBuilder (selenium_firefox): enabled http proxy, ip: #{ip}, port: #{port}" elsif type == "socks5" driver_options.profile["network.proxy.socks"] = ip driver_options.profile["network.proxy.socks_port"] = port.to_i driver_options.profile["network.proxy.socks_version"] = 5 driver_options.profile["network.proxy.socks_remote_dns"] = true logger.debug "BrowserBuilder (selenium_firefox): enabled socks5 proxy, ip: #{ip}, port: #{port}" else logger.error "BrowserBuilder (selenium_firefox): wrong type of proxy: #{type}, skipped" end else logger.error "BrowserBuilder (selenium_firefox): proxy with authentication doesn't supported by selenium, skipped" end end if proxy_bypass_list = @config[:proxy_bypass_list].presence if proxy driver_options.profile["network.proxy.no_proxies_on"] = proxy_bypass_list.join(", ") logger.debug "BrowserBuilder (selenium_firefox): enabled proxy_bypass_list" else logger.error "BrowserBuilder (selenium_firefox): provide `proxy` to set proxy_bypass_list, skipped" end end # SSL if @config[:ignore_ssl_errors].present? driver_options.profile.secure_ssl = false driver_options.profile.assume_untrusted_certificate_issuer = true logger.debug "BrowserBuilder (selenium_firefox): enabled ignore_ssl_errors" end # Disable images if @config[:disable_images].present? driver_options.profile["permissions.default.image"] = 2 logger.debug "BrowserBuilder (selenium_firefox): enabled disable_images" end # Headers if @config[:headers].present? logger.warn "BrowserBuilder: (selenium_firefox): custom headers doesn't supported by selenium, skipped" end if user_agent = @config[:user_agent].presence user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip driver_options.profile["general.useragent.override"] = user_agent_string logger.debug "BrowserBuilder (selenium_firefox): enabled custom user_agent" end # Headless mode if ENV["HEADLESS"] != "false" if @config[:headless_mode] == :virtual_display if Gem::Platform.local.os == "linux" unless self.class.virtual_display require 'headless' self.class.virtual_display = Headless.new(reuse: true, destroy_at_exit: false) self.class.virtual_display.start end logger.debug "BrowserBuilder (selenium_firefox): enabled virtual_display headless_mode" else logger.error "BrowserBuilder (selenium_firefox): virtual_display headless_mode works only " \ "on Linux platform. Browser will run in normal mode. Set `native` mode instead." end else driver_options.args << "--headless" logger.debug "BrowserBuilder (selenium_firefox): enabled native headless_mode" end end Capybara::Selenium::Driver.new(app, browser: :firefox, options: driver_options) end # Create browser instance (Capybara session) @browser = Capybara::Session.new(:selenium_firefox) @browser.spider = spider logger.debug "BrowserBuilder (selenium_firefox): created browser instance" if @config[:extensions].present? logger.error "BrowserBuilder (selenium_firefox): `extensions` option not supported by Selenium, skipped" end # Window size if size = @config[:window_size].presence @browser.current_window.resize_to(*size) logger.debug "BrowserBuilder (selenium_firefox): enabled window_size" end # Cookies if cookies = @config[:cookies].presence @browser.config.cookies = cookies logger.debug "BrowserBuilder (selenium_firefox): enabled custom cookies" end # Browser instance options # skip_request_errors if skip_errors = @config[:skip_request_errors].presence @browser.config.skip_request_errors = skip_errors logger.debug "BrowserBuilder (selenium_firefox): enabled skip_request_errors" end # retry_request_errors if retry_errors = @config[:retry_request_errors].presence @browser.config.retry_request_errors = retry_errors logger.debug "BrowserBuilder (selenium_firefox): enabled retry_request_errors" end # restart_if if requests_limit = @config.dig(:restart_if, :requests_limit).presence @browser.config.restart_if[:requests_limit] = requests_limit logger.debug "BrowserBuilder (selenium_firefox): enabled restart_if.requests_limit >= #{requests_limit}" end if memory_limit = @config.dig(:restart_if, :memory_limit).presence @browser.config.restart_if[:memory_limit] = memory_limit logger.debug "BrowserBuilder (selenium_firefox): enabled restart_if.memory_limit >= #{memory_limit}" end # before_request clear_cookies if @config.dig(:before_request, :clear_cookies) @browser.config.before_request[:clear_cookies] = true logger.debug "BrowserBuilder (selenium_firefox): enabled before_request.clear_cookies" end # before_request clear_and_set_cookies if @config.dig(:before_request, :clear_and_set_cookies) if cookies = @config[:cookies].presence @browser.config.cookies = cookies @browser.config.before_request[:clear_and_set_cookies] = true logger.debug "BrowserBuilder (selenium_firefox): enabled before_request.clear_and_set_cookies" else logger.error "BrowserBuilder (selenium_firefox): cookies should be present to enable before_request.clear_and_set_cookies, skipped" end end # before_request change_user_agent if @config.dig(:before_request, :change_user_agent) logger.error "BrowserBuilder (selenium_firefox): before_request.change_user_agent option not supported by Selenium, skipped" end # before_request change_proxy if @config.dig(:before_request, :change_proxy) logger.error "BrowserBuilder (selenium_firefox): before_request.change_proxy option not supported by Selenium, skipped" end # before_request delay if delay = @config.dig(:before_request, :delay).presence @browser.config.before_request[:delay] = delay logger.debug "BrowserBuilder (selenium_firefox): enabled before_request.delay" end # encoding if encoding = @config[:encoding] @browser.config.encoding = encoding logger.debug "BrowserBuilder (selenium_firefox): enabled encoding: #{encoding}" end # return Capybara session instance @browser end