class AppleSystemStatus::Crawler

Constants

DEFAULT_CHROME_OPTIONS_ARGS
MAX_RETRY_COUNT
USER_AGENT

Public Class Methods

blank_string?(str) click to toggle source
# File lib/apple_system_status/crawler.rb, line 116
def self.blank_string?(str)
  return true unless str
  str.strip.empty?
end
new(chrome_options_args: DEFAULT_CHROME_OPTIONS_ARGS, chrome_options_binary: nil) click to toggle source

@param chrome_options_args [Array<String>] @param chrome_options_binary [String]

# File lib/apple_system_status/crawler.rb, line 20
def initialize(chrome_options_args: DEFAULT_CHROME_OPTIONS_ARGS, chrome_options_binary: nil)
  Capybara.register_driver :chrome_headless do |app|
    client = Selenium::WebDriver::Remote::Http::Default.new
    client.read_timeout = 120

    chrome_options = { args: chrome_options_args }
    chrome_options[:binary] = chrome_options_binary if chrome_options_binary

    opts = Selenium::WebDriver::Chrome::Options.new(profile: nil, **chrome_options)
    capabilities = Selenium::WebDriver::Remote::Capabilities.chrome

    Capybara::Selenium::Driver.new(
      app,
      browser: :chrome,
      capabilities: [capabilities, opts],
      http_client: client,
    )
  end
  @session = Capybara::Session.new(:chrome_headless)
end
perform(country: nil, title: nil, chrome_options_args: DEFAULT_CHROME_OPTIONS_ARGS, chrome_options_binary: nil) click to toggle source

crawl apple system status page. When finished crawling, clear capybara session @param country [String] country code. (e.g. jp, ca, fr. default. us) @param title [String] If specified, narrow the service title @param chrome_options_args [Array<String>] @param chrome_options_binary [String] @return [Hash] @example response format

{
  title: ,
  services: [
    { title: , description: , status:  }
  ]
}

@link github.com/teampoltergeist/poltergeist#memory-leak

# File lib/apple_system_status/crawler.rb, line 109
def self.perform(country: nil, title: nil, chrome_options_args: DEFAULT_CHROME_OPTIONS_ARGS, chrome_options_binary: nil)
  crawler = AppleSystemStatus::Crawler.new(chrome_options_args: chrome_options_args, chrome_options_binary: chrome_options_binary)
  crawler.perform(country: country, title: title)
ensure
  crawler.quit!
end

Public Instance Methods

apple_url(country) click to toggle source
# File lib/apple_system_status/crawler.rb, line 87
def apple_url(country)
  if self.class.blank_string?(country) || country == "us"
    "https://www.apple.com/support/systemstatus/"
  else
    "https://www.apple.com/#{country}/support/systemstatus/"
  end
end
perform(country: nil, title: nil) click to toggle source

crawl apple system status page @param country [String] country code. (e.g. jp, ca, fr. default. us) @param title [String] If specified, narrow the service title @return [Hash] @example response format

{
  title: ,
  services: [
    { title: , description: , status:  }
  ]
}
# File lib/apple_system_status/crawler.rb, line 56
def perform(country: nil, title: nil)
  @session.visit(apple_url(country))

  response = {
    title:    @session.find(".section-date .date-copy").text.strip,
    services: [],
  }

  MAX_RETRY_COUNT.times do
    services = fetch_services

    if services.empty?
      # wait until the page is fully loaded
      sleep 1
    else
      response[:services] = services
      break
    end
  end

  raise "Not found services" if response[:services].empty?

  unless self.class.blank_string?(title)
    response[:services].select! { |service| service[:title] == title }
  end

  response[:services].sort_by! { |service| service[:title] }

  response
end
quit!() click to toggle source
# File lib/apple_system_status/crawler.rb, line 41
def quit!
  @session.driver.quit if @session
end

Private Instance Methods

fetch_services() click to toggle source
# File lib/apple_system_status/crawler.rb, line 123
def fetch_services
  @session.all("#ssp-lights-table td").each_with_object([]) do |td, services|
    begin
      names = td.find(".light-container .light-content.light-name").text.split(/[-:]/).map(&:strip)
      light_image = td.find(".light-container .light-content.light-image div")["class"]

      services << {
        title:       names[0],
        description: names[1],
        status:      light_image.gsub("light-", ""),
      }
    rescue Capybara::ElementNotFound
      # suppress error (for blank cell)
      # NOTE: Capybara::Node::Matchers#has_css? is very slow!
    end
  end
end