class MetacriticGames::Scraper

Public Class Methods

doc() click to toggle source
# File lib/scraper.rb, line 7
def self.doc
  @@doc
end
doc=(name) click to toggle source
# File lib/scraper.rb, line 3
def self.doc= (name)
  @@doc = name
end
get_title_platform(game) click to toggle source

method to clean up text scrape for platform

# File lib/scraper.rb, line 40
def self.get_title_platform(game)
  game.text.slice(/\(([^)]+)\)/).delete"()"
end
get_title_text(game) click to toggle source

method to clean up text scrape

# File lib/scraper.rb, line 35
def self.get_title_text(game)
    game.text.gsub(/\(([^)]+)\)/, "").strip
end
get_title_url(game) click to toggle source

method to convert relative url on index page to absolute url

# File lib/scraper.rb, line 45
def self.get_title_url(game)
  absolute = "http://www.metacritic.com"
  absolute + game.css("a").attribute("href").value
end
scrape_game(url) click to toggle source

scrape individual page and return scores and genre listings

# File lib/scraper.rb, line 51
def self.scrape_game(url)
  doc = Nokogiri::HTML(open(url, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE,  'User-Agent' => 'safari'))
  genre_array = []
  doc.css("li.summary_detail.product_genre").css("span.data").each do |genre|
    MetacriticGames::CLI.progressbar.increment
    genre_array << genre.text
  end
  details_hash = {
    :metascore => {
      :platform => doc.css("div.metascore_w.xlarge").text
    },
    :user_score => {
      :platform => doc.css(".metascore_anchor .user").text
    },
    :genre => genre_array
  }
end
scrape_new_releases() click to toggle source

returns the array of game information hashes from the index page

# File lib/scraper.rb, line 21
def self.scrape_new_releases
  self.doc.css(".product_wrap .product_title").collect do |game|
    MetacriticGames::CLI.progressbar.increment
    if game.text.include? ?(
      game_hash = {
        :name => self.get_title_text(game),
        :platform => self.get_title_platform(game),
        :url => self.get_title_url(game)
      }
    end
  end
end
scrape_platform(url) click to toggle source

scrapes page for platforms and sets the class url variable to avoid scraping the index page a second time, returns the platform array to CLI

# File lib/scraper.rb, line 12
def self.scrape_platform(url)
  self.doc = Nokogiri::HTML(open(url, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE,  'User-Agent' => 'safari'))
  self.doc.css(".platform_item").collect do |platform|
    MetacriticGames::CLI.progressbar.increment
    platform.text
  end
end