class MetacriticGames::Scraper
Public Class Methods
doc()
click to toggle source
# File lib/scraper.rb, line 7 def self.doc @@doc end
doc=(name)
click to toggle source
# File lib/scraper.rb, line 3 def self.doc= (name) @@doc = name end
get_title_platform(game)
click to toggle source
method to clean up text scrape for platform
# File lib/scraper.rb, line 40 def self.get_title_platform(game) game.text.slice(/\(([^)]+)\)/).delete"()" end
get_title_text(game)
click to toggle source
method to clean up text scrape
# File lib/scraper.rb, line 35 def self.get_title_text(game) game.text.gsub(/\(([^)]+)\)/, "").strip end
get_title_url(game)
click to toggle source
method to convert relative url on index page to absolute url
# File lib/scraper.rb, line 45 def self.get_title_url(game) absolute = "http://www.metacritic.com" absolute + game.css("a").attribute("href").value end
scrape_game(url)
click to toggle source
scrape individual page and return scores and genre listings
# File lib/scraper.rb, line 51 def self.scrape_game(url) doc = Nokogiri::HTML(open(url, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, 'User-Agent' => 'safari')) genre_array = [] doc.css("li.summary_detail.product_genre").css("span.data").each do |genre| MetacriticGames::CLI.progressbar.increment genre_array << genre.text end details_hash = { :metascore => { :platform => doc.css("div.metascore_w.xlarge").text }, :user_score => { :platform => doc.css(".metascore_anchor .user").text }, :genre => genre_array } end
scrape_new_releases()
click to toggle source
returns the array of game information hashes from the index page
# File lib/scraper.rb, line 21 def self.scrape_new_releases self.doc.css(".product_wrap .product_title").collect do |game| MetacriticGames::CLI.progressbar.increment if game.text.include? ?( game_hash = { :name => self.get_title_text(game), :platform => self.get_title_platform(game), :url => self.get_title_url(game) } end end end
scrape_platform(url)
click to toggle source
scrapes page for platforms and sets the class url variable to avoid scraping the index page a second time, returns the platform array to CLI
# File lib/scraper.rb, line 12 def self.scrape_platform(url) self.doc = Nokogiri::HTML(open(url, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, 'User-Agent' => 'safari')) self.doc.css(".platform_item").collect do |platform| MetacriticGames::CLI.progressbar.increment platform.text end end