class Google::Scholar::Scraper
Attributes
documents[RW]
Public Class Methods
class_lookup(url="")
click to toggle source
# File lib/google/scholar/scraper.rb, line 13 def self.class_lookup(url="") arguments = url.split("?") arguments = arguments[1].split("&") if arguments.length > 1 if(arguments.include?("view_op=search_authors")) return Google::Scholar::AuthorsDocument end if(arguments.any?{|x| x.include?("user=")}) return Google::Scholar::AuthorsProfileDocument end return Google::Scholar::Document end
load_url(url)
click to toggle source
# File lib/google/scholar/scraper.rb, line 34 def self.load_url(url) uri = URI(url) raise "Invalid scheme for #{url}" if uri.scheme.nil? || !%w{http https}.any?{|scheme| uri.scheme == scheme} return class_lookup(url).new(Nokogiri::HTML(open(url))) end
new(url,initial_document=nil)
click to toggle source
# File lib/google/scholar/scraper.rb, line 7 def initialize(url,initial_document=nil) @documents = [] @documents << initial_document if initial_document @documents << self.class.load_url(url) if url self end
Public Instance Methods
has_more_pages?()
click to toggle source
# File lib/google/scholar/scraper.rb, line 39 def has_more_pages? @documents.last.has_next_page? end
load_next_page()
click to toggle source
# File lib/google/scholar/scraper.rb, line 30 def load_next_page return unless self.has_more_pages? @documents << self.class.load_url(@documents.last.next_page_url) end
valid?()
click to toggle source
# File lib/google/scholar/scraper.rb, line 24 def valid? @documents.each do |document| return false unless document.valid? end return true end