class Google::Scholar::Scraper

Attributes

documents[RW]

Public Class Methods

class_lookup(url="") click to toggle source
# File lib/google/scholar/scraper.rb, line 13
def self.class_lookup(url="")
  arguments = url.split("?")
  arguments = arguments[1].split("&") if arguments.length > 1
  if(arguments.include?("view_op=search_authors"))
    return Google::Scholar::AuthorsDocument
  end
  if(arguments.any?{|x| x.include?("user=")})
    return Google::Scholar::AuthorsProfileDocument
  end
  return Google::Scholar::Document
end
load_url(url) click to toggle source
# File lib/google/scholar/scraper.rb, line 34
def self.load_url(url)
  uri = URI(url)
  raise "Invalid scheme for #{url}" if uri.scheme.nil? || !%w{http https}.any?{|scheme| uri.scheme == scheme}
  return class_lookup(url).new(Nokogiri::HTML(open(url)))
end
new(url,initial_document=nil) click to toggle source
# File lib/google/scholar/scraper.rb, line 7
def initialize(url,initial_document=nil)
  @documents = []
  @documents << initial_document if initial_document
  @documents << self.class.load_url(url) if url
  self
end

Public Instance Methods

has_more_pages?() click to toggle source
# File lib/google/scholar/scraper.rb, line 39
def has_more_pages?
  @documents.last.has_next_page?
end
load_next_page() click to toggle source
# File lib/google/scholar/scraper.rb, line 30
def load_next_page
  return unless self.has_more_pages?
  @documents << self.class.load_url(@documents.last.next_page_url)
end
valid?() click to toggle source
# File lib/google/scholar/scraper.rb, line 24
def valid?
  @documents.each do |document|
    return false unless document.valid?
  end
  return true
end