class GentleScholar::Publication

This class loads a single publication from Google scholar and returns all its attributes, including dynamic attributes like number of citations

Constants

GS_CIT_URL
GS_HOST_URL
SCAN_LAMBDAS
SCAN_STR
TABLE_ATTR
TABLE_LAMBDAS

Public Class Methods

extract_cite_trend(doc) click to toggle source
# File lib/gentle-scholar/publication.rb, line 102
def self.extract_cite_trend(doc)
  years = doc.xpath('//span[@class="gsc_g_t"]').children.map { |c| c.text }
  years_sym = years.map { |y| y.to_sym }
  count = doc.xpath('//span[@class="gsc_g_al"]').children.map { |c| c.text }
  count_i = count.map { |c| c.to_i }
  Hash[years_sym.zip(count_i)]
end
extract_from_document(doc) click to toggle source
# File lib/gentle-scholar/publication.rb, line 62
def self.extract_from_document(doc)
  extract_html_elements(doc).merge(extract_html_table(doc))
end
extract_from_http(scholar_pub_id) click to toggle source
# File lib/gentle-scholar/publication.rb, line 57
def self.extract_from_http(scholar_pub_id)
  doc = get_document_from_http(scholar_pub_id)
  extract_from_document(doc)
end
extract_html_elements(doc) click to toggle source
# File lib/gentle-scholar/publication.rb, line 66
def self.extract_html_elements(doc)
  xpath = Hash[SCAN_STR.map { |elem, path| [elem, doc.xpath(path)] }]
  elements = SCAN_LAMBDAS.map do |key, lam|
    [key, lam.call(xpath[key])] if xpath[key].any?
  end

  Hash[elements.compact]
end
extract_html_table(doc) click to toggle source
# File lib/gentle-scholar/publication.rb, line 75
def self.extract_html_table(doc)
  extracted_a = TABLE_ATTR.map do |k, v|
    extract = GentleScholar::Publication.extract_table_item(v, doc)
    extract ? [k, extract] : nil
  end.compact

  extracted_h = Hash[extracted_a]

  processed_h = extracted_h.map do |attr, extracted|
    processor = TABLE_LAMBDAS[attr]
    processed = processor ? processor.call(extracted) : nil
    processed ? [attr, processed] : [attr, extracted]
  end

  extracted_h.merge(Hash[processed_h])
end
extract_table_item(name, doc) click to toggle source
# File lib/gentle-scholar/publication.rb, line 92
def self.extract_table_item(name, doc)
  elem = doc.xpath("//div[@class='gs_scl' and contains(div,'#{name}')]")
  begin
    elem.empty? ? nil : elem.xpath('div[@class="gsc_value"]').text
  rescue => e
    STDERR.puts "ERROR PROCESSING TABLE ITEM: #{name}"
    raise e
  end
end
get_document_from_http(scholar_pub_id) click to toggle source
# File lib/gentle-scholar/publication.rb, line 49
def self.get_document_from_http(scholar_pub_id)
  auth_id, pub_id = scholar_pub_id.split(/:/)
  url = GS_CIT_URL + '&user=' + auth_id \
                   + '&citation_for_view=' + auth_id + ':' + pub_id
  res = Typhoeus::Request.new(url).run
  Nokogiri::HTML(res.response_body)
end
http_to_file(scholar_pub_id, filename) click to toggle source

Useful for creating new test docs

# File lib/gentle-scholar/publication.rb, line 111
def self.http_to_file(scholar_pub_id, filename)
  doc = get_document_from_http(scholar_pub_id)
  File.open(filename, 'w') { |f| f.write(doc) }
end
text_to_document(text) click to toggle source
# File lib/gentle-scholar/publication.rb, line 116
def self.text_to_document(text)
  Nokogiri.parse(text)
end