class GentleScholar::Publication
This class loads a single publication from Google scholar and returns all its attributes, including dynamic attributes like number of citations
Constants
- GS_CIT_URL
- GS_HOST_URL
- SCAN_LAMBDAS
- SCAN_STR
- TABLE_ATTR
- TABLE_LAMBDAS
Public Class Methods
extract_cite_trend(doc)
click to toggle source
# File lib/gentle-scholar/publication.rb, line 102 def self.extract_cite_trend(doc) years = doc.xpath('//span[@class="gsc_g_t"]').children.map { |c| c.text } years_sym = years.map { |y| y.to_sym } count = doc.xpath('//span[@class="gsc_g_al"]').children.map { |c| c.text } count_i = count.map { |c| c.to_i } Hash[years_sym.zip(count_i)] end
extract_from_document(doc)
click to toggle source
# File lib/gentle-scholar/publication.rb, line 62 def self.extract_from_document(doc) extract_html_elements(doc).merge(extract_html_table(doc)) end
extract_from_http(scholar_pub_id)
click to toggle source
# File lib/gentle-scholar/publication.rb, line 57 def self.extract_from_http(scholar_pub_id) doc = get_document_from_http(scholar_pub_id) extract_from_document(doc) end
extract_html_elements(doc)
click to toggle source
# File lib/gentle-scholar/publication.rb, line 66 def self.extract_html_elements(doc) xpath = Hash[SCAN_STR.map { |elem, path| [elem, doc.xpath(path)] }] elements = SCAN_LAMBDAS.map do |key, lam| [key, lam.call(xpath[key])] if xpath[key].any? end Hash[elements.compact] end
extract_html_table(doc)
click to toggle source
# File lib/gentle-scholar/publication.rb, line 75 def self.extract_html_table(doc) extracted_a = TABLE_ATTR.map do |k, v| extract = GentleScholar::Publication.extract_table_item(v, doc) extract ? [k, extract] : nil end.compact extracted_h = Hash[extracted_a] processed_h = extracted_h.map do |attr, extracted| processor = TABLE_LAMBDAS[attr] processed = processor ? processor.call(extracted) : nil processed ? [attr, processed] : [attr, extracted] end extracted_h.merge(Hash[processed_h]) end
extract_table_item(name, doc)
click to toggle source
# File lib/gentle-scholar/publication.rb, line 92 def self.extract_table_item(name, doc) elem = doc.xpath("//div[@class='gs_scl' and contains(div,'#{name}')]") begin elem.empty? ? nil : elem.xpath('div[@class="gsc_value"]').text rescue => e STDERR.puts "ERROR PROCESSING TABLE ITEM: #{name}" raise e end end
get_document_from_http(scholar_pub_id)
click to toggle source
# File lib/gentle-scholar/publication.rb, line 49 def self.get_document_from_http(scholar_pub_id) auth_id, pub_id = scholar_pub_id.split(/:/) url = GS_CIT_URL + '&user=' + auth_id \ + '&citation_for_view=' + auth_id + ':' + pub_id res = Typhoeus::Request.new(url).run Nokogiri::HTML(res.response_body) end
http_to_file(scholar_pub_id, filename)
click to toggle source
Useful for creating new test docs
# File lib/gentle-scholar/publication.rb, line 111 def self.http_to_file(scholar_pub_id, filename) doc = get_document_from_http(scholar_pub_id) File.open(filename, 'w') { |f| f.write(doc) } end
text_to_document(text)
click to toggle source
# File lib/gentle-scholar/publication.rb, line 116 def self.text_to_document(text) Nokogiri.parse(text) end