module BibURI::Driver::COinS
This driver reads COinS
citations in an HTML page and returns them. It defaults to returning a single citation, but can be used to return all citations.
Public Class Methods
The canonical form of this identifier is the normalized URI, IF it has a scheme of ‘http’ or ‘https’.
# File lib/biburi/driver/coins.rb, line 36 def self.canonical(id) uri = URI.parse(id) unless uri.scheme == 'http' || uri.scheme == 'https' return nil end return uri.to_s rescue URI::InvalidURIError # If there is an error in the URI, it is not an identifier. nil end
Converts a COinS
string to a BibTeX entry.
# File lib/biburi/driver/coins.rb, line 117 def self.coins_to_bibtex(coins) # Create a BibTeX entry to store these values. bibentry = BibTeX::Entry.new # If we have COinS data, we have a lot more data. coins_kv = CGI::parse(coins) metadata = {} coins_kv.each do |key, val| if val.length == 1 then metadata[key] = val[0] else metadata[key] = val end end # COinS values are explained at http://ocoins.info/cobg.html # and in http://ocoins.info/cobgbook.html. # If we're not Z3988-2004, skip out. ctx_ver = metadata['ctx_ver'] if ctx_ver != 'Z39.88-2004' then ctx_ver = "" if ctx_ver.nil? raise "ctx_ver is: '#{ctx_ver}'" end # Add ALL the identifiers. bibentry[:identifiers] = "" if metadata.key?('rft_id') then if metadata['rft_id'].kind_of?(Array) then bibentry[:identifiers] = metadata['rft_id'].join("\n") else bibentry[:identifiers] = metadata['rft_id'] end end # COinS supports some types genre = metadata['rft.genre'] if genre == 'article' then bibentry.type = "article" elsif genre == 'book' then bibentry.type = "book" elsif genre == 'bookitem' then bibentry.type = "inbook" elsif genre == 'proceeding' then bibentry.type = "proceedings" elsif genre == 'conference' then bibentry.type = "inproceedings" elsif genre == 'report' then bibentry.type = "techreport" else # Default to misc. # There is a COinS genre called 'unknown' # which comes here, too. bibentry.type = "misc" end # Journal title: title, jtitle journal_title = metadata['rft.title'] # The old-style journal title. journal_title ||= metadata['rft.stitle'] # An abbreviated title. journal_title ||= metadata['rft.jtitle'] # Complete title. bibentry[:journal] = journal_title # Book title: btitle if metadata.key?('rft.btitle') if journal_title bibentry[:booktitle] = metadata['rft.btitle'] else bibentry[:title] = metadata['rft.btitle'] end end # Pages: spage, epage pages = metadata['rft.pages'] # If only pages are provided # Expand a single dash to a BibTeX-y double-dash. pages.gsub!(/([^\-])\-([^\-])/, '\1--\2') unless pages.nil? pages ||= metadata['rft.spage'] + "--" + metadata['rft.epage'] # If we have start and end pages bibentry[:pages] = pages # Authors are all in 'rft.au' authors = [] metadata['rft.au'] = [ metadata['rft.au'] ] unless metadata['rft.au'].kind_of?(Array) metadata['rft.au'].each do |author| authors.push(BibTeX::Name.parse(author)) unless author.nil? end # However! Sometimes a name is in aufirst/aulast # and also in au; and sometimes it's only in aufirst/aulast. first_author = BibTeX::Name.new first_author.last = metadata['rft.aulast'] first_author.suffix = metadata['rft.ausuffix'] if metadata.key?('rft.ausuffix') if metadata.key?('rft.aufirst') then first_author.first = metadata['rft.aufirst'] elsif metadata.key?('rft.auinit') then first_author.first = metadata['rft.auinit'] elsif first_author.first = metadata['rft.auinit1'] first_author.first += " " + metadata['rftinitm'] if metadata.key?('rftinitm') end if !authors.include?(first_author) then authors.unshift(first_author) end bibentry[:author] = BibTeX::Names.new(authors) # Dates. date = metadata['rft.date'] bibentry[:date] = date # Citeulike dates are easy to parse. unless date.nil? then if match = date.match(/^(\d{4})$/) then bibentry[:year] = match[1] elsif match = date.match(/^(\d{4})-(\d{1,2})$/) then bibentry[:year] = match[1] bibentry[:month] = match[2] elsif match = date.match(/^(\d{4})-(\d{1,2})-(\d{1,2})$/) then bibentry[:year] = match[1] bibentry[:month] = match[2] bibentry[:day] = match[3] end end # Map remaining fields to BibTeX. standard_mappings = { "rft.atitle" => "title", "rft.volume" => "volume", "rft.issue" => "number", "rft.artnum" => "article_number", "rft.issn" => "issn", "rft.eissn" => "eissn", "rft.isbn" => "isbn", "rft.coden" => "CODEN", "rft.sici" => "SICI", "rft.chron" => "chronology", "rft.ssn" => "season", "rft.quarter" => "quarter", "rft.part" => "part", "rft.place" => "address", "rft.pub" => "publisher", "rft.edition" => "edition", "rft.tpages" => "total_pages", "rft.series" => "series", "rft.bici" => "bici" } standard_mappings.keys.each do |field| if metadata.key?(field) then bibentry[standard_mappings[field]] = metadata[field] end end return bibentry end
Returns a list of parsed values with BibTeX names by looking up the provided id (a URL).
This will call self.lookup_all(), and then only return the first match. For pages like Mendeley, this is necessary to avoid pulling in ‘related to’ citations or to pull in all entries.
# File lib/biburi/driver/coins.rb, line 57 def self.lookup(id) self.lookup_all(id) do |first_only| return first_only end end
This method returns ALL COinS
on this page. For Mendeley, this will return the pages’ COinS
as well as all related papers. Use self.lookup() to find a single one.
# File lib/biburi/driver/coins.rb, line 66 def self.lookup_all(id) # Calculate the canonical identifier. url = canonical(id) if url.nil? then return nil end # Retrieve the HTML. content = Net::HTTP.get(URI(url)) doc = Nokogiri::HTML(content) spans = doc.css('span.Z3988') # Go through results and format them as BibTeX::Entry. # We support both results = [] unless block_given? spans.each do |span| coins = span['title'] bibentry = self.coins_to_bibtex(coins) # Set identifiers so we know where this came from. bibentry[:url] = url identifiers = bibentry[:identifiers].split("\n") identifiers.push(url) bibentry.add(:identifiers, identifiers.join("\n")) # See if we have a DOI. identifiers.each do |identifier| match = identifier.match(/^(?:http:\/\/dx\.doi\.org\/|doi:|info:doi\/)(.*)$/i) if match then bibentry[:doi] = match[1] end end # Yield values or return array. if block_given? then yield(bibentry) else results.push(bibentry) end end # If we built an array, return it. unless block_given? then return results end end
We support an identifier if we can make them look canonical.
# File lib/biburi/driver/coins.rb, line 28 def self.supported?(id) canonical = self.canonical(id) return !(canonical.nil?) end