class BibSync::Actions::SynchronizeMetadata
Public Class Methods
new(options)
click to toggle source
# File lib/bibsync/actions/synchronize_metadata.rb, line 7 def initialize(options) raise 'Option --bib is required' unless @bib = options[:bib] @force = options[:resync] end
Public Instance Methods
run()
click to toggle source
# File lib/bibsync/actions/synchronize_metadata.rb, line 12 def run notice 'Synchronize with arXiv and DOI' @bib.to_a.each do |entry| next if entry.comment? entry.delete(:abstract) if @force if @force || !(entry[:title] && entry[:author] && entry[:year]) if entry[:arxiv] if entry.key == arxiv_id(entry, prefix: false, version: true) entry = rename_arxiv_file(entry) next unless entry end update_arxiv(entry) end update_doi(entry) if entry[:doi] end if entry[:doi] =~ /\A10\.1103\// && !entry[:abstract] update_aps_abstract(entry) end # Add timestamp when this entry was added entry[:added] ||= Date.today.to_s end end
Private Instance Methods
rename_arxiv_file(entry)
click to toggle source
Rename arxiv file if key contains version
# File lib/bibsync/actions/synchronize_metadata.rb, line 76 def rename_arxiv_file(entry) file = entry.file key = arxiv_id(entry, prefix: false, version: false) if old_entry = @bib[key] # Existing entry found @bib.delete(entry) old_entry[:arxiv] =~ /v(\d+)$/ old_version = $1 entry[:arxiv] =~ /v(\d+)$/ new_version = $1 if old_version && new_version && old_version >= new_version info('Not updating existing entry with older version', key: old_entry) File.delete(file[:path]) if file return nil end old_entry[:arxiv] = entry[:arxiv] old_entry[:doi] = entry[:doi] entry = old_entry info('Updating existing entry', key: entry) else # This is a new entry entry.key = key end if file new_path = file[:path].sub(arxiv_id(entry, prefix: false, version: true), key) File.rename(file[:path], new_path) entry.file = new_path end entry end
update_aps_abstract(entry)
click to toggle source
# File lib/bibsync/actions/synchronize_metadata.rb, line 43 def update_aps_abstract(entry) info("Downloading APS abstract", key: entry) html = fetch("http://link.aps.org/doi/#{entry[:doi]}") if html =~ %r{<div class='aps-abstractbox'>(.*?)</div>} entry[:abstract] = $1.gsub(/<[^>]+>/, '') end rescue => ex error('Abstract download failed', key: entry, ex: ex) end
update_arxiv(entry)
click to toggle source
# File lib/bibsync/actions/synchronize_metadata.rb, line 112 def update_arxiv(entry) info('Downloading arXiv metadata', key: entry) xml = fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: "oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}", metadataPrefix: 'arXiv') error = xml.elements['//error'] raise error.text if error arXiv = xml.elements['//arXiv'] entry[:title] = arXiv.elements['title'].text entry[:abstract] = arXiv.elements['abstract'].text entry[:arxivcategories] = arXiv.elements['categories'].text entry[:primaryclass] = entry[:arxivcategories].split(/\s+/).first entry[:author] = arXiv.get_elements('authors/author').map do |author| "{#{author.elements['keyname'].text}}, {#{author.elements['forenames'].text}}" end.join(' and ') entry[:journal] = 'ArXiv e-prints' entry[:eprint] = entry[:arxiv] entry[:archiveprefix] = 'arXiv' entry[:arxivcreated] = arXiv.elements['created'].text if arXiv.elements['created'] entry[:arxivupdated] = arXiv.elements['updated'].text if arXiv.elements['updated'] date = Date.parse(entry[:arxivupdated] || entry[:arxivcreated]) entry[:year] = date.year entry[:month] = Literal.new(%w(jan feb mar apr may jun jul aug sep oct nov dec)[date.month - 1]) entry[:doi] = arXiv.elements['doi'].text if arXiv.elements['doi'] entry[:journal] = arXiv.elements['journal-ref'].text if arXiv.elements['journal-ref'] entry[:comments] = arXiv.elements['comments'].text if arXiv.elements['comments'] entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}" rescue => ex entry.delete(:arxiv) error('arXiv download failed', key: entry, ex: ex) end
update_doi(entry)
click to toggle source
# File lib/bibsync/actions/synchronize_metadata.rb, line 53 def update_doi(entry) url = "http://dx.doi.org/#{entry[:doi]}" info("Downloading DOI metadata from #{url}", key: entry) text = fetch(url, nil, 'Accept' => 'text/bibliography; style=bibtex') raise text if text == 'Unknown DOI' Entry.parse(text).each {|k, v| entry[k] = v } rescue => ex error('DOI download failed', key: entry, ex: ex) # dx.doi.org shows spurious 500 errors if ex.respond_to?(:response) && ex.response[:status] == 500 tries ||= 0 tries += 1 if tries < 10 info('Retrying...', key: entry) retry else error('Giving up :(', key: entry) end end entry.delete(:doi) end