class BibSync::Actions::DetermineArXivDOI
Public Class Methods
new(options)
click to toggle source
# File lib/bibsync/actions/determine_arxiv_doi.rb, line 7 def initialize(options) raise 'Option --bib is required' unless @bib = options[:bib] @force = options[:resync] end
Public Instance Methods
run()
click to toggle source
# File lib/bibsync/actions/determine_arxiv_doi.rb, line 12 def run notice 'Determine arXiv and DOI identifiers' @bib.each do |entry| next if entry.comment? || (entry[:doi] && entry[:arxiv]) || (!@force && entry[:title] && entry[:author] && entry[:year]) determine_arxiv_and_doi(entry) end end
Private Instance Methods
determine_arxiv_and_doi(entry)
click to toggle source
# File lib/bibsync/actions/determine_arxiv_doi.rb, line 26 def determine_arxiv_and_doi(entry) if file = entry.file if file[:type] == 'PDF' && !entry[:arxiv] && !entry[:doi] debug('Searching for arXiv or doi identifier in pdf file', key: entry) text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null` entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/ entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i end if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/ debug('Interpreting file name as arXiv identifier', key: entry) entry[:arxiv] = $1 end if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/ debug('Interpreting file name as doi identifier', key: entry) entry[:doi] = "10.1103/#{$1}" end end if !entry[:arxiv] && entry[:doi] begin info('Fetch missing arXiv identifier', key: entry) xml = fetch_xml('http://export.arxiv.org/api/query', search_query: "doi:#{entry[:doi]}", max_results: 1) doi = xml.elements['//arxiv:doi'] if doi && doi.text == entry[:doi] id = xml.elements['//entry/id'].text if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z} entry[:arxiv] = $1 end end rescue => ex error('arXiv query by DOI failed', ex: ex, key: entry) end end unless entry[:arxiv] || entry[:doi] warning('No arXiv or DOI identifier found', key: entry) end end