class Terrier::HtmlData
Constants
- AUTHOR_META_TAGS
- DOI_META_TAGS
- ISSN_TAGS
- LICENSING_TAGS
- PUBLICATION_DATE_META_TAGS
- PUBLICATION_META_TAGS
- TITLE_META_TAGS
Attributes
url[R]
Public Class Methods
new(url)
click to toggle source
# File lib/terrier/html_data.rb, line 13 def initialize(url) raise Terrier::UrlError, "bad url given" unless uri?(url) @url = url @raw = self.class.get(url) @parsed_html = Nokogiri::HTML(@raw) end
Public Instance Methods
data()
click to toggle source
# File lib/terrier/html_data.rb, line 20 def data return @_data if @_data @_data = { url: url, journal: collect_meta_data(@parsed_html, PUBLICATION_META_TAGS).first, title: collect_meta_data( @parsed_html, TITLE_META_TAGS).first, authors: collect_meta_data( @parsed_html, AUTHOR_META_TAGS).uniq, publication_date: collect_meta_data(@parsed_html, PUBLICATION_DATE_META_TAGS).first, doi: collect_meta_data(@parsed_html, DOI_META_TAGS).first, issn: nil, zenodo_pdf: zenodo_pdf } @_data.merge(bibliography: bibliography(@_data)) end
Private Instance Methods
bibliography(parsed_data)
click to toggle source
# File lib/terrier/html_data.rb, line 42 def bibliography(parsed_data) "#{parsed_data[:authors].join(', ')}. (#{parsed_data[:publication_date]}). #{parsed_data[:title]}. #{parsed_data[:journal]}. #{bibliography_reference}" end
bibliography_reference()
click to toggle source
# File lib/terrier/html_data.rb, line 64 def bibliography_reference if data[:doi] "<a href='https://doi.org/#{data[:doi]}'>DOI: #{data[:doi]}</a>" else data[:url] end end
citation_header()
click to toggle source
# File lib/terrier/html_data.rb, line 72 def citation_header { "Accept" => "application/vnd.citationstyles.csl+json;q=1.0" } end
collect_meta_data(parsed_html, meta_names_array)
click to toggle source
# File lib/terrier/html_data.rb, line 55 def collect_meta_data(parsed_html, meta_names_array) value = [] meta_names_array.each do |meta_name| value = parsed_html.xpath("//meta[@name='#{meta_name}']/@content").map(&:value) return value unless value.empty? end value end
uri?(string)
click to toggle source
# File lib/terrier/html_data.rb, line 46 def uri?(string) uri = URI.parse(string) %w( http https ).include?(uri.scheme) rescue URI::BadURIError false rescue URI::InvalidURIError false end
zenodo_pdf()
click to toggle source
# File lib/terrier/html_data.rb, line 38 def zenodo_pdf @zenodo_pdf ||= /\bhttps:\/\/zenodo.org\S*pdf\b/.match(@raw).to_s end