class RelatonCalconnect::DataFetcher
Relaton-calconnect data fetcher
Constants
- ENDPOINT
DOMAIN = “standards.calconnect.org/” SCHEME, HOST = DOMAIN.split(%r{:?/?/})
Public Class Methods
fetch(output: "data", format: "yaml")
click to toggle source
# File lib/relaton_calconnect/data_fetcher.rb, line 21 def self.fetch(output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output unless Dir.exist? output new(output, format).fetch t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end
new(output, format)
click to toggle source
DATADIR = “data” DATAFILE = File.join DATADIR, “bibliography.yml” ETAGFILE = File.join DATADIR, “etag.txt”
# File lib/relaton_calconnect/data_fetcher.rb, line 15 def initialize(output, format) @output = output @etagfile = File.join output, "etag.txt" @format = format end
Public Instance Methods
fetch()
click to toggle source
fetch data form server and save it to file.
# File lib/relaton_calconnect/data_fetcher.rb, line 34 def fetch resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get # return if there aren't any changes since last fetching return unless resp.status == 200 data = YAML.safe_load resp.body all_success = true data["root"]["items"].each do |doc| success = parse_page doc all_success &&= success end self.etag = resp[:etag] if all_success end
Private Instance Methods
etag()
click to toggle source
Read ETag from file
@return [String, NilClass]
# File lib/relaton_calconnect/data_fetcher.rb, line 81 def etag @etag ||= File.exist?(@etagfile) ? File.read(@etagfile, encoding: "UTF-8") : nil end
etag=(e_tag)
click to toggle source
Save ETag to file
@param tag [String]
# File lib/relaton_calconnect/data_fetcher.rb, line 89 def etag=(e_tag) File.write @etagfile, e_tag, encoding: "UTF-8" end
parse_page(doc)
click to toggle source
Parse document and write it to file
@param [Hash] doc
# File lib/relaton_calconnect/data_fetcher.rb, line 55 def parse_page(doc) bib = Scrapper.parse_page doc # bib.link.each { |l| l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host } write_doc doc["docid"]["id"], bib true rescue StandardError => e warn "Document: #{doc['docid']['id']}" warn e.message puts e.backtrace false end
write_doc(docid, bib)
click to toggle source
# File lib/relaton_calconnect/data_fetcher.rb, line 67 def write_doc(docid, bib) content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml file = File.join @output, "#{docid.upcase.gsub(%r{[/\s:]}, '_')}.#{@format}" # if File.exist? file # warn "#{file} exist" # else File.write file, content, encoding: "UTF-8" # end end