class RelatonCalconnect::DataFetcher

Relaton-calconnect data fetcher

Constants

ENDPOINT

DOMAIN = “standards.calconnect.org/” SCHEME, HOST = DOMAIN.split(%r{:?/?/})

Public Class Methods

fetch(output: "data", format: "yaml") click to toggle source
# File lib/relaton_calconnect/data_fetcher.rb, line 21
def self.fetch(output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output unless Dir.exist? output
  new(output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end
new(output, format) click to toggle source

DATADIR = “data” DATAFILE = File.join DATADIR, “bibliography.yml” ETAGFILE = File.join DATADIR, “etag.txt”

# File lib/relaton_calconnect/data_fetcher.rb, line 15
def initialize(output, format)
  @output = output
  @etagfile = File.join output, "etag.txt"
  @format = format
end

Public Instance Methods

fetch() click to toggle source

fetch data form server and save it to file.

# File lib/relaton_calconnect/data_fetcher.rb, line 34
def fetch
  resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
  # return if there aren't any changes since last fetching
  return unless resp.status == 200

  data = YAML.safe_load resp.body
  all_success = true
  data["root"]["items"].each do |doc|
    success = parse_page doc
    all_success &&= success
  end
  self.etag = resp[:etag] if all_success
end

Private Instance Methods

etag() click to toggle source

Read ETag from file

@return [String, NilClass]

# File lib/relaton_calconnect/data_fetcher.rb, line 81
def etag
  @etag ||= File.exist?(@etagfile) ? File.read(@etagfile, encoding: "UTF-8") : nil
end
etag=(e_tag) click to toggle source

Save ETag to file

@param tag [String]

# File lib/relaton_calconnect/data_fetcher.rb, line 89
def etag=(e_tag)
  File.write @etagfile, e_tag, encoding: "UTF-8"
end
parse_page(doc) click to toggle source

Parse document and write it to file

@param [Hash] doc

# File lib/relaton_calconnect/data_fetcher.rb, line 55
def parse_page(doc)
  bib = Scrapper.parse_page doc
  # bib.link.each { |l| l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host }
  write_doc doc["docid"]["id"], bib
  true
rescue StandardError => e
  warn "Document: #{doc['docid']['id']}"
  warn e.message
  puts e.backtrace
  false
end
write_doc(docid, bib) click to toggle source
# File lib/relaton_calconnect/data_fetcher.rb, line 67
def write_doc(docid, bib)
  content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
  file = File.join @output, "#{docid.upcase.gsub(%r{[/\s:]}, '_')}.#{@format}"
  # if File.exist? file
  #   warn "#{file} exist"
  # else
  File.write file, content, encoding: "UTF-8"
  # end
end