class Relaton::Cli::RelatonFile

Attributes

options[R]
outdir[R]
outfile[R]
source[R]

Public Class Methods

concatenate(source, outfile, options = {}) click to toggle source

Concatenate files

This interface expect us to provide a source directory, output file and custom configuration options. Normally, this expect the source directory to contain RXL fles, but it also converts any YAML files to RXL and then finally combines those together.

This interface also allow us to provdie options like title and organization and then it usage those details to generate the collection file.

@param source [Dir] The source directory for files @param output [String] The collection output file @param options [Hash] Options as hash key value pair

# File lib/relaton/cli/relaton_file.rb, line 57
def self.concatenate(source, outfile, options = {})
  new(source, options.merge(outfile: outfile)).concatenate
end
extract(source, outdir, options = {}) click to toggle source

Extract files

This interface expect us to provide a source file / directory, output directory and custom configuration options. Then it wll extract Relaton XML file / files to output directory from the source file / directory. During this process it will use custom options when available.

@param source [Dir] The source directory for files @param outdir [Dir] The output directory for files @param options [Hash] Options as hash key value pair

# File lib/relaton/cli/relaton_file.rb, line 38
def self.extract(source, outdir, options = {})
  new(source, options.merge(outdir: outdir)).extract
end
new(source, options = {}) click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 7
def initialize(source, options = {})
  @source = source
  @options = options
  @outdir = options.fetch(:outdir, nil)
  @outfile = options.fetch(:outfile, nil)
end
split(source, outdir = nil, options = {}) click to toggle source

Split collection

This interface expects us to provide a Relaton Collection file and also an output directory, then it will split that collection into multiple files.

By default it usages `rxl` extension for these new files, but we can also customize that by providing the correct one as `extension` option parameter.

@param source [File] The source collection file @param output [Dir] The output directory for files @param options [Hash] Options as hash key value pair

# File lib/relaton/cli/relaton_file.rb, line 75
def self.split(source, outdir = nil, options = {})
  new(source, options.merge(outdir: outdir)).split
end

Public Instance Methods

concatenate() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 18
def concatenate
  concatenate_and_write_to_files
end
extract() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 14
def extract
  extract_and_write_to_files
end
split() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 22
def split
  split_and_write_to_files
end

Private Instance Methods

bibcollection() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 83
def bibcollection
  Bibcollection.new(
    title: options[:title],
    items: concatenate_files,
    doctype: options[:doctype],
    author: options[:organization],
  )
end
bibdata_instance(document, file) click to toggle source

@param document [Nokogiri::XML::Document] @param file [String] path to file @return [Relaton::Bibdata]

# File lib/relaton/cli/relaton_file.rb, line 194
def bibdata_instance(document, file)
  document = clean_nokogiri_document(document)
  bibdata = Relaton::Bibdata.from_xml document.root
  build_bibdata_relaton(bibdata, file) if bibdata

  bibdata
end
build_bibdata_relaton(bibdata, file) click to toggle source

@param bibdata [Relaton::Bibdata] @param file [String] path to file

# File lib/relaton/cli/relaton_file.rb, line 204
def build_bibdata_relaton(bibdata, file)
  ["xml", "pdf", "doc", "html", "rxl", "txt"].each do |type|
    file = Pathname.new(file).sub_ext(".#{type}")
    bibdata.send("#{type}=", file.to_s) if File.file?(file)
  end
end
build_dirname(filename) click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 252
def build_dirname(filename)
  basename = File.basename(filename)&.gsub(/.(xml|rxl)/, "")
  directory_name = sanitize_string(basename)
  Dir.mkdir(directory_name) unless File.exists?(directory_name)

  directory_name
end
build_filename(file, identifier = nil, ext = "rxl") click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 260
def build_filename(file, identifier = nil, ext = "rxl")
  identifier ||= Pathname.new(File.basename(file.to_s, ".xml")).to_s
  [sanitize_string(identifier), options[:extension] || ext].join(".")
end
clean_nokogiri_document(document) click to toggle source

Force a namespace otherwise Nokogiri won't parse. The reason is we use Bibcollection's from_xml, but that one has an xmlns. We don't want to change the code for bibdata hence this hack bibdata_doc.root[‘xmlns’] = “xmlns”

@param document [Nokogiri::XML::Document] @return [Nokogiri::XML::Document]

# File lib/relaton/cli/relaton_file.rb, line 218
def clean_nokogiri_document(document)
  document.remove_namespaces!
  document.root.add_namespace(nil, "xmlns")
  nokogiri_document(document.to_xml)
end
concatenate_and_write_to_files() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 156
def concatenate_and_write_to_files
  write_to_file(bibcollection.send(output_type))
end
concatenate_files() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 139
def concatenate_files
  xml_files = [convert_rxl_to_xml, convert_yamls_to_xml, convert_xml_to_xml]

  xml_files.flatten.reduce([]) do |mem, xml|
    doc = nokogiri_document(xml[:content])
    if (rfc = doc.at("/rfc"))
      require "relaton_ietf/scrapper"
      ietf = RelatonIetf::Scrapper.fetch_rfc rfc
      d = nokogiri_document ietf.to_xml(bibdata: true)
      mem << bibdata_instance(d, xml[:file])
    elsif %w[bibitem bibdata].include? doc&.root&.name
      mem << bibdata_instance(doc, xml[:file])
    else mem
    end
  end
end
convert_rxl_to_xml() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 224
def convert_rxl_to_xml
  select_files_with("{rxl}").map do |file|
    { file: file, content: File.read(file, encoding: "utf-8") }
  end
end
convert_xml_to_xml() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 236
def convert_xml_to_xml
  select_files_with("{xml}").map do |file|
    { file: file, content: File.read(file, encoding: "utf-8") }
  end
end
convert_yamls_to_xml() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 230
def convert_yamls_to_xml
  select_files_with("yaml").map do |file|
    { file: file, content: YAMLConvertor.to_xml(file, write: false) }
  end
end
extract_and_write_to_files() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 112
def extract_and_write_to_files
  select_source_files.each do |file|
    xml = nokogiri_document(nil, file)
    xml.remove_namespaces!

    if (bib = xml.at("//bibdata"))
      bib = nokogiri_document(bib.to_xml)
    elsif (rfc = xml.at("//rfc"))
      require "relaton_ietf/scrapper"
      ietf = RelatonIetf::Scrapper.fetch_rfc rfc
      bib = nokogiri_document ietf.to_xml(bibdata: true)
    else
      next
    end

    bib.remove_namespaces!
    bib.root.add_namespace(nil, "xmlns")

    bibdata = Relaton::Bibdata.from_xml(bib.root)
    if bibdata
      build_bibdata_relaton(bibdata, file)

      write_to_file(bibdata.send(output_type), outdir, build_filename(file))
    end
  end
end
find_available_bibrxl_file(name, _ouputdir, content) click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 170
def find_available_bibrxl_file(name, _ouputdir, content)
  if options[:extension] == "yaml" || options[:extension] == "yml"
    bib_rxl = Pathname.new([outdir, name].join("/")).sub_ext(".rxl")
    content.bib_rxl = bib_rxl.to_s if File.file?(bib_rxl)
  end
end
nokogiri_document(document, file = nil) click to toggle source

@param document [String] XML @param file [String, nil] path to file @return [Nokogiri::XML::Document]

# File lib/relaton/cli/relaton_file.rb, line 95
def nokogiri_document(document, file = nil)
  document ||= File.read(file, encoding: "utf-8")
  Nokogiri.XML(document)
end
output_type(ext = options[:extension]) click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 177
def output_type(ext = options[:extension])
  ext ||= File.extname(outfile)[1..-1] if outfile
  case ext
  when "rxl", "xml"
    :to_xml
  when "yml", "yaml"
    :to_yaml
  else
    puts "[relaton-cli] the given extension of '#{ext}' is "\
    "not supported. Use 'rxl'."
    :to_xml
  end
end
relaton_collection() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 108
def relaton_collection
  @relaton_collection ||= Bibcollection.from_xml(nokogiri_document(nil, source))
end
replace_bad_characters(string) click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 270
def replace_bad_characters(string)
  bad_chars = ["/", "\\", "?", "%", "*", ":", "|", '"', "<", ">", ".", " "]
  bad_chars.reduce(string.downcase) { |res, char| res.gsub(char, "-") }
end
sanitize_string(string) click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 265
def sanitize_string(string)
  clean_string = replace_bad_characters(string.downcase)
  clean_string.gsub(/^\s+/, "").gsub(/\s+$/, "").gsub(/\s+/, "-")
end
select_files_with(extension) click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 242
def select_files_with(extension)
  files = File.join(source, "**", "*.#{extension}")
  Dir[files].reject { |file| File.directory?(file) }
end
select_source_files() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 100
def select_source_files
  if File.file?(source)
    [source]
  else
    select_files_with("xml")
  end
end
split_and_write_to_files() click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 160
def split_and_write_to_files
  output_dir = outdir || build_dirname(source)

  relaton_collection.items.each do |content|
    name = build_filename(nil, content.docidentifier)
    find_available_bibrxl_file(name, output_dir, content)
    write_to_file(content.send(output_type), output_dir, name)
  end
end
write_to_file(content, directory = nil, output_file = nil) click to toggle source
# File lib/relaton/cli/relaton_file.rb, line 247
def write_to_file(content, directory = nil, output_file = nil)
  file_with_dir = [directory, output_file || outfile].compact.join("/")
  File.open(file_with_dir, "w:utf-8") { |file| file.write(content) }
end