class UniversaTools::UNS

Tools to work with UNS contracts, part or parsec protocols family

Public Class Methods

reduce(name) click to toggle source

reduce string to its glyph archetypes removing any homological similarities and ambiguity

@param [String] name to reduce @return [String] reduced name @raise [ArgumentError] if name contains unprocessable characters

# File lib/universa_tools/uns.rb, line 11
def self.reduce(name)
  # step 1: remove space and punctuation, step 2: NFKD
  name = name.downcase.strip.gsub(/([-_=+()*&^%#@±§~`<,>\/?'";:{}\[\]"']|\s)+/, '_').unicode_normalize(:nfkd)
  # step 3: XLAT1: removing composing characters and ligatures
  name = name.chars.map { |x| xlat1[x] || x }.join('')
  # step 4: reduce to glyph archetype
  name.chars.map { |ch|
    xlat2[ch] or raise ArgumentError, "illegal character: #{ch.ord}:'#{ch}' in #{name.inspect}"
  }.join('')
end

Private Class Methods

decode(char) click to toggle source
# File lib/universa_tools/uns.rb, line 41
def self.decode(char)
  char.strip!
  if char.start_with?('U+')
    code = char[2..].to_i(16)
    [code, code.chr(Encoding::UTF_8)]
  else
    [char.ord, char]
  end
end
read_xlat(xlat, missing = '') click to toggle source
# File lib/universa_tools/uns.rb, line 51
def self.read_xlat(xlat, missing = '')
  xlat.lines.reduce({}) { |all, line|
    begin
      line = line.split('#', 2)[0].strip
      if line != ''
        left, right = line.split(/\s+/)
        case left
          when /^(.+):(.+)$/
            # range
            start, stop = decode($1), decode($2)
            (start[0]..stop[0]).each { |code|
              ch = code.chr(Encoding::UTF_8)
              all[ch] = right || (missing == :self ? ch : missing)
            }
          when /^(?!U\+)/
            # sequence characters or single character
            left.chars.each { |ch|
              all[ch] = right || (missing == :self ? ch : missing)
            }
          else
            # single character un U+00000 form
            right ||= (missing == :self ? left : right)
            all[decode(left)[1]] = right
        end
      end
    rescue Exception
      puts "Error in line: #{line.inspect}: #{$!}"
      raise
    end
    all
  }
end
xlat1() click to toggle source
# File lib/universa_tools/uns.rb, line 22
def self.xlat1
  @xlat1 ||= read_xlat(DEFAULT_XLAT1, '')
end
xlat2() click to toggle source
# File lib/universa_tools/uns.rb, line 26
def self.xlat2
  @xlat2 ||= begin
    result = read_xlat(DEFAULT_XLAT2, :self)
    read_xlat(DEFAULT_XLAT2_FINALIZER, :self).each { |final_key, final_value|
      # finalizer algorithm: if it overrides result's value, alter it
      # not effective at build time, but more effective when processing strings
      # update all existing values according to final update table
      affected_keys = result.select { |k, v| v == final_key }.keys
      affected_keys.each { |k| result[k] = final_value }
      result[final_key] = final_value
    }
    result
  end
end