module Uniscribe

Constants

COLORS
EMOJI_VERSION
SUPPORTED_ENCODINGS
UNICODE_VERSION
UNICODE_VERSION_GLYPH_DETECTION
VERSION

Public Class Methods

convert_to_encoding_or_raise(string, encoding) click to toggle source
# File lib/uniscribe.rb, line 44
def self.convert_to_encoding_or_raise(string, encoding)
  raise ArgumentError, "no data given to uniscribe" if !string || string.empty?

  string.force_encoding(encoding) if encoding

  case string.encoding.name
  when *SUPPORTED_ENCODINGS
    unless string.valid_encoding?
      raise ArgumentError, "uniscribe can only describe strings with a valid encoding"
    end

    string
  when 'UTF-16', 'UTF-32'
    raise ArgumentError, "unibits only supports #{string.encoding.name} with specified endianess, please use #{string.encoding.name}LE or #{string.encoding.name}BE"
  else
    raise ArgumentError, "uniscribe can only describe Unicode strings (or US-ASCII or ISO-8859-1)"
  end
end
determine_codepoint_color(char_info) click to toggle source
# File lib/uniscribe.rb, line 133
def self.determine_codepoint_color(char_info)
  if !char_info.assigned?
    if char_info.ignorable?
      COLORS[:ignorable]
    else
      COLORS[:unassigned]
    end
  elsif char_info.blank?
    COLORS[:blank]
  elsif char_info.control?
    COLORS[:control]
  elsif char_info.format?
    COLORS[:format]
  elsif char_info.unicode? && char_info.category[0] == "M"
    COLORS[:mark]
  else
    random_color
  end
end
determine_codepoint_name(char) click to toggle source
# File lib/uniscribe.rb, line 157
def self.determine_codepoint_name(char)
  name = Unicode::Name.correct(char)
  return name if name

  name = Unicode::Name.label(char)
  as = Unicode::Name.aliases(char)
  return name if !as

  alias_ = ( as[:control]      && as[:control][0]      ||
             as[:figment]      && as[:figment][0]      ||
             as[:alternate]    && as[:alternate][0]    ||
             as[:abbreviation] && as[:abbreviation][0] )
  return name if !alias_

  name + " " + alias_
end
determine_padding(char, composed, wide_ambiguous) click to toggle source
# File lib/uniscribe.rb, line 174
def self.determine_padding(char, composed, wide_ambiguous)
  required_width = Unicode::DisplayWidth.of(char, wide_ambiguous ? 2 : 1, {}, emoji: true)
  required_width += 1 if composed
  required_width = 0 if required_width < 0

  case required_width
  when  0...5
    "\t\t"
  when 5...10
    "\t"
  else
    ""
  end
end
of(string, encoding: nil, wide_ambiguous: false) click to toggle source
# File lib/uniscribe.rb, line 37
def self.of(string, encoding: nil, wide_ambiguous: false)
  string = convert_to_encoding_or_raise(string, encoding)
  glyphs = string.encode("UTF-8").scan(/\X/)

  visualize(glyphs, wide_ambiguous: wide_ambiguous)
end
puts_codepoint(cp, composed = false, last = false, wide_ambiguous = false) click to toggle source
# File lib/uniscribe.rb, line 107
def self.puts_codepoint(cp, composed = false, last = false, wide_ambiguous = false)
  char = [cp].pack("U*")
  char_info = UnicodeCharacteristics.new(char)
  char_color = determine_codepoint_color(char_info)
  cp_hex = cp.to_s(16).rjust(4, "0").rjust(6).upcase
  symbolified_char = Symbolify.unicode(char, char_info)
  if composed && !last
    branch = "│├─"
  elsif composed && last
    branch = "│└─"
  else
    branch = "├─"
  end
  name = determine_codepoint_name(char)
  padding = determine_padding(symbolified_char, composed, wide_ambiguous)

  puts " %s %s %s%s%s %s" % [
    Paint[cp_hex, char_color],
    branch,
    Paint[symbolified_char, char_color],
    padding,
    branch,
    Paint[name, char_color],
  ]
end
puts_composition(cps, wide_ambiguous = false) click to toggle source
# File lib/uniscribe.rb, line 83
def self.puts_composition(cps, wide_ambiguous = false)
  char = cps.pack("U*")
  if sequence_name = Unicode::SequenceName.of(char)
    name = "Composition: #{sequence_name}"
  else
    name = "Composition"
  end
  char_color = random_color
  cp_hex = "----"
  symbolified_char = symbolify_composition(char)
  padding = determine_padding(symbolified_char, false, wide_ambiguous)

  puts "   %s ├┬ %s%s├┬ %s" % [
    Paint[cp_hex, char_color],
    Paint[symbolified_char, char_color],
    padding,
    Paint[name, char_color],
  ]
  ( cps[0..-2] || [] ).each{ |cp|
    puts_codepoint(cp, true, false, wide_ambiguous)
  }
  puts_codepoint(cps[-1], true, true, wide_ambiguous)
end
random_color() click to toggle source
# File lib/uniscribe.rb, line 153
def self.random_color
  "%.2x%.2x%.2x" % [rand(90) + 60, rand(90) + 60, rand(90) + 60]
end
symbolify_composition(char) click to toggle source
# File lib/uniscribe.rb, line 189
def self.symbolify_composition(char)
  char_infos = char.chars.map{ |c| UnicodeCharacteristics.new(c) }

  case
  when char_infos.any?{ |c| !c.assigned? }
    "n/a"
  when char_infos.all?{ |c| c.separator? }
    "⏎"
  when char_infos.all?{ |c| c.category == "Mn" || c.category == "Me" }
    if char_infos.any?{ |c| c.category == "Mn" }
      "◌" + char
    else
      " " + char
    end
  when char_infos.all?{ |c| c.blank? }
    "]" + char + "["
  else
    char
  end
end
visualize(glyphs, wide_ambiguous: false) click to toggle source
# File lib/uniscribe.rb, line 63
def self.visualize(glyphs, wide_ambiguous: false)
  puts
  ( glyphs[0..-2] || [] ).each{ |glyph|
    cps = glyph.codepoints
    if cps.size > 1
      puts_composition(cps, wide_ambiguous)
    else
      puts_codepoint(cps[0], false, false, wide_ambiguous)
    end
  }

  cps = glyphs[-1].codepoints
  if cps.size > 1
    puts_composition(cps, wide_ambiguous)
  else
    puts_codepoint(cps[0], false, true, wide_ambiguous)
  end
  puts
end