module Stringex::Unidecoder

Constants

CODEPOINTS

Contains Unicode codepoints, loading as needed from YAML files

Public Class Methods

decode(string) click to toggle source

Returns string with its UTF-8 characters transliterated to ASCII ones

You're probably better off just using the added String#to_ascii

   # File lib/stringex/unidecoder.rb
17 def decode(string)
18   string.chars.map{|char| decoded(char)}.join
19 end
encode(codepoint) click to toggle source

Returns character for the given Unicode codepoint

   # File lib/stringex/unidecoder.rb
22 def encode(codepoint)
23   ["0x#{codepoint}".to_i(16)].pack("U")
24 end
get_codepoint(character) click to toggle source

Returns Unicode codepoint for the given character

   # File lib/stringex/unidecoder.rb
27 def get_codepoint(character)
28   "%04x" % character.unpack("U")[0]
29 end
in_yaml_file(character) click to toggle source

Returns string indicating which file (and line) contains the transliteration value for the character

   # File lib/stringex/unidecoder.rb
33 def in_yaml_file(character)
34   unpacked = character.unpack("U")[0]
35   "#{code_group(unpacked)}.yml (line #{grouped_point(unpacked) + 2})"
36 end

Private Class Methods

code_group(unpacked_character) click to toggle source

Returns the Unicode codepoint grouping for the given character

   # File lib/stringex/unidecoder.rb
59 def code_group(unpacked_character)
60   "x%02x" % (unpacked_character >> 8)
61 end
decoded(character) click to toggle source
   # File lib/stringex/unidecoder.rb
40 def decoded(character)
41   localized(character) || from_yaml(character)
42 end
from_yaml(character) click to toggle source
   # File lib/stringex/unidecoder.rb
48 def from_yaml(character)
49   return character unless character.ord > 128
50   unpacked = character.unpack("U")[0]
51   CODEPOINTS[code_group(unpacked)][grouped_point(unpacked)]
52 rescue
53   # Hopefully this won't come up much
54   # TODO: Make this note something to the user that is reportable to me perhaps
55   "?"
56 end
grouped_point(unpacked_character) click to toggle source

Returns the index of the given character in the YAML file for its codepoint group

   # File lib/stringex/unidecoder.rb
64 def grouped_point(unpacked_character)
65   unpacked_character & 255
66 end
localized(character) click to toggle source
   # File lib/stringex/unidecoder.rb
44 def localized(character)
45   Localization.translate(:transliterations, character)
46 end