module Wanakana
Constants
- FOUR_CHARACTER_EDGE_CASES
- HIRAGANA_END
- HIRAGANA_START
- J_TO_R
- KATAKANA_END
- KATAKANA_START
- LOWERCASE_END
- LOWERCASE_FULLWIDTH_END
- LOWERCASE_FULLWIDTH_START
- LOWERCASE_START
based on wanakana.js version 1.3.7
- R_TO_J
- UPPERCASE_END
- UPPERCASE_FULLWIDTH_END
- UPPERCASE_FULLWIDTH_START
- UPPERCASE_START
- VERSION
Public Class Methods
default_options(opt=nil)
click to toggle source
# File lib/wanakana.rb, line 29 def self.default_options(opt=nil) @@default_options = extend(opt, @@default_options) if opt @@default_options end
is_hiragana?(input)
click to toggle source
# File lib/wanakana.rb, line 34 def self.is_hiragana?(input) chars = input.split('') are_hira = chars.select { |c| is_char_hiragana?(c) || c.match(/[\s[:punct:][\uFF01-\uFF5E][\u3000-\u303F]]/) } chars.length == are_hira.length end
is_kana?(input)
click to toggle source
# File lib/wanakana.rb, line 46 def self.is_kana?(input) chars = input.split('') are_kana = chars.select { |c| is_char_hiragana?(c) || is_char_katakana?(c) } chars.length == are_kana.length end
is_katakana?(input)
click to toggle source
# File lib/wanakana.rb, line 40 def self.is_katakana?(input) chars = input.split('') are_kata = chars.select { |c| is_char_katakana?(c) || c.match(/[\s[:punct:][\uFF01-\uFF5E][\u3000-\u303F]]/) } chars.length == are_kata.length end
is_romaji?(input)
click to toggle source
# File lib/wanakana.rb, line 52 def self.is_romaji?(input) chars = input.split('') are_roma = chars.select { |c| !is_char_hiragana?(c) && !is_char_katakana?(c) } chars.length == are_roma.length end
to_hiragana(input, options={})
click to toggle source
# File lib/wanakana.rb, line 58 def self.to_hiragana(input, options={}) return romaji_to_hiragana(input, options) if is_romaji?(input) return katakana_to_hiragana(input) if is_katakana?(input) input end
to_kana(input, options={})
click to toggle source
# File lib/wanakana.rb, line 73 def self.to_kana(input, options={}) romaji_to_kana(input, options) end
to_katakana(input, options={})
click to toggle source
# File lib/wanakana.rb, line 64 def self.to_katakana(input, options={}) return hiragana_to_katakana(input) if is_hiragana?(input) if is_romaji?(input) input = romaji_to_hiragana(input, options) return hiragana_to_katakana(input) end input end
to_romaji(input, options={})
click to toggle source
# File lib/wanakana.rb, line 77 def self.to_romaji(input, options={}) hiragana_to_romaji(input, options) end
Protected Class Methods
convert_full_width_chars_to_ASCII(string)
click to toggle source
# File lib/wanakana.rb, line 125 def self.convert_full_width_chars_to_ASCII(string) chars = string.split('') chars.map! do |char| code = char[0].ord if is_char_in_range?(char, Wanakana::LOWERCASE_FULLWIDTH_START, Wanakana::LOWERCASE_FULLWIDTH_END) char = [code - Wanakana::LOWERCASE_FULLWIDTH_START + Wanakana::LOWERCASE_START].pack('U*') elsif is_char_in_range?(char, Wanakana::UPPERCASE_FULLWIDTH_START, Wanakana::UPPERCASE_FULLWIDTH_END) char = [code - Wanakana::UPPERCASE_FULLWIDTH_START + Wanakana::UPPERCASE_START].pack('U*') end end chars.join('') end
extend(target, source)
click to toggle source
For adding defaults to options hash
# File lib/wanakana.rb, line 84 def self.extend(target, source) return source if !target || target.empty? source.each do |key, value| target[key] = value unless target.has_key?(key) end target end
hiragana_to_katakana(hira)
click to toggle source
# File lib/wanakana.rb, line 151 def self.hiragana_to_katakana(hira) chars = hira.split('') chars.map! do |char| if is_char_hiragana?(char) code = char[0].ord + (Wanakana::KATAKANA_START - Wanakana::HIRAGANA_START) char = [code].pack('U*') else char end end chars.join('') end
hiragana_to_romaji(hira, options)
click to toggle source
# File lib/wanakana.rb, line 164 def self.hiragana_to_romaji(hira, options) options = extend(options, @@default_options) len = hira.length roma = [] cursor = 0 chunk_size = 0 chunk = nil roma_char = nil next_char_is_double_consonant = false is_kata = false while cursor < len do chunk_size = [2, len - cursor].min while chunk_size > 0 do chunk = hira.slice(cursor, chunk_size) if is_katakana?(chunk) is_kata = true chunk = katakana_to_hiragana(chunk) end if (chunk[0] == "っ" && chunk_size == 1 && cursor < (len - 1)) next_char_is_double_consonant = true roma_char = '' break end roma_char = Wanakana::J_TO_R[chunk.to_sym] if (roma_char && next_char_is_double_consonant) roma_char = roma_char[0].concat(roma_char) next_char_is_double_consonant = false end if roma_char && is_kata roma_char = roma_char.upcase if options[:convertKatakanaToUppercase] end is_kata = false break if roma_char chunk_size -= 1 end roma_char = chunk unless roma_char roma.push(roma_char) cursor += chunk_size > 0 ? chunk_size : 1 end roma.join('') end
is_char_consonant?(char, includeY=true)
click to toggle source
# File lib/wanakana.rb, line 103 def self.is_char_consonant?(char, includeY=true) return false unless char regexp = (includeY ? /[bcdfghjklmnpqrstvwxyz]/ : /[bcdfghjklmnpqrstvwxz]/) char.downcase[0] =~ regexp end
is_char_hiragana?(char)
click to toggle source
# File lib/wanakana.rb, line 113 def self.is_char_hiragana?(char) is_char_in_range?(char, Wanakana::HIRAGANA_START, Wanakana::HIRAGANA_END) end
is_char_in_range?(char, start, finish)
click to toggle source
# File lib/wanakana.rb, line 92 def self.is_char_in_range?(char, start, finish) code = char[0].ord code.between?(start, finish) end
is_char_kana?(char)
click to toggle source
# File lib/wanakana.rb, line 117 def self.is_char_kana?(char) is_char_hiragana?(char) || is_char_katakana?(char) end
is_char_katakana?(char)
click to toggle source
# File lib/wanakana.rb, line 109 def self.is_char_katakana?(char) is_char_in_range?(char, Wanakana::KATAKANA_START, Wanakana::KATAKANA_END) end
is_char_not_kana?(char)
click to toggle source
# File lib/wanakana.rb, line 121 def self.is_char_not_kana?(char) !is_char_hiragana?(char) && !is_char_katakana?(char) end
is_char_vowel?(char, includeY=true)
click to toggle source
# File lib/wanakana.rb, line 97 def self.is_char_vowel?(char, includeY=true) return false unless char regexp = (includeY ? /[aeiouy]/ : /[aeiou]/) char.downcase[0] =~ regexp end
katakana_to_hiragana(kata)
click to toggle source
# File lib/wanakana.rb, line 138 def self.katakana_to_hiragana(kata) chars = kata.split('') chars.map! do |char| if is_char_katakana?(char) code = char[0].ord + (Wanakana::HIRAGANA_START - Wanakana::KATAKANA_START) char = [code].pack('U*') else char end end chars.join('') end
romaji_to_hiragana(roma, options)
click to toggle source
# File lib/wanakana.rb, line 206 def self.romaji_to_hiragana(roma, options) romaji_to_kana(roma, options, true) end
romaji_to_kana(roma, options, ignore_case=false)
click to toggle source
# File lib/wanakana.rb, line 210 def self.romaji_to_kana(roma, options, ignore_case=false) options = extend(options, @@default_options) len = roma.length kana = [] cursor = 0 chunk_size = 0 chunk = nil kana_char = nil chunk_LC = nil set_chunk = lambda { chunk = roma.slice(cursor, chunk_size) chunk_LC = chunk.downcase } is_char_upper_case = lambda { |char| is_char_in_range?(char, Wanakana::UPPERCASE_START, Wanakana::UPPERCASE_END) } while cursor < len do chunk_size = [3, len - cursor].min while chunk_size > 0 do set_chunk.call() if (Wanakana::FOUR_CHARACTER_EDGE_CASES.include?(chunk_LC) && (len - cursor) >= 4) chunk_size += 1 set_chunk.call() else if chunk_LC[0] == 'n' # if (options[:IMEMode] && chunk_LC[1] == "'" && chunk_size ==2) # kana_char = 'ん' # break # end if (is_char_consonant?(chunk_LC[1], false) && is_char_vowel?(chunk_LC[2])) chunk_size = 1 set_chunk.call() end end if (chunk_LC[0] != 'n' && is_char_consonant?(chunk_LC[0]) && chunk[0] == chunk[1]) chunk_size = 1 chunk_LC = chunk = (is_char_in_range?(chunk[0], Wanakana::UPPERCASE_START, Wanakana::UPPERCASE_END) ? 'ッ' : 'っ') end end kana_char = Wanakana::R_TO_J[chunk_LC.to_sym] break if kana_char chunk_size -= chunk_size == 4 ? 2 : 1 end kana_char = chunk unless kana_char if options[:useObsoleteKana] kana_char = 'ゐ' if chunk_LC == 'wi' kana_char = 'ゑ' if chunk_LC == 'we' end # if (options[:IMEMode] && chunk_LC[0] == 'n') # kana_char = chunk[0] if ( roma[cursor+1].downcase == 'y' && # !is_char_vowel?(roma[cursor + 2]) || # cursor == (len - 1) || # is_kana?(roma[cursor + 1]) ) # end unless ignore_case kana_char = hiragana_to_katakana(kana_char) if is_char_upper_case.call(chunk[0]) end kana.push(kana_char) cursor += chunk_size > 0 ? chunk_size : 1 end kana.join('') end