module Wanakana

Constants

FOUR_CHARACTER_EDGE_CASES
HIRAGANA_END
HIRAGANA_START
J_TO_R
KATAKANA_END
KATAKANA_START
LOWERCASE_END
LOWERCASE_FULLWIDTH_END
LOWERCASE_FULLWIDTH_START
LOWERCASE_START

based on wanakana.js version 1.3.7

R_TO_J
UPPERCASE_END
UPPERCASE_FULLWIDTH_END
UPPERCASE_FULLWIDTH_START
UPPERCASE_START
VERSION

Public Class Methods

default_options(opt=nil) click to toggle source
# File lib/wanakana.rb, line 29
def self.default_options(opt=nil)
  @@default_options = extend(opt, @@default_options) if opt
  @@default_options
end
is_hiragana?(input) click to toggle source
# File lib/wanakana.rb, line 34
def self.is_hiragana?(input)
  chars = input.split('')
  are_hira = chars.select { |c| is_char_hiragana?(c) || c.match(/[\s[:punct:][\uFF01-\uFF5E][\u3000-\u303F]]/) }
  chars.length == are_hira.length
end
is_kana?(input) click to toggle source
# File lib/wanakana.rb, line 46
def self.is_kana?(input)
  chars = input.split('')
  are_kana = chars.select { |c| is_char_hiragana?(c) || is_char_katakana?(c) }
  chars.length == are_kana.length
end
is_katakana?(input) click to toggle source
# File lib/wanakana.rb, line 40
def self.is_katakana?(input)
  chars = input.split('')
  are_kata = chars.select { |c| is_char_katakana?(c) || c.match(/[\s[:punct:][\uFF01-\uFF5E][\u3000-\u303F]]/) }
  chars.length == are_kata.length
end
is_romaji?(input) click to toggle source
# File lib/wanakana.rb, line 52
def self.is_romaji?(input)
  chars = input.split('')
  are_roma = chars.select { |c| !is_char_hiragana?(c) && !is_char_katakana?(c) }
  chars.length == are_roma.length
end
to_hiragana(input, options={}) click to toggle source
# File lib/wanakana.rb, line 58
def self.to_hiragana(input, options={})
  return romaji_to_hiragana(input, options) if is_romaji?(input)
  return katakana_to_hiragana(input) if is_katakana?(input)
  input
end
to_kana(input, options={}) click to toggle source
# File lib/wanakana.rb, line 73
def self.to_kana(input, options={})
  romaji_to_kana(input, options)
end
to_katakana(input, options={}) click to toggle source
# File lib/wanakana.rb, line 64
def self.to_katakana(input, options={})
  return hiragana_to_katakana(input) if is_hiragana?(input)
  if is_romaji?(input)
    input = romaji_to_hiragana(input, options)
    return hiragana_to_katakana(input)
  end
  input
end
to_romaji(input, options={}) click to toggle source
# File lib/wanakana.rb, line 77
def self.to_romaji(input, options={})
  hiragana_to_romaji(input, options)
end

Protected Class Methods

convert_full_width_chars_to_ASCII(string) click to toggle source
# File lib/wanakana.rb, line 125
def self.convert_full_width_chars_to_ASCII(string)
  chars = string.split('')
  chars.map! do |char|
    code = char[0].ord
    if is_char_in_range?(char, Wanakana::LOWERCASE_FULLWIDTH_START, Wanakana::LOWERCASE_FULLWIDTH_END)
      char = [code - Wanakana::LOWERCASE_FULLWIDTH_START + Wanakana::LOWERCASE_START].pack('U*')
    elsif is_char_in_range?(char, Wanakana::UPPERCASE_FULLWIDTH_START, Wanakana::UPPERCASE_FULLWIDTH_END)
      char = [code - Wanakana::UPPERCASE_FULLWIDTH_START + Wanakana::UPPERCASE_START].pack('U*')
    end
  end
  chars.join('')
end
extend(target, source) click to toggle source

For adding defaults to options hash

# File lib/wanakana.rb, line 84
def self.extend(target, source)
  return source if !target || target.empty?
  source.each do |key, value|
    target[key] = value unless target.has_key?(key)
  end
  target
end
hiragana_to_katakana(hira) click to toggle source
# File lib/wanakana.rb, line 151
def self.hiragana_to_katakana(hira)
  chars = hira.split('')
  chars.map! do |char|
    if is_char_hiragana?(char)
      code = char[0].ord + (Wanakana::KATAKANA_START - Wanakana::HIRAGANA_START)
      char = [code].pack('U*')
    else
      char
    end
  end
  chars.join('')
end
hiragana_to_romaji(hira, options) click to toggle source
# File lib/wanakana.rb, line 164
def self.hiragana_to_romaji(hira, options)
  options = extend(options, @@default_options)
  len = hira.length
  roma = []
  cursor = 0
  chunk_size = 0
  chunk = nil
  roma_char = nil
  next_char_is_double_consonant = false
  is_kata = false
  while cursor < len do
    chunk_size = [2, len - cursor].min
    while chunk_size > 0 do
      chunk = hira.slice(cursor, chunk_size)
      if is_katakana?(chunk)
        is_kata = true
        chunk = katakana_to_hiragana(chunk)
      end
      if (chunk[0] == "っ" && chunk_size == 1 && cursor < (len - 1))
        next_char_is_double_consonant = true
        roma_char = ''
        break
      end
      roma_char = Wanakana::J_TO_R[chunk.to_sym]
      if (roma_char && next_char_is_double_consonant)
        roma_char = roma_char[0].concat(roma_char)
        next_char_is_double_consonant = false
      end
      if roma_char && is_kata
        roma_char = roma_char.upcase if options[:convertKatakanaToUppercase]
      end
      is_kata = false
      break if roma_char
      chunk_size -= 1
    end
    roma_char = chunk unless roma_char
    roma.push(roma_char)
    cursor += chunk_size > 0 ? chunk_size : 1
  end
  roma.join('')
end
is_char_consonant?(char, includeY=true) click to toggle source
# File lib/wanakana.rb, line 103
def self.is_char_consonant?(char, includeY=true)
  return false unless char
  regexp = (includeY ? /[bcdfghjklmnpqrstvwxyz]/ : /[bcdfghjklmnpqrstvwxz]/)
  char.downcase[0] =~ regexp
end
is_char_hiragana?(char) click to toggle source
# File lib/wanakana.rb, line 113
def self.is_char_hiragana?(char)
  is_char_in_range?(char, Wanakana::HIRAGANA_START, Wanakana::HIRAGANA_END)
end
is_char_in_range?(char, start, finish) click to toggle source
# File lib/wanakana.rb, line 92
def self.is_char_in_range?(char, start, finish)
  code = char[0].ord
  code.between?(start, finish)
end
is_char_kana?(char) click to toggle source
# File lib/wanakana.rb, line 117
def self.is_char_kana?(char)
  is_char_hiragana?(char) || is_char_katakana?(char)
end
is_char_katakana?(char) click to toggle source
# File lib/wanakana.rb, line 109
def self.is_char_katakana?(char)
  is_char_in_range?(char, Wanakana::KATAKANA_START, Wanakana::KATAKANA_END)
end
is_char_not_kana?(char) click to toggle source
# File lib/wanakana.rb, line 121
def self.is_char_not_kana?(char)
  !is_char_hiragana?(char) && !is_char_katakana?(char)
end
is_char_vowel?(char, includeY=true) click to toggle source
# File lib/wanakana.rb, line 97
def self.is_char_vowel?(char, includeY=true)
  return false unless char
  regexp = (includeY ? /[aeiouy]/ : /[aeiou]/)
  char.downcase[0] =~ regexp
end
katakana_to_hiragana(kata) click to toggle source
# File lib/wanakana.rb, line 138
def self.katakana_to_hiragana(kata)
  chars = kata.split('')
  chars.map! do |char|
    if is_char_katakana?(char)
      code = char[0].ord + (Wanakana::HIRAGANA_START - Wanakana::KATAKANA_START)
      char = [code].pack('U*')
    else
      char
    end
  end
  chars.join('')
end
romaji_to_hiragana(roma, options) click to toggle source
# File lib/wanakana.rb, line 206
def self.romaji_to_hiragana(roma, options)
  romaji_to_kana(roma, options, true)
end
romaji_to_kana(roma, options, ignore_case=false) click to toggle source
# File lib/wanakana.rb, line 210
def self.romaji_to_kana(roma, options, ignore_case=false)
  options = extend(options, @@default_options)
  len = roma.length
  kana = []
  cursor = 0
  chunk_size = 0
  chunk = nil
  kana_char = nil
  chunk_LC = nil
  set_chunk = lambda {
    chunk = roma.slice(cursor, chunk_size)
    chunk_LC = chunk.downcase
  }
  is_char_upper_case = lambda { |char| is_char_in_range?(char, Wanakana::UPPERCASE_START, Wanakana::UPPERCASE_END) }
  while cursor < len do
    chunk_size = [3, len - cursor].min
    while chunk_size > 0 do
      set_chunk.call()
      if (Wanakana::FOUR_CHARACTER_EDGE_CASES.include?(chunk_LC) && (len - cursor) >= 4)
        chunk_size += 1
        set_chunk.call()
      else
        if chunk_LC[0] == 'n'
          # if (options[:IMEMode] && chunk_LC[1] == "'" && chunk_size ==2)
          #   kana_char = 'ん'
          #   break
          # end
          if (is_char_consonant?(chunk_LC[1], false) && is_char_vowel?(chunk_LC[2]))
            chunk_size = 1
            set_chunk.call()
          end
        end
        if (chunk_LC[0] != 'n' && is_char_consonant?(chunk_LC[0]) && chunk[0] == chunk[1])
          chunk_size = 1
          chunk_LC = chunk = (is_char_in_range?(chunk[0], Wanakana::UPPERCASE_START, Wanakana::UPPERCASE_END) ? 'ッ' : 'っ')
        end
      end
      kana_char = Wanakana::R_TO_J[chunk_LC.to_sym]
      break if kana_char
      chunk_size -= chunk_size == 4 ? 2 : 1
    end
    kana_char = chunk unless kana_char
    if options[:useObsoleteKana]
      kana_char = 'ゐ' if chunk_LC == 'wi'
      kana_char = 'ゑ' if chunk_LC == 'we'
    end
    # if (options[:IMEMode] && chunk_LC[0] == 'n')
    #   kana_char = chunk[0] if ( roma[cursor+1].downcase == 'y' &&
    #                             !is_char_vowel?(roma[cursor + 2]) ||
    #                             cursor == (len - 1) ||
    #                             is_kana?(roma[cursor + 1]) )
    # end
    unless ignore_case
      kana_char = hiragana_to_katakana(kana_char) if is_char_upper_case.call(chunk[0])
    end
    kana.push(kana_char)
    cursor += chunk_size > 0 ? chunk_size : 1
  end
  kana.join('')
end