class Pinyin
Constants
- TONE_MARK
Attributes
ruby1[RW]
table[RW]
Public Class Methods
init_table()
click to toggle source
# File lib/chinese_pinyin.rb, line 21 def init_table return if @table # Ruby 2.0以后默认即为UTF-8编码,使用新的码表以提升效率 @ruby1 = !!(RUBY_VERSION =~ /^1/) datfile = @ruby1 ? 'Mandarin.dat' : 'pinyin-utf8.dat' @table = {} file = File.join(File.dirname(__FILE__), "../data/#{datfile}") File.open(file, "r:UTF-8",) do |file| while line = file.gets key, value = line.split(' ', 2) @table[key] = value end end end
init_word_table()
click to toggle source
# File lib/chinese_pinyin.rb, line 38 def init_word_table return if @words_table @words_table = {} if ENV["WORDS_FILE"] File.open(ENV["WORDS_FILE"]) do |file| while line = file.gets key, value = line.sub("\n", "").split('|', 2) @words_table[key] = value end end end end
translate(chars, options={}) { |pinyin, size)| ... }
click to toggle source
# File lib/chinese_pinyin.rb, line 53 def translate(chars, options={}) chars = chars.encode("UTF-8") splitter = options.fetch(:splitter, ' ') tonemarks = options.fetch(:tonemarks, false) tone = options.fetch(:tone, false || tonemarks) camel = options.fetch(:camelcase, false) init_word_table results = @words_table[chars] if results results = results.split results.map!(&:downcase) results.map!(&:capitalize) if camel results.map! { |x| (48..57).include?(x[-1].ord) ? x.chop! : x } unless tone return results.join(splitter) end init_table results = [] is_english = false chars.scan(/./).each do |char| key = @ruby1 ? sprintf("%X", char.unpack("U").first) : char if @table[key] results << splitter if is_english is_english = false pinyin = @table[key].chomp.split(' ', 2)[0] pinyin.downcase! if @ruby1 pinyin.chop! unless tone pinyin.capitalize! if camel if tonemarks tone_index = pinyin[-1].to_i pinyin = pinyin[0...-1] %w(a o e i u v).each { |v| break if pinyin.tr! v, TONE_MARK[v.to_sym][tone_index - 1] } end if block_given? results << (yield pinyin, results.size) else results << pinyin results << splitter end else if char =~ /[a-zA-Z0-9]/ results << char elsif results.last != splitter results << splitter else end is_english = true end end results.join('').chomp(splitter) end
Also aliased as: t