class Tataki::Converter::SkkJisyo
Constants
- DEFAULT_CONFIG_PATH
- DEFAULT_JISYO_SUFFIXES
Public Class Methods
new(jisyo_types = DEFAULT_JISYO_SUFFIXES)
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 12 def initialize(jisyo_types = DEFAULT_JISYO_SUFFIXES) @jisyo_paths = jisyo_types.map{|suffix| Skk::Jisyo.path(suffix) } @table_cache_path = table_cache_path(jisyo_types.join("_")) config_file = File.expand_path(DEFAULT_CONFIG_PATH, __FILE__) config_data = YAML.load_file(config_file) @roman_data = config_data["roman_table"] @ignore_kana = config_data["ignore_kana"] tables = setup_jisyo @match_table = tables[0].freeze @okurigana_table = tables[1].freeze end
Public Instance Methods
add_jisyo(match_table, okurigana_table, jisyo_path)
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 41 def add_jisyo(match_table, okurigana_table, jisyo_path) File.open(jisyo_path, "rb:euc-jp") do |jisyo_file| jisyo_file.each_line do |line| next if line.empty? || line[0] == ";" || line.include?("#") kana, kanji_part = line.encode("utf-8").split(" ") next unless kana && kanji_part kana.gsub!(/[^ぁ-んa-z]/, "") next if kana.empty? || !(kana =~ /^[ぁ-ん]+[a-z]?/) || @ignore_kana.include?(kana) kanji_part.gsub!(/^\/|;.+|\/$/, "") table = kana =~ /^(.+)([a-z])$/ ? okurigana_table : match_table kanji_part.split("/").each do |kanji| kanji_prefix = kanji[0] table_entry = table[kanji_prefix] table[kanji_prefix] = table_entry = [] unless table_entry table_entry.push($2 ? [kanji, $1, $2] : [kanji, kana]) table_entry.sort_by!{|entry| - (entry[0].size) } end end end end
jisyo_path()
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 63 def jisyo_path File.expand_path("../../../../data/jisyo", __FILE__) end
jisyo_timestamp(path)
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 71 def jisyo_timestamp(path) Time.parse(File.read("#{path}.timestamp")) end
setup_jisyo()
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 25 def setup_jisyo if File.exist?(@table_cache_path) tables = Marshal.load(File.read(@table_cache_path)) else match_table = {} okurigana_table = {} @jisyo_paths.each do |jisyo_path| add_jisyo(match_table, okurigana_table, jisyo_path) end tables = [match_table, okurigana_table] File.binwrite(@table_cache_path, Marshal.dump(tables)) File.write("#{@table_cache_path}.timestamp", Time.now.to_s) end tables end
table_cache_path(name)
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 67 def table_cache_path(name) File.join(jisyo_path, "SKK-JISYO.#{name}.table.cache") end
to_kana(sentence)
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 75 def to_kana(sentence) _to_kana(sentence, "") end
Private Instance Methods
_to_kana(sentence, kana)
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 81 def _to_kana(sentence, kana) return kana if sentence.empty? table_entry = find_okurigana_entry(sentence) || find_match_entry(sentence) if table_entry next_kanji = table_entry[0] next_kana = table_entry[1] next_sentence = sentence[next_kanji.size .. -1] return _to_kana(next_sentence, kana + next_kana) end return _to_kana(sentence[1 .. -1], kana + sentence[0]) end
find_match_entry(sentence)
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 112 def find_match_entry(sentence) entries = @match_table[sentence[0]] return unless entries entries.each do |entry| kanji, yomi = *entry return entry if sentence.start_with?(kanji) end nil end
find_okurigana_entry(sentence)
click to toggle source
# File lib/tataki/converters/skk_jisyo.rb, line 95 def find_okurigana_entry(sentence) entries = @okurigana_table[sentence[0]] return unless entries entries.each do |entry| kanji, yomi, alphabet = *entry next unless sentence.start_with?(kanji) next_ch = sentence[kanji.size] okurigana_candidates = @roman_data[alphabet] next unless okurigana_candidates okurigana_candidates.each do |okurigana| return entry if okurigana == next_ch end end nil end