module Dopeness

sentenceを解析してchunkの配列を返す

sentenceを解析して配列を返す

Constants

VERSION

Public Class Methods

dope(verse) click to toggle source
# File lib/dopeness.rb, line 10
def self.dope(verse)
        threshold = 0.0
surfaces = parse_surfaces(verse)
pronuncitaitons = parse_pronunciation(verse)
vowels = []
pronuncitaitons.each do |pronuncitaiton|
        vowels.push(parse_vowels(pronuncitaiton.to_roman))
end
chunk_features = {}
(0...vowels.size).each do |i|
        once_chunk_hash = {}
    max = i + 10
    if vowels.size < max
        max = vowels.size

    end
        (i...max).each do |j|
        # 母音の3-gram類似度
        # 母音のLevenshtein距離
        # 発音のLevenshtein距離
        # 対象までの物理的距離 からスコアを計算する
                trigram_evaluation = Trigram.compare(vowels[i], vowels[j])
        if trigram_evaluation.nan?
            trigram_evaluation = 0
        end
        vowels_distance = 1 - Levenshtein.normalized_distance(vowels[i], vowels[j])
        pronuncitaitons_distance = 1 - Levenshtein.normalized_distance(pronuncitaitons[i], pronuncitaitons[j])
        physical_distance = 1 - ((j - i) / 10)
        matching_score = trigram_evaluation + vowels_distance + pronuncitaitons_distance + physical_distance
        once_chunk_hash.store(j, matching_score)
        end
    sorted_score = Hash[once_chunk_hash.sort_by{ |_, v| -v }]
    chunk_features.store(i, sorted_score)
end
return surfaces, chunk_features
end
parse_pronunciation(sentence) click to toggle source
# File lib/dopeness/parse_pronunciation.rb, line 8
def parse_pronunciation(sentence)
  parser = Parser.new;
  tree = parser.parse(sentence)
  tree.set_output_layer(OUTPUT_RAW_SENTENCE)
  chunks = []
  (0 ... tree.chunk_size).each do |i|
    chunk = tree.chunk(i)
    x = (0 ... chunk.token_size).map do |j|
      if tree.token(chunk.token_pos + j).feature_list(tree.token(chunk.token_pos + j).feature_list_size - 1).force_encoding("UTF-8") != "*" then
        tree.token(chunk.token_pos + j).feature_list(tree.token(chunk.token_pos + j).feature_list_size - 1).force_encoding("UTF-8")  
      else
        tree.token(chunk.token_pos).surface.force_encoding("UTF-8")
      end
    end.join("")
    chunks.push(x)
  end
  return chunks
end
parse_surfaces(sentence) click to toggle source
# File lib/dopeness/parse_surfaces.rb, line 8
def parse_surfaces(sentence)
  parser = Parser.new;
  tree = parser.parse(sentence)
  tree.set_output_layer(OUTPUT_RAW_SENTENCE)
  surfaces = []
  (0 ... tree.chunk_size).each do |i|
    chunk = tree.chunk(i)
    x = (0 ... chunk.token_size).map do |j|
      surface = tree.token(chunk.token_pos + j).normalized_surface.force_encoding("UTF-8")
      if surface != "*" then
        if surface != "。"
          surface
        end
      else
        surface = tree.token(chunk.token_pos).normalized_surface.force_encoding("UTF-8")
        if surface != "。"
          surface
        end
      end
    end.join("")
    surfaces.push(x)
  end
  return surfaces
end
parse_vowels(str) click to toggle source
# File lib/dopeness/parse_vowels.rb, line 4
def parse_vowels(str)
  vowel = ["a", "i", "u", "e", "o"]
  rhyme = ""
  str.each_char do |ch|
    if vowel.include?(ch)
      rhyme += ch
    end
  end
  return rhyme
end

Private Instance Methods

parse_pronunciation(sentence) click to toggle source
# File lib/dopeness/parse_pronunciation.rb, line 8
def parse_pronunciation(sentence)
  parser = Parser.new;
  tree = parser.parse(sentence)
  tree.set_output_layer(OUTPUT_RAW_SENTENCE)
  chunks = []
  (0 ... tree.chunk_size).each do |i|
    chunk = tree.chunk(i)
    x = (0 ... chunk.token_size).map do |j|
      if tree.token(chunk.token_pos + j).feature_list(tree.token(chunk.token_pos + j).feature_list_size - 1).force_encoding("UTF-8") != "*" then
        tree.token(chunk.token_pos + j).feature_list(tree.token(chunk.token_pos + j).feature_list_size - 1).force_encoding("UTF-8")  
      else
        tree.token(chunk.token_pos).surface.force_encoding("UTF-8")
      end
    end.join("")
    chunks.push(x)
  end
  return chunks
end
parse_surfaces(sentence) click to toggle source
# File lib/dopeness/parse_surfaces.rb, line 8
def parse_surfaces(sentence)
  parser = Parser.new;
  tree = parser.parse(sentence)
  tree.set_output_layer(OUTPUT_RAW_SENTENCE)
  surfaces = []
  (0 ... tree.chunk_size).each do |i|
    chunk = tree.chunk(i)
    x = (0 ... chunk.token_size).map do |j|
      surface = tree.token(chunk.token_pos + j).normalized_surface.force_encoding("UTF-8")
      if surface != "*" then
        if surface != "。"
          surface
        end
      else
        surface = tree.token(chunk.token_pos).normalized_surface.force_encoding("UTF-8")
        if surface != "。"
          surface
        end
      end
    end.join("")
    surfaces.push(x)
  end
  return surfaces
end
parse_vowels(str) click to toggle source
# File lib/dopeness/parse_vowels.rb, line 4
def parse_vowels(str)
  vowel = ["a", "i", "u", "e", "o"]
  rhyme = ""
  str.each_char do |ch|
    if vowel.include?(ch)
      rhyme += ch
    end
  end
  return rhyme
end