class GroongaSynonym::Sudachi

Public Class Methods

new() click to toggle source
# File lib/groonga-synonym/sudachi.rb, line 24
def initialize
  @dataset = Datasets::SudachiSynonymDictionary.new
end

Public Instance Methods

each() { |term, synonyms| ... } click to toggle source
# File lib/groonga-synonym/sudachi.rb, line 28
def each
  return to_enum(__method__) unless block_given?

  groups = {}
  group_id = nil
  group = nil
  @dataset.each do |synonym|
    if synonym.group_id != group_id
      emit_synonyms(groups, group)
      group_id = synonym.group_id
      group = [synonym]
    else
      group << synonym
    end
  end
  emit_synonyms(groups, group)
  filter_groups(groups) do |term, synonyms|
    yield(term, synonyms)
  end
end

Private Instance Methods

emit_synonyms(groups, group) click to toggle source
# File lib/groonga-synonym/sudachi.rb, line 50
def emit_synonyms(groups, group)
  return if group.nil?
  target_synonyms = group.reject do |synonym|
    synonym.expansion_type == :never
  end
  return if target_synonyms.size <= 1
  target_synonyms.each_with_index do |typical, i|
    next unless typical.expansion_type == :always
    term = typical.notation
    synonyms = []
    target_synonyms.each_with_index do |synonym, j|
      if i == j
        weight = nil
      elsif synonym.lexeme_id == typical.lexeme_id
        weight = 0.8
      else
        weight = 0.6
      end
      synonyms << Synonym.new(synonym.notation, weight)
    end
    # e.g.: 働き手
    if groups.key?(term)
      groups[term] |= synonyms
    else
      groups[term] = synonyms
    end
  end
end
filter_groups(groups) { |term, synonyms| ... } click to toggle source
# File lib/groonga-synonym/sudachi.rb, line 79
def filter_groups(groups)
  groups.each do |term, synonyms|
    typical_synonym = nil
    other_synonyms = []
    synonyms.each do |synonym|
      if synonym.weight.nil?
        typical_synonym = synonym
      else
        other_synonyms << synonym
      end
    end
    others_sub_synonyms = []
    sub_synonyms = []
    super_synonyms = []
    other_synonyms.each do |synonym|
      is_sub_synonym = other_synonyms.any? do |other_synonym|
        other_synonym != synonym and
          synonym.term.include?(other_synonym.term)
      end
      if is_sub_synonym
        others_sub_synonyms << synonym
      elsif term.include?(synonym.term)
        sub_synonyms << synonym
      elsif synonym.term.include?(term)
        super_synonyms << synonym
      end
    end
    synonyms -= others_sub_synonyms
    synonyms -= super_synonyms
    unless sub_synonyms.empty?
      sorted_sub_synonyms = sub_synonyms.sort_by do |synonym|
        synonym.term.size
      end
      typical_sub_synonym, *other_sub_synonyms = sorted_sub_synonyms
      synonyms -= other_sub_synonyms
      synonyms.delete(typical_synonym)
      synonyms << Synonym.new(typical_synonym.term,
                              (1.0 - typical_sub_synonym.weight).round(2))
    end
    yield(term, synonyms)
  end
end