class GroongaSynonym::Sudachi
Public Class Methods
new()
click to toggle source
# File lib/groonga-synonym/sudachi.rb, line 24 def initialize @dataset = Datasets::SudachiSynonymDictionary.new end
Public Instance Methods
each() { |term, synonyms| ... }
click to toggle source
# File lib/groonga-synonym/sudachi.rb, line 28 def each return to_enum(__method__) unless block_given? groups = {} group_id = nil group = nil @dataset.each do |synonym| if synonym.group_id != group_id emit_synonyms(groups, group) group_id = synonym.group_id group = [synonym] else group << synonym end end emit_synonyms(groups, group) filter_groups(groups) do |term, synonyms| yield(term, synonyms) end end
Private Instance Methods
emit_synonyms(groups, group)
click to toggle source
# File lib/groonga-synonym/sudachi.rb, line 50 def emit_synonyms(groups, group) return if group.nil? target_synonyms = group.reject do |synonym| synonym.expansion_type == :never end return if target_synonyms.size <= 1 target_synonyms.each_with_index do |typical, i| next unless typical.expansion_type == :always term = typical.notation synonyms = [] target_synonyms.each_with_index do |synonym, j| if i == j weight = nil elsif synonym.lexeme_id == typical.lexeme_id weight = 0.8 else weight = 0.6 end synonyms << Synonym.new(synonym.notation, weight) end # e.g.: 働き手 if groups.key?(term) groups[term] |= synonyms else groups[term] = synonyms end end end
filter_groups(groups) { |term, synonyms| ... }
click to toggle source
# File lib/groonga-synonym/sudachi.rb, line 79 def filter_groups(groups) groups.each do |term, synonyms| typical_synonym = nil other_synonyms = [] synonyms.each do |synonym| if synonym.weight.nil? typical_synonym = synonym else other_synonyms << synonym end end others_sub_synonyms = [] sub_synonyms = [] super_synonyms = [] other_synonyms.each do |synonym| is_sub_synonym = other_synonyms.any? do |other_synonym| other_synonym != synonym and synonym.term.include?(other_synonym.term) end if is_sub_synonym others_sub_synonyms << synonym elsif term.include?(synonym.term) sub_synonyms << synonym elsif synonym.term.include?(term) super_synonyms << synonym end end synonyms -= others_sub_synonyms synonyms -= super_synonyms unless sub_synonyms.empty? sorted_sub_synonyms = sub_synonyms.sort_by do |synonym| synonym.term.size end typical_sub_synonym, *other_sub_synonyms = sorted_sub_synonyms synonyms -= other_sub_synonyms synonyms.delete(typical_synonym) synonyms << Synonym.new(typical_synonym.term, (1.0 - typical_sub_synonym.weight).round(2)) end yield(term, synonyms) end end