module Synt::Similar
Public Instance Methods
compare(opts)
click to toggle source
# File lib/synt/similar.rb, line 11 def compare opts error 'no compare propery provided' unless opts[:compare] error 'no to propery provided' unless opts[:to] # TODO: string-compare sucks to reference.. src = parse_data opts["string-compare"], opts[:compare] cmp = parse_data opts["string-compare"], opts[:to] algorithm = algorithms[(opts[:algorithm] || 'jaccard').to_sym] n_start, n_end = ngram_range opts[:ngram] src_t = normalize_ripper_tokens Synt::Parser.parse(src) cmp_t = normalize_ripper_tokens Synt::Parser.parse(cmp) a = generate_ngrams src_t, n_start, n_end b = generate_ngrams cmp_t, n_start, n_end sim = algorithm.compare a, b sim.to_f.round 2 end
Private Instance Methods
algorithms()
click to toggle source
# File lib/synt/similar.rb, line 33 def algorithms { jaccard: Jaccard, tanimoto: Tanimoto } end
error(msg)
click to toggle source
# File lib/synt/similar.rb, line 88 def error msg puts msg exit 1 end
generate_ngrams(arr, start, nend)
click to toggle source
# File lib/synt/similar.rb, line 45 def generate_ngrams arr, start, nend nend = arr.length unless nend start = 1 unless start if nend > arr.length start = nend = 1 puts 'ngram end value exceeds max length- setting start/end to: 1' end return arr if start == nend && start == 1 # short circuit sets = [] (start..nend).to_a.each_index do |n_len| arr.each_index do |index| s_len = index + n_len sets.push arr[index, s_len].join('') if s_len <= arr.length end end sets end
ngram_range(ngram)
click to toggle source
# File lib/synt/similar.rb, line 68 def ngram_range ngram is_range = /\.\./ if !ngram return 1, 1 elsif ngram =~ is_range n = ngram.split '..' return n[0].to_i, n[1].to_i elsif ngram != 'all' n = ngram.to_i return n, n else return nil, nil end end
normalize_ripper_tokens(tokens)
click to toggle source
# File lib/synt/similar.rb, line 84 def normalize_ripper_tokens tokens tokens.select { |t| t && t !~ /^\s*$/ } end
parse_data(is_string, value)
click to toggle source
# File lib/synt/similar.rb, line 37 def parse_data is_string, value if is_string value else IO.read value end end