class Lsi4R::Doc
Constants
- TOKEN_RE
Attributes
transform[R]
key[R]
norm[R]
vector[R]
Public Class Methods
new(key, value, list, freq)
click to toggle source
# File lib/lsi4r/doc.rb 61 def initialize(key, value, list, freq) 62 @key, @list, @freq, @total, @map = key, list, freq, 1, hash = Hash.new(0) 63 64 value.is_a?(Hash) ? 65 value.each { |k, v| hash[i = list[k]] = v; freq[i] += 1 } : 66 build_hash(value, list, hash).each_key { |i| freq[i] += 1 } 67 68 self.vector = raw_vector 69 end
transform=(transform)
click to toggle source
# File lib/lsi4r/doc.rb 43 def transform=(transform) 44 method = :transformed_vector 45 46 @transform = case transform ||= :raw 47 when Symbol, String 48 alias_method(method, "#{transform}_vector") 49 transform.to_sym 50 when Proc, UnboundMethod 51 define_method(method, transform) 52 transform.to_s 53 else 54 raise TypeError, "wrong argument type #{transform.class} " << 55 '(expected Symbol/String or Proc/UnboundMethod)' 56 end 57 end
Public Instance Methods
foat_vector(*args)
click to toggle source
TODO: “first-order association transform” ???
# File lib/lsi4r/doc.rb 84 def foat_vector(*args) 85 vec, q = raw_vector(*args), 0 86 return vec unless (s = vec.sum) > 1 87 88 vec.each { |v| q -= (w = v / s) * Math.log(w) if v > 0 } 89 vec.map { |v| Math.log(v + 1) / q } 90 end
inspect()
click to toggle source
# File lib/lsi4r/doc.rb 106 def inspect 107 '%s@%p/%d' % [self.class, key, size] 108 end
raw_vector(size = @list.size, *)
click to toggle source
# File lib/lsi4r/doc.rb 77 def raw_vector(size = @list.size, *) 78 vec = GSL::Vector.calloc(size) 79 each { |k, v| vec[k] = v } 80 vec 81 end
tfidf_vector(*args)
click to toggle source
# File lib/lsi4r/doc.rb 92 def tfidf_vector(*args) 93 vec, f = raw_vector(*args), @freq 94 s, d = vec.sum, @total = args.fetch(1, @total).to_f 95 96 vec.enum_for(:map).with_index { |v, i| 97 v > 0 ? Math.log(d / f[i]) * v / s : v } 98 end
vector=(vec)
click to toggle source
# File lib/lsi4r/doc.rb 102 def vector=(vec) 103 @vector, @norm = vec, vec.normalize 104 end
Private Instance Methods
build_enum(value, re = TOKEN_RE)
click to toggle source
# File lib/lsi4r/doc.rb 117 def build_enum(value, re = TOKEN_RE) 118 value = value.read if value.respond_to?(:read) 119 value = value.split(re) if value.respond_to?(:split) 120 value 121 end
build_hash(value, list, hash)
click to toggle source
# File lib/lsi4r/doc.rb 112 def build_hash(value, list, hash) 113 build_enum(value).each { |i| hash[list[i]] += 1 } 114 hash 115 end