class Lsi4R::Doc

Constants

TOKEN_RE

Attributes

transform[R]
key[R]
norm[R]
vector[R]

Public Class Methods

new(key, value, list, freq) click to toggle source
   # File lib/lsi4r/doc.rb
61 def initialize(key, value, list, freq)
62   @key, @list, @freq, @total, @map = key, list, freq, 1, hash = Hash.new(0)
63 
64   value.is_a?(Hash) ?
65     value.each { |k, v| hash[i = list[k]] = v; freq[i] += 1 } :
66     build_hash(value, list, hash).each_key { |i| freq[i] += 1 }
67 
68   self.vector = raw_vector
69 end
transform=(transform) click to toggle source
   # File lib/lsi4r/doc.rb
43 def transform=(transform)
44   method = :transformed_vector
45 
46   @transform = case transform ||= :raw
47     when Symbol, String
48       alias_method(method, "#{transform}_vector")
49       transform.to_sym
50     when Proc, UnboundMethod
51       define_method(method, transform)
52       transform.to_s
53     else
54       raise TypeError, "wrong argument type #{transform.class} " <<
55                        '(expected Symbol/String or Proc/UnboundMethod)'
56   end
57 end

Public Instance Methods

foat_vector(*args) click to toggle source

TODO: “first-order association transform” ???

   # File lib/lsi4r/doc.rb
84 def foat_vector(*args)
85   vec, q = raw_vector(*args), 0
86   return vec unless (s = vec.sum) > 1
87 
88   vec.each { |v| q -= (w = v / s) * Math.log(w) if v > 0 }
89   vec.map { |v| Math.log(v + 1) / q }
90 end
inspect() click to toggle source
    # File lib/lsi4r/doc.rb
106 def inspect
107   '%s@%p/%d' % [self.class, key, size]
108 end
raw_vector(size = @list.size, *) click to toggle source
   # File lib/lsi4r/doc.rb
77 def raw_vector(size = @list.size, *)
78   vec = GSL::Vector.calloc(size)
79   each { |k, v| vec[k] = v }
80   vec
81 end
tfidf_vector(*args) click to toggle source
   # File lib/lsi4r/doc.rb
92 def tfidf_vector(*args)
93   vec, f = raw_vector(*args), @freq
94   s, d = vec.sum, @total = args.fetch(1, @total).to_f
95 
96   vec.enum_for(:map).with_index { |v, i|
97     v > 0 ? Math.log(d / f[i]) * v / s : v }
98 end
vector=(vec) click to toggle source
    # File lib/lsi4r/doc.rb
102 def vector=(vec)
103   @vector, @norm = vec, vec.normalize
104 end

Private Instance Methods

build_enum(value, re = TOKEN_RE) click to toggle source
    # File lib/lsi4r/doc.rb
117 def build_enum(value, re = TOKEN_RE)
118   value = value.read if value.respond_to?(:read)
119   value = value.split(re) if value.respond_to?(:split)
120   value
121 end
build_hash(value, list, hash) click to toggle source
    # File lib/lsi4r/doc.rb
112 def build_hash(value, list, hash)
113   build_enum(value).each { |i| hash[list[i]] += 1 }
114   hash
115 end