class Lsi4R

Constants

DEFAULT_CUTOFF
DEFAULT_EPSILON
DEFAULT_TRANSFORM
VERSION

Public Class Methods

build(items, options = {}) click to toggle source
   # File lib/lsi4r.rb
44 def build(items, options = {})
45   lsi = new(items)
46   lsi if lsi.build(options)
47 end
each_norm(items, options = {}, build_options = {}, &block) click to toggle source
   # File lib/lsi4r.rb
49 def each_norm(items, options = {}, build_options = {}, &block)
50   lsi = new(items)
51   lsi.each_norm(nil, options, &block) if lsi.build(build_options)
52 end
new(items = {}) click to toggle source
   # File lib/lsi4r.rb
56 def initialize(items = {})
57   reset
58   items.each { |k, v| self[k] = v || k }
59 end

Public Instance Methods

<<(value) click to toggle source
   # File lib/lsi4r.rb
79 def <<(value)
80   add(value.object_id, value)
81 end
[]=(key, value) click to toggle source
   # File lib/lsi4r.rb
70 def []=(key, value)
71   @hash[key] = Doc.new(key, value, @list, @freq)
72 end
add(key, value = key) click to toggle source
   # File lib/lsi4r.rb
74 def add(key, value = key)
75   self[key] = value
76   self
77 end
build(options = {}) click to toggle source
    # File lib/lsi4r.rb
149 def build(options = {})
150   build!(docs, @list, options.is_a?(Hash) ?
151     options : { cutoff: options }) if size > 1
152 end
each(key = nil, options = {}, &block)
Alias for: each_norm
each_norm(key = nil, options = {}, &block) click to toggle source
    # File lib/lsi4r.rb
121 def each_norm(key = nil, options = {}, &block)
122   each_term(key, options.merge(norm: true), &block)
123 end
Also aliased as: each
each_term(key = nil, options = {}) { |doc, list, v| ... } click to toggle source
min

minimum value to consider

abs

minimum absolute value to consider

nul

exclude null values (true or Float)

new

exclude original terms / only yield new ones

    # File lib/lsi4r.rb
102 def each_term(key = nil, options = {})
103   return enum_for(:each_term, key, options) unless block_given?
104 
105   min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
106   nul = DEFAULT_EPSILON if nul == true
107 
108   list = @invlist
109 
110   each_vector(key, options[:norm]) { |doc, vec|
111     vec.enum_for(:each).with_index { |v, i|
112       yield doc, list[i], v unless v.nan? ||
113                                    (min && v < min) ||
114                                    (abs && v.abs < abs) ||
115                                    (nul && v.abs < nul) ||
116                                    (new && doc.include?(i))
117     }
118   }
119 end
each_vector(key = nil, norm = true) { |doc, vec| ... } click to toggle source
   # File lib/lsi4r.rb
83 def each_vector(key = nil, norm = true)
84   return enum_for(:each_vector, key, norm) unless block_given?
85 
86   block = lambda { |doc|
87     vec = norm ? doc.norm : doc.vector
88     yield doc, vec if vec
89   }
90 
91   key.nil? ? docs.each(&block) : begin
92     doc = self[key] and block[doc]
93   end
94 
95   self
96 end
inspect() click to toggle source
    # File lib/lsi4r.rb
161 def inspect
162   '%s@%d/%d' % [self.class, size, @list.size]
163 end
reset() click to toggle source
    # File lib/lsi4r.rb
154 def reset
155   @hash, @list, @freq, @invlist =
156     {}, Hash.new { |h, k| h[k] = h.size }, Hash.new(0), {}
157 
158   self
159 end
to_a(norm = true) click to toggle source
    # File lib/lsi4r.rb
165 def to_a(norm = true)
166   (norm ? map { |_, doc| doc.norm.to_a } :
167           map { |_, doc| doc.vector.to_a }).transpose
168 end

Private Instance Methods

build!(docs, list, options) click to toggle source
    # File lib/lsi4r.rb
172 def build!(docs, list, options)
173   Doc.transform = options.fetch(:transform, DEFAULT_TRANSFORM)
174 
175   @invlist = list.invert
176 
177   # TODO: GSL::ERROR::EUNIMPL: Ruby/GSL error code 24, svd of
178   # MxN matrix, M<N, is not implemented (file svd.c, line 61)
179   u, v, s = matrix(docs, list.size, size = docs.size).SV_decomp
180 
181   (u * reduce(s, options.fetch(:cutoff, DEFAULT_CUTOFF)) * v.trans).
182     enum_for(:each_col).with_index { |c, i| docs[i].vector = c.row }
183 
184   size
185 end
matrix(d = docs, m = @list.size, n = d.size) click to toggle source
    # File lib/lsi4r.rb
187 def matrix(d = docs, m = @list.size, n = d.size)
188   x = GSL::Matrix.alloc(m, n)
189   d.each_with_index { |i, j| x.set_col(j, i.transformed_vector(m, n)) }
190   x
191 end
reduce(s, k, m = s.size) click to toggle source
k == nil

keep all

k >= 1

keep this many

k < 1

keep (at most) this proportion

    # File lib/lsi4r.rb
196 def reduce(s, k, m = s.size)
197   if k && k < m
198     k > 0 ? s[k = (k < 1 ? m * k : k).floor, m - k] = 0 : s.set_zero
199   end
200 
201   s.to_m_diagonal
202 end