class Lsi4R
Constants
- DEFAULT_CUTOFF
- DEFAULT_EPSILON
- DEFAULT_TRANSFORM
- VERSION
Public Class Methods
build(items, options = {})
click to toggle source
# File lib/lsi4r.rb 44 def build(items, options = {}) 45 lsi = new(items) 46 lsi if lsi.build(options) 47 end
each_norm(items, options = {}, build_options = {}, &block)
click to toggle source
# File lib/lsi4r.rb 49 def each_norm(items, options = {}, build_options = {}, &block) 50 lsi = new(items) 51 lsi.each_norm(nil, options, &block) if lsi.build(build_options) 52 end
new(items = {})
click to toggle source
# File lib/lsi4r.rb 56 def initialize(items = {}) 57 reset 58 items.each { |k, v| self[k] = v || k } 59 end
Public Instance Methods
<<(value)
click to toggle source
# File lib/lsi4r.rb 79 def <<(value) 80 add(value.object_id, value) 81 end
[]=(key, value)
click to toggle source
# File lib/lsi4r.rb 70 def []=(key, value) 71 @hash[key] = Doc.new(key, value, @list, @freq) 72 end
add(key, value = key)
click to toggle source
# File lib/lsi4r.rb 74 def add(key, value = key) 75 self[key] = value 76 self 77 end
build(options = {})
click to toggle source
# File lib/lsi4r.rb 149 def build(options = {}) 150 build!(docs, @list, options.is_a?(Hash) ? 151 options : { cutoff: options }) if size > 1 152 end
each_norm(key = nil, options = {}, &block)
click to toggle source
# File lib/lsi4r.rb 121 def each_norm(key = nil, options = {}, &block) 122 each_term(key, options.merge(norm: true), &block) 123 end
Also aliased as: each
each_term(key = nil, options = {}) { |doc, list, v| ... }
click to toggle source
- min
-
minimum value to consider
- abs
-
minimum absolute value to consider
- nul
-
exclude null values (true or Float)
- new
-
exclude original terms / only yield new ones
# File lib/lsi4r.rb 102 def each_term(key = nil, options = {}) 103 return enum_for(:each_term, key, options) unless block_given? 104 105 min, abs, nul, new = options.values_at(:min, :abs, :nul, :new) 106 nul = DEFAULT_EPSILON if nul == true 107 108 list = @invlist 109 110 each_vector(key, options[:norm]) { |doc, vec| 111 vec.enum_for(:each).with_index { |v, i| 112 yield doc, list[i], v unless v.nan? || 113 (min && v < min) || 114 (abs && v.abs < abs) || 115 (nul && v.abs < nul) || 116 (new && doc.include?(i)) 117 } 118 } 119 end
each_vector(key = nil, norm = true) { |doc, vec| ... }
click to toggle source
# File lib/lsi4r.rb 83 def each_vector(key = nil, norm = true) 84 return enum_for(:each_vector, key, norm) unless block_given? 85 86 block = lambda { |doc| 87 vec = norm ? doc.norm : doc.vector 88 yield doc, vec if vec 89 } 90 91 key.nil? ? docs.each(&block) : begin 92 doc = self[key] and block[doc] 93 end 94 95 self 96 end
inspect()
click to toggle source
# File lib/lsi4r.rb 161 def inspect 162 '%s@%d/%d' % [self.class, size, @list.size] 163 end
reset()
click to toggle source
# File lib/lsi4r.rb 154 def reset 155 @hash, @list, @freq, @invlist = 156 {}, Hash.new { |h, k| h[k] = h.size }, Hash.new(0), {} 157 158 self 159 end
to_a(norm = true)
click to toggle source
# File lib/lsi4r.rb 165 def to_a(norm = true) 166 (norm ? map { |_, doc| doc.norm.to_a } : 167 map { |_, doc| doc.vector.to_a }).transpose 168 end
Private Instance Methods
build!(docs, list, options)
click to toggle source
# File lib/lsi4r.rb 172 def build!(docs, list, options) 173 Doc.transform = options.fetch(:transform, DEFAULT_TRANSFORM) 174 175 @invlist = list.invert 176 177 # TODO: GSL::ERROR::EUNIMPL: Ruby/GSL error code 24, svd of 178 # MxN matrix, M<N, is not implemented (file svd.c, line 61) 179 u, v, s = matrix(docs, list.size, size = docs.size).SV_decomp 180 181 (u * reduce(s, options.fetch(:cutoff, DEFAULT_CUTOFF)) * v.trans). 182 enum_for(:each_col).with_index { |c, i| docs[i].vector = c.row } 183 184 size 185 end
matrix(d = docs, m = @list.size, n = d.size)
click to toggle source
# File lib/lsi4r.rb 187 def matrix(d = docs, m = @list.size, n = d.size) 188 x = GSL::Matrix.alloc(m, n) 189 d.each_with_index { |i, j| x.set_col(j, i.transformed_vector(m, n)) } 190 x 191 end
reduce(s, k, m = s.size)
click to toggle source
- k == nil
-
keep all
- k >= 1
-
keep this many
- k < 1
-
keep (at most) this proportion
# File lib/lsi4r.rb 196 def reduce(s, k, m = s.size) 197 if k && k < m 198 k > 0 ? s[k = (k < 1 ? m * k : k).floor, m - k] = 0 : s.set_zero 199 end 200 201 s.to_m_diagonal 202 end