class Yanbi::Corpus
Attributes
all[R]
bags[R]
docs[R]
Public Class Methods
new(klass=WordBag)
click to toggle source
# File lib/corpus.rb, line 22 def initialize(klass=WordBag) @all = klass.new @index = nil @docs = [] @bags = [] end
Public Instance Methods
add_doc(doc, comment=nil)
click to toggle source
# File lib/corpus.rb, line 48 def add_doc(doc, comment=nil) doc.gsub! comment, '' if comment doc.strip! unless doc.length.zero? @bags << @all.class.new(doc) @all.add_text doc @docs << doc @index = nil end end
add_file(docpath, delim=nil, comment=nil)
click to toggle source
# File lib/corpus.rb, line 33 def add_file(docpath, delim=nil, comment=nil) infile = File.open(docpath, 'r') raw = infile.read infile.close raw = raw.encode("UTF-8", invalid: :replace, replace: "") if delim docs = raw.split(delim) docs.each {|d| add_doc(d, comment)} else add_doc(raw, comment) end end
each_doc() { |bag, doc| ... }
click to toggle source
# File lib/corpus.rb, line 60 def each_doc before = 0 after = 0 @bags.each do |bag, doc| before += bag.words.count yield bag, doc after += bag.words.count end rebuild_all if before != after end
size()
click to toggle source
# File lib/corpus.rb, line 29 def size @docs.size end
to_index()
click to toggle source
# File lib/corpus.rb, line 73 def to_index if @index.nil? w = all.words.uniq @index = Yanbi::Dictionary.new(w, @all.class) end @index end
Private Instance Methods
rebuild_all()
click to toggle source
# File lib/corpus.rb, line 84 def rebuild_all @all = @all.class.new @bags.each do |bag| @all.add_text bag.words.join(' ') end end