class Yanbi::Corpus

Attributes

all[R]
bags[R]
docs[R]

Public Class Methods

new(klass=WordBag) click to toggle source
# File lib/corpus.rb, line 22
def initialize(klass=WordBag)
  @all = klass.new
  @index = nil
  @docs = []
  @bags = []
end

Public Instance Methods

add_doc(doc, comment=nil) click to toggle source
# File lib/corpus.rb, line 48
def add_doc(doc, comment=nil)
  doc.gsub! comment, '' if comment
  doc.strip!
  
  unless doc.length.zero?
    @bags << @all.class.new(doc)
    @all.add_text doc
    @docs << doc
    @index = nil
  end
end
add_file(docpath, delim=nil, comment=nil) click to toggle source
# File lib/corpus.rb, line 33
def add_file(docpath, delim=nil, comment=nil)
  infile = File.open(docpath, 'r')
  raw = infile.read
  infile.close

  raw = raw.encode("UTF-8", invalid: :replace, replace: "")
  
  if delim
    docs = raw.split(delim) 
    docs.each {|d| add_doc(d, comment)} 
  else
    add_doc(raw, comment)
  end
end
each_doc() { |bag, doc| ... } click to toggle source
# File lib/corpus.rb, line 60
def each_doc
  before = 0
  after = 0

  @bags.each do |bag, doc|
    before += bag.words.count
    yield bag, doc
    after += bag.words.count
  end

  rebuild_all if before != after
end
size() click to toggle source
# File lib/corpus.rb, line 29
def size
  @docs.size
end
to_index() click to toggle source
# File lib/corpus.rb, line 73
def to_index
  if @index.nil?
    w = all.words.uniq
    @index = Yanbi::Dictionary.new(w, @all.class)
  end

  @index
end

Private Instance Methods

rebuild_all() click to toggle source
# File lib/corpus.rb, line 84
def rebuild_all
  @all = @all.class.new
  @bags.each do |bag|
    @all.add_text bag.words.join(' ')
  end
end