class Yanbi::WordBag

Attributes

words[R]

Public Class Methods

load(filename) click to toggle source
# File lib/wordbags/wordbag.rb, line 44
def self.load(filename)
  WordBag.new.load(filename)
end
new(corpus=nil) click to toggle source
# File lib/wordbags/wordbag.rb, line 18
def initialize(corpus=nil)
  @words = []
  @counts = {}
  standardize(corpus) if corpus
end

Public Instance Methods

add_file(filename) click to toggle source
# File lib/wordbags/wordbag.rb, line 24
def add_file(filename)
  raw = File.open(filename).read
  standardize(raw)
end
add_text(text) click to toggle source
# File lib/wordbags/wordbag.rb, line 29
def add_text(text)
  standardize(text)
end
between_counts(min, max=nil) click to toggle source
# File lib/wordbags/wordbag.rb, line 59
def between_counts(min, max=nil)
  counts = @counts.select{|key, value| value >= min}
  counts.select! {|key, value| value <= max} unless max.nil?
  @words.select {|word| counts.keys.include? word}
end
empty?() click to toggle source
# File lib/wordbags/wordbag.rb, line 69
def empty?
  @words.empty?
end
intersection(other) click to toggle source
# File lib/wordbags/wordbag.rb, line 65
def intersection(other)
  self.words & other.words
end
load(filename) click to toggle source
# File lib/wordbags/wordbag.rb, line 39
def load(filename)
  @words = YAML.load_file(filename + ".yml")
  update_counts(@words)
end
remove(words) click to toggle source
# File lib/wordbags/wordbag.rb, line 52
def remove(words)
  words.each do |word|
    @words.reject! {|x| x == word}
    @counts.delete(word)
  end
end
save(filename) click to toggle source
# File lib/wordbags/wordbag.rb, line 33
def save(filename)
  out = File.new(filename + ".yml", "w")
  out.write(@words.to_yaml)
  out.close
end
word_counts(min=1) click to toggle source
# File lib/wordbags/wordbag.rb, line 48
def word_counts(min=1)
  @counts.select {|key, value| value >= min}
end

Private Instance Methods

process(raw) { |x| ... } click to toggle source
# File lib/wordbags/wordbag.rb, line 79
def process(raw)
  processed = raw.downcase
  processed.gsub!(/[^\w\s'\-]/, ' ')
  words = processed.split
  words = words.map {|x| x.split /-/}.flatten
  
  if block_given?
    words.map! {|x| yield x}
  end
  
  update_counts(words)
  @words.concat(words)
end
standardize(raw) click to toggle source
# File lib/wordbags/wordbag.rb, line 75
def standardize(raw)
  process(raw)
end
update_counts(data) click to toggle source
# File lib/wordbags/wordbag.rb, line 93
def update_counts(data)
  data.each do |word|
    if @counts[word].nil?
      @counts[word] = 1
    else
      @counts[word] += 1
    end
  end
end