class Yanbi::WordBag
Attributes
words[R]
Public Class Methods
load(filename)
click to toggle source
# File lib/wordbags/wordbag.rb, line 44 def self.load(filename) WordBag.new.load(filename) end
new(corpus=nil)
click to toggle source
# File lib/wordbags/wordbag.rb, line 18 def initialize(corpus=nil) @words = [] @counts = {} standardize(corpus) if corpus end
Public Instance Methods
add_file(filename)
click to toggle source
# File lib/wordbags/wordbag.rb, line 24 def add_file(filename) raw = File.open(filename).read standardize(raw) end
add_text(text)
click to toggle source
# File lib/wordbags/wordbag.rb, line 29 def add_text(text) standardize(text) end
between_counts(min, max=nil)
click to toggle source
# File lib/wordbags/wordbag.rb, line 59 def between_counts(min, max=nil) counts = @counts.select{|key, value| value >= min} counts.select! {|key, value| value <= max} unless max.nil? @words.select {|word| counts.keys.include? word} end
empty?()
click to toggle source
# File lib/wordbags/wordbag.rb, line 69 def empty? @words.empty? end
intersection(other)
click to toggle source
# File lib/wordbags/wordbag.rb, line 65 def intersection(other) self.words & other.words end
load(filename)
click to toggle source
# File lib/wordbags/wordbag.rb, line 39 def load(filename) @words = YAML.load_file(filename + ".yml") update_counts(@words) end
remove(words)
click to toggle source
# File lib/wordbags/wordbag.rb, line 52 def remove(words) words.each do |word| @words.reject! {|x| x == word} @counts.delete(word) end end
save(filename)
click to toggle source
# File lib/wordbags/wordbag.rb, line 33 def save(filename) out = File.new(filename + ".yml", "w") out.write(@words.to_yaml) out.close end
word_counts(min=1)
click to toggle source
# File lib/wordbags/wordbag.rb, line 48 def word_counts(min=1) @counts.select {|key, value| value >= min} end
Private Instance Methods
process(raw) { |x| ... }
click to toggle source
# File lib/wordbags/wordbag.rb, line 79 def process(raw) processed = raw.downcase processed.gsub!(/[^\w\s'\-]/, ' ') words = processed.split words = words.map {|x| x.split /-/}.flatten if block_given? words.map! {|x| yield x} end update_counts(words) @words.concat(words) end
standardize(raw)
click to toggle source
# File lib/wordbags/wordbag.rb, line 75 def standardize(raw) process(raw) end
update_counts(data)
click to toggle source
# File lib/wordbags/wordbag.rb, line 93 def update_counts(data) data.each do |word| if @counts[word].nil? @counts[word] = 1 else @counts[word] += 1 end end end