class RNlp::Idf
Attributes
lang[R]
compatible with ja or en
Public Class Methods
new(lang)
click to toggle source
# File lib/r_nlp/idf.rb, line 6 def initialize(lang) @lang = lang unless lang == 'ja' || lang == 'en' puts "#{@lang} is not compatible language\nlang should be 'ja' or 'en'" exit end end
Public Instance Methods
calc_idf(word, documents)
click to toggle source
documents should be array of string
# File lib/r_nlp/idf.rb, line 14 def calc_idf(word, documents) @word = word @documents = documents n = @documents.size df = 0.0 @documents.each do |document| df += 1 if document =~ /#{@word}/ end idf = Math.log2(n/df) + 1 return idf end