class RNlp::Idf

Attributes

lang[R]

compatible with ja or en

Public Class Methods

new(lang) click to toggle source
# File lib/r_nlp/idf.rb, line 6
def initialize(lang)
  @lang = lang
  unless lang == 'ja' || lang == 'en'
    puts "#{@lang} is not compatible language\nlang should be 'ja' or 'en'"
    exit
  end
end

Public Instance Methods

calc_idf(word, documents) click to toggle source

documents should be array of string

# File lib/r_nlp/idf.rb, line 14
def calc_idf(word, documents)
  @word = word
  @documents = documents
  n = @documents.size
  df = 0.0
  @documents.each do |document|
    df += 1 if document =~ /#{@word}/
  end
  idf = Math.log2(n/df) + 1
  return idf
end