class TextStat

Constants

GEM_PATH
VERSION

Public Class Methods

automated_readability_index(text) click to toggle source
# File lib/textstat.rb, line 113
def self.automated_readability_index(text)
  chars = char_count(text)
  words = lexicon_count(text)
  sentences = sentence_count(text)
  begin
    a = chars.to_f / words
    b = words.to_f / sentences

    readability = 4.71 * a + 0.5 * b - 21.43
    readability.round(1)
  rescue ZeroDivisionError
    0.0
  end
end
avg_letter_per_word(text) click to toggle source
# File lib/textstat.rb, line 53
def self.avg_letter_per_word(text)
  letters_per_word = char_count(text).to_f / lexicon_count(text)
  letters_per_word.round(2)
rescue ZeroDivisionError
  0.0
end
avg_sentence_length(text) click to toggle source
# File lib/textstat.rb, line 35
def self.avg_sentence_length(text)
  asl = lexicon_count(text).to_f / sentence_count(text)
  asl.round(1)
rescue ZeroDivisionError
  0.0
end
avg_sentence_per_word(text) click to toggle source
# File lib/textstat.rb, line 60
def self.avg_sentence_per_word(text)
  sentence_per_word = sentence_count(text).to_f / lexicon_count(text)
  sentence_per_word.round(2)
rescue ZeroDivisionError
  0.0
end
avg_syllables_per_word(text) click to toggle source
# File lib/textstat.rb, line 42
def self.avg_syllables_per_word(text)
  syllable = syllable_count(text)
  words    = lexicon_count(text)
  begin
    syllables_per_word = syllable.to_f / words
    syllables_per_word.round(1)
  rescue ZeroDivisionError
    0.0
  end
end
char_count(text, ignore_spaces = true) click to toggle source
# File lib/textstat.rb, line 6
def self.char_count(text, ignore_spaces = true)
  text = text.delete(' ') if ignore_spaces
  text.length
end
coleman_liau_index(text) click to toggle source
# File lib/textstat.rb, line 106
def self.coleman_liau_index(text)
  letters = (avg_letter_per_word(text) * 100).round(2)
  sentences = (avg_sentence_per_word(text) * 100).round(2)
  coleman = 0.0588 * letters - 0.296 * sentences - 15.8
  coleman.round(2)
end
dale_chall_readability_score(text) click to toggle source
# File lib/textstat.rb, line 165
def self.dale_chall_readability_score(text)
  word_count = lexicon_count(text)
  count = word_count - difficult_words(text)

  begin
    per = 100.0 * count / word_count
  rescue ZeroDivisionError
    return 0.0
  end

  difficult_words = 100 - per
  score = 0.1579 * difficult_words + 0.0496 * avg_sentence_length(text)
  score += 3.6365 if difficult_words > 5

  score.round(2)
end
dictionary_path() click to toggle source
# File lib/textstat.rb, line 306
def self.dictionary_path
  @dictionary_path ||= File.join(TextStat::GEM_PATH, 'lib', 'dictionaries')
end
dictionary_path=(path) click to toggle source
# File lib/textstat.rb, line 302
def self.dictionary_path=(path)
  @dictionary_path = path
end
difficult_words(text, language = 'en_us') click to toggle source
# File lib/textstat.rb, line 148
def self.difficult_words(text, language = 'en_us')
  require 'set'
  easy_words = Set.new
  File.read(File.join(dictionary_path, "#{language}.txt")).each_line do |line|
    easy_words << line.chop
  end

  text_list = text.downcase.gsub(/[^0-9a-z ]/i, '').split(' ')
  diff_words_set = Set.new
  text_list.each do |value|
    next if easy_words.include? value

    diff_words_set.add(value) if syllable_count(value) > 1
  end
  diff_words_set.length
end
flesch_kincaid_grade(text) click to toggle source
# File lib/textstat.rb, line 74
def self.flesch_kincaid_grade(text)
  sentence_length = avg_sentence_length(text)
  syllables_per_word = avg_syllables_per_word(text)
  flesch = 0.39 * sentence_length + 11.8 * syllables_per_word - 15.59
  flesch.round(1)
end
flesch_reading_ease(text) click to toggle source
# File lib/textstat.rb, line 67
def self.flesch_reading_ease(text)
  sentence_length    = avg_sentence_length(text)
  syllables_per_word = avg_syllables_per_word(text)
  flesch = 206.835 - 1.015 * sentence_length - 84.6 * syllables_per_word
  flesch.round(2)
end
forcast(text, language = 'en_us') click to toggle source
# File lib/textstat.rb, line 203
def self.forcast(text, language = 'en_us')
  words = text.split(' ')[0..149]
  words_with_one_syllabe = words.count {
    |word| syllable_count(word, language) == 1
  }
  forcast = 20 - (words_with_one_syllabe / 10)
  forcast
end
gunning_fog(text) click to toggle source
# File lib/textstat.rb, line 182
def self.gunning_fog(text)
  per_diff_words = 100.0 * difficult_words(text) / lexicon_count(text) + 5
  grade = 0.4 * (avg_sentence_length(text) + per_diff_words)

  grade.round(2)
rescue ZeroDivisionError
  0.0
end
lexicon_count(text, remove_punctuation = true) click to toggle source
# File lib/textstat.rb, line 11
def self.lexicon_count(text, remove_punctuation = true)
  text  = text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ') if remove_punctuation
  count = text.split(' ').count
  count
end
linsear_write_formula(text) click to toggle source
# File lib/textstat.rb, line 128
def self.linsear_write_formula(text)
  easy_word = 0
  difficult_word = 0
  text_list = text.split(' ')[0..100]

  text_list.each do |word|
    if syllable_count(word) < 3
      easy_word += 1
    else
      difficult_word += 1
    end
  end

  text = text_list.join(' ')

  number = (easy_word * 1 + difficult_word * 3).to_f / sentence_count(text)
  number -= 2 if number <= 20
  number / 2
end
lix(text) click to toggle source
# File lib/textstat.rb, line 191
def self.lix(text)
  words = text.split(' ')
  words_length = words.length
  long_words = words.count { |word| word.length > 6 }

  per_long_words = 100.0 * long_words / words_length
  asl = avg_sentence_length(text)
  lix = asl + per_long_words

  lix.round(2)
end
polysyllab_count(text) click to toggle source
# File lib/textstat.rb, line 81
def self.polysyllab_count(text)
  count = 0
  text.split(' ').each do |word|
    w = syllable_count(word)
    count += 1 if w >= 3
  end
  count
end
powers_sumner_kearl(text) click to toggle source
# File lib/textstat.rb, line 212
def self.powers_sumner_kearl(text)
  grade = 0.0778 * avg_sentence_length(text) + 0.0455 * syllable_count(text) - 2.2029
  grade.round(2)
end
sentence_count(text) click to toggle source
# File lib/textstat.rb, line 31
def self.sentence_count(text)
  text.scan(/[\.\?!][\'\\)\]]*[ |\n][A-Z]/).map(&:strip).count + 1
end
smog_index(text) click to toggle source
# File lib/textstat.rb, line 90
def self.smog_index(text)
  sentences = sentence_count(text)

  if sentences >= 3
    begin
      polysyllab = polysyllab_count(text)
      smog = 1.043 * Math.sqrt(30.0 * polysyllab / sentences) + 3.1291
      smog.round(1)
    rescue ZeroDivisionError
      0.0
    end
  else
    0.0
  end
end
spache(text, language = 'en_us') click to toggle source
# File lib/textstat.rb, line 217
def self.spache(text, language = 'en_us')
  words = text.split(' ').count
  unfamiliar_words = difficult_words(text, language) / words
  grade = (0.141 * avg_sentence_length(text)) + (0.086 * unfamiliar_words) + 0.839
  grade.round(2)
end
syllable_count(text, language = 'en_us') click to toggle source
# File lib/textstat.rb, line 17
def self.syllable_count(text, language = 'en_us')
  return 0 if text.empty?

  text = text.downcase
  text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ')
  dictionary = Text::Hyphen.new(language: language, left: 0, right: 0)
  count = 0
  text.split(' ').each do |word|
    word_hyphenated = dictionary.visualise(word)
    count += word_hyphenated.count('-') + 1
  end
  count
end
text_standard(text, float_output=nil) click to toggle source
# File lib/textstat.rb, line 224
def self.text_standard(text, float_output=nil)
  grade = []

  lower = flesch_kincaid_grade(text).round
  upper = flesch_kincaid_grade(text).ceil
  grade.append(lower.to_i)
  grade.append(upper.to_i)

  # Appending Flesch Reading Easy
  score = flesch_reading_ease(text)
  if score < 100 && score >= 90
    grade.append(5)
  elsif score < 90 && score >= 80
    grade.append(6)
  elsif score < 80 && score >= 70
    grade.append(7)
  elsif score < 70 && score >= 60
    grade.append(8)
    grade.append(9)
  elsif score < 60 && score >= 50
    grade.append(10)
  elsif score < 50 && score >= 40
    grade.append(11)
  elsif score < 40 && score >= 30
    grade.append(12)
  else
    grade.append(13)
  end

  # Appending SMOG Index
  lower = smog_index(text).round
  upper = smog_index(text).ceil
  grade.append(lower.to_i)
  grade.append(upper.to_i)

  # Appending Coleman_Liau_Index
  lower = coleman_liau_index(text).round
  upper = coleman_liau_index(text).ceil
  grade.append(lower.to_i)
  grade.append(upper.to_i)

  # Appending Automated_Readability_Index
  lower = automated_readability_index(text).round
  upper = automated_readability_index(text).ceil
  grade.append(lower.to_i)
  grade.append(upper.to_i)

  # Appending Dale_Chall_Readability_Score
  lower = dale_chall_readability_score(text).round
  upper = dale_chall_readability_score(text).ceil
  grade.append(lower.to_i)
  grade.append(upper.to_i)

  # Appending Linsear_Write_Formula
  lower = linsear_write_formula(text).round
  upper = linsear_write_formula(text).ceil
  grade.append(lower.to_i)
  grade.append(upper.to_i)

  # Appending Gunning Fog Index
  lower = gunning_fog(text).round
  upper = gunning_fog(text).ceil
  grade.append(lower.to_i)
  grade.append(upper.to_i)

  # Finding the Readability Consensus based upon all the above tests
  require 'counter'
  d = Counter.new(grade)
  final_grade = d.most_common(1)
  score = final_grade[0][0]

  if float_output
    score.to_f
  else
    "#{score.to_i - 1}th and #{score.to_i}th grade"
  end
end