class RakeText
Public Class Methods
FOX()
click to toggle source
# File lib/rake_text.rb, line 10 def self.FOX return @@stoplist_fox end
SMART()
click to toggle source
# File lib/rake_text.rb, line 6 def self.SMART return @@stoplist_smart end
new()
click to toggle source
# File lib/rake_text.rb, line 14 def initialize end
Public Instance Methods
analyse(text, stoplist, verbose=false)
click to toggle source
# File lib/rake_text.rb, line 17 def analyse text, stoplist, verbose=false pattern = buildStopwordRegExPattern stoplist sentences = text.split(/[.!?,;:\t\\-\\"\\(\\)\\\'\u2019\u2013]/u) phrases = generateCandidateKeywords sentences, pattern wordscores = calculateWordScores phrases candidates = generateCandidateKeywordScores phrases, wordscores if verbose == true result = candidates.sort_by{|k,v| v}.reverse result.each do |word, score| puts sprintf '%.2f - %s', score, word end end return candidates end
Private Instance Methods
buildStopwordRegExPattern(words)
click to toggle source
create stopword pattern 1
# File lib/rake_text.rb, line 38 def buildStopwordRegExPattern words pattern = Array.new words.each do |word| pattern.push '\\b'+word+'\\b' end return Regexp.new(pattern.join("|"), Regexp::IGNORECASE) end
calculateWordScores(phrases)
click to toggle source
calculate individual word scores 3
# File lib/rake_text.rb, line 69 def calculateWordScores phrases word_freq = Hash.new 0 word_degree = Hash.new 0 word_score = Hash.new 0 phrases.each do |phrase| words = seperateWords phrase length = words.length degree = length-1 words.each do |word| word_freq[word] += 1 word_degree[word] += degree end end word_freq.each do |word, counter| word_degree[word] = word_degree[word] + word_freq[word] end word_freq.each do |word, counter| word_score[word] = word_degree[word]/(word_freq[word] * 1.0) end return word_score end
generateCandidateKeywordScores(phrases, scores)
click to toggle source
generate candidate keyword scores 4
# File lib/rake_text.rb, line 99 def generateCandidateKeywordScores phrases, scores candidates = Hash.new 0 phrases.each do |phrase| words = seperateWords(phrase) score = 0 words.each do |word| score += scores[word] end candidates[phrase] = score end return candidates end
generateCandidateKeywords(sentences, pattern)
click to toggle source
generate candidate keywords 2
# File lib/rake_text.rb, line 48 def generateCandidateKeywords sentences, pattern phrases = Array.new sentences.each do |sentence| sentence = sentence.strip tmp = sentence.gsub pattern, "|" tmp.split("|").each do |part| part = part.strip.downcase if !part.empty? phrases.push part end end end return phrases end
seperateWords(text)
click to toggle source
# File lib/rake_text.rb, line 114 def seperateWords text words = Array.new text.split(/[^a-zA-Z0-9_\\+\\-]/).each do |word| word = word.strip.downcase if !word.empty? && !(true if Float(word) rescue false) words.push word end end return words end