class Yada::Markov
Constants
- START
- STOP
Attributes
tokens[R]
Public Class Methods
new(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ')
click to toggle source
# File lib/yada/markov.rb, line 10 def initialize(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ') @n, @tokenize, @join = n, tokenize, join @transition_count = Hash.new(0) @ngram_count = Hash.new(0) @tokens = Set.new end
Public Instance Methods
train!(data)
click to toggle source
# File lib/yada/markov.rb, line 17 def train!(data) prefix = [START] * @n suffix = [STOP] data.each do |text| Join.new(prefix, text.scan(@tokenize), suffix).each_cons(@n + 1) do |*ngram, token| joined_ngram = ngram.join(@join) @tokens.add(token) @transition_count[[joined_ngram, token]] += 1 @ngram_count[joined_ngram] += 1 end end end
transition_probability(ngram, token)
click to toggle source
# File lib/yada/markov.rb, line 31 def transition_probability(ngram, token) joined_ngram = ngram.join(@join) return 0 if @ngram_count[joined_ngram] == 0 Rational(@transition_count[[joined_ngram, token]], @ngram_count[joined_ngram]) end