class Yada::Markov

Constants

START
STOP

Attributes

tokens[R]

Public Class Methods

new(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ') click to toggle source
# File lib/yada/markov.rb, line 10
def initialize(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ')
  @n, @tokenize, @join = n, tokenize, join
  @transition_count = Hash.new(0)
  @ngram_count = Hash.new(0)
  @tokens = Set.new
end

Public Instance Methods

train!(data) click to toggle source
# File lib/yada/markov.rb, line 17
def train!(data)
  prefix = [START] * @n
  suffix = [STOP]

  data.each do |text|
    Join.new(prefix, text.scan(@tokenize), suffix).each_cons(@n + 1) do |*ngram, token|
      joined_ngram = ngram.join(@join)
      @tokens.add(token)
      @transition_count[[joined_ngram, token]] += 1
      @ngram_count[joined_ngram] += 1
    end
  end
end
transition_probability(ngram, token) click to toggle source
# File lib/yada/markov.rb, line 31
def transition_probability(ngram, token)
  joined_ngram = ngram.join(@join)
  return 0 if @ngram_count[joined_ngram] == 0
  Rational(@transition_count[[joined_ngram, token]], @ngram_count[joined_ngram])
end