class String

add methods to String

Public Instance Methods

each_ngram(n=3) { |join| ... } click to toggle source
# File lib/rlid/common.rb, line 115
def each_ngram(n=3)
  string = preprocess(n)
  string.chars.each_cons(n) do |chars|
    yield chars.join
  end
end
preprocess(n) click to toggle source

private

# File lib/rlid/common.rb, line 123
def preprocess(n)
  string = self.dup

  # remove spaces at the start and end
  string.gsub!(/\A\s+/, '')
  string.gsub!(/\s+\Z/, '')

  # remove non alphabetic characters
  string.gsub!(/[^[:alpha:]'\n]/, ' ')
  # substitute newlines with ||
  string.gsub!(/\s*\n\s*/, '|'*(n-1))
  string.gsub!(/\s+/, ' ')
  # remove spaces at the start and end
  string.gsub!(/\A\s+/, '')
  string.gsub!(/\s+\Z/, '')
  
  string.downcase!

  padding = "|" * (n-1)

  #if string.size == 1
  #  string = "|" + string + " "
  if string.size < n-1
    string = padding + string + " "
  else
    string = padding + string + padding
  end
  string
end