class Textoken::Tokenizer
This is the last step in the process Takes findings objects and makes finishing punctuation split Does not split if base.dont_split is true
Attributes
base[R]
col[R]
result[R]
Public Class Methods
new(base)
click to toggle source
# File lib/textoken/tokenizer.rb, line 8 def initialize(base) @base = base @result = [] @col = base.options.collection.length > 0 ? base.findings : base.text end
Public Instance Methods
tokens()
click to toggle source
# File lib/textoken/tokenizer.rb, line 14 def tokens return col if base.dont_split split_punctuations @result end
Private Instance Methods
default_regexp()
click to toggle source
will be used for finding punctuations
# File lib/textoken/tokenizer.rb, line 29 def default_regexp /[^[:word:]+]/ end
split_punctuations()
click to toggle source
# File lib/textoken/tokenizer.rb, line 22 def split_punctuations col.each do |w| @result += Scanner.new(w, default_regexp).result || [w] end end