class NlpToolz::Tokens
Constants
- FileInputStream
load java classes
- TokenizerME
- TokenizerModel
Attributes
input[RW]
lang[RW]
model[RW]
model_name[RW]
tokens[RW]
Public Class Methods
new(input, lang = nil)
click to toggle source
# File lib/nlp_toolz/tokens.rb, line 19 def initialize(input, lang = nil) @input = input @lang = lang || NlpToolz::Language.get_language(input) @model_name = "#{@lang}-token.bin" get_model end
Public Instance Methods
has_model?()
click to toggle source
# File lib/nlp_toolz/tokens.rb, line 30 def has_model? @model end
tokenize()
click to toggle source
# File lib/nlp_toolz/tokens.rb, line 26 def tokenize @tokens = @tokenizer.tokenize(@input) end
Private Instance Methods
get_model()
click to toggle source
# File lib/nlp_toolz/tokens.rb, line 36 def get_model model_file = "#{MODELS}/token/#{@model_name}" if File.exists?(model_file) @model = TokenizerModel.new(FileInputStream.new(model_file)) @tokenizer = TokenizerME.new(@model) else @model = false end end