class NlpToolz::Tokens

Constants

FileInputStream

load java classes

TokenizerME
TokenizerModel

Attributes

input[RW]
lang[RW]
model[RW]
model_name[RW]
tokens[RW]

Public Class Methods

new(input, lang = nil) click to toggle source
# File lib/nlp_toolz/tokens.rb, line 19
def initialize(input, lang = nil)
  @input = input
  @lang = lang || NlpToolz::Language.get_language(input)
  @model_name = "#{@lang}-token.bin"
  get_model
end

Public Instance Methods

has_model?() click to toggle source
# File lib/nlp_toolz/tokens.rb, line 30
def has_model?
  @model
end
tokenize() click to toggle source
# File lib/nlp_toolz/tokens.rb, line 26
def tokenize
  @tokens = @tokenizer.tokenize(@input)
end

Private Instance Methods

get_model() click to toggle source
# File lib/nlp_toolz/tokens.rb, line 36
def get_model
  model_file = "#{MODELS}/token/#{@model_name}"
  if File.exists?(model_file)
    @model = TokenizerModel.new(FileInputStream.new(model_file))
    @tokenizer = TokenizerME.new(@model)
  else
    @model = false
  end
end