class NlpToolz::PosTags
Constants
- FileInputStream
load java classes
- POSModel
- POSTaggerME
Attributes
input[RW]
lang[RW]
model[RW]
model_name[RW]
tokenized[RW]
Public Class Methods
new(input, lang = nil)
click to toggle source
# File lib/nlp_toolz/pos_tags.rb, line 20 def initialize(input, lang = nil) @input = input @lang = lang || NlpToolz::Language.get_language(input) @model_name = "#{@lang}-pos-maxent.bin" get_model end
Public Instance Methods
has_model?()
click to toggle source
# File lib/nlp_toolz/pos_tags.rb, line 41 def has_model? @model end
tokens()
click to toggle source
# File lib/nlp_toolz/pos_tags.rb, line 33 def tokens @tokenized[:tokens] end
Private Instance Methods
get_model()
click to toggle source
# File lib/nlp_toolz/pos_tags.rb, line 47 def get_model model_file = "#{MODELS}/pos/#{@model_name}" if File.exists?(model_file) @model = POSModel.new(FileInputStream.new(model_file)) @tagger = POSTaggerME.new(@model) else @model = false end end
tokenize_it(stream)
click to toggle source
ToDo 2012-11-28: only a workaround upto the opennlp tokenizer is implemented
# File lib/nlp_toolz/pos_tags.rb, line 58 def tokenize_it stream foo = {tokens: [], tags: []} stream.split.each do |token| splitter = token.split("/") if splitter.length == 2 foo[:tokens] << splitter.first foo[:tags] << splitter.last else splitter[0..-2].each do |splits| foo[:tokens] << splits foo[:tags] << splitter.last end end end foo end