class Opener::POSTaggers::EnEs

Base POS tagger class for the various language specific ones such as {OpeneR::POSTaggers::FR}.

@!attribute [r] args

@return [Array]

@!attribute [r] options

@return [Hash]

Constants

DEFAULT_OPTIONS

The default options to use.

@return [Hash]

VERSION

Attributes

args[R]
options[R]

Public Class Methods

new(options = {}) click to toggle source

@param [Hash] options

@option options [Array] :args

@option options [TrueClass|FalseClass] :enable_time When set to ‘true`

(default) dynamic timestamps will be added.
# File lib/opener/pos_taggers/en_es/en_es.rb, line 33
def initialize(options = {})
  @args    = options.delete(:args) || []
  @options = DEFAULT_OPTIONS.merge(options)
end

Public Instance Methods

run(input) click to toggle source

Runs the command and returns the resulting KAF document.

@param [String] input The input to tag. @return [Array]

# File lib/opener/pos_taggers/en_es/en_es.rb, line 44
def run(input)
  language = language_from_kaf(input)
  input    = StringIO.new(input)

  reader    = Java::java.io.InputStreamReader.new(input.to_inputstream)
  kaf       = Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  annotator = new_annotator(language)

  annotator.annotatePOSToKAF(kaf, lemmatizer(language), language)

  return kaf.to_string
end

Protected Instance Methods

language_from_kaf(input) click to toggle source

Returns the language for the given KAF document.

@param [String] input @return [String]

# File lib/opener/pos_taggers/en_es/en_es.rb, line 88
def language_from_kaf(input)
  document = Nokogiri::XML(input)

  return document.at('KAF').attr('xml:lang')
end
lemmatizer(language) click to toggle source

Returns the lemmatizer to use.

@param [String] language

# File lib/opener/pos_taggers/en_es/en_es.rb, line 78
def lemmatizer(language)
  return Java::ehu.lemmatize.LemmatizerDispatcher.obtainMorfologikLemmatizer(language)
end
new_annotator(language) click to toggle source

Creates and configures a new annotator instance.

@param [String] language @return [Java::ehy.pos.Annotate]

# File lib/opener/pos_taggers/en_es/en_es.rb, line 65
def new_annotator(language)
  annotator = Java::ehu.pos.Annotate.new(language)

  annotator.disableTimestamp unless options[:enable_time]

  return annotator
end