class Opener::POSTaggers::EnEs
Base POS tagger class for the various language specific ones such as {OpeneR::POSTaggers::FR}.
@!attribute [r] args
@return [Array]
@!attribute [r] options
@return [Hash]
Constants
- DEFAULT_OPTIONS
The default options to use.
@return [Hash]
- VERSION
Attributes
args[R]
options[R]
Public Class Methods
new(options = {})
click to toggle source
@param [Hash] options
@option options [Array] :args
@option options [TrueClass|FalseClass] :enable_time When set to ‘true`
(default) dynamic timestamps will be added.
# File lib/opener/pos_taggers/en_es/en_es.rb, line 33 def initialize(options = {}) @args = options.delete(:args) || [] @options = DEFAULT_OPTIONS.merge(options) end
Public Instance Methods
run(input)
click to toggle source
Runs the command and returns the resulting KAF document.
@param [String] input The input to tag. @return [Array]
# File lib/opener/pos_taggers/en_es/en_es.rb, line 44 def run(input) language = language_from_kaf(input) input = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input.to_inputstream) kaf = Java::ixa.kaflib.KAFDocument.create_from_stream(reader) annotator = new_annotator(language) annotator.annotatePOSToKAF(kaf, lemmatizer(language), language) return kaf.to_string end
Protected Instance Methods
language_from_kaf(input)
click to toggle source
Returns the language for the given KAF document.
@param [String] input @return [String]
# File lib/opener/pos_taggers/en_es/en_es.rb, line 88 def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end
lemmatizer(language)
click to toggle source
Returns the lemmatizer to use.
@param [String] language
# File lib/opener/pos_taggers/en_es/en_es.rb, line 78 def lemmatizer(language) return Java::ehu.lemmatize.LemmatizerDispatcher.obtainMorfologikLemmatizer(language) end
new_annotator(language)
click to toggle source
Creates and configures a new annotator instance.
@param [String] language @return [Java::ehy.pos.Annotate]
# File lib/opener/pos_taggers/en_es/en_es.rb, line 65 def new_annotator(language) annotator = Java::ehu.pos.Annotate.new(language) annotator.disableTimestamp unless options[:enable_time] return annotator end