module StanfordCoreNLP

Constants

VERSION

Attributes

custom_properties[RW]

Custom properties

language[RW]

Store the language currently being used.

model_files[RW]

The model file names for a given language.

model_path[RW]

The folder in which to look for models.

Public Class Methods

bind() click to toggle source

########################### #

Public API methods       #

########################### #

Calls superclass method
# File lib/stanford-core-nlp.rb, line 115
def self.bind

  # Take care of Windows users.
  if self.running_on_windows?
    self.jar_path.gsub!('/', '\\')
    self.model_path.gsub!('/', '\\')
  end

  # Make the bindings.
  super

  # Bind annotation bridge.
  self.default_classes.each do |info|
    klass = const_get(info.first)
    self.inject_get_method(klass)
  end

end
const_missing(const) click to toggle source

Hack in order not to break backwards compatibility.

Calls superclass method
# File lib/stanford-core-nlp.rb, line 192
def self.const_missing(const)
  if const == :Text
    puts "WARNING: StanfordCoreNLP::Text has been deprecated." +
    "Please use StanfordCoreNLP::Annotation instead."
    Annotation
  else
    super(const)
  end
end
load(*annotators) click to toggle source

Load a StanfordCoreNLP pipeline with the specified JVM flags and StanfordCoreNLP properties.

# File lib/stanford-core-nlp.rb, line 137
def self.load(*annotators)

  self.bind unless self.bound

  # Prepend the JAR path to the model files.
  properties = {}
  self.model_files.each do |k,v|
    found = false
    annotators.each do |annotator|
      found = true if k.index(annotator.to_s)
      break if found
    end
    next unless found
    f = self.model_path + v
    unless File.readable?(f)
      raise "Model file #{f} could not be found. " +
      "You may need to download this file manually " +
      "and/or set paths properly."
    end
    properties[k] = f
  end

  properties['annotators'] = annotators.map { |x| x.to_s }.join(', ')

  unless self.language == :english
    # Bug fix for French/German parsers.
    # Otherwise throws "IllegalArgumentException:
    # Unknown option: -retainTmpSubcategories"
    properties['parse.flags'] = ''
    # Bug fix for French/German parsers.
    # Otherswise throws java.lang.NullPointerException: null.
    properties['parse.buildgraphs'] = 'false'
  end

  # Bug fix for NER system. Otherwise throws:
  # Error initializing binder 1 at edu.stanford.
  # nlp.time.Options.<init>(Options.java:88)
  properties['sutime.binders'] = '0'

  # Manually include SUTime models.
  if annotators.include?(:ner)
    properties['sutime.rules'] =
    self.model_path + 'sutime/defs.sutime.txt, ' +
    self.model_path + 'sutime/english.sutime.txt'
  end

  props = get_properties(properties)

  # Hack for Java7 compatibility.
  bridge = const_get(:AnnotationBridge)
  bridge.getPipelineWithProperties(props)

end
set_model(name, file) click to toggle source

Set a model file.

# File lib/stanford-core-nlp.rb, line 106
def self.set_model(name, file)
  n = name.split('.')[0].intern
  self.model_files[name] = Config::ModelFolders[n] + file
end
use(language) click to toggle source

Use models for a given language. Language can be supplied as full-length, or ISO-639 2 or 3 letter code (e.g. :english, :eng or :en will work).

# File lib/stanford-core-nlp.rb, line 81
def self.use(language)
  lang = nil
  self.model_files = {}
  Config::LanguageCodes.each do |l,codes|
    lang = codes[2] if codes.include?(language)
  end
  self.language = lang
  Config::Models.each do |n, languages|
    models = languages[lang]
    folder = Config::ModelFolders[n]
    if models.is_a?(Hash)
      n = n.to_s
      models.each do |m, file|
        self.model_files["#{n}.#{m}"] = folder + file
      end
    elsif models.is_a?(String)
      self.model_files["#{n}.model"] = folder + models
    end
  end
end

Private Class Methods

camel_case(s) click to toggle source

camel_case which also support dot as separator

# File lib/stanford-core-nlp.rb, line 230
def self.camel_case(s)
  s = s.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }
  s.gsub(/(?:^|_|\.)(.)/) { $1.upcase }
end
get_list(tokens) click to toggle source

Get a Java ArrayList binding to pass lists of tokens to the Stanford Core NLP process.

# File lib/stanford-core-nlp.rb, line 216
def self.get_list(tokens)
  list = StanfordCoreNLP::ArrayList.new
  tokens.each do |t|
    list.add(Word.new(t.to_s))
  end
  list
end
get_properties(properties) click to toggle source

Create a java.util.Properties object from a hash.

# File lib/stanford-core-nlp.rb, line 205
def self.get_properties(properties)
  properties = properties.merge(self.custom_properties)
  props = Properties.new
  properties.each do |property, value|
    props.set_property(property.to_s, value.to_s)
  end
  props
end
running_on_windows?() click to toggle source

Returns true if we're running on Windows.

# File lib/stanford-core-nlp.rb, line 225
def self.running_on_windows?
  RUBY_PLATFORM.split("-")[1] == 'mswin32'
end