module StanfordCoreNLP
Constants
- VERSION
Attributes
custom_properties[RW]
Custom properties
language[RW]
Store the language currently being used.
model_files[RW]
The model file names for a given language.
model_path[RW]
The folder in which to look for models.
Public Class Methods
bind()
click to toggle source
########################### #
Public API methods #
########################### #
Calls superclass method
# File lib/stanford-core-nlp.rb, line 115 def self.bind # Take care of Windows users. if self.running_on_windows? self.jar_path.gsub!('/', '\\') self.model_path.gsub!('/', '\\') end # Make the bindings. super # Bind annotation bridge. self.default_classes.each do |info| klass = const_get(info.first) self.inject_get_method(klass) end end
const_missing(const)
click to toggle source
Hack in order not to break backwards compatibility.
Calls superclass method
# File lib/stanford-core-nlp.rb, line 192 def self.const_missing(const) if const == :Text puts "WARNING: StanfordCoreNLP::Text has been deprecated." + "Please use StanfordCoreNLP::Annotation instead." Annotation else super(const) end end
load(*annotators)
click to toggle source
Load a StanfordCoreNLP
pipeline with the specified JVM flags and StanfordCoreNLP
properties.
# File lib/stanford-core-nlp.rb, line 137 def self.load(*annotators) self.bind unless self.bound # Prepend the JAR path to the model files. properties = {} self.model_files.each do |k,v| found = false annotators.each do |annotator| found = true if k.index(annotator.to_s) break if found end next unless found f = self.model_path + v unless File.readable?(f) raise "Model file #{f} could not be found. " + "You may need to download this file manually " + "and/or set paths properly." end properties[k] = f end properties['annotators'] = annotators.map { |x| x.to_s }.join(', ') unless self.language == :english # Bug fix for French/German parsers. # Otherwise throws "IllegalArgumentException: # Unknown option: -retainTmpSubcategories" properties['parse.flags'] = '' # Bug fix for French/German parsers. # Otherswise throws java.lang.NullPointerException: null. properties['parse.buildgraphs'] = 'false' end # Bug fix for NER system. Otherwise throws: # Error initializing binder 1 at edu.stanford. # nlp.time.Options.<init>(Options.java:88) properties['sutime.binders'] = '0' # Manually include SUTime models. if annotators.include?(:ner) properties['sutime.rules'] = self.model_path + 'sutime/defs.sutime.txt, ' + self.model_path + 'sutime/english.sutime.txt' end props = get_properties(properties) # Hack for Java7 compatibility. bridge = const_get(:AnnotationBridge) bridge.getPipelineWithProperties(props) end
set_model(name, file)
click to toggle source
Set a model file.
# File lib/stanford-core-nlp.rb, line 106 def self.set_model(name, file) n = name.split('.')[0].intern self.model_files[name] = Config::ModelFolders[n] + file end
use(language)
click to toggle source
Use models for a given language. Language can be supplied as full-length, or ISO-639 2 or 3 letter code (e.g. :english, :eng or :en will work).
# File lib/stanford-core-nlp.rb, line 81 def self.use(language) lang = nil self.model_files = {} Config::LanguageCodes.each do |l,codes| lang = codes[2] if codes.include?(language) end self.language = lang Config::Models.each do |n, languages| models = languages[lang] folder = Config::ModelFolders[n] if models.is_a?(Hash) n = n.to_s models.each do |m, file| self.model_files["#{n}.#{m}"] = folder + file end elsif models.is_a?(String) self.model_files["#{n}.model"] = folder + models end end end
Private Class Methods
camel_case(s)
click to toggle source
camel_case
which also support dot as separator
# File lib/stanford-core-nlp.rb, line 230 def self.camel_case(s) s = s.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" } s.gsub(/(?:^|_|\.)(.)/) { $1.upcase } end
get_list(tokens)
click to toggle source
Get a Java ArrayList binding to pass lists of tokens to the Stanford Core NLP process.
# File lib/stanford-core-nlp.rb, line 216 def self.get_list(tokens) list = StanfordCoreNLP::ArrayList.new tokens.each do |t| list.add(Word.new(t.to_s)) end list end
get_properties(properties)
click to toggle source
Create a java.util.Properties object from a hash.
# File lib/stanford-core-nlp.rb, line 205 def self.get_properties(properties) properties = properties.merge(self.custom_properties) props = Properties.new properties.each do |property, value| props.set_property(property.to_s, value.to_s) end props end
running_on_windows?()
click to toggle source
Returns true if we're running on Windows.
# File lib/stanford-core-nlp.rb, line 225 def self.running_on_windows? RUBY_PLATFORM.split("-")[1] == 'mswin32' end