class NlpToolz::Parser
Constants
- FileInputStream
load java classes
- Leaf
- Node
Attributes
input[RW]
lang[RW]
model[RW]
model_name[RW]
parse_hash[RW]
parsed[R]
Public Class Methods
new(input, lang = nil)
click to toggle source
# File lib/nlp_toolz/parser.rb, line 18 def initialize(input, lang = nil) @input = input @lang = lang || NlpToolz::Language.get_language(input) @model_name = "#{@lang}-sm5.gr" get_model end
Public Instance Methods
has_model?()
click to toggle source
# File lib/nlp_toolz/parser.rb, line 41 def has_model? @model end
hash()
click to toggle source
# File lib/nlp_toolz/parser.rb, line 49 def hash @parse_hash end
layer(level = nil)
click to toggle source
# File lib/nlp_toolz/parser.rb, line 45 def layer(level = nil) @first_layer end
parse_text()
click to toggle source
# File lib/nlp_toolz/parser.rb, line 25 def parse_text parsed = nil if self.has_model? jar = "#{JARS}/BerkeleyParser-1.7.jar" in_file = make_tmp_file_from @input.clean_up out_file = make_tmp_file_from `java -Xmx4g -jar #{jar} -gr #{@model} -inputFile #{in_file.path} -outputFile #{out_file.path} -tokenize -maxLength 500`.chomp @parsed = File.open(out_file).gets(nil).chomp parse_output_to_hash delete_and_unlink_tmp_file in_file delete_and_unlink_tmp_file out_file end end
Private Instance Methods
create_leafs(parsed)
click to toggle source
-
merge tags and tokens, create leafs
# File lib/nlp_toolz/parser.rb, line 89 def create_leafs(parsed) @first_layer = {tags: [],tokens: []} leafs = {} foo = [] parsed.each_with_index do |part,i| if part =~ /\{([\w\-]+|\$\p{P}|\p{P})/ && parsed[i+1] =~ /([\p{L}\p{N}\-\.]+|\p{P})\}/ tag = part.gsub("{","") token = parsed[i+1].gsub("}","") @first_layer[:tags] << tag @first_layer[:tokens] << token leaf = Leaf.new(tag.to_sym,token) if foo[foo.length-1].is_a?(Hash) foo[foo.length-1] = [foo[foo.length-1], leaf] elsif foo[foo.length-1].is_a?(Array) foo[foo.length-1] << leaf else foo << leaf end elsif part !~ /([\p{L}\p{N}\-]+|\p{P})\}/ if part =~ /(\{)(.+)/ foo << "{#{part.gsub("{","")}" else foo << "#{part}" end end end foo end
get_model()
click to toggle source
helper for … initialize
# File lib/nlp_toolz/parser.rb, line 57 def get_model model_file = "#{MODELS}/parser/#{@model_name}" if File.exists?(model_file) @model = model_file else @model = false end end
make_hash_hash(nodes)
click to toggle source
# File lib/nlp_toolz/parser.rb, line 121 def make_hash_hash(nodes) tmp = catch(:done) { nodes.reverse.each_with_index do |node,i| if node =~ /\{(\w+)/ key = node.match(/\{(\w+)/)[1].to_sym part = [] nodes[-i-1..-1].each_with_index do |x,ii| if x == "}" part = {key => nodes[-i..-i+ii-2]} throw :done, [nodes[0..-i-2],part,nodes[-i+ii..-1]].flatten end end end end } if tmp.length > 3 make_hash_hash(tmp) else tmp[1] end end
parse_output_to_hash()
click to toggle source
convert: tree -> hash
# File lib/nlp_toolz/parser.rb, line 67 def parse_output_to_hash parsed = split_parse_tree(self.parsed) nodes = create_leafs(parsed) @parse_hash = make_hash_hash(nodes) @parse_hash end
split_parse_tree(parsed)
click to toggle source
-
split
# File lib/nlp_toolz/parser.rb, line 80 def split_parse_tree(parsed) bar = parsed.gsub("))", ") )").gsub("))", ") )") .gsub("(", "{") .gsub(")", "}") bar.split end