class EnjuAccessor
An instance of this class holds the parsing result of a natural language query as anlyzed by Enju.
Public Class Methods
new(enju_cgi_url)
click to toggle source
# File lib/enju_accessor/enju_accessor.rb, line 8 def initialize(enju_cgi_url) @enju_cgi = RestClient::Resource.new(enju_cgi_url) @sentencer = TextSentencer.new @tid_base, @rid_base = 0, 0 end
Public Instance Methods
get_parse(sentence)
click to toggle source
# File lib/enju_accessor/enju_accessor.rb, line 14 def get_parse (sentence) begin response = @enju_cgi.get :params => {:sentence=>sentence, :format=>'so'} rescue => e raise IOError, "Abnormal behavior of the Enju CGI server: #{e.message}." end parse = case response.code when 200 # 200 means success raise "Empty input." if response =~/^Empty line/ r = response.encode("ASCII-8BIT").force_encoding("UTF-8").to_s read_parse(sentence, r) else raise IOError, "Abnormal response from the Enju CGI server." end parse end
parse_sentence(sentence, offset_base = 0, mode = '')
click to toggle source
# File lib/enju_accessor/enju_accessor.rb, line 98 def parse_sentence (sentence, offset_base = 0, mode = '') @tid_base, @rid_base = 0, 0 unless mode == 'continue' toks, cons = get_parse(sentence) denotations = [] tid_mapping = {} idx_last = 0 toks.each do |id, tok| id = tid_mapping[id] = 'T' + (tok[:idx] + @tid_base).to_s denotations << {id:id, span:{begin: tok[:beg] + offset_base, end: tok[:end] + offset_base}, obj: tok[:pos]} idx_last = tok[:idx] end # puts toks.map{|t| t.to_s}.join("\n") cons.each do |id, con| thead = con[:sem_head] thead = cons[thead][:sem_head] until thead.start_with?('t') con[:thead] = thead end relations = [] rid_num = @rid_base toks.each do |id, tok| unless tok[:args].empty? tok[:args].each do |type, arg| arg = cons[arg][:thead] if arg.start_with?('c') next if tid_mapping[arg].nil? relations << {id: 'R' + rid_num.to_s, subj: tid_mapping[arg], obj: tid_mapping[id], pred: type.to_s.downcase + 'Of'} rid_num += 1 end end end @tid_base = @tid_base + idx_last + 1 @rid_base = rid_num {:denotations => denotations, :relations => relations} end
parse_text(text)
click to toggle source
# File lib/enju_accessor/enju_accessor.rb, line 157 def parse_text (text) segments = @sentencer.segment(text) denotations, relations = [], [] segments.each_with_index do |s, i| mode = (i == 0)? nil : 'continue' annotation = parse_sentence(text[s[0]...s[1]], s[0], mode) denotations += annotation[:denotations] relations += annotation[:relations] end {:text=> text, :denotations => denotations, :relations => relations} end
read_parse(sentence, r)
click to toggle source
# File lib/enju_accessor/enju_accessor.rb, line 33 def read_parse (sentence, r) toks = {} cons = {} adjustment = 0 # r is a parsing result in SO format. lines = r.split(/\r?\n/) idx = 0 lines.each do |line| # for each line of analysis b, e, attr_str = line.split(/\t/) b = b.to_i e = e.to_i node = Nokogiri::HTML.parse('<node ' + attr_str + '>') attrs = node.css('node').first.to_h if attrs['tok'] == "" base = attrs['base'] b += adjustment base.each_char{|c| adjustment += (1 - c.bytesize) if c !~ /\p{ASCII}/} e += adjustment id = attrs['id'] pos = attrs['pos'] pos = attrs['base'] if [',', '.', ':', '(', ')', '``', ''''].include?(pos) pos.sub!('$', '-DOLLAR-') pos = '-COLON-' if pos == 'HYPH' toks[id] = {beg: b, end:e, word:sentence[b ... e], idx:idx, base:base, pos:pos, cat:attrs['cat'], args:{}} toks[id][:args][:arg1] = attrs['arg1'] if attrs['arg1'] toks[id][:args][:arg2] = attrs['arg2'] if attrs['arg2'] toks[id][:args][:arg3] = attrs['arg3'] if attrs['arg3'] toks[id][:args][:mod] = attrs['mod'] if attrs['mod'] idx += 1 end end lines.each do |line| # for each line of analysis b, e, attr_str = line.split(/\t/) b = b.to_i e = e.to_i node = Nokogiri::HTML.parse('<node ' + attr_str + '>') attrs = node.css('node').first.to_h if attrs['cons'] == "" id = attrs['id'] head = attrs['head'] sem_head = attrs['sem_head'] cat = attrs['cat'] cons[id] = {head:head, sem_head: sem_head, cat:cat} end end # puts sentence # puts toks.map{|t| t.to_s}.join("\n") # puts cons.map{|c| c.to_s}.join("\n") # puts "-----" # exit [toks, cons] end
tag_sentence(sentence, offset_base = 0, mode = '')
click to toggle source
# File lib/enju_accessor/enju_accessor.rb, line 139 def tag_sentence (sentence, offset_base = 0, mode = '') @id_base = 0 unless mode == 'continue' toks, cons = get_parse(sentence) denotations = [] idx_last = 0 toks.each do |id, tok| denotations << {id: 'P' + (tok[:idx] + @id_base).to_s, span: {begin: tok[:beg] + offset_base, end: tok[:end] + offset_base}, obj: tok[:pos]} denotations << {id: 'B' + (tok[:idx] + @id_base).to_s, span: {begin: tok[:beg] + offset_base, end: tok[:end] + offset_base}, obj: tok[:base]} idx_last = tok[:idx] end @id_base = @id_base + idx_last + 1 {:denotations => denotations} end
tag_text(text)
click to toggle source
# File lib/enju_accessor/enju_accessor.rb, line 171 def tag_text (text) segments = @sentencer.segment(text) denotations = [] segments.each_with_index do |s, i| mode = (i == 0)? nil : 'continue' annotation = tag_sentence(text[s[0]...s[1]], s[0], mode) denotations += annotation[:denotations] end {:text=> text, :denotations => denotations} end