class PROIEL::Valency::Lexicon
Attributes
frames[R]
Public Class Methods
new()
click to toggle source
# File lib/proiel/valency/lexicon.rb, line 6 def initialize @source_ids = Set.new @source_languages = Set.new @frames = {} end
Public Instance Methods
add_source!(source)
click to toggle source
Generates a valency lexicon from the provided sources. In practice the sources should be in the same language but this is not enforced. This makes it possible to generate a lexicon from sources in closely related languages or dialects.
# File lib/proiel/valency/lexicon.rb, line 16 def add_source!(source) @source_ids << source.id @source_languages << source.language source.sentences.each do |sentence| tokens = find_verbal_nodes(sentence) tokens.each do |token| frame = PROIEL::Valency::Arguments.get_argument_frame(token) partition = if token.dependents.any? { |d| d.relation == 'aux' and d.part_of_speech == 'Pk' } :r else :a end @frames[token.lemma] ||= {} @frames[token.lemma][token.part_of_speech] ||= {} @frames[token.lemma][token.part_of_speech][frame] ||= { a: [], r: [] } @frames[token.lemma][token.part_of_speech][frame][partition] << token.id end end end
lookup(lemma, part_of_speech)
click to toggle source
# File lib/proiel/valency/lexicon.rb, line 40 def lookup(lemma, part_of_speech) frames = @frames[lemma][part_of_speech].map do |arguments, token_ids| { arguments: arguments, tokens: token_ids } end PROIEL::Valency::Obliqueness.sort_frames(frames) end
Private Instance Methods
find_verbal_nodes(sentence)
click to toggle source
Find verbal nodes in a sentence
# File lib/proiel/valency/lexicon.rb, line 51 def find_verbal_nodes(sentence) sentence.tokens.select do |token| # FIXME: is this test in the proiel library already? (token.part_of_speech and token.part_of_speech[/^V/]) or token.empty_token_sort == 'V' end end