class PROIEL::Lemma

Attributes

dictionary[R]

@return [Dictionary] source that the lemma belongs to

distribution[R]

@return [Hash{String, Integer}] distribution of lemmata in sources. The keys are IDs of sources, the values give the frequency of the lemma per source.

glosses[R]

@return [Hash{Symbol, String}] glosses for the current lemma. The keys are language tags and the values the glosses.

homographs[R]

@return [Array<[String, String]> identified homographs of this lemma. The array contains pairs of lemma form (which will be homographs of this lemma form under the orthographic conventions of the language) and parts of speech.

n[R]
paradigm[R]
valency[R]

Public Class Methods

new(parent, xml = nil) click to toggle source

Creates a new lemma object.

# File lib/proiel/lemma.rb, line 31
def initialize(parent, xml = nil)
  @dictionary = parent

  @n = nil

  @distribution = {}
  @homographs = []
  @glosses = {}
  @paradigm = {}
  @valency = []

  from_xml(xml) if xml
end

Private Instance Methods

from_xml(xml) click to toggle source
# File lib/proiel/lemma.rb, line 47
def from_xml(xml)
  @n = nullify(xml.n, :int)

  @distribution = xml.distribution.map { |h| [h.idref, nullify(h.n, :int)] }.to_h
  @glosses = xml.glosses.map { |h| [h.language.to_sym, h.gloss] }.to_h
  @homographs = xml.homographs.map { |h| [h.lemma, h.part_of_speech] }
  @paradigm = xml.paradigm.map { |slot1| [slot1.morphology, slot1.slot2s.map { |slot2| [slot2.form, nullify(slot2.n, :int)] }.to_h] }.to_h
  @valency =
    xml.valency.map do |frame|
      {
        arguments: frame.arguments.map { |a| { relation: a.relation, lemma: a.lemma, part_of_speech: a.part_of_speech, mood: a.mood, case: a.case } },
        tokens: frame.tokens.map { |t| { flags: t.flags, idref: t.idref } },
      }
    end
end
nullify(s, type = nil) click to toggle source
# File lib/proiel/lemma.rb, line 63
def nullify(s, type = nil)
  case s
  when NilClass, /^\s*$/
    nil
  else
    case type
    when :int
      s.to_i
    else
      s.to_s
    end
  end
end