class PROIEL::Token

A token object in a treebank.

Constants

MORPHOLOGY_POSITIONAL_TAG_SEQUENCE

FIXME: extract this from the header of the PROIEL XML file instead and subclass PositionalTag

NULL_PARTS_OF_SPEECH
POS_POSITIONAL_TAG_SEQUENCE

FIXME: extract this from the header of the PROIEL XML file instead and subclass PositionalTag

Attributes

alignment_id[R]

@return [nil, Integer] ID of the sentence that this sentence is aligned to

antecedent_id[R]

@return [nil, Fixnum] ID of antecedent token

citation_part[R]

@return [nil, String] citation part

contrast_group[R]

@return [nil, String] contrast group tag

empty_token_sort[R]

@return [nil, String] token empty token sort tag

foreign_ids[R]

@return [nil, String] free-form foreign IDs

form[R]

@return [nil, String] token form

head_id[R]

@return [nil, Fixnum] ID of head token

id[R]

@return [Fixnum] ID of the sentence

information_status[R]

@return [nil, String] information status tag

lemma[R]

@return [nil, String] token lemma

morphology[R]

@return [nil, String] token morphological tag

part_of_speech[R]

@return [nil, String] token part of speech tag

pos[R]

@return [nil, String] token part of speech tag

presentation_after[R]

@return [nil, String] presentation material after form

presentation_before[R]

@return [nil, String] presentation material before form

relation[R]

@return [nil, String] token relation tag

sentence[RW]

@return [Sentence] parent sentence object

slashes[R]

@return [Array<Array<String,Fixnum>>] secondary edges as an array of pairs of relation tag and target token ID

Public Class Methods

new(parent, id, head_id, form, lemma, part_of_speech, morphology, relation, empty_token_sort, citation_part, presentation_before, presentation_after, antecedent_id, information_status, contrast_group, foreign_ids, slashes, alignment_id) click to toggle source

Creates a new token object.

# File lib/proiel/token.rb, line 70
def initialize(parent, id, head_id, form, lemma, part_of_speech,
               morphology, relation, empty_token_sort, citation_part,
               presentation_before, presentation_after, antecedent_id,
               information_status, contrast_group, foreign_ids, slashes,
               alignment_id)
  @sentence = parent

  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
  @id = id

  raise ArgumentError, 'integer or nil expected' unless head_id.nil? or head_id.is_a?(Integer)
  @head_id = head_id

  raise ArgumentError, 'string or nil expected' unless form.nil? or form.is_a?(String)
  @form = form.freeze

  raise ArgumentError, 'string or nil expected' unless lemma.nil? or lemma.is_a?(String)
  @lemma = lemma.freeze

  raise ArgumentError, 'string or nil expected' unless part_of_speech.nil? or part_of_speech.is_a?(String)
  @part_of_speech = part_of_speech.freeze

  raise ArgumentError, 'string or nil expected' unless morphology.nil? or morphology.is_a?(String)
  @morphology = morphology.freeze

  raise ArgumentError, 'string or nil expected' unless relation.nil? or relation.is_a?(String)
  @relation = relation.freeze

  raise ArgumentError, 'string or nil expected' unless empty_token_sort.nil? or empty_token_sort.is_a?(String)
  @empty_token_sort = empty_token_sort.freeze

  raise ArgumentError, 'string or nil expected' unless citation_part.nil? or citation_part.is_a?(String)
  @citation_part = citation_part.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
  @presentation_before = presentation_before.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
  @presentation_after = presentation_after.freeze

  raise ArgumentError, 'integer or nil expected' unless antecedent_id.nil? or antecedent_id.is_a?(Integer)
  @antecedent_id = antecedent_id

  raise ArgumentError, 'string or nil expected' unless information_status.nil? or information_status.is_a?(String)
  @information_status = information_status.freeze

  raise ArgumentError, 'string or nil expected' unless contrast_group.nil? or contrast_group.is_a?(String)
  @contrast_group = contrast_group.freeze

  raise ArgumentError, 'string or nil expected' unless foreign_ids.nil? or foreign_ids.is_a?(String)
  @foreign_ids = foreign_ids.freeze

  raise ArgumentError, 'array expected' unless slashes.is_a?(Array)
  @slashes = slashes.map { |s| [s.relation.freeze, s.target_id] }

  raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
  @alignment_id = alignment_id
end

Public Instance Methods

alignment(aligned_source) click to toggle source

Returns the aligned token if any.

@return [Token, NilClass] aligned token

# File lib/proiel/token.rb, line 400
def alignment(aligned_source)
  alignment_id ? aligned_source.treebank.find_token(alignment_id) : nil
end
ancestors() click to toggle source

Finds ancestors of this token in the dependency graph.

The ancestors are the ancestors of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned ancestors is as follows: The first ancestor is the head of this token, the next ancestor is the head of the previous token, and so on.

@return [Array<Token>] ancestors

# File lib/proiel/token.rb, line 266
def ancestors
  if is_root?
    []
  else
    [head] + head.ancestors
  end
end
children()
Alias for: dependents
citation() click to toggle source

@return [nil, String] a complete citation for the token

# File lib/proiel/token.rb, line 152
def citation
  if citation_part
    [source.citation_part, citation_part].compact.join(' ')
  else
    nil
  end
end
common_ancestors(other_token, inclusive: false) click to toggle source

Finds the common ancestors that this token and another token share in the dependency graph.

If ‘inclusive` is `false`, a common ancestor is defined strictly as a common ancestor of both tokens. If `inclusive` is `true`, one of the tokens can be a common ancestor of the other.

Ancestors are returned in the same order as {Token#ancestors}.

@example

x.head # => w
w.head # => z
y.head # => z
z.head # => u

x.common_ancestors(y, inclusive: false) # => [z, u]
x.common_ancestors(w, inclusive: false) # => [z, u]
x.common_ancestors(x, inclusive: false) # => [w, z, u]

x.common_ancestors(y, inclusive: true)  # => [z, u]
x.common_ancestors(w, inclusive: true)  # => [w, z, u]
x.common_ancestors(x, inclusive: true)  # => [x, w, z, u]

@see Token#first_common_ancestor @see Token#first_common_ancestor_path

@return [Array<Token>] common ancestors

# File lib/proiel/token.rb, line 358
def common_ancestors(other_token, inclusive: false)
  if inclusive
    x, y = [self] + ancestors, [other_token] + other_token.ancestors
  else
    x, y = ancestors, other_token.ancestors
  end

  x & y
end
dependents() click to toggle source

Finds dependent of this token in the dependency graph.

The dependents are the children of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned dependents is indeterminate.

@return [Array<Token>] dependent

# File lib/proiel/token.rb, line 248
def dependents
  @sentence.tokens.select { |t| t.head_id == @id }
end
Also aliased as: children
descendants()
Alias for: descendents
descendents() click to toggle source

Finds descendents of this token in the dependency graph.

The descendents are the ancestors of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned descendents is as indeterminate.

@return [Array<Token>] descendents

# File lib/proiel/token.rb, line 284
def descendents
  dependents.map { |dependent| [dependent] + dependent.descendents }.flatten
end
Also aliased as: descendants
div() click to toggle source

@return [Div] parent div object

# File lib/proiel/token.rb, line 130
def div
  @sentence.div
end
first_common_ancestor(other_token, inclusive: false) click to toggle source

Finds the first common ancestor that this token and another token share in the dependency graph.

If ‘inclusive` is `false`, a common ancestor is defined strictly as a common ancestor of both tokens. If `inclusive` is `true`, one of the tokens can be a common ancestor of the other.

@example

x.head # => w
w.head # => z
y.head # => z
z.head # => u

x.first_common_ancestor(y, inclusive: false) # => z
x.first_common_ancestor(w, inclusive: false) # => z
x.first_common_ancestor(x, inclusive: false) # => w

x.first_common_ancestor(y, inclusive: true)  # => z
x.first_common_ancestor(w, inclusive: true)  # => w
x.first_common_ancestor(x, inclusive: true)  # => x

@see Token#common_ancestors @see Token#first_common_ancestor_path

@return [nil, Token] first common ancestor

# File lib/proiel/token.rb, line 393
def first_common_ancestor(other_token, inclusive: false)
  common_ancestors(other_token, inclusive: inclusive).first
end
has_citation?() click to toggle source

Tests if the token has a citation.

A token has a citation if ‘citation_part` is not `nil`.

@return [true, false]

# File lib/proiel/token.rb, line 320
def has_citation?
  !citation_part.nil?
end
has_content?() click to toggle source

Tests if the token has content.

A token has content if it has a form.

@see Token#is_empty?

@return [true, false]

# File lib/proiel/token.rb, line 311
def has_content?
  empty_token_sort.nil?
end
head() click to toggle source

Finds the head of this token.

The head is the parent of the this token in the tree that has tokens as nodes and primary relations as edges.

@return [Token] head

# File lib/proiel/token.rb, line 228
def head
  if is_root?
    nil
  else
    treebank.find_token(head_id)
  end
end
Also aliased as: parent
is_empty?() click to toggle source

Tests if the token is empty.

A token is empty if it does not have a form. If the token is empty, {Token#empty_token_sort} explains its function.

@see Token#has_content?

@return [true, false]

# File lib/proiel/token.rb, line 300
def is_empty?
  !empty_token_sort.nil?
end
is_root?() click to toggle source

Checks if the token is the root of its dependency graph.

If the token belongs to a sentence that lacks dependency annotation, all tokens are treated as roots. If a sentence has partial or complete dependency annotation there may still be multiple root tokens.

@return [true, false]

# File lib/proiel/token.rb, line 218
def is_root?
  head_id.nil?
end
language() click to toggle source

@return [String] language of the token as an ISO 639-3 language tag

# File lib/proiel/token.rb, line 145
def language
  source.language
end
morphology_hash() click to toggle source

@return [Hash<Symbol,String>] token morphology tag as a hash

# File lib/proiel/token.rb, line 201
def morphology_hash
  if morphology
    MORPHOLOGY_POSITIONAL_TAG_SEQUENCE.zip(morphology.split('')).reject { |_, v| v == '-' }.to_h
  else
    {}
  end
end
parent()
Alias for: head
part_of_speech_hash() click to toggle source

@return [Hash<Symbol,String>] token part of speech tag as a hash

# File lib/proiel/token.rb, line 178
def part_of_speech_hash
  if part_of_speech
    POS_POSITIONAL_TAG_SEQUENCE.zip(part_of_speech.split('')).reject { |_, v| v == '-' }.to_h
  else
    {}
  end
end
Also aliased as: pos_hash
part_of_speech_with_nulls() click to toggle source

Returns the part of speech tag if set, but also provides a suitable part of speech tag for empty elements.

@return [String] part of speech tag

# File lib/proiel/token.rb, line 194
def part_of_speech_with_nulls
  part_of_speech || NULL_PARTS_OF_SPEECH[empty_token_sort]
end
Also aliased as: pos_with_nulls
pos_hash()
Alias for: part_of_speech_hash
pos_with_nulls()
printable_form(custom_token_formatter: nil) click to toggle source

Returns the printable form of the token with any presentation data.

@param custom_token_formatter [Lambda] formatting function for tokens which is passed the token as its sole argument

@return [String] the printable form of the token

# File lib/proiel/token.rb, line 166
def printable_form(custom_token_formatter: nil)
  printable_form =
    if custom_token_formatter
      custom_token_formatter.call(self)
    else
      form
    end

  [presentation_before, printable_form, presentation_after].compact.join
end
pro?() click to toggle source

Checks if the token is a PRO token.

@return [true, false]

# File lib/proiel/token.rb, line 327
def pro?
  empty_token_sort == 'P'
end
source() click to toggle source

@return [Source] parent source object

# File lib/proiel/token.rb, line 135
def source
  @sentence.div.source
end
treebank() click to toggle source

@return [Treebank] parent treebank object

# File lib/proiel/token.rb, line 140
def treebank
  @sentence.div.source.treebank
end