require “unicode_utils”

grammar ScientificNameClean

rule root
  space a:scientific_name_5 space {
    def value
      a.value.gsub(/\s{2,}/, " ").strip
    end

    def canonical
      a.canonical.gsub(/\s{2,}/, " ").strip
    end

    def pos
      a.pos
    end

    def hybrid
      a.hybrid
    end

    def details
      a.details.class == Array ? a.details : [a.details]
    end

    def parser_run
      1
    end
  }
end

rule scientific_name_5
  a:multinomial_name space_hard hybrid_character space_hard b:species {
    def value
      a.value + " × " + b.value
    end

    def canonical
      a.canonical + " × " + b.canonical
    end

    def pos
      a.pos.merge(b.pos)
    end

    def hybrid
      true
    end

    def details
      [a.details, b.details.merge({:genus => a.details[:genus]})]
    end
  }
  /
  a:scientific_name_1 space b:taxon_concept_rank space c:authorship {
    def value
      a.value + " " + b.apply(c)
    end

    def canonical
      a.canonical
    end

    def pos
      a.pos.merge(c.pos)
    end

    def hybrid
      a.hybrid
    end

    def details
      a.details.merge(b.details(c))
    end
  }
  /
  scientific_name_4
end

rule scientific_name_4
  a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
    def value
      a.value + " × " + b.value
    end

    def canonical
      a.canonical + " × " + b.canonical
    end

    def pos
      a.pos.merge(b.pos)
    end

    def hybrid
      true
    end

    def details
      [a.details, b.details]
    end
  }
  /
  a:scientific_name_1 space hybrid_character space [\?]? {
    def value
      a.value + " × ?"
    end

    def canonical
      a.canonical
    end

    def pos
      a.pos
    end

    def hybrid
      true
    end

    def details
      [a.details, "?"]
    end
  }
  /
  scientific_name_3
end

rule scientific_name_3
  a:hybrid_character space b:scientific_name_2 {
    def  value
      a.value + " " + b.value
    end

    def canonical
      b.canonical
    end

    def pos
      b.pos
    end

    def hybrid
      true
    end

    def details
      b.details
    end
  }
  /
  scientific_name_2
end

rule scientific_name_2
  a:scientific_name_1 space b:status_part {
    def value
      a.value + " " + b.value
    end

    def canonical
      a.canonical
    end

    def pos
      a.pos
    end

    def hybrid
      a.hybrid rescue false
    end

    def details
      a.details.merge(b.details)
    end
  }
  /
  scientific_name_1
end

rule scientific_name_1
  multiuninomial_name
  /
  multinomial_name
  /
  uninomial_name
end

rule status_part
  a:status_word space b:status_part {
    def value
      a.value + " " + b.value
    end
    def details
      {:status => value}
    end
  }
  /
  status_word
end

rule status_word
  latin_word [\.] {
    def value
      text_value.strip
    end
    def details
      {:status => value}
    end
  }
  #/
  #latin_word
end

rule unparsed
  .+ space {

    def value
      ""
    end

    def hybrid
      false
    end

    def canonical
      ""
    end

    def pos
      {interval.begin => ["unparsed", interval.end]}
    end

    def details
      {:unparsed => text_value}
    end
  }
end

rule multinomial_name
  a:genus space b:infragenus space aid:annotation_identification? space c:species space_hard d:infraspecies_mult {
    def value
      a.value + " " + b.value + " " + c.value + " " + d.value
    end

    def canonical
      a.canonical + " " + c.canonical + " " + d.canonical
    end

    def pos
      a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
    end

    def hybrid
      c.hybrid rescue false
    end

    def details
      a.details.merge(b.details).merge(c.details).merge(d.details)
    end
  }
  /
  a:genus space b:infragenus space aid:annotation_identification? space c:species space aid:annotation_identification space d:infraspecies_mult {
    def value
      a.value + " " + b.value + " " + c.value + " " + d.value
    end

    def canonical
      a.canonical + " " + c.canonical + " " + d.canonical
    end

    def pos
      a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
    end

    def hybrid
      c.hybrid rescue false
    end

    def details
      a.details.merge(b.details).merge(c.details).merge(d.details)
    end
  }
  /
  a:genus space b:infragenus space aid:annotation_identification? space c:species {
    def value
      if defined? aid.apply
        a.value + " " + b.value + aid.apply(c)
      else
        a.value + " " + b.value + " " + c.value
      end
    end

    def canonical
      if defined? aid.apply
        a.canonical + aid.canonical(c)
      else
        a.canonical + " " + c.canonical
      end
    end

    def pos
      if defined? aid.apply
        a.pos.merge(b.pos).merge(aid.pos(c))
      else
        a.pos.merge(b.pos).merge(c.pos)
      end
    end

    def hybrid
      c.hybrid rescue false
    end

    def details
      if defined? aid.apply
        a.details.merge(b.details).merge(aid.apply(c))
      else
        a.details.merge(b.details).merge(c.details)
      end
    end
  }
  /
  a:genus space aid:annotation_identification? space b:species space_hard c:infraspecies_mult {
    def value
      a.value + " " + b.value + " " + c.value
    end

    def canonical
      a.canonical + " " + b.canonical + " " + c.canonical
    end

    def pos
      a.pos.merge(b.pos).merge(c.pos)
    end

    def hybrid
      b.hybrid rescue false
    end

    def details
      a.details.merge(b.details).merge(c.details)
    end
  }
  /
  a:genus space aid:annotation_identification? space b:species {
    def value
      if defined? aid.apply
        a.value + aid.apply(b)
      else
        a.value + " " + b.value
      end
    end

    def canonical
      if defined? aid.apply
        a.canonical + aid.canonical(b)
      else
        a.canonical + " " + b.canonical
      end
    end

    def pos
      if defined? aid.apply
        a.pos.merge(aid.pos(b))
      else
        a.pos.merge(b.pos)
      end
    end

    def hybrid
      b.hybrid rescue false
    end

    def details
      if defined? aid.apply
        a.details.merge(aid.details(b))
      else
        a.details.merge(b.details)
      end
    end
  }
  /
  a:genus space aid:annotation_identification space b:unparsed {
    def value
      a.value + aid.apply(b)
    end

    def canonical
      a.canonical + aid.canonical(b)
    end

    def pos
      a.pos.merge(aid.pos(b))
    end

    def hybrid
      false
    end

    def details
      a.details.merge(aid.details(b))
    end
  }
end

rule multiuninomial_name
  a:uninomial_name space b:rank_uninomial space c:uninomial_name {

    def value
      a.value + " " + b.value + " " + c.value
    end

    def canonical
      a.canonical
    end

    def hybrid
      false
    end

    def pos
      a.pos.merge(b.pos(c))
    end

    def details
      a.details.merge(b.details(c))
    end
  }
end

rule infraspecies_mult
  a:infraspecies space b:infraspecies_mult {
    def value
      a.value + " " + b.value
    end

    def canonical
      a.canonical + " " + b.canonical
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      a_array =  a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
      b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
      a.details.merge({:infraspecies => a_array + b_array})
    end
  }
  /
  infraspecies {
    def details
      if super[:annotation_identification]
        {:infraspecies => [{:annotation_identification => super[:annotation_identification], :ignored => super[:ignored]}]}
      else
        {:infraspecies => [super[:infraspecies]]}
      end
    end
  }
end

rule infraspecies
  a:infraspecies_string space b:authorship {
    def value
      a.value + " " + b.value
    end

    def canonical
      a.canonical
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      {:infraspecies => a.details[:infraspecies].merge(b.details)}
    end
  }
  /
  infraspecies_string
end

rule infraspecies_string
  sel:rank space a:species_word {
    def value
      sel.apply(a)
    end
    def canonical
      sel.canonical(a)
    end

    def pos
      sel.pos(a)
    end

    def details
      sel.details(a)
    end
  }
  /
  aid:annotation_identification space a:species_word ![\.] {
    def value
      aid.apply(a)
    end

    def canonical
      aid.canonical(a)
    end

    def pos
      def a.pos
        {interval.begin => ["infraspecies", a.interval.end]}
      end
      aid.pos(a)
    end

    def details
      def a.details
        {:infraspecies => {:string => value, :rank => "n/a"}}
      end
      aid.details(a)
    end
  }
  /
  a:species_word ![\.] {
    def value
      a.value
    end

    def canonical
      value
    end

    def pos
      {interval.begin => ["infraspecies", interval.end]}
    end

    def details
      {:infraspecies => {:string => value, :rank => "n/a"}}
    end
  }
end

rule taxon_concept_rank
  ("sec."/"sensu.") {
    def value
      "sec."
    end
    def apply(a)
      " " + value + " " + a.value
    end
    def details(a = nil)
      {:taxon_concept => a.details}
    end
  }
end

rule rank
  ("morph."/"f.sp."/"B "/"ssp."/"ssp "/"mut."/"nat "/"nothosubsp."/"convar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var "/"subsp."/"subsp "/"subf."/"race "/"forma."/"forma "/"fma."/"fma "/"form."/"form "/"fo."/"fo "/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
  {
    def value
      text_value.strip
    end

    def apply(a)
      " " + text_value.strip + " " + a.value
    end

    def canonical(a)
      " " + a.value
    end

    def pos(a)
      interval_end =  text_value[-1] == " " ? interval.end - 1 : interval.end
      {interval.begin => ["infraspecific_type", interval_end], a.interval.begin => ["infraspecies", a.interval.end]}
    end

    def details(a = nil)
      {:infraspecies => {:string => (a.value rescue nil), :rank => text_value.strip}}
    end
  }
end

rule rank_uninomial
  ("sect."/"sect "/"subsect."/"subsect "/"trib."/"trib "/"subtrib."/"subtrib "/"ser."/"ser "/"subgen."/"subgen "/"fam."/"fam "/"subfam."/"subfam "/"supertrib."/"supertrib ") {
    def value
      text_value.strip
    end

    def pos(uni)
      {interval.begin => ["rank_uninomial", interval.end], uni.interval.begin => ["uninomial", uni.interval.end]}
    end

    def details(uni)
      {:rank_uninomials => value, :uninomial2 => uni.details[:uninomial]}
    end
  }
end

rule species
  a:species_string space b:authorship {
    def value
      a.value + " " + b.value
    end

    def canonical
      a.canonical
    end

    def hybrid
      a.hybrid rescue false
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      {:species => a.details[:species].merge(b.details)}
    end
  }
  /
  species_string
end

rule species_string
  species_word {
    def canonical
      value
    end

    def pos
      {interval.begin => ["species", interval.end]}
    end

    def hybrid
      false
    end

    def details
      {:species => {:string => value}}
    end
  }
  /
  species_word_hybrid
end

rule infragenus
  left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
    def value
      "(" + a.value + ")"
    end

    def canonical
      a.value
    end

    def pos
      {a.interval.begin => ["infragenus", a.interval.end]}
    end

    def details
      {:infragenus => {:string => a.value}}
    end
  }
end

rule genus
  a:(abbreviated_genus/uninomial_string) !(space_hard author_prefix_word space_hard author_word) {
    def value
      a.value
    end

    def pos
      {a.interval.begin => ["genus", a.interval.end]}
    end

    def canonical
      a.value
    end

    def details
      {:genus => {:string => a.value}}
    end
  }
end

rule abbreviated_genus
  [A-Z] [a-z]? [a-z]? [\\.] space {
    def value
      text_value.strip
    end

    def canonical
      value
    end

    def pos
      {interval.begin => ["abbreviated_genus", interval.end]}
    end

    def details
      {:abbreviated_genus => {:string => value}}
    end
  }
end

rule uninomial_name
  a:uninomial_string space b:infragenus space c:simple_authorship {
    def value
      a.value + " " + b.value + " " + c.value
    end

    def canonical
      a.canonical
    end

    def pos
      a.pos.merge(b.pos).merge(c.pos)
    end

    def hybrid
      false
    end

    def details
      {:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
    end
  }
  /
  a:uninomial_string space b:infragenus {
    def value
      a.value + " " + b.value
    end

    def canonical
      a.canonical
    end

    def pos
      a.pos.merge(b.pos)
    end

    def hybrid
      false
    end

    def details
      {:uninomial => a.details[:uninomial].merge(b.details)}
    end
  }
  /
  a:uninomial_string space_hard b:authorship {
    def value
      a.value + " " + b.value
    end

    def canonical
      a.canonical
    end

    def pos
      a.pos.merge(b.pos)
    end

    def hybrid
      false
    end

    def details
      {:uninomial => a.details[:uninomial].merge(b.details)}
    end
  }
  /
  uninomial_string
end

rule uninomial_string
  (cap_latin_word_pair/cap_latin_word) {
    def canonical
      value
    end

    def pos
      {interval.begin => ["uninomial", interval.end]}
    end

    def hybrid
      false
    end

    def details
      {:uninomial => {:string => value}}
    end
  }
end

rule authorship
  a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
    def value
      a.value + " " + b.value + " " + c.value
    end

    def pos
      a.pos.merge(b.pos).merge(c.pos)
    end

    def details
      val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
      val[:combinationAuthorTeam].merge!(c.details)
      val
    end
  }
  /
  a:basionym_authorship_with_parenthesis space b:simple_authorship {
    def value
      a.value + " " + b.value
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
    end
  }
  /
  basionym_authorship_with_parenthesis
  /
  a:simple_authorship ","? space b:ex_authorship {
    def value
      a.value + " " + b.value
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      val = a.details
      val[:authorship] = text_value.strip
      val[:basionymAuthorTeam].merge!(b.details)
      val
    end
  }
  /
  simple_authorship
end

rule basionym_authorship_with_parenthesis
  left_paren space a:authors_names space right_paren space [,]? space b:year {
    def value
      "(" + a.value + " " + b.value + ")"
    end

    def pos
     a.pos.merge(b.pos)
    end

    def details
      { :authorship => text_value,
        :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
        }
    end
  }
  /
  left_paren space a:simple_authorship ","? space b:ex_authorship space right_paren {
    def value
      "(" + a.value + " " + b.value + ")"
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      val = a.details
      val[:basionymAuthorTeam].merge!(b.details)
      val[:authorship] = text_value.strip
      val
    end
  }
  /
  left_paren space a:simple_authorship space right_paren {
    def value
      "(" + a.value + ")"
    end

    def pos
      a.pos
    end

    def details
      val = a.details
      val[:authorship] = text_value
      val
    end
  }
  /
  left_paren space a:"?" space right_paren {
    def value
      "(?)"
    end

    def pos
      {a.interval.begin => ["unknown_author", a.interval.end]}
    end

    def details
      {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ["?"]}}
    end
  }
end

rule ex_authorship
  ex_sep space b:simple_authorship {
    def value
      " ex " + b.value
    end

    def pos
      b.pos
    end

    def details
      val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
      val
    end
  }
end

rule simple_authorship
  a:authors_names space [,]? space b:year? [,]? space "non" space authors_names space [,]? space year {
    def value
      a.value + " " + b.value
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      details_with_arg(:basionymAuthorTeam)
    end

    def details_with_arg(authorTeamType = "basionymAuthorTeam")
      { :authorship => text_value,
        authorTeamType.to_sym => {
          :authorTeam => a.text_value.strip
        }.merge(a.details).merge(b.details)
      }
    end
  }
  /
  a:authors_names space [,]? space b:year {
    def value
      a.value + " " + b.value
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      details_with_arg(:basionymAuthorTeam)
    end

    def details_with_arg(authorTeamType = "basionymAuthorTeam")
      { :authorship => text_value,
        authorTeamType.to_sym => {
          :authorTeam => a.text_value.strip
        }.merge(a.details).merge(b.details)
      }
    end
  }
  /
  authors_names {
    def details
      details = details_with_arg(:basionymAuthorTeam)
      details[:basionymAuthorTeam].merge!(super)
      details
    end

    def details_with_arg(authorTeamType = "basionymAuthorTeam")
      { :authorship => text_value,
        authorTeamType.to_sym => {
          :authorTeam => text_value,
        }
      }
    end
  }
end

rule authors_names
  a:author_name space sep:author_separator space b:authors_names {
    def value
      sep.apply(a,b)
    end

    def pos
      sep.pos(a,b)
    end

    def details
      sep.details(a,b)
    end
  }
  /
  author_name
  /
  unknown_auth
end

rule unknown_auth
  ("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") !latin_word {
    def value
      text_value
    end

    def pos
     {interval.begin => ["unknown_author", interval.end]}
    end

    def details
      {:author => ["unknown"]}
    end
  }
end

rule ex_sep
  ("ex"/"in") &[\s]
end

rule author_separator
  ("&"/"&"/","/"and"/"et") {
    def apply(a,b)
      sep = text_value.strip
      sep = " &" if ["&", "&","and","et"].include? sep
      a.value + sep + " " + b.value
    end

    def pos(a,b)
      a.pos.merge(b.pos)
    end

    def details(a,b)
      {:author => a.details[:author] + b.details[:author]}
    end
  }
end

rule author_name
  space a:author_name_without_postfix space b:author_postfix_word space !latin_word {
    def value
      a.value + " " + b.value
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      {:author => [value]}
    end
  }
  /
  author_name_without_postfix
end

rule author_name_without_postfix
  space a:author_prefix_word space b:author_name {
    def value
      a.value + " " + b.value
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      {:author => [value]}
    end
  }
  /
  a:author_word space b:author_name {
    def value
      a.value + " " + b.value
    end

    def pos
      a.pos.merge(b.pos)
    end

    def details
      {:author => [value]}
    end
  }
  /
  author_word
end

rule author_word
  "A S. Xu" {
    def value
      text_value.strip
    end

    def pos
      {interval.begin => ["author_word", 1], (interval.begin + 2) => ["author_word", 2], (interval.begin + 5) => ["author_word", 2]}
    end

    def details
      {:author => [value]}
    end
  }
  /
  ("arg."/"et al.\{\?\}"/"et al."/"et al") {
    def value
      text_value.strip
    end

    def pos
      #cheating because there are several words in some of them
      {interval.begin => ["author_word", interval.end]}
    end

    def details
      {:author => [value]}
    end
  }
  /
  ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
    def value
      text_value.gsub(/([\p{Lu}]{3,})/) do |match|
        UnicodeUtils.titlecase(match)
      end
    end

    def pos
      {interval.begin => ["author_word", interval.end]}
    end

    def details
      {:author => [value]}
    end
  }
  /
  "X" [^0-9\[\]\(\)\s&,]+ {
    def value
      text_value
    end

    def pos
      {interval.begin => ["author_word", interval.end]}
    end

    def details
      {:author => [value]}
    end
  }
  /
  author_prefix_word
end

rule author_prefix_word
  space ("ab"/"af"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
    def value
      text_value
    end

    def pos
      #cheating because there are several words in some of them
      {interval.begin => ["author_word", interval.end]}
    end
  }
end

rule author_postfix_word
  ("f."/"filius") {
    def value
      text_value.strip
    end

    def pos
      {interval.begin => ["author_word", interval.end]}
    end
  }
end

rule cap_latin_word_pair
  a:cap_latin_word "-" b:cap_latin_word {
    def value
      a.value + b.value.downcase
    end
  }
end

rule cap_latin_word
  a:([A-Z]/cap_digraph) b:latin_word "?" {
    def value
      (a.value rescue a.text_value) + b.value
    end
  }
  /
  a:([A-Z]/cap_digraph) b:latin_word {
    def value
      (a.value rescue a.text_value) + b.value
    end
  }
  /
  a:("AE"/"OE") b:latin_word {
    def value
      a.text_value[0..0] + "e" + b.value
    end
  }
  /
  ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
    def value
      text_value
    end
  }
end

rule capped_dotted_char
  [A-Z] "." {
    def value
      text_value
    end
  }
end

rule species_word_hybrid
  a:multiplication_sign space b:species_word {
    def value
      a.value + " " + b.value
    end

    def canonical
      b.value
    end

    def hybrid
      true
    end

    def pos
      {b.interval.begin => ["species", b.interval.end]}
    end

    def details
      {:species => {:string => b.value}}
    end
  }
  /
  a:"X" space b:species_word {
    def value
      "× " + b.value
    end

    def canonical
      b.value
    end

    def hybrid
      true
    end

    def pos
      {b.interval.begin => ["species", b.interval.end]}
    end

    def details
      {:species => {:string => b.value}}
    end
  }
  /
  a:"x" space_hard b:species_word {
    def value
      "× " + b.value
    end

    def canonical
      b.value
    end

    def hybrid
      true
    end

    def pos
      {b.interval.begin => ["species", b.interval.end]}
    end

    def details
      {:species => {:string => b.value}}
    end
  }
end

rule annotation_identification
  ("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"species"/"spp."/"spp "/"aff."/"aff "/"monst."/"? ") {

    def value
      text_value.strip
    end

    def apply(sp)
      ""
    end

    def canonical(sp)
      ""
    end

    def pos(sp)
      interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
      {interval.begin => ["annotation_identification", interval.end]}
    end

    def details(sp)
      {:annotation_identification => value, :ignored => sp.details}
    end
  }
  /
  ("cf."/"cf ") {
    def value
      text_value.strip
    end

    def apply(sp)
      " " + value + " " + sp.value
    end

    def canonical(sp)
      " " + sp.canonical
    end

    def pos(sp)
      interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
      {interval.begin => ["annotation_identification", interval.end]}.merge(sp.pos)
    end

    def details(sp)
      {:annotation_identification => value, :species => sp.details}
    end
  }
end

rule species_word
  a:[0-9]+ "-"? b:latin_word {
    def value
      num = {"1" => "uni", "2" => "du", "3" => "tri", "4" => "quadri", "5" => "quinque", "6" => "hexa", "7" => "septem", "8" => "octo", "9" => "novem", "10" => "decem", "11" => "undecim", "12" => "duodec", "13" => "tredec", "14" => "quattuordec", "15" => "quinquadec", "16" => "hexadec", "17" => "septendec", "18" => "octodec", "19" => "novemdec", "20" => "viginti", "21" => "unviginti", "22" => "duodeviginti", "23" => "triviginti", "24" => "quattuorviginti", "25" => "quinquatviginti", "26" => "hexaviginti", "27" => "septenviginti", "28" => "octoviginti", "29" => "novemviginti", "30" => "triginta", "38" => "trigintaocto", "100" => "centi"}
      a_value = num[a.text_value] ? num[a.text_value] : a.text_value + "-"
      a_value + b.value
    end
  }
  /
  latin_word
end

rule latin_word
  a:valid_name_letters "-" b:latin_word {
    def value
      a.value + "-" + b.value
    end
  }
  /
  "o'donelli" {
    def value
      "odonelli"
    end
  }
  /
  "o'neili" {
    def value
      "oneili"
    end
  }
  /
  a:valid_name_letter b:valid_name_letters {
    def value
      a.value + b.value
    end
   }
end

rule valid_name_letters
  [a-zëæœ]+ {
    def value
      res = ""
      text_value.split("").each do |l|
        l = "ae" if l == "æ"
        l = "oe" if l == "œ"
        # We normalize ë as well. It is legal in botanical code, but it
        # is beneficial to normalize it for the reconsiliation purposes
        l = "e" if l == "ë"
        res << l
      end
      res
    end
  }
end

rule valid_name_letter
  [a-zëæœ] {
    def value
      res = text_value
      res = "ae" if res == "æ"
      res = "oe" if res == "œ"
      res = "e"  if res == "ë"
      res
    end
  }
end

rule cap_digraph
  "Æ" {
    def value
    "Ae"
    end
  }
  /
  "Œ" {
    def value
    "Oe"
    end
  }
end

rule year
  b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
    def value
      a.value
    end

    def pos
      a.pos
    end

    def details
      a.details
    end
  }
  /
  year_number_with_character
  /
  year_number
end

rule year_number_with_character
  a:year_number [a-zA-Z] {
    def value
      a.text_value
    end

    def pos
      {interval.begin => ["year", interval.end]}
    end

    def details
      {:year => value}
    end
  }
end

rule year_number
  [12] [7890] [0-9] ([0-9] [\?]?/"?") {
    def value
      text_value
    end

    def pos
      {interval.begin => ["year", interval.end]}
    end

    def details
      {:year => value}
    end
  }
end

rule left_paren
  "("
end

rule right_paren
  ")"
end

rule hybrid_character
  ("x"/"X") {
    def value
      "×"
    end
  }
  /
  multiplication_sign
end

rule multiplication_sign
  ("×"/"*") {
    def value
      "×"
    end
  }
end

rule space
  [\s]*
end

rule space_hard
  [\s]+
end

end