grammar ScientificNameDirty
include ScientificNameClean rule root space a:scientific_name_5 space { def value a.value.gsub(/\s{2,}/, " ").strip end def canonical a.canonical.gsub(/\s{2,}/, " ").strip end def pos a.pos end def hybrid a.hybrid end def details a.details.class == Array ? a.details : [a.details] end def parser_run 2 end } end rule scientific_name_5 a:scientific_name_4 garbage { def value a.value end def canonical a.canonical end def pos a.pos end def details a.details end } / super end rule infraspecies a:infraspecies_string space b:year { def value a.value + " " + b.value end def canonical a.canonical end def pos a.pos.merge(b.pos) end def details {:infraspecies => a.details[:infraspecies].merge(b.details)} end } / a:infraspecies_string space string_authorship_inconsistencies space b:authorship { def value a.value + " " + b.value end def canonical a.canonical end def pos a.pos.merge(b.pos) end def details {:infraspecies => a.details[:infraspecies].merge(b.details)} end } / super end rule species a:species_string space b:year { def value a.value + " " + b.value end def canonical a.canonical end def pos a.pos.merge(b.pos) end def details {:species => a.details[:species].merge(b.details)} end } / super end rule latin_word a:[a-z\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž] b:valid_name_letters { def value res = "" text_value.split("").each do |l| l = "ae" if l == "æ" l = "oe" if l == "œ" res << l end res.tr("àâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž", "aaaaaacceeeiiinnnoooooouuurrrsssz") end } end rule valid_name_letters [a-z\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž]+ { def value res = "" text_value.split("").each do |l| l = "ae" if l == "æ" l = "oe" if l == "œ" res << l end res.tr("àâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž", "aaaaaacceeeiiinnnoooooouuurrrsssz") end } end rule right_paren ")" space ")" / super end rule left_paren "(" space "(" / super end rule year a:year_number space b:approximate_year { def value a.value + " " + b.value end def pos a.pos.merge(b.pos) end def details {:year => a.value, :approximate_year => b.value} end } / a:year_number space page_number { def value a.text_value end def pos {a.interval.begin => ["year", a.interval.end]} end def details {:year => value} end } / year_number_with_punctuation / approximate_year / double_year / super end rule approximate_year "[" space a:year_number space "]"+ { def value "(" + a.text_value + ")" end def pos {a.interval.begin => ["year", a.interval.end]} end def details {:approximate_year => value} end } end rule double_year year_number "-" [0-9]+ [A-Za-z]? [\?]? { def value text_value end def pos {interval.begin => ["year", interval.end]} end def details {:year => value} end } end rule year_number_with_punctuation a:year_number "." { def value a.text_value end def pos {interval.begin => ["year", interval.end]} end def details {:year => value} end } end rule page_number ":" space [\d]+ { def value end } end rule string_authorship_inconsistencies ("corrig.") end rule garbage space (["',]) space [^щ]* / space_hard [^ш]+ end
end