class ScientificNameParser
we can use these expressions when we are ready to parse virus names class VirusParser
def initialize @order = /^\s*[A-Z][a-z]\+virales/i @family = /^\s*[A-Z][a-z]\+viridae|viroidae/i @subfamily = /^\s*[A-Z][a-z]\+virinae|viroinae/i @genus = /^\s*[A-Z][a-z]\+virus|viroid/i @species = /^\s*[A-z0-9u0391-u03C9\[\] ]\+virus|phage| viroid|satellite|prion[A-z0-9u0391-u03C9\[\] ]\+/ix @parsed = nil end
end
Constants
- FAILED_RESULT
Public Class Methods
fix_case(name_string)
click to toggle source
# File lib/biodiversity/parser.rb, line 119 def self.fix_case(name_string) name_ary = name_string.split(/\s+/) words_num = name_ary.size res = nil if words_num == 1 res = name_ary[0].gsub(/[\(\)\{\}]/, '') if res.size > 1 res = UnicodeUtils.upcase(res[0]) + UnicodeUtils.downcase(res[1..-1]) else res = nil end else if name_ary[0].size > 1 word1 = UnicodeUtils.upcase(name_ary[0][0]) + UnicodeUtils.downcase(name_ary[0][1..-1]) else word1 = name_ary[0] end if name_ary[1].match(/^\(/) word2 = name_ary[1].gsub(/\)$/, '') + ')' word2 = word2[0] + UnicodeUtils.upcase(word2[1]) + UnicodeUtils.downcase(word2[2..-1]) else word2 = UnicodeUtils.downcase(name_ary[1]) end res = word1 + ' ' + word2 + ' ' + name_ary[2..-1].map { |w| UnicodeUtils.downcase(w) }.join(' ') res.strip! end res end
new(opts = {})
click to toggle source
# File lib/biodiversity/parser.rb, line 153 def initialize(opts = {}) @canonical_with_rank = !!opts[:canonical_with_rank] @verbatim = '' @clean = ScientificNameCleanParser.new @dirty = ScientificNameDirtyParser.new @canonical = ScientificNameCanonicalParser.new @parsed = nil end
version()
click to toggle source
# File lib/biodiversity/parser.rb, line 115 def self.version Biodiversity::VERSION end
Private Class Methods
add_rank_to_canonical(parsed)
click to toggle source
# File lib/biodiversity/parser.rb, line 265 def self.add_rank_to_canonical(parsed) parts = parsed[:canonical].split(' ') name_ary = parts[0..1] parsed[:details][0][:infraspecies].each do |data| infrasp = data[:string] rank = data[:rank] name_ary << (rank && rank != 'n/a' ? "#{rank} #{infrasp}" : infrasp) end parsed[:canonical] = name_ary.join(' ') end
surrogate?(parsed_data)
click to toggle source
# File lib/biodiversity/parser.rb, line 247 def self.surrogate?(parsed_data) return false unless parsed_data[:parsed] name = parsed_data[:verbatim] pos = parsed_data[:positions].to_a.flatten surrogate1 = /BOLD:|[\d]{5,}/i surrogate2 = /\b(spp|sp|nr|cf)[\.]?[\s]*$/i is_surrogate = false ai_index = pos.index('annotation_identification') if ai_index ai = name[pos[ai_index - 1]..pos[ai_index + 1]] is_surrogate = true if ai.match(/^(spp|cf|sp|nr)/) end is_surrogate = true if !is_surrogate && (name.match(surrogate1) || name.match(surrogate2)) is_surrogate end
Public Instance Methods
parse(a_string)
click to toggle source
# File lib/biodiversity/parser.rb, line 178 def parse(a_string) @verbatim = a_string.strip a_string = PreProcessor::clean(a_string) if virus?(a_string) @parsed = { verbatim: a_string, virus: true } elsif unknown_placement?(a_string) @parsed = { verbatim: a_string } else begin @parsed = @clean.parse(a_string) || @dirty.parse(a_string) unless @parsed index = @dirty.index || @clean.index salvage_match = a_string[0..index].split(/\s+/)[0..-2] salvage_string = salvage_match ? salvage_match.join(' ') : a_string @parsed = @dirty.parse(salvage_string) || @canonical.parse(a_string) || { verbatim: a_string } end rescue @parsed = FAILED_RESULT.(@verbatim) end end def @parsed.verbatim=(a_string) @verbatim = a_string end def @parsed.all(opts = {}) canonical_with_rank = !!opts[:canonical_with_rank] parsed = self.class != Hash res = { parsed: parsed, parser_version: ScientificNameParser::version} if parsed hybrid = self.hybrid rescue false res.merge!({ verbatim: @verbatim, normalized: self.value, canonical: self.canonical, hybrid: hybrid, details: self.details, parser_run: self.parser_run, positions: self.pos }) else res.merge!(self) end if (canonical_with_rank && canonical.count(' ') > 1 && res[:details][0][:infraspecies]) ScientificNameParser.add_rank_to_canonical(res) end res[:surrogate] = true if ScientificNameParser.surrogate?(res) res = {:scientificName => res} end def @parsed.pos_json self.pos.to_json rescue '' end def @parsed.all_json self.all.to_json rescue '' end @parsed.verbatim = @verbatim @parsed.all(canonical_with_rank: @canonical_with_rank) end
parsed()
click to toggle source
# File lib/biodiversity/parser.rb, line 174 def parsed @parsed end
unknown_placement?(a_string)
click to toggle source
# File lib/biodiversity/parser.rb, line 170 def unknown_placement?(a_string) !!(a_string.match(/incertae\s+sedis/i) || a_string.match(/inc\.\s*sed\./i)) end
virus?(a_string)
click to toggle source
# File lib/biodiversity/parser.rb, line 162 def virus?(a_string) !!(a_string.match(/\sICTV\s*$/) || a_string.match(/\b(virus|viruses| phage|phages|viroid|viroids| satellite|satellites|prion|prions)\b/ix) || a_string.match(/[A-Z]?[a-z]+virus\b/)) end