class Taxonomy
Public Class Methods
new()
click to toggle source
# File lib/ncbi_taxonomy.rb, line 16 def initialize @home_dir = Dir.home @work_dir = @home_dir + "/.ncbi_taxonomy" @taxdb_release = @work_dir + "/taxonomy.db" @in_memory = false begin @db = SQLite3::Database.new @taxdb_release rescue SQLite3::CantOpenException => e STDERR.puts "Please download the NCBI Taxonomy database using 'ncbi_taxonomy update' command." STDERR.puts "[MSG]" + e.message exit 1 end end
Public Instance Methods
check_sqlite_version()
click to toggle source
# File lib/ncbi_taxonomy.rb, line 43 def check_sqlite_version rs = @db.execute "SELECT SQLITE_VERSION()" Gem::Version.new(rs[0][0]) >= Gem::Version.new('3.8.3') end
get_all_names_by_id(id)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 68 def get_all_names_by_id id id = id.to_i out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id}" out end
get_allrank_by_id(id)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 86 def get_allrank_by_id id id = id.to_i out = Array.new if check_sqlite_version rs = @db.execute "WITH RECURSIVE allrank (id, pid, rank, name) AS ( VALUES (0, #{id}, 'no_rank', 'Homo sapiens javamintus') UNION ALL SELECT nodes.tax_id, nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names, allrank WHERE nodes.tax_id=allrank.pid AND names.tax_id = nodes.tax_id AND names.name_class='scientific name' AND nodes.tax_id<>1) SELECT * FROM allrank;" rs[1..-2].each {|x| out << [ x[2], x[3] ] } else while true rs = self.get_rank_ptaxonid_scientificname_by_id id out << [ rs[1], rs[2] ] break if rs[1] == 'superkingdom' id = rs[0] end end out = self.get_allrank_by_id(self.get_missing_id id).reverse if out.size == 0 && id > -1 out.reverse end
get_allrank_by_name(name)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 141 def get_allrank_by_name name out = Array.new self.get_taxonids_by_name(name).each {|x| out << self.get_allrank_by_id(x) } out end
get_fixedrank_by_id(id)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 105 def get_fixedrank_by_id id id = id.to_i ranks = self.get_allrank_by_id id arr = Array.new pos = 0 alt_name = '' ranks.each do |rank, name| rank_fixed_no = tax_rank_fixed[rank] rank_all_no = tax_rank_all[rank] if rank_fixed_no != nil this_rank_fixed = tax_rank_fixed.to_a.index [rank, rank_fixed_no] if arr.size < this_rank_fixed (arr.size...this_rank_fixed).each do |x| arr << [ tax_rank_fixed.to_a[x][0], "@#{alt_name}_#{tax_rank_fixed.to_a[x][0]}" ] pos += 1 end end arr << [ rank, name ] pos += 1 alt_name = name elsif arr.size == 7 && rank_all_no == 28 arr << [ 'strain', name ] elsif rank_all_no != 28 alt_name = name if tax_rank_fixed.to_a[pos-1][1] > tax_rank_all[rank] end end if arr.size < 7 && arr.size >= 0 (arr.size..7).each do |x| arr << [ tax_rank_fixed.to_a[x][0], nil ] end elsif arr.size == 7 arr << [ 'strain', arr[-1][1] ] end arr end
get_fixedrank_by_name(name)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 147 def get_fixedrank_by_name name out = Array.new self.get_taxonids_by_name(name).each {|x| out << self.get_fixedrank_by_id(x) } out end
get_missing_id(id)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 153 def get_missing_id id rs = @db.execute "SELECT tax_id FROM delnodes WHERE tax_id='#{id}'" if rs.size > 0 return -1 else rs = @db.execute "SELECT new_tax_id FROM merged WHERE old_tax_id=#{id}" if rs.size == 1 return rs[0][0].to_i else return -1 end end end
get_names_by_taxonid(id)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 54 def get_names_by_taxonid id @db.execute "SELECT name_class, name_txt FROM names WHERE tax_id=#{id}" end
get_rank_ptaxonid_by_id(id)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 74 def get_rank_ptaxonid_by_id id id = id.to_i out = @db.execute "SELECT parent_tax_id, rank FROM nodes WHERE tax_id=#{id}" out[0] end
get_rank_ptaxonid_scientificname_by_id(id)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 80 def get_rank_ptaxonid_scientificname_by_id id id = id.to_i out = @db.execute "SELECT nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names WHERE nodes.tax_id=#{id} AND names.tax_id=#{id} AND names.name_class='scientific name'" out[0] end
get_scientific_name_by_id(id)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 62 def get_scientific_name_by_id id id = id.to_i out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id} AND name_class='scientific name'" out[0][0] end
get_scientific_name_by_names(names)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 58 def get_scientific_name_by_names names names.each {|x| return x[1] if x[0] == 'scientific name' } end
get_taxonids_by_name(name)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 48 def get_taxonids_by_name name name = SQLite3::Database.quote name out = @db.execute "SELECT DISTINCT tax_id FROM names WHERE name_txt='#{name}'" out.flatten end
memory()
click to toggle source
# File lib/ncbi_taxonomy.rb, line 30 def memory tmp = SQLite3::Database.new ':memory:' backup = SQLite3::Backup.new tmp, 'main', @db, 'main' backup.step -1 backup.finish @db = tmp @in_memory = true end
memory?()
click to toggle source
# File lib/ncbi_taxonomy.rb, line 39 def memory? @in_memory end
mget_allrank_by_id(id_arr)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 183 def mget_allrank_by_id id_arr self.mrun 'get_allrank_by_id', id_arr end
mget_allrank_by_name(name_arr)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 191 def mget_allrank_by_name name_arr self.mrun 'get_allrank_by_name', name_arr end
mget_fixedrank_by_id(id_arr)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 187 def mget_fixedrank_by_id id_arr self.mrun 'get_fixedrank_by_id', id_arr end
mget_fixedrank_by_name(name_arr)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 195 def mget_fixedrank_by_name name_arr self.mrun 'get_fixedrank_by_name', name_arr end
mrun(cmd, arr)
click to toggle source
# File lib/ncbi_taxonomy.rb, line 167 def mrun cmd, arr t = Array.new out = Array.new(arr.size) arr.each do |el| idx = t.size t << Thread.new(el, idx) do |myel, myidx| out[myidx] = self.method(cmd).call(myel) end end t.each { |myt| myt.join } out end
tax_rank_all()
click to toggle source
# File lib/ncbi_taxonomy.rb, line 12 def tax_rank_all { "superkingdom" => 0, "kingdom" => 1, "subkingdom" => 2, "superphylum" => 3, "phylum" => 4, "subphylum" => 5, "superclass" => 6, "class" => 7, "infraclass" => 8, "subclass" => 9, "superorder" => 10, "order" => 11, "suborder" => 12, "infraorder" => 13, "parvorder" => 14, "superfamily" => 15, "family" => 16, "subfamily" => 17, "tribe" => 18, "subtribe" => 19, "genus" => 20, "subgenus" => 21, "species group" => 22, "species subgroup" => 23, "species" => 24, "subspecies" => 25, "varietas" => 26, "forma" => 27, "no rank" => 28 } end
tax_rank_fixed()
click to toggle source
# File lib/ncbi_taxonomy.rb, line 8 def tax_rank_fixed { "superkingdom" => 0, "phylum" => 4, "class" => 7, "order" => 11, "family" => 16, "genus" => 21, "species" => 24, "strain" => 28 } end
using_unique_name()
click to toggle source
# File lib/ncbi_taxonomy.rb, line 4 def using_unique_name { "Ponticoccus" => 1, "Bacillus" => 1 } end