class Taxonomy

Public Class Methods

new() click to toggle source
# File lib/ncbi_taxonomy.rb, line 16
def initialize
        @home_dir = Dir.home
        @work_dir = @home_dir + "/.ncbi_taxonomy"
        @taxdb_release = @work_dir + "/taxonomy.db"
        @in_memory = false
        begin
                @db = SQLite3::Database.new @taxdb_release
        rescue SQLite3::CantOpenException => e
                STDERR.puts "Please download the NCBI Taxonomy database using 'ncbi_taxonomy update' command."
                STDERR.puts "[MSG]" + e.message
                exit 1
        end
end

Public Instance Methods

check_sqlite_version() click to toggle source
# File lib/ncbi_taxonomy.rb, line 43
def check_sqlite_version
        rs = @db.execute "SELECT SQLITE_VERSION()"
        Gem::Version.new(rs[0][0]) >= Gem::Version.new('3.8.3')
end
get_all_names_by_id(id) click to toggle source
# File lib/ncbi_taxonomy.rb, line 68
def get_all_names_by_id id
        id = id.to_i
        out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id}"
        out
end
get_allrank_by_id(id) click to toggle source
# File lib/ncbi_taxonomy.rb, line 86
def get_allrank_by_id id
        id = id.to_i
        out = Array.new
        if check_sqlite_version
                rs = @db.execute "WITH RECURSIVE allrank (id, pid, rank, name) AS ( VALUES (0, #{id}, 'no_rank', 'Homo sapiens javamintus') UNION ALL SELECT nodes.tax_id, nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names, allrank WHERE nodes.tax_id=allrank.pid AND names.tax_id = nodes.tax_id AND names.name_class='scientific name' AND nodes.tax_id<>1) SELECT * FROM allrank;"
                rs[1..-2].each {|x| out << [ x[2], x[3] ] }
        else
                while true
                        rs = self.get_rank_ptaxonid_scientificname_by_id id
                        out << [ rs[1], rs[2] ]
                        break if rs[1] == 'superkingdom'
                        id = rs[0]
                end
        end

        out = self.get_allrank_by_id(self.get_missing_id id).reverse  if out.size == 0 && id > -1
        out.reverse
end
get_allrank_by_name(name) click to toggle source
# File lib/ncbi_taxonomy.rb, line 141
def get_allrank_by_name name
        out = Array.new
        self.get_taxonids_by_name(name).each {|x| out << self.get_allrank_by_id(x) }
        out
end
get_fixedrank_by_id(id) click to toggle source
# File lib/ncbi_taxonomy.rb, line 105
def get_fixedrank_by_id id
        id = id.to_i
        ranks = self.get_allrank_by_id id
        arr = Array.new
        pos = 0       
        alt_name = ''
        ranks.each do |rank, name|
                rank_fixed_no = tax_rank_fixed[rank]
                rank_all_no = tax_rank_all[rank]
                if rank_fixed_no != nil
                        this_rank_fixed = tax_rank_fixed.to_a.index [rank, rank_fixed_no]
                        if arr.size < this_rank_fixed
                                (arr.size...this_rank_fixed).each do |x|
                                        arr << [ tax_rank_fixed.to_a[x][0], "@#{alt_name}_#{tax_rank_fixed.to_a[x][0]}" ]
                                        pos += 1
                                end
                        end
                        arr << [ rank, name ]
                        pos += 1
                        alt_name = name
                elsif arr.size == 7 && rank_all_no == 28
                        arr << [ 'strain', name ]
                elsif rank_all_no != 28
                        alt_name = name if tax_rank_fixed.to_a[pos-1][1] > tax_rank_all[rank]
                end
        end
        if arr.size < 7 && arr.size >= 0
                (arr.size..7).each do |x|
                        arr << [ tax_rank_fixed.to_a[x][0], nil ]
                end
        elsif arr.size == 7
                arr << [ 'strain', arr[-1][1] ]
        end
        arr
end
get_fixedrank_by_name(name) click to toggle source
# File lib/ncbi_taxonomy.rb, line 147
def get_fixedrank_by_name name
        out = Array.new
        self.get_taxonids_by_name(name).each {|x| out << self.get_fixedrank_by_id(x) }
        out
end
get_missing_id(id) click to toggle source
# File lib/ncbi_taxonomy.rb, line 153
def get_missing_id id
        rs = @db.execute "SELECT tax_id FROM delnodes WHERE tax_id='#{id}'"
        if rs.size > 0
                return -1
        else
                rs = @db.execute "SELECT new_tax_id FROM merged WHERE old_tax_id=#{id}"
                if rs.size == 1
                        return rs[0][0].to_i
                else
                        return -1
                end
        end
end
get_names_by_taxonid(id) click to toggle source
# File lib/ncbi_taxonomy.rb, line 54
def get_names_by_taxonid id
        @db.execute "SELECT name_class, name_txt FROM names WHERE tax_id=#{id}"
end
get_rank_ptaxonid_by_id(id) click to toggle source
# File lib/ncbi_taxonomy.rb, line 74
def get_rank_ptaxonid_by_id id
        id = id.to_i
        out = @db.execute "SELECT parent_tax_id, rank FROM nodes WHERE tax_id=#{id}"
        out[0]
end
get_rank_ptaxonid_scientificname_by_id(id) click to toggle source
# File lib/ncbi_taxonomy.rb, line 80
def get_rank_ptaxonid_scientificname_by_id id
        id = id.to_i
        out = @db.execute "SELECT nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names WHERE nodes.tax_id=#{id} AND names.tax_id=#{id} AND names.name_class='scientific name'"
        out[0]
end
get_scientific_name_by_id(id) click to toggle source
# File lib/ncbi_taxonomy.rb, line 62
def get_scientific_name_by_id id
        id = id.to_i
        out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id} AND name_class='scientific name'"
        out[0][0]
end
get_scientific_name_by_names(names) click to toggle source
# File lib/ncbi_taxonomy.rb, line 58
def get_scientific_name_by_names names
        names.each {|x| return x[1] if x[0] == 'scientific name' }
end
get_taxonids_by_name(name) click to toggle source
# File lib/ncbi_taxonomy.rb, line 48
def get_taxonids_by_name name
        name = SQLite3::Database.quote name
        out = @db.execute "SELECT DISTINCT tax_id FROM names WHERE name_txt='#{name}'"
        out.flatten
end
memory() click to toggle source
# File lib/ncbi_taxonomy.rb, line 30
def memory
        tmp = SQLite3::Database.new ':memory:'
        backup = SQLite3::Backup.new tmp, 'main', @db, 'main'
        backup.step -1
        backup.finish
        @db = tmp
        @in_memory = true
end
memory?() click to toggle source
# File lib/ncbi_taxonomy.rb, line 39
def memory?
        @in_memory
end
mget_allrank_by_id(id_arr) click to toggle source
# File lib/ncbi_taxonomy.rb, line 183
def mget_allrank_by_id id_arr
        self.mrun 'get_allrank_by_id', id_arr
end
mget_allrank_by_name(name_arr) click to toggle source
# File lib/ncbi_taxonomy.rb, line 191
def mget_allrank_by_name name_arr
        self.mrun 'get_allrank_by_name', name_arr
end
mget_fixedrank_by_id(id_arr) click to toggle source
# File lib/ncbi_taxonomy.rb, line 187
def mget_fixedrank_by_id id_arr
        self.mrun 'get_fixedrank_by_id', id_arr
end
mget_fixedrank_by_name(name_arr) click to toggle source
# File lib/ncbi_taxonomy.rb, line 195
def mget_fixedrank_by_name name_arr
        self.mrun 'get_fixedrank_by_name', name_arr
end
mrun(cmd, arr) click to toggle source
# File lib/ncbi_taxonomy.rb, line 167
def mrun cmd, arr
        t = Array.new
        out = Array.new(arr.size)
        
        arr.each do |el|
                idx = t.size
                t << Thread.new(el, idx) do |myel, myidx|
                        out[myidx] = self.method(cmd).call(myel)
                end
        end
        
        t.each { |myt| myt.join }
        
        out
end
tax_rank_all() click to toggle source
# File lib/ncbi_taxonomy.rb, line 12
def tax_rank_all
        { "superkingdom" => 0, "kingdom" => 1, "subkingdom" => 2, "superphylum" => 3, "phylum" => 4, "subphylum" => 5, "superclass" => 6, "class" => 7, "infraclass" => 8, "subclass" => 9, "superorder" => 10, "order" => 11, "suborder" => 12, "infraorder" => 13, "parvorder" => 14, "superfamily" => 15, "family" => 16, "subfamily" => 17, "tribe" => 18, "subtribe" => 19, "genus" => 20, "subgenus" => 21, "species group" => 22, "species subgroup" => 23, "species" => 24, "subspecies" => 25, "varietas" => 26, "forma" => 27, "no rank" => 28 }
end
tax_rank_fixed() click to toggle source
# File lib/ncbi_taxonomy.rb, line 8
def tax_rank_fixed
        { "superkingdom" => 0, "phylum" => 4, "class" => 7, "order" => 11, "family" => 16, "genus" => 21, "species" => 24, "strain" => 28 }
end
using_unique_name() click to toggle source
# File lib/ncbi_taxonomy.rb, line 4
def using_unique_name
        { "Ponticoccus" => 1, "Bacillus" => 1 }
end