class Update
Bug : 2014. 04. 05
Error: citations.dmp.trim line 32659: expected 7 columns of data but found 8 Actually this software does not use this table. Therefore, this error can be ignored. Later, I should fix this.
Public Class Methods
new()
click to toggle source
# File lib/ncbi_taxonomy_update.rb, line 9 def initialize @home_dir = Dir.home @work_dir = @home_dir + "/.ncbi_taxonomy" md5_file = @work_dir + "/taxdump.tar.gz.md5" md5_old_file = @work_dir + "/taxdump.tar.gz.md5.old" @taxdb = @work_dir + "/taxonomy.db.prep" @taxdb_release = @work_dir + "/taxonomy.db" @status = nil # check workinng directory, if not exist, make it. if File.exist?(@work_dir) if !File.directory?(@work_dir) @status = "This software uses $HOME/.ncbi_taxonomy directory. However, in your home directory there is same name of file. We recommend you change that file name to another name." return else Dir.chdir @work_dir if File.exist?(md5_file) `rm -f #{md5_old_file}` File.rename(md5_file, md5_old_file) self.download_md5 `diff #{md5_file} #{md5_old_file}` if $?.exitstatus == 0 @status = false return else @status = true return end else self.download_md5 end end else Dir.mkdir @work_dir Dir.chdir @work_dir self.download_md5 end @status = true return end
Public Instance Methods
do()
click to toggle source
# File lib/ncbi_taxonomy_update.rb, line 87 def do self.download_dump self.substitution self.load_db self.release end
download_dump()
click to toggle source
# File lib/ncbi_taxonomy_update.rb, line 59 def download_dump `curl -s https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz 2>/dev/null | tar zxf - ` end
download_md5()
click to toggle source
# File lib/ncbi_taxonomy_update.rb, line 55 def download_md5 `curl -s https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz.md5 > taxdump.tar.gz.md5 2>/dev/null` end
load_db()
click to toggle source
# File lib/ncbi_taxonomy_update.rb, line 94 def load_db sql = <<EOF PRAGMA page_size=4096; PRAGMA main.locking_mode=EXCLUSIVE; .separator '\t' CREATE TABLE citations ( cit_id BIGINT, cit_key VARCHAR(255), pubmed_id BIGINT, medline_id BIGINT, ur LONGTEXT, text LONGTEXT, taxid_list LONGTEXT ); CREATE TABLE delnodes ( tax_id BIGINT ); CREATE TABLE division ( division_id BIGINT, division_cde VARCHAR(255), division_name VARCHAR(255), comments VARCHAR(255) ); CREATE TABLE gencode ( genetic_code_id INT, abbreviation VARCHAR(255), name VARCHAR(255), cde LONGTEXT, starts LONGTEXT ); CREATE TABLE merged ( old_tax_id BIGINT, new_tax_id BIGINT ); CREATE TABLE names ( tax_id BIGINT, name_txt VARCHAR(255), unique_name VARCHAR(255), name_class VARCHAR(255) ); CREATE TABLE nodes ( tax_id BIGINT, parent_tax_id BIGINT, rank VARCHAR(64), embl_code VARCHAR(64), division_id INTEGER, inherited_div_flag BOOLEAN, genetic_code_id INTEGER, inherited_GC_flag BOOLEAN, mitochondrial_genetic_code_id INTEGER, inherited_MGC_flag BOOLEAN, GenBank_hidden_flag BOOLEAN, hidden_subtree_root_flag BOOLEAN, comments VARCHAR(255) ); CREATE INDEX citations_idx ON citations(cit_id,cit_key,pubmed_id,medline_id,ur,text,taxid_list); CREATE INDEX delnodes_idx ON delnodes(tax_id); CREATE INDEX division_idx ON division(division_id,division_cde,division_name,comments); CREATE INDEX gencode_idx ON gencode(genetic_code_id,abbreviation,name,cde,starts); CREATE INDEX merged_idx ON merged(old_tax_id,new_tax_id); CREATE INDEX names_idx ON names(tax_id,name_txt,unique_name,name_class); CREATE INDEX nodes_idx ON nodes(tax_id,parent_tax_id,rank,embl_code,division_id,inherited_div_flag,genetic_code_id,inherited_GC_flag,mitochondrial_genetic_code_id,inherited_MGC_flag,GenBank_hidden_flag,hidden_subtree_root_flag,comments); .import citations.dmp.trim citations .import delnodes.dmp.trim delnodes .import division.dmp.trim division .import gencode.dmp.trim gencode .import merged.dmp.trim merged .import names.dmp.trim names .import nodes.dmp.trim nodes EOF `echo "#{sql}" | sqlite3 #{@taxdb} < /dev/stdin` end
release()
click to toggle source
# File lib/ncbi_taxonomy_update.rb, line 75 def release begin FileUtils.rm "#{@taxdb_release}.old" rescue Errno::ENOENT => e end begin File.rename(@taxdb_release, @taxdb_release+".old") rescue Errno::ENOENT => e end File.rename(@taxdb, @taxdb_release) end
status()
click to toggle source
# File lib/ncbi_taxonomy_update.rb, line 51 def status return @status end
substitution()
click to toggle source
substitute some characters
# File lib/ncbi_taxonomy_update.rb, line 64 def substitution Dir.entries(@work_dir).each do |file| if file =~ /dmp$/ #STDERR.puts "treating #{file}" File.open(@work_dir+"/"+file+".trim","w") do |out| out << File.open(@work_dir+"/"+file).read.force_encoding('iso-8859-1').encode('utf-8').gsub(/([^|]) ([^|])/,'\1 \2').gsub(/\t\|\t/,"\t").gsub(/\t\|$/,"").gsub(/\"/,"%22") end end end end