class Object
Public Instance Methods
add_term2dictionary(dict, key, term)
click to toggle source
# File lib/DomFun/generalMethods.rb, line 85 def add_term2dictionary(dict, key, term) query = dict[key] if query.nil? dict[key] = [term] else query << term end end
invert_hash(hash)
click to toggle source
# File lib/DomFun/generalMethods.rb, line 72 def invert_hash(hash) new_hash = {} hash.each do |k, v| query = new_hash[v] if query.nil? new_hash[v] = [k] else query << k end end return new_hash end
load_cafa_data(cafa_file)
click to toggle source
# File lib/DomFun/generalMethods.rb, line 94 def load_cafa_data(cafa_file) cafa_data = {} File.open(cafa_file).each do |line| line.chomp! next if line.include?('GO_Ont') cafa_info = line.split("\t") next unless cafa_info[1] == 'MF' go_term = cafa_info[4] gene_name = cafa_info[6] next if gene_name == 'NA' query = cafa_data[gene_name] if query.nil? cafa_data[gene_name] = [go_term] else query << go_term end end return cafa_data end
load_cath_data(file, category, dictionary_key='gene_name')
click to toggle source
# File lib/DomFun/generalMethods.rb, line 41 def load_cath_data(file, category, dictionary_key='gene_name') if dictionary_key == 'gene_name' field = 3 elsif dictionary_key == 'geneID' # UNIPROT entry_name field = 4 end cath_data = {} protein2gene_dict = {} csv_file = CSV.read(file, { :col_sep => "\t" }) csv_file.delete_at(0) csv_file.each do |protein_domains_data| next if protein_domains_data.empty? protein_id = protein_domains_data[0] protein_alternative_name = protein_domains_data[field] next if protein_domains_data[3].include?('fusion') # Only can checked in cath gene name field protein_alternative_name.gsub!(' ', '_') if protein_alternative_name.include?(' ') superfamilyID = protein_domains_data[5] funfamID = protein_domains_data[6] term2save = nil if category == 'superfamilyID' term2save = superfamilyID elsif category == 'funfamID' term2save = funfamID end add_term2dictionary(cath_data, protein_id, term2save) protein2gene_dict[protein_id] = protein_alternative_name if protein_alternative_name != 'NULL' end cath_proteins_number = cath_data.keys.length return cath_data, protein2gene_dict, cath_proteins_number end
load_proteins_file(file, annotation_types)
click to toggle source
# File lib/DomFun/generalMethods.rb, line 1 def load_proteins_file(file, annotation_types) protein_annotations = {} proteins_without_annotations = [] annotation_types.each do |type| # initialize annotation hashes protein_annotations[type] = {} end fields_to_split = annotation_types.length counter = 0 File.open(file).each do |line| line.chomp! if counter == 0 counter += 1 next end line.gsub!(' ', '') fields = line.split("\t", fields_to_split + 1) protID = fields.shift annotation_types.each_with_index do |type, i| annotations = fields[i].split(/[;,]/) if !annotations.empty? if type.include?('go') go_annotations = [] annotations.each do |go_term| go_name, go_id = go_term.split('GO:') go_annotations << "GO:".concat(go_id.tr(']', '')) unless go_id.nil? end protein_annotations[type][protID] = go_annotations else protein_annotations[type][protID] = annotations end end if fields.count("") == fields_to_split proteins_without_annotations << protID end end counter += 1 end return protein_annotations, counter, proteins_without_annotations.uniq end