class Bio::DTASelect::OutputFile
Public Class Methods
log()
click to toggle source
# File lib/dta_select_output.rb, line 9 def self.log SelectedProtein.new.log end
parse(io)
click to toggle source
# File lib/dta_select_output.rb, line 72 def self.parse(io) result = Result.new # Hashes of identifiers to objects result.protein_name_to_object = {} result.peptide_name_to_object = {} # Read in the tab separated file reading_header = true current_proteins = [] last_line_was_protein_name = false peptide_attribute_names = nil # Parse each line of the DTAselect file io.each_line do |line| splits = line.chomp.split("\t") log.debug "Parsing line `#{line.chomp}'" if log.debug? if reading_header log.debug "reading header" if splits[0] == 'Unique' reading_header = false # Current line describes the peptide attributes peptide_attribute_names = splits # This field has special importance, so be picky raise "Badly parsed file at this line: #{line.inspect}, expected 2nd field to be 'FileName', found #{splits[1]}" unless splits[1] == 'FileName' end next end # OK, now we are reading the actual table, not the header if splits[0] != '' and splits[11].nil? ident = splits[0] if !last_line_was_protein_name # Sometimes several proteins are given all in the one header line # start a new protein log.debug "New protein now being parsed" if log.debug? current_proteins = [] end current_protein = SelectedProtein.new last_line_was_protein_name = true current_proteins.push current_protein current_protein.identifier = ident i = 1 current_protein.sequence_count = splits[i].to_i; i+=1 current_protein.spectrum_count = splits[i].to_i; i+=1 current_protein.sequence_coverage = splits[i].to_f; i+=1 current_protein.length = splits[i].to_i; i+=1 current_protein.molwt = splits[i].to_f; i+=1 current_protein.pi = splits[i].to_f; i+=1 current_protein.validation_status = splits[i].to_f; i+=1 current_protein.descriptive_name = splits[i] if result.protein_name_to_object[ident] raise "Unexpectedly found the same protein identifier twice: #{ident}, from line #{line.chomp}" end result.protein_name_to_object[ident] = current_protein elsif splits[1] == 'Proteins' # Done processing, except for the bits down the bottom which aren't parsed (yet, at least) break else log.debug "New spectra now being parsed" if log.debug? last_line_was_protein_name = false # Record a spectra ident = splits[1] raise "Unexpected hits name `#{ident}', from line `#{line.chomp}'" unless ident.length > 10 pep = result.peptide_name_to_object[ident] if pep.nil? pep = Peptide.new pep.identifier = ident peptide_attribute_names.each_with_index do |attribute_name,i| pep.dtaselect_attributes ||= {} pep.dtaselect_attributes[attribute_name] = splits[i] end result.peptide_name_to_object[ident] = pep end current_proteins.each do |current_protein| pep.parent_proteins.push current_protein current_protein.peptides.push pep end log.debug "Parsed this peptide #{pep.inspect}" if log.debug? end end log.debug "Proteins parsed: #{result.protein_name_to_object.inspect}" if log.debug? return result end