class Mspire::Peaklist
a collection of Peak
objects. At a minimum, each peak should respond to :x and :y
Constants
- DEFAULT_MERGE
Public Class Methods
creates a new Mspire::Peaklist
and coerces each peak into an Mspire::Peak
. If your peaks already behave like peaks you should use .new
# File lib/mspire/peaklist.rb, line 44 def [](*peaks) self.new( peaks.map {|peak| Mspire::Peak.new(peak) } ) end
# File lib/mspire/peaklist.rb, line 48 def create_bins(peaklists, opts) min, max = min_max_x(peaklists) divisions = [] bin_width = opts[:bin_width] use_ppm = (opts[:bin_unit] == :ppm) puts "using bin width: #{bin_width}" if $VERBOSE puts "using ppm for bins: #{use_ppm}" if $VERBOSE current_x = min loop do if current_x >= max divisions << max break else divisions << current_x current_x += ( use_ppm ? current_x./(1e6).*(bin_width) : bin_width ) end end # make each bin exclusive so there is no overlap bins = divisions.each_cons(2).map {|pair| Mspire::Bin.new(*pair, true) } # make the last bin *inclusive* of the terminating value bins[-1] = Mspire::Bin.new(bins.last.begin, bins.last.end) bins end
returns a new peaklist which has been merged with the others. opts) and then segment according to monotonicity (sharing intensity between abutting points). The final m/z is the weighted averaged of all the m/z’s in each peak. Valid opts (with default listed first):
:bin_width => 5 :bin_unit => :ppm|:amu interpret bin_width as ppm or amu :bins => array of Mspire::Bin objects for custom bins (overides other bin options) :normalize => true if true, divides total intensity by number of spectra :return_data => false returns a parallel array containing the peaks associated with each returned peak :only_data => false only returns the binned peaks :split => :zero|:greedy_y|:share see Mspire::Peak#split :centroided => true treat the data as centroided
The binning algorithm is roughly the fastest possible algorithm that would allow for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
Assumes the peaklists are already sorted by m/z.
Note that the peaks themselves will be altered if using the :share split method.
# File lib/mspire/peaklist.rb, line 174 def merge(peaklists, opts={}) opts = DEFAULT_MERGE.merge(opts) (peaklist, returned_data) = if opts[:centroided] merge_centroids(peaklists, opts.dup) else raise NotImplementedError, "need to implement profile merging" end if opts[:only_data] returned_data elsif opts[:return_data] [peaklist, returned_data] else peaklist end end
takes an array of peaklists, merges them together (like merge), then returns an array of doublets. Each doublet is an m/z and an array of values (parallel to input) of intensities]
If you are already using tagged peak objects, then you should set have_tagged_peaks to false (default is true). The peaks will be deconvolved based on the sample_id in this case.
:have_tagged_peaks => false|true
# File lib/mspire/peaklist.rb, line 203 def merge_and_deconvolve(peaklists, opts={}) unless htp=opts.delete(:have_tagged_peaks) peaklists.each_with_index do |peaklist,i| peaklist.map! do |peak| TaggedPeak.new(peak, i) end end end # make sure we get the data back out opts[:return_data] = true opts[:only_data] = false sample_ids = peaklists.map {|pl| pl.first.sample_id } peaks, data = merge(peaklists, opts) data.zip(peaks).map do |bucket_of_peaks, meta_peak| signal_by_sample_id = Hash.new {|h,k| h[k] = 0.0 } bucket_of_peaks.each do |peak| signal_by_sample_id[peak.sample_id] += peak.y end [meta_peak.x, sample_ids.map {|sample_id| signal_by_sample_id[sample_id] } ] end end
# File lib/mspire/peaklist.rb, line 88 def merge_centroids(peaklists, opts={}) opts[:return_data] = true if opts[:only_data] # Create Mspire::Bin objects bins = opts[:bins] ? opts[:bins] : create_bins(peaklists, opts) puts "created #{bins.size} bins" if $VERBOSE peaklists.each do |peaklist| Mspire::Bin.bin(bins, peaklist, &:x) end pseudo_peaks = bins.map do |bin| Mspire::Peak.new( [bin, bin.data.reduce(0.0) {|sum,peak| sum + peak.y }] ) end pseudo_peaklist = Mspire::Peaklist.new(pseudo_peaks) separate_peaklists = pseudo_peaklist.split(opts[:split]) normalize_factor = opts[:normalize] ? peaklists.size : 1 return_data = [] final_peaklist = Mspire::Peaklist.new unless opts[:only_data] separate_peaklists.each do |pseudo_peaklist| data_peaklist = Mspire::Peaklist.new weight_x = 0.0 tot_intensity = pseudo_peaklist.inject(0.0) {|sum, bin_peak| sum + bin_peak.y } #puts "TOT INTENSITY:" #p tot_intensity calc_from_lil_bins = pseudo_peaklist.inject(0.0) {|sum, bin_peak| sum + bin_peak.x.data.map(&:y).reduce(:+) } #puts "LILBINS: " #p calc_from_lil_bins pseudo_peaklist.each do |bin_peak| # For the :share method, the psuedo_peak intensity may have been # adjusted, but the individual peaks were not. Correct this. if opts[:split] == :share post_scaled_y = bin_peak.y pre_scaled_y = bin_peak.x.data.reduce(0.0) {|sum,peak| sum + peak.last } #puts "PRESCALED Y:" #p pre_scaled_y if (post_scaled_y - pre_scaled_y).abs.round(10) != 0.0 correction = post_scaled_y / pre_scaled_y bin_peak.x.data.each {|peak| peak.y = (peak.y * correction) } end end unless opts[:only_data] bin_peak.x.data.each do |peak| weight_x += peak.x * ( peak.y.to_f / tot_intensity) end end (data_peaklist.push( *bin_peak.x.data )) if opts[:return_data] end final_peaklist << Mspire::Peak.new([weight_x, tot_intensity / normalize_factor]) unless opts[:only_data] return_data << data_peaklist if opts[:return_data] end [final_peaklist, return_data] end
# File lib/mspire/peaklist.rb, line 75 def min_max_x(peaklists) # find the min and max across all spectra first_peaklist = peaklists.first min = first_peaklist.first.x; max = first_peaklist.last.x peaklists.each do |peaklist| if peaklist.size > 0 min = peaklist.lo_x if peaklist.lo_x < min max = peaklist.hi_x if peaklist.hi_x > max end end [min, max] end
Public Instance Methods
# File lib/mspire/peaklist.rb, line 13 def hi_x self.last[0] if size > 0 end
# File lib/mspire/peaklist.rb, line 9 def lo_x self.first[0] if size > 0 end
returns an array with the indices outlining each peak. The first index is the start of the peak, the last index is the last of the peak. Interior indices represent local minima. So, peaks that have only two indices have no local minima.
# File lib/mspire/peaklist.rb, line 233 def peak_boundaries(gt=0.0) in_peak = false prev_y = gt prev_prev_y = gt peak_inds = [] self.each_with_index do |peak, index| curr_y = peak.y if curr_y > gt if !in_peak in_peak = true peak_inds << [index] else # if on_upslope if prev_y < curr_y # If we were previously on a downslope and we are now on an upslope # then the previous index is a local min # on_downslope(prev_previous_y, prev_y) if prev_prev_y > prev_y # We have found a local min peak_inds.last << (index - 1) end end # end if (upslope) end # end if !in_peak elsif in_peak peak_inds.last << (index - 1) in_peak = false end prev_prev_y = prev_y prev_y = curr_y end # add the last one to the last peak if it is a boundary if self[-1].y > gt peak_inds.last << (self.size-1) end peak_inds end
returns an Array
of peaklist objects. Splits run of 1 or more local minima into multiple peaklists. When a point is ‘shared’ between two adjacent hill-ish areas, the choice of how to resolve multi-hills (runs of data above zero) is one of:
:zero = only split on zeros :share = give each peak its rightful portion of shared peaks, dividing the intensity based on the intensity of adjacent peaks :greedy_y = give the point to the peak with highest point next to the point in question. tie goes lower.
Note that the peak surrounding a local_minima may be altered if using :share
assumes that a new peak can be made with an array containing the x value and the y value.
# File lib/mspire/peaklist.rb, line 339 def split(split_multipeaks_mthd=:zero) if split_multipeaks_mthd == :zero split_on_zeros else boundaries = peak_boundaries(0.0) no_lm_pklsts = [] boundaries.each do |indices| peak = self[indices.first..indices.last] if indices.size == 2 no_lm_pklsts << peak else # have local minima multipeak = Peaklist.new(peak) local_min_inds = indices[1..-2].map {|i| i-indices.first} peaklists = multipeak.split_contiguous(split_multipeaks_mthd, local_min_inds) no_lm_pklsts.push *peaklists end end #$stderr.puts "now #{no_lm_pklsts.size} peaks." if $VERBOSE no_lm_pklsts end end
returns an array of Peaklist
objects assumes that this is one connected list of peaks (i.e., no zeros/whitespace on the edges or internally)
/\ / \/\ / \
if there are no local minima, just returns self inside the array
# File lib/mspire/peaklist.rb, line 287 def split_contiguous(methd=:greedy_y, local_min_indices=nil) local_min_indices ||= ((pb=peak_boundaries.first) && pb.shift && pb.pop && pb) if local_min_indices.size == 0 self else peak_class = first.class prev_lm_i = 0 # <- don't worry, will be set to bumped to zero peaklists = [ self.class.new([self[0]]) ] local_min_indices.each do |lm_i| peaklists.last.push( *self[(prev_lm_i+1)..(lm_i-1)] ) case methd when :greedy_y if self[lm_i-1].y >= self[lm_i+1].y peaklists.last << self[lm_i] peaklists << self.class.new else peaklists << self.class.new( [self[lm_i]] ) end when :share # for each local min, two new peaks will be created, with # intensity shared between adjacent peaklists lm = self[lm_i] sum = self[lm_i-1].y + self[lm_i+1].y # push onto the last peaklist its portion of the local min peaklists.last << peak_class.new( [lm.x, lm.y * (self[lm_i-1].y.to_f/sum)] ) # create a new peaklist that contains its portion of the local min peaklists << self.class.new( [peak_class.new([lm.x, lm.y * (self[lm_i+1].y.to_f/sum)])] ) end prev_lm_i = lm_i end peaklists.last.push(*self[(prev_lm_i+1)...(self.size)] ) peaklists end end
returns an array of Peaklist
objects
# File lib/mspire/peaklist.rb, line 271 def split_on_zeros(given_peak_boundaries=nil) pk_bounds = given_peak_boundaries || peak_boundaries(0.0) pk_bounds.map do |indices| self.class.new self[indices.first..indices.last] end end
returns an Mspire::Spectrum
object
# File lib/mspire/peaklist.rb, line 362 def to_spectrum Mspire::Spectrum.new(self.transpose) end
for spectral peaks, this is the weighted m/z
# File lib/mspire/peaklist.rb, line 27 def weighted_x tot_intensity = self.inject(0.0) {|sum,peak| sum + peak.y } _weighted_x = 0.0 self.each do |peak| int = peak.y signal_by_sample_index[peak.sample_id] += int _weighted_x += (peak.first * (int/tot_intensity)) end _weighted_x end