class Mspire::Peaklist

a collection of Peak objects. At a minimum, each peak should respond to :x and :y

Constants

DEFAULT_MERGE

Public Class Methods

[](*peaks) click to toggle source

creates a new Mspire::Peaklist and coerces each peak into an Mspire::Peak. If your peaks already behave like peaks you should use .new

# File lib/mspire/peaklist.rb, line 44
def [](*peaks)
  self.new( peaks.map {|peak| Mspire::Peak.new(peak) } )
end
create_bins(peaklists, opts) click to toggle source
# File lib/mspire/peaklist.rb, line 48
def create_bins(peaklists, opts)
  min, max = min_max_x(peaklists)

  divisions = []
  bin_width = opts[:bin_width]
  use_ppm = (opts[:bin_unit] == :ppm)

  puts "using bin width: #{bin_width}" if $VERBOSE
  puts "using ppm for bins: #{use_ppm}" if $VERBOSE

  current_x = min
  loop do
    if current_x >= max
      divisions << max
      break
    else
      divisions << current_x
      current_x += ( use_ppm ? current_x./(1e6).*(bin_width) : bin_width )
    end
  end
  # make each bin exclusive so there is no overlap
  bins = divisions.each_cons(2).map {|pair| Mspire::Bin.new(*pair, true) }
  # make the last bin *inclusive* of the terminating value
  bins[-1] = Mspire::Bin.new(bins.last.begin, bins.last.end)
  bins
end
merge(peaklists, opts={}) click to toggle source

returns a new peaklist which has been merged with the others. opts) and then segment according to monotonicity (sharing intensity between abutting points). The final m/z is the weighted averaged of all the m/z’s in each peak. Valid opts (with default listed first):

:bin_width => 5 
:bin_unit => :ppm|:amu        interpret bin_width as ppm or amu
:bins => array of Mspire::Bin objects  for custom bins (overides other bin options)
:normalize => true              if true, divides total intensity by 
                                number of spectra
:return_data => false           returns a parallel array containing
                                the peaks associated with each returned peak
:only_data => false           only returns the binned peaks
:split => :zero|:greedy_y|:share  see Mspire::Peak#split
:centroided => true             treat the data as centroided

The binning algorithm is roughly the fastest possible algorithm that would allow for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))

Assumes the peaklists are already sorted by m/z.

Note that the peaks themselves will be altered if using the :share split method.

# File lib/mspire/peaklist.rb, line 174
def merge(peaklists, opts={})
  opts = DEFAULT_MERGE.merge(opts)

  (peaklist, returned_data) =  
    if opts[:centroided]
      merge_centroids(peaklists, opts.dup)
    else
      raise NotImplementedError, "need to implement profile merging"
    end

  if opts[:only_data]
    returned_data
  elsif opts[:return_data]
    [peaklist, returned_data]
  else
    peaklist
  end
end
merge_and_deconvolve(peaklists, opts={}) click to toggle source

takes an array of peaklists, merges them together (like merge), then returns an array of doublets. Each doublet is an m/z and an array of values (parallel to input) of intensities]

If you are already using tagged peak objects, then you should set have_tagged_peaks to false (default is true). The peaks will be deconvolved based on the sample_id in this case.

:have_tagged_peaks => false|true
# File lib/mspire/peaklist.rb, line 203
def merge_and_deconvolve(peaklists, opts={})
  unless htp=opts.delete(:have_tagged_peaks)
    peaklists.each_with_index do |peaklist,i|
      peaklist.map! do |peak|
        TaggedPeak.new(peak, i)
      end
    end
  end

  # make sure we get the data back out
  opts[:return_data] = true
  opts[:only_data] = false

  sample_ids = peaklists.map {|pl| pl.first.sample_id }
  peaks, data = merge(peaklists, opts)
  data.zip(peaks).map do |bucket_of_peaks, meta_peak|
    signal_by_sample_id = Hash.new {|h,k| h[k] = 0.0 }
    bucket_of_peaks.each do |peak|
      signal_by_sample_id[peak.sample_id] += peak.y
    end
    [meta_peak.x, sample_ids.map {|sample_id| signal_by_sample_id[sample_id] } ]
  end
end
merge_centroids(peaklists, opts={}) click to toggle source
# File lib/mspire/peaklist.rb, line 88
def merge_centroids(peaklists, opts={})
  opts[:return_data] = true if opts[:only_data]

  # Create Mspire::Bin objects
  bins = opts[:bins] ? opts[:bins] : create_bins(peaklists, opts)
  puts "created #{bins.size} bins" if $VERBOSE

  peaklists.each do |peaklist|
    Mspire::Bin.bin(bins, peaklist, &:x)
  end

  pseudo_peaks = bins.map do |bin|
    Mspire::Peak.new( [bin, bin.data.reduce(0.0) {|sum,peak| sum + peak.y }] )
  end

  pseudo_peaklist = Mspire::Peaklist.new(pseudo_peaks)

  separate_peaklists = pseudo_peaklist.split(opts[:split])

  normalize_factor = opts[:normalize] ? peaklists.size : 1

  return_data = []
  final_peaklist = Mspire::Peaklist.new unless opts[:only_data]

  separate_peaklists.each do |pseudo_peaklist|
    data_peaklist = Mspire::Peaklist.new
    weight_x = 0.0
    tot_intensity = pseudo_peaklist.inject(0.0) {|sum, bin_peak| sum + bin_peak.y }
    #puts "TOT INTENSITY:"
    #p tot_intensity
    calc_from_lil_bins = pseudo_peaklist.inject(0.0) {|sum, bin_peak| sum + bin_peak.x.data.map(&:y).reduce(:+) }
    #puts "LILBINS: "
    #p calc_from_lil_bins
    pseudo_peaklist.each do |bin_peak|

      # For the :share method, the psuedo_peak intensity may have been
      # adjusted, but the individual peaks were not.  Correct this.
      if opts[:split] == :share
        post_scaled_y = bin_peak.y
        pre_scaled_y = bin_peak.x.data.reduce(0.0) {|sum,peak| sum + peak.last }
        #puts "PRESCALED Y:"
        #p pre_scaled_y
        if (post_scaled_y - pre_scaled_y).abs.round(10) != 0.0
          correction = post_scaled_y / pre_scaled_y
          bin_peak.x.data.each {|peak| peak.y = (peak.y * correction) }
        end
      end

      unless opts[:only_data]
        bin_peak.x.data.each do |peak|
          weight_x += peak.x * ( peak.y.to_f / tot_intensity)
        end
      end
      (data_peaklist.push( *bin_peak.x.data )) if opts[:return_data]
    end
    final_peaklist << Mspire::Peak.new([weight_x, tot_intensity / normalize_factor]) unless opts[:only_data]
    return_data << data_peaklist if opts[:return_data]
  end
  [final_peaklist, return_data]
end
min_max_x(peaklists) click to toggle source
# File lib/mspire/peaklist.rb, line 75
def min_max_x(peaklists)
  # find the min and max across all spectra
  first_peaklist = peaklists.first
  min = first_peaklist.first.x; max = first_peaklist.last.x
  peaklists.each do |peaklist|
    if peaklist.size > 0
      min = peaklist.lo_x if peaklist.lo_x < min
      max = peaklist.hi_x if peaklist.hi_x > max
    end
  end
  [min, max]
end

Public Instance Methods

hi_x() click to toggle source
# File lib/mspire/peaklist.rb, line 13
def hi_x
  self.last[0] if size > 0
end
lo_x() click to toggle source
# File lib/mspire/peaklist.rb, line 9
def lo_x
  self.first[0] if size > 0
end
peak_boundaries(gt=0.0) click to toggle source

returns an array with the indices outlining each peak. The first index is the start of the peak, the last index is the last of the peak. Interior indices represent local minima. So, peaks that have only two indices have no local minima.

# File lib/mspire/peaklist.rb, line 233
def peak_boundaries(gt=0.0)
  in_peak = false
  prev_y = gt
  prev_prev_y = gt
  peak_inds = []
  self.each_with_index do |peak, index|
    curr_y = peak.y
    if curr_y > gt
      if !in_peak
        in_peak = true
        peak_inds << [index]
      else
        # if on_upslope
        if prev_y < curr_y
          # If we were previously on a downslope and we are now on an upslope
          # then the previous index is a local min
          # on_downslope(prev_previous_y, prev_y)
          if prev_prev_y > prev_y
            # We have found a local min
            peak_inds.last << (index - 1)
          end
        end # end if (upslope)
      end # end if !in_peak
    elsif in_peak
      peak_inds.last << (index - 1)
      in_peak = false
    end 
    prev_prev_y = prev_y
    prev_y = curr_y
  end
  # add the last one to the last peak if it is a boundary
  if self[-1].y > gt
    peak_inds.last << (self.size-1)
  end
  peak_inds
end
split(split_multipeaks_mthd=:zero) click to toggle source

returns an Array of peaklist objects. Splits run of 1 or more local minima into multiple peaklists. When a point is ‘shared’ between two adjacent hill-ish areas, the choice of how to resolve multi-hills (runs of data above zero) is one of:

:zero = only split on zeros
:share = give each peak its rightful portion of shared peaks, dividing the
          intensity based on the intensity of adjacent peaks
:greedy_y = give the point to the peak with highest point next to
             the point in question. tie goes lower.

Note that the peak surrounding a local_minima may be altered if using :share

assumes that a new peak can be made with an array containing the x value and the y value.

# File lib/mspire/peaklist.rb, line 339
def split(split_multipeaks_mthd=:zero)
  if split_multipeaks_mthd == :zero
    split_on_zeros
  else
    boundaries = peak_boundaries(0.0)
    no_lm_pklsts = []
    boundaries.each do |indices|
      peak = self[indices.first..indices.last]
      if indices.size == 2
        no_lm_pklsts << peak
      else # have local minima
        multipeak = Peaklist.new(peak)
        local_min_inds = indices[1..-2].map {|i| i-indices.first}
        peaklists = multipeak.split_contiguous(split_multipeaks_mthd, local_min_inds)
        no_lm_pklsts.push *peaklists
      end
    end
    #$stderr.puts "now #{no_lm_pklsts.size} peaks." if $VERBOSE
    no_lm_pklsts 
  end 
end
split_contiguous(methd=:greedy_y, local_min_indices=nil) click to toggle source

returns an array of Peaklist objects assumes that this is one connected list of peaks (i.e., no zeros/whitespace on the edges or internally)

  /\       
 /  \/\
/      \

if there are no local minima, just returns self inside the array

# File lib/mspire/peaklist.rb, line 287
def split_contiguous(methd=:greedy_y, local_min_indices=nil)
  local_min_indices ||= ((pb=peak_boundaries.first) && pb.shift && pb.pop && pb)

  if local_min_indices.size == 0
    self
  else
    peak_class = first.class
    prev_lm_i = 0  # <- don't worry, will be set to bumped to zero
    peaklists = [ self.class.new([self[0]]) ]
    local_min_indices.each do |lm_i|
      peaklists.last.push( *self[(prev_lm_i+1)..(lm_i-1)] )
      case methd
      when :greedy_y
        if self[lm_i-1].y >= self[lm_i+1].y
          peaklists.last << self[lm_i]
          peaklists << self.class.new
        else
          peaklists << self.class.new( [self[lm_i]] )
        end
      when :share
        # for each local min, two new peaks will be created, with
        # intensity shared between adjacent peaklists
        lm = self[lm_i]
        sum = self[lm_i-1].y + self[lm_i+1].y
        # push onto the last peaklist its portion of the local min
        peaklists.last << peak_class.new( [lm.x, lm.y * (self[lm_i-1].y.to_f/sum)] )
        # create a new peaklist that contains its portion of the local min
        peaklists << self.class.new( [peak_class.new([lm.x, lm.y * (self[lm_i+1].y.to_f/sum)])] )
      end
      prev_lm_i = lm_i
    end
    peaklists.last.push(*self[(prev_lm_i+1)...(self.size)] )
    peaklists
  end
end
split_on_zeros(given_peak_boundaries=nil) click to toggle source

returns an array of Peaklist objects

# File lib/mspire/peaklist.rb, line 271
def split_on_zeros(given_peak_boundaries=nil)
  pk_bounds = given_peak_boundaries || peak_boundaries(0.0)
  pk_bounds.map do |indices|
    self.class.new self[indices.first..indices.last]
  end
end
to_spectrum() click to toggle source

returns an Mspire::Spectrum object

# File lib/mspire/peaklist.rb, line 362
def to_spectrum
  Mspire::Spectrum.new(self.transpose)
end
weighted_x() click to toggle source

for spectral peaks, this is the weighted m/z

# File lib/mspire/peaklist.rb, line 27
def weighted_x
  tot_intensity = self.inject(0.0) {|sum,peak| sum + peak.y }
  _weighted_x = 0.0
  self.each do |peak|
    int = peak.y
    signal_by_sample_index[peak.sample_id] += int
    _weighted_x += (peak.first * (int/tot_intensity))
  end
  _weighted_x
end