class Mspire::Mzml

Reading an mzml file:

Mspire::Mzml.open("somefile.mzML") do |mzml|
  mzml.each do |spectrum|
    scan = spectrum.scan
    spectrum.mzs                  # array of m/zs
    spectrum.intensities          # array of intensities
    spectrum.peaks do |mz,intensity|
      puts "mz: #{mz} intensity: #{intensity}" 
    end

    spectrum.params  # list all the params associated with an object

    # true if key exists and no value, the value if present, or false
    if spectrum.fetch_by_acc('MS:1000128')
      puts "this is a profile spectrum!"
    end

    if spectrum.ms_level == 2
      low_mz = spectrum.scan_list.first.scan_windows.first.to_i
      puts "begin scan at #{low_mz} m/z"
    end
  end

  mzml.each_chromatogram do |chrm|
    chrm.times
    chrm.intensities
  end
end

Note that the mzml object supports random spectrum access (even if the mzml was not indexed):

mzml[22]  # retrieve spectrum at index 22

Writing an mzml file from scratch:

spec1 = Mspire::Mzml::Spectrum.new('scan=1') do |spec|
  # profile and ms_level 1
  spec.describe_many!(['MS:1000128', ['MS:1000511', 1]])
  spec.data_arrays = [
    Mspire::Mzml::DataArray[1,2,3].describe!('MS:1000514'),  
    Mspire::Mzml::DataArray[4,5,6].describe!('MS:1000515')   
  ]
  spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
    scan = Mspire::Mzml::Scan.new do |scan|
      # retention time of 42 seconds
      scan.describe! 'MS:1000016', 40.0, 'UO:0000010'
    end
    sl << scan
  end
end

spec2 = Mspire::Mzml::Spectrum.new('scan=2') do |spec| 
  # centroid,  ms_level 2, MSn spectrum,
  spec.describe_many!(['MS:1000127', ['MS:1000511', 2], "MS:1000580"])
  spec.data_arrays = [
    Mspire::Mzml::DataArray[1,2,3.5].describe!('MS:1000514'),  
    Mspire::Mzml::DataArray[5,6,5].describe!('MS:1000515')   
  ]
  spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
    scan = Mspire::Mzml::Scan.new do |scan|
      # retention time of 42 seconds
      scan.describe! 'MS:1000016', 45.0, 'UO:0000010'
    end
    sl << scan
  end
  precursor = Mspire::Mzml::Precursor.new( spec1.id )
  si = Mspire::Mzml::SelectedIon.new
  # the selected ion m/z:
  si.describe! "MS:1000744", 2.0
  # the selected ion charge state
  si.describe! "MS:1000041", 2
  # the selected ion intensity
  si.describe! "MS:1000042", 5
  precursor.selected_ions = [si]
  spec.precursors = [precursor]
end

mzml = Mspire::Mzml.new do |mzml|
  mzml.id = 'ms1_and_ms2'
  mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
  mzml.file_description = Mspire::Mzml::FileDescription.new  do |fd|
    fd.file_content = Mspire::Mzml::FileContent.new
    fd.source_files << Mspire::Mzml::SourceFile.new
  end
  default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
  mzml.instrument_configurations << default_instrument_config
  software = Mspire::Mzml::Software.new
  mzml.software_list << software
  default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
  mzml.data_processing_list << default_data_processing
  mzml.run = Mspire::Mzml::Run.new("little_run", default_instrument_config) do |run|
    spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, [spec1, spec2])
    run.spectrum_list = spectrum_list
  end
end

mzml.write("writtenxml.mzML")

Attributes

accession[RW]

(optional) e.g. a PRIDE accession number

cvs[RW]

(required) an array of Mspire::Mzml::CV objects

data_processing_list[RW]

(required) an array of Mspire::Mzml::DataProcessing objects

encoding[RW]

xml file encoding

file_description[RW]
id[RW]

(optional) an id for accessing from external files

index_list[RW]

Mspire::Mzml::IndexList object associated with the file (only expected when reading mzml files at the moment)

instrument_configurations[RW]

(required) an array of Mspire::Mzml::InstrumentConfiguration objects

io[RW]

the io object of the mzml file

referenceable_param_groups[RW]

(optional) an array of CV::ReferenceableParamGroup objects

run[RW]

(required) an Mspire::Mzml::Run object

samples[RW]

(optional) an array of Mspire::Mzml::Sample objects

scan_settings_list[RW]

(optional) an array of Mspire::Mzml::ScanSettings objects

software_list[RW]

(required) an array of Mspire::Mzml::Software objects

version[RW]

(required) the Mzml document version

Public Class Methods

foreach(filename, &block) click to toggle source
# File lib/mspire/mzml.rb, line 121
def foreach(filename, &block)
  return to_enum(__method__, filename) unless block
  open(filename) do |mzml|
    mzml.each(&block)
  end
end
new(arg=nil, &block) click to toggle source

arg must be an IO object for automatic index and header parsing to occur. If arg is a hash, then attributes are set. In addition (or alternatively) a block called that yields self to setup the object.

io must respond_to?(:size), giving the size of the io object in bytes which allows seeking. get_index_list is called to get or create the index list.

# File lib/mspire/mzml.rb, line 202
def initialize(arg=nil, &block)
  %w(cvs software_list instrument_configurations samples data_processing_list).each {|guy| self.send( guy + '=', [] ) }

  case arg
  when IO
    set_from_xml_io!(arg)
  when Hash
    arg.each {|k,v| self.send("#{k}=", v) }
  end
  block.call(self) if block
end
open(filename, &block) click to toggle source

read-only right now

# File lib/mspire/mzml.rb, line 115
def open(filename, &block)
  File.open(filename) do |io|
    block.call(self.new(io))
  end
end

Public Instance Methods

to_plms1(use_scan_nums=true) click to toggle source

will use scan numbers if use_scan_nums is true (typically start with one), otherwise it will use index numbers (starts with zero)

# File lib/mspire/mzml/plms1.rb, line 8
def to_plms1(use_scan_nums=true)
  spectrum_index = self.index_list[:spectrum]

  scan_nums = spectrum_index.create_scan_to_index.keys if use_scan_nums

  nums = [] ; rts = [] ; spectra = []

  self.each_with_index do |spec, index|
    next unless spec.ms_level == 1
    nums << (use_scan_nums ? scan_nums[index] : index)
    spectra << spec
    rts << spec.retention_time
  end
  Mspire::Plms1.new(nums, rts, spectra)
end
to_xml(filename=nil) click to toggle source

Because mzml files are often very large, we try to avoid storing the entire object tree in memory before writing.

takes a filename and uses builder to write to it if no filename is given, returns a string

# File lib/mspire/mzml.rb, line 267
def to_xml(filename=nil)
  # TODO: support indexed mzml files
  io = filename ? File.open(filename, 'w') : StringIO.new
  xml = Builder::XmlMarkup.new(:target => io, :indent => 2)
  xml.instruct!

  mzml_atts = Default::NAMESPACE.dup
  mzml_atts[:version] = @version || Default::VERSION
  mzml_atts[:accession] = @accession if @accession
  mzml_atts[:id] = @id if @id

  xml.mzML(mzml_atts) do |mzml_n|
    # the 'if' statements capture whether or not the list is required or not
    raise "#{self.class}#cvs must have > 0 Mspire::Mzml::CV objects" unless @cvs.size > 0 
    Mspire::Mzml::CV.list_xml(@cvs, mzml_n)
    @file_description.to_xml(mzml_n)
    if @referenceable_param_groups
      Mspire::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n)
    end
    if @samples && @samples.size > 0
      Mspire::Mzml::Sample.list_xml(@samples, mzml_n)
    end
    Mspire::Mzml::Software.list_xml(@software_list, mzml_n)
    if @scan_settings_list && @scan_settings_list.size > 0
      Mspire::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n)
    end
    icl = Mspire::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n)
    Mspire::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n)
    @run.to_xml(mzml_n)
  end
  
  if filename
    io.close 
    self
  else
    io.string
  end
end
Also aliased as: write
write(filename=nil)
Alias for: to_xml