class Mspire::Mzml
Reading an mzml file:
Mspire::Mzml.open("somefile.mzML") do |mzml| mzml.each do |spectrum| scan = spectrum.scan spectrum.mzs # array of m/zs spectrum.intensities # array of intensities spectrum.peaks do |mz,intensity| puts "mz: #{mz} intensity: #{intensity}" end spectrum.params # list all the params associated with an object # true if key exists and no value, the value if present, or false if spectrum.fetch_by_acc('MS:1000128') puts "this is a profile spectrum!" end if spectrum.ms_level == 2 low_mz = spectrum.scan_list.first.scan_windows.first.to_i puts "begin scan at #{low_mz} m/z" end end mzml.each_chromatogram do |chrm| chrm.times chrm.intensities end end
Note that the mzml object supports random spectrum access (even if the mzml was not indexed):
mzml[22] # retrieve spectrum at index 22
Writing an mzml file from scratch:
spec1 = Mspire::Mzml::Spectrum.new('scan=1') do |spec| # profile and ms_level 1 spec.describe_many!(['MS:1000128', ['MS:1000511', 1]]) spec.data_arrays = [ Mspire::Mzml::DataArray[1,2,3].describe!('MS:1000514'), Mspire::Mzml::DataArray[4,5,6].describe!('MS:1000515') ] spec.scan_list = Mspire::Mzml::ScanList.new do |sl| scan = Mspire::Mzml::Scan.new do |scan| # retention time of 42 seconds scan.describe! 'MS:1000016', 40.0, 'UO:0000010' end sl << scan end end spec2 = Mspire::Mzml::Spectrum.new('scan=2') do |spec| # centroid, ms_level 2, MSn spectrum, spec.describe_many!(['MS:1000127', ['MS:1000511', 2], "MS:1000580"]) spec.data_arrays = [ Mspire::Mzml::DataArray[1,2,3.5].describe!('MS:1000514'), Mspire::Mzml::DataArray[5,6,5].describe!('MS:1000515') ] spec.scan_list = Mspire::Mzml::ScanList.new do |sl| scan = Mspire::Mzml::Scan.new do |scan| # retention time of 42 seconds scan.describe! 'MS:1000016', 45.0, 'UO:0000010' end sl << scan end precursor = Mspire::Mzml::Precursor.new( spec1.id ) si = Mspire::Mzml::SelectedIon.new # the selected ion m/z: si.describe! "MS:1000744", 2.0 # the selected ion charge state si.describe! "MS:1000041", 2 # the selected ion intensity si.describe! "MS:1000042", 5 precursor.selected_ions = [si] spec.precursors = [precursor] end mzml = Mspire::Mzml.new do |mzml| mzml.id = 'ms1_and_ms2' mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS mzml.file_description = Mspire::Mzml::FileDescription.new do |fd| fd.file_content = Mspire::Mzml::FileContent.new fd.source_files << Mspire::Mzml::SourceFile.new end default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031') mzml.instrument_configurations << default_instrument_config software = Mspire::Mzml::Software.new mzml.software_list << software default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing") mzml.data_processing_list << default_data_processing mzml.run = Mspire::Mzml::Run.new("little_run", default_instrument_config) do |run| spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, [spec1, spec2]) run.spectrum_list = spectrum_list end end mzml.write("writtenxml.mzML")
Attributes
(optional) e.g. a PRIDE accession number
(required) an array of Mspire::Mzml::CV
objects
(required) an array of Mspire::Mzml::DataProcessing
objects
xml file encoding
(required) an Mspire::Mzml::FileDescription
(optional) an id for accessing from external files
Mspire::Mzml::IndexList
object associated with the file (only expected when reading mzml files at the moment)
(required) an array of Mspire::Mzml::InstrumentConfiguration
objects
the io object of the mzml file
(optional) an array of CV::ReferenceableParamGroup objects
(required) an Mspire::Mzml::Run
object
(optional) an array of Mspire::Mzml::Sample
objects
(optional) an array of Mspire::Mzml::ScanSettings
objects
(required) an array of Mspire::Mzml::Software
objects
(required) the Mzml
document version
Public Class Methods
# File lib/mspire/mzml.rb, line 121 def foreach(filename, &block) return to_enum(__method__, filename) unless block open(filename) do |mzml| mzml.each(&block) end end
arg must be an IO
object for automatic index and header parsing to occur. If arg is a hash, then attributes are set. In addition (or alternatively) a block called that yields self to setup the object.
io must respond_to?(:size), giving the size of the io object in bytes which allows seeking. get_index_list is called to get or create the index list.
# File lib/mspire/mzml.rb, line 202 def initialize(arg=nil, &block) %w(cvs software_list instrument_configurations samples data_processing_list).each {|guy| self.send( guy + '=', [] ) } case arg when IO set_from_xml_io!(arg) when Hash arg.each {|k,v| self.send("#{k}=", v) } end block.call(self) if block end
read-only right now
# File lib/mspire/mzml.rb, line 115 def open(filename, &block) File.open(filename) do |io| block.call(self.new(io)) end end
Public Instance Methods
will use scan numbers if use_scan_nums is true (typically start with one), otherwise it will use index numbers (starts with zero)
# File lib/mspire/mzml/plms1.rb, line 8 def to_plms1(use_scan_nums=true) spectrum_index = self.index_list[:spectrum] scan_nums = spectrum_index.create_scan_to_index.keys if use_scan_nums nums = [] ; rts = [] ; spectra = [] self.each_with_index do |spec, index| next unless spec.ms_level == 1 nums << (use_scan_nums ? scan_nums[index] : index) spectra << spec rts << spec.retention_time end Mspire::Plms1.new(nums, rts, spectra) end
Because mzml files are often very large, we try to avoid storing the entire object tree in memory before writing.
takes a filename and uses builder to write to it if no filename is given, returns a string
# File lib/mspire/mzml.rb, line 267 def to_xml(filename=nil) # TODO: support indexed mzml files io = filename ? File.open(filename, 'w') : StringIO.new xml = Builder::XmlMarkup.new(:target => io, :indent => 2) xml.instruct! mzml_atts = Default::NAMESPACE.dup mzml_atts[:version] = @version || Default::VERSION mzml_atts[:accession] = @accession if @accession mzml_atts[:id] = @id if @id xml.mzML(mzml_atts) do |mzml_n| # the 'if' statements capture whether or not the list is required or not raise "#{self.class}#cvs must have > 0 Mspire::Mzml::CV objects" unless @cvs.size > 0 Mspire::Mzml::CV.list_xml(@cvs, mzml_n) @file_description.to_xml(mzml_n) if @referenceable_param_groups Mspire::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n) end if @samples && @samples.size > 0 Mspire::Mzml::Sample.list_xml(@samples, mzml_n) end Mspire::Mzml::Software.list_xml(@software_list, mzml_n) if @scan_settings_list && @scan_settings_list.size > 0 Mspire::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n) end icl = Mspire::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n) Mspire::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n) @run.to_xml(mzml_n) end if filename io.close self else io.string end end