module MiseqRunStats

Constants

AssemblyRunStats
AssemblySampleStats
ResequencingRunStats
ResequencingSampleStats

Public Instance Methods

parse_assembly_run_stats(xml_file, original_sample_names = nil) click to toggle source
# File lib/miseq_run_stats.rb, line 31
def parse_assembly_run_stats(xml_file, original_sample_names = nil)
  xml = Nokogiri::XML(File.read(xml_file))
  assembly_run_stats = AssemblyRunStats.new

  xml.search('//RunStats').each do |run_stats|
    assembly_run_stats.number_of_bases = run_stats.search('YieldInBasesPF').text.to_f/1000000000
    assembly_run_stats.number_of_clusters = run_stats.search('NumberOfClustersPF').text.to_i
  end

  # get un-named contig data
  assembly_stats = Array.new
  xml.search('//AssemblyStatistics').each do |assembly_sample_stats|
    number_of_contigs = assembly_sample_stats.search('NumberOfContigs').text.to_i
    mean_contig_size = assembly_sample_stats.search('MeanContigLength').text.to_f.to_i
    n50 = assembly_sample_stats.search('N50').text.to_i
    number_of_bases = assembly_sample_stats.search('BaseCount').text.to_i
    assembly_stats << {:number_of_contigs  => number_of_contigs, :mean_contig_size => mean_contig_size, :n50 => n50, :number_of_bases => number_of_bases}
  end

  assembly_run_stats.sample_stats = Hash.new
  xml.search('//SampleStatistics').each do |sample_stats|
    sample_name = sample_stats.search('SampleName').text
    sample_name = original_sample_names.select{|original_sample_name| sample_name =~ /#{original_sample_name}/}.first unless original_sample_names.nil? # alter sample name to original sample name if supplies as an array
    next if sample_name.nil?

    assembly_run_stats.sample_stats[sample_name] = AssemblySampleStats.new
    assembly_run_stats.sample_stats[sample_name].sample_name = sample_name
    assembly_run_stats.sample_stats[sample_name].number_of_clusters = sample_stats.search('NumberOfClustersPF').text
    assembly_sample_stats = assembly_stats.shift
    assembly_run_stats.sample_stats[sample_name].number_of_contigs = assembly_sample_stats[:number_of_contigs]
    assembly_run_stats.sample_stats[sample_name].mean_contig_size = assembly_sample_stats[:mean_contig_size]
    assembly_run_stats.sample_stats[sample_name].n50 = assembly_sample_stats[:n50]
    assembly_run_stats.sample_stats[sample_name].number_of_bases = assembly_sample_stats[:number_of_bases]
 
  end
  return assembly_run_stats
end
parse_resequencing_run_stats(xml_file, original_sample_names = nil) click to toggle source
# File lib/miseq_run_stats.rb, line 7
def parse_resequencing_run_stats(xml_file, original_sample_names = nil)
  xml = Nokogiri::XML(File.read(xml_file))
  resequencing_run_stats = ResequencingRunStats.new

  xml.search('//RunStats').each do |run_stats|
    resequencing_run_stats.number_of_bases = run_stats.search('YieldInBasesPF').text.to_f/1000000000
    resequencing_run_stats.number_of_clusters = run_stats.search('NumberOfClustersPF').text.to_i
  end

  resequencing_run_stats.sample_stats = Hash.new
  xml.search('//SummarizedSampleStatisics').each do |summarised_samples_stats|
    sample_name = summarised_samples_stats.search('SampleName').text
    sample_name = original_sample_names.select{|original_sample_name| sample_name =~ /#{original_sample_name}/}.first unless original_sample_names.nil? # alter sample name to original sample name if supplies as an array

    resequencing_run_stats.sample_stats[sample_name] = ResequencingSampleStats.new
    resequencing_run_stats.sample_stats[sample_name].sample_name = sample_name
    resequencing_run_stats.sample_stats[sample_name].number_of_clusters = summarised_samples_stats.search('NumberOfClustersPF').text
    resequencing_run_stats.sample_stats[sample_name].number_of_forward_reads_aligned = summarised_samples_stats.search('ClustersAlignedR1').text
    resequencing_run_stats.sample_stats[sample_name].number_of_reverse_reads_aligned = summarised_samples_stats.search('ClustersAlignedR2').text
    resequencing_run_stats.sample_stats[sample_name].coverage = summarised_samples_stats.search('WeightedCoverage').text
    resequencing_run_stats.sample_stats[sample_name].number_of_snps = summarised_samples_stats.search('NumberHomozygousSNPs').text
  end
  return resequencing_run_stats
end