class Object

Public Instance Methods

calculate_read_length(filename) click to toggle source
# File lib/trim_and_correct.rb, line 70
def calculate_read_length(filename)
  read_length = nil
  File.open(filename) do |f|
    f.each do |line|
     line.chomp!
     if line =~ /^[GATCgatc]/
       read_length = line.size
       break
     end
    end
  end
  return read_length - 1
end
delete_file_with_dependencies(file, *dependencies) click to toggle source
# File lib/trim_and_correct.rb, line 63
def delete_file_with_dependencies(file, *dependencies)
  dependencies << file
  if dependencies.all?{|dependency| File.exists?(dependency)} && File.exists?(file)
    File.delete(file)
  end
end
extract_file_prefixes_and_sample_name(sample_map_file, directory) click to toggle source
# File lib/fastq-factory.rb, line 4
def extract_file_prefixes_and_sample_name(sample_map_file, directory)
  sample_map = Hash.new
  File.read("#{directory}/#{sample_map_file}").split("\n").each do |sample_map_line|
    file_prefix, sample_name = sample_map_line.split("\t")
    sample_map[file_prefix] = sample_name
  end
  return sample_map
end
file_exists?(directory, *filenames) click to toggle source
# File lib/fastq-factory.rb, line 13
def file_exists?(directory, *filenames)
  at_least_one_file_found = false
  filenames.each do |filename|
    at_least_one_file_found = true  if File.exists?("#{directory}/#{filename}")
  end
  abort("You specified a file(s): #{filenames.join(", ")}. At least one of these must exist! Please check your sample map file") unless at_least_one_file_found
end
find_executable(executable_name, directory = nil) click to toggle source
# File lib/fastq-factory.rb, line 21
def find_executable(executable_name, directory = nil)
  if directory.nil?
    if which(executable_name)
      return which(executable_name)
    elsif File.executable?("/usr/local/bin/#{executable_name}")
      return "/usr/local/bin/#{executable_name}"
    elsif File.executable?("/usr/local/#{executable_name}/#{executable_name}")
      return "/usr/local/#{executable_name}/#{executable_name}"
    else
      return nil
    end
  else
    if File.executable?("#{directory}/#{executable_name}")
      return "#{directory}/#{executable_name}"
    else
      return nil
    end
  end
end
generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff) click to toggle source
# File lib/generate_quality_metrics.rb, line 6
def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff)
  if File.exists?("#{directory}/ResequencingRunStatistics.xml")
    puts "Assessing quality from Miseq run stats file"
    run_stats = parse_resequencing_run_stats("#{directory}/ResequencingRunStatistics.xml", sample_map.values)
  elsif File.exists?("#{directory}/AssemblyRunStatistics.xml")
    puts "Assessing quality from Miseq run stats file"
    run_stats = parse_assembly_run_stats("#{directory}/AssemblyRunStatistics.xml", sample_map.values)
  else
    run_stats = ResequencingRunStats.new
    run_stats.sample_stats = Hash.new
    sample_map.values.each do |sample_name|
      run_stats.sample_stats[sample_name] = ResequencingSampleStats.new
    end
  end

  forward_reads_trimmed_suffix = forward_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed\2')
  reverse_reads_trimmed_suffix = reverse_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed\2')

  forward_reads_trimmed_corrected_suffix = forward_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed.cor\2')
  reverse_reads_trimmed_corrected_suffix = reverse_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed.cor\2')

  sample_map.each do |read_file_prefix, sample_name|
    puts "Assesing quality for #{sample_name}"
    run_stats.sample_stats[sample_name].fastq_stats = Hash.new
    run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale, quality_cutoff)
    run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale, quality_cutoff)
    if File.exists?("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}")
      run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
      run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}")
      run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
      run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}")
    else
      run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_suffix}",quality_scale, quality_cutoff)
      run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}")
      run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_suffix}",quality_scale, quality_cutoff)
      run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}")
    end

  end
  # print out data
  output_file = File.open("#{directory}/summary_stats.txt", "w")
  # print headers
  output_file.puts "run name\tnumber of bases(Gb)\tnumber of clusters\tsample name\tdirection\tnumber of clusters\tnumber of forward reads aligned\tnumber of reverse reads aligned\tcoverage\tnumber of snps\tnumber of contigs\tmean contig size\tn50\tnumber of bases\tmean quality\tread base where qual falls below 30\tpercent reduction compared to raw"
  output_file.puts "#{directory.match(/.*\/(.+?)$/).captures.first}\t#{run_stats.number_of_bases}\t#{run_stats.number_of_clusters}"
  run_stats.sample_stats.keys.sort.each do |sample_name|
    sample_stats = run_stats.sample_stats[sample_name]
    if sample_stats.class == Struct::ResequencingSampleStats
      output_file.puts "\t\t\t#{sample_name}\t\t#{sample_stats.number_of_clusters}\t#{sample_stats.number_of_forward_reads_aligned}\t#{sample_stats.number_of_reverse_reads_aligned}\t#{sample_stats.coverage}\t#{sample_stats.number_of_snps}"
    elsif sample_stats.class == Struct::AssemblySampleStats
      output_file.puts "\t\t\t#{sample_name}\t\t#{sample_stats.number_of_clusters}\t\t\t\t\t#{sample_stats.number_of_contigs}\t#{sample_stats.mean_contig_size}\t#{sample_stats.n50}\t#{sample_stats.number_of_bases}"
    end
    ["forward", "reverse", "forward-trim_corrected", "reverse-trim_corrected"].each do |direction|
      fastq_stats = run_stats.sample_stats[sample_name].fastq_stats[direction]
      output_file.puts "\t\t\t\t#{direction}\t\t\t\t\t\t\t\t\t\t#{fastq_stats.mean_quality}\t#{fastq_stats.position_where_quality_lt_20}\t#{fastq_stats.percentage_compared_to_raw}"
    end
  end
  output_file.close
end
trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path,trim_point_fraction, trim_quality_cutoff) click to toggle source
# File lib/trim_and_correct.rb, line 1
def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path,trim_point_fraction, trim_quality_cutoff)
  Dir.chdir(directory)
  # trimming
  sample_map.each do |sample_file_prefix, sample_name|
    next if File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") || File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
    puts "Trimming files for #{sample_name}"
    #determine read length
    read_length = calculate_read_length("#{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension}")
    trim_point = (trim_point_fraction * read_length).to_i
    unless File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
      `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
      `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.#{reverse_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
    end
    `perl /tmp/fastq-remove-orphans.pl -1 #{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -2 #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}`
  end

  #  quake correction
  # write file for quake
  sample_map.each do |sample_file_prefix, sample_name|
    next if File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") || File.exists?(("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}"))
    puts "Error correcting files for #{sample_name}"
    output_file = File.open("quake_file_list.txt","w")
    output_file.puts "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}"
    output_file.close
    # run quake
    `#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}`
  end
  sample_map.each do |sample_file_prefix, sample_name|
    next if File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
    # remove orphans
    `perl /tmp/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}`
  end

  # cleanup and rename files

  sample_map.each do |sample_file_prefix, sample_name|
    delete_file_with_dependencies("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}", "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
    delete_file_with_dependencies("#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}", "paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
    delete_file_with_dependencies("orphaned_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
    delete_file_with_dependencies("orphaned_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
    if File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
      delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
      delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
      delete_file_with_dependencies("error_model.paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.txt")
      delete_file_with_dependencies("error_model.paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.txt")
      delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.stats.txt")
      delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor_single.#{forward_reads_file_extension}")
      delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.stats.txt")
      delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor_single.#{forward_reads_file_extension}")
      delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
      delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
      delete_file_with_dependencies("orphaned_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
      delete_file_with_dependencies("orphaned_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
      File.rename("paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}", "#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") if File.exists?("paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
      File.rename("paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}", "#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") if File.exists?("paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
    else
      File.rename("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}", "#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") if File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
      File.rename("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}", "#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}") if File.exists?("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
    end
  end
end
which(cmd) click to toggle source

meethod to return path to command if it is in the path (works in windows) @param String cmd the name of the command

# File lib/fastq-factory.rb, line 43
def which(cmd)
  exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
  ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
    exts.each { |ext|
      exe = "#{path}/#{cmd}#{ext}"
      return exe if File.executable? exe
    }
  end
  return nil
end
write_out_fastq_trim_script() click to toggle source
# File lib/fastq-factory.rb, line 54
def write_out_fastq_trim_script
  system("cp #{File.dirname(__FILE__)}/fastq-remove-orphans.pl /tmp/")
end