class Object
Public Instance Methods
calculate_read_length(filename)
click to toggle source
# File lib/trim_and_correct.rb, line 70 def calculate_read_length(filename) read_length = nil File.open(filename) do |f| f.each do |line| line.chomp! if line =~ /^[GATCgatc]/ read_length = line.size break end end end return read_length - 1 end
delete_file_with_dependencies(file, *dependencies)
click to toggle source
# File lib/trim_and_correct.rb, line 63 def delete_file_with_dependencies(file, *dependencies) dependencies << file if dependencies.all?{|dependency| File.exists?(dependency)} && File.exists?(file) File.delete(file) end end
extract_file_prefixes_and_sample_name(sample_map_file, directory)
click to toggle source
# File lib/fastq-factory.rb, line 4 def extract_file_prefixes_and_sample_name(sample_map_file, directory) sample_map = Hash.new File.read("#{directory}/#{sample_map_file}").split("\n").each do |sample_map_line| file_prefix, sample_name = sample_map_line.split("\t") sample_map[file_prefix] = sample_name end return sample_map end
file_exists?(directory, *filenames)
click to toggle source
# File lib/fastq-factory.rb, line 13 def file_exists?(directory, *filenames) at_least_one_file_found = false filenames.each do |filename| at_least_one_file_found = true if File.exists?("#{directory}/#{filename}") end abort("You specified a file(s): #{filenames.join(", ")}. At least one of these must exist! Please check your sample map file") unless at_least_one_file_found end
find_executable(executable_name, directory = nil)
click to toggle source
# File lib/fastq-factory.rb, line 21 def find_executable(executable_name, directory = nil) if directory.nil? if which(executable_name) return which(executable_name) elsif File.executable?("/usr/local/bin/#{executable_name}") return "/usr/local/bin/#{executable_name}" elsif File.executable?("/usr/local/#{executable_name}/#{executable_name}") return "/usr/local/#{executable_name}/#{executable_name}" else return nil end else if File.executable?("#{directory}/#{executable_name}") return "#{directory}/#{executable_name}" else return nil end end end
generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff)
click to toggle source
# File lib/generate_quality_metrics.rb, line 6 def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff) if File.exists?("#{directory}/ResequencingRunStatistics.xml") puts "Assessing quality from Miseq run stats file" run_stats = parse_resequencing_run_stats("#{directory}/ResequencingRunStatistics.xml", sample_map.values) elsif File.exists?("#{directory}/AssemblyRunStatistics.xml") puts "Assessing quality from Miseq run stats file" run_stats = parse_assembly_run_stats("#{directory}/AssemblyRunStatistics.xml", sample_map.values) else run_stats = ResequencingRunStats.new run_stats.sample_stats = Hash.new sample_map.values.each do |sample_name| run_stats.sample_stats[sample_name] = ResequencingSampleStats.new end end forward_reads_trimmed_suffix = forward_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed\2') reverse_reads_trimmed_suffix = reverse_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed\2') forward_reads_trimmed_corrected_suffix = forward_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed.cor\2') reverse_reads_trimmed_corrected_suffix = reverse_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed.cor\2') sample_map.each do |read_file_prefix, sample_name| puts "Assesing quality for #{sample_name}" run_stats.sample_stats[sample_name].fastq_stats = Hash.new run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale, quality_cutoff) if File.exists?("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}") run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}") run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}") else run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}") run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}") end end # print out data output_file = File.open("#{directory}/summary_stats.txt", "w") # print headers output_file.puts "run name\tnumber of bases(Gb)\tnumber of clusters\tsample name\tdirection\tnumber of clusters\tnumber of forward reads aligned\tnumber of reverse reads aligned\tcoverage\tnumber of snps\tnumber of contigs\tmean contig size\tn50\tnumber of bases\tmean quality\tread base where qual falls below 30\tpercent reduction compared to raw" output_file.puts "#{directory.match(/.*\/(.+?)$/).captures.first}\t#{run_stats.number_of_bases}\t#{run_stats.number_of_clusters}" run_stats.sample_stats.keys.sort.each do |sample_name| sample_stats = run_stats.sample_stats[sample_name] if sample_stats.class == Struct::ResequencingSampleStats output_file.puts "\t\t\t#{sample_name}\t\t#{sample_stats.number_of_clusters}\t#{sample_stats.number_of_forward_reads_aligned}\t#{sample_stats.number_of_reverse_reads_aligned}\t#{sample_stats.coverage}\t#{sample_stats.number_of_snps}" elsif sample_stats.class == Struct::AssemblySampleStats output_file.puts "\t\t\t#{sample_name}\t\t#{sample_stats.number_of_clusters}\t\t\t\t\t#{sample_stats.number_of_contigs}\t#{sample_stats.mean_contig_size}\t#{sample_stats.n50}\t#{sample_stats.number_of_bases}" end ["forward", "reverse", "forward-trim_corrected", "reverse-trim_corrected"].each do |direction| fastq_stats = run_stats.sample_stats[sample_name].fastq_stats[direction] output_file.puts "\t\t\t\t#{direction}\t\t\t\t\t\t\t\t\t\t#{fastq_stats.mean_quality}\t#{fastq_stats.position_where_quality_lt_20}\t#{fastq_stats.percentage_compared_to_raw}" end end output_file.close end
trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path,trim_point_fraction, trim_quality_cutoff)
click to toggle source
# File lib/trim_and_correct.rb, line 1 def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path,trim_point_fraction, trim_quality_cutoff) Dir.chdir(directory) # trimming sample_map.each do |sample_file_prefix, sample_name| next if File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") || File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") puts "Trimming files for #{sample_name}" #determine read length read_length = calculate_read_length("#{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension}") trim_point = (trim_point_fraction * read_length).to_i unless File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v` `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.#{reverse_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v` end `perl /tmp/fastq-remove-orphans.pl -1 #{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -2 #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}` end # quake correction # write file for quake sample_map.each do |sample_file_prefix, sample_name| next if File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") || File.exists?(("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")) puts "Error correcting files for #{sample_name}" output_file = File.open("quake_file_list.txt","w") output_file.puts "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}" output_file.close # run quake `#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}` end sample_map.each do |sample_file_prefix, sample_name| next if File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") # remove orphans `perl /tmp/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}` end # cleanup and rename files sample_map.each do |sample_file_prefix, sample_name| delete_file_with_dependencies("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}", "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") delete_file_with_dependencies("#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}", "paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") delete_file_with_dependencies("orphaned_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") delete_file_with_dependencies("orphaned_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}") if File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}") delete_file_with_dependencies("error_model.paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.txt") delete_file_with_dependencies("error_model.paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.txt") delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.stats.txt") delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor_single.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.stats.txt") delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor_single.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") delete_file_with_dependencies("orphaned_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") delete_file_with_dependencies("orphaned_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") File.rename("paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}", "#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") if File.exists?("paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") File.rename("paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}", "#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") if File.exists?("paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") else File.rename("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}", "#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") if File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") File.rename("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}", "#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}") if File.exists?("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}") end end end
which(cmd)
click to toggle source
meethod to return path to command if it is in the path (works in windows) @param String cmd the name of the command
# File lib/fastq-factory.rb, line 43 def which(cmd) exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| exts.each { |ext| exe = "#{path}/#{cmd}#{ext}" return exe if File.executable? exe } end return nil end
write_out_fastq_trim_script()
click to toggle source
# File lib/fastq-factory.rb, line 54 def write_out_fastq_trim_script system("cp #{File.dirname(__FILE__)}/fastq-remove-orphans.pl /tmp/") end