class Mapping

Public Class Methods

new(user_options = {}) click to toggle source
# File lib/full_lengther_next/mapping.rb, line 57
def initialize(user_options = {})

        options = {
                threads: 1,
                total_reads: [],
        }.merge!(user_options)
        @ref_fasta_path = options[:ref_fasta_path]
        @temp_folder = options[:temp_folder]
        @threads = options[:threads]

        @map_files = []
        @paired = []
        @idxstats = []
        @mpileups = []
        @coverage_results = {}
        @total_reads = options[:total_reads]
end

Public Instance Methods

do_map(user_options = {}) click to toggle source
# File lib/full_lengther_next/mapping.rb, line 96
def do_map(user_options = {})
        options = {
                files: [],
                command: 'bowtie2 -p /THREADS/ -x /REFERENCE/',
                paired_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
                single_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
                flag_single: '-U',
                flags_paired: ['-1', '-2'],
                additional_paired_flags: '',
                flag_output: '-S',
                output: File.join(@temp_folder, 'map_data'),
                log: File.join(@temp_folder, 'mapping_log'),
                force: false
        }
        options.merge!(user_options)
        options[:files].each_with_index do |read_files, map_process_id|
                cmd = options[:command].gsub('/THREADS/', @threads.to_s)
                cmd.gsub!('/REFERENCE/', @ref)
                if read_files.length == 1
                        cmd = cmd + " #{options[:flag_single]} #{read_files.first}"
                        @paired << false
                elsif read_files.length == 2
                        @paired << true
                        cmd = cmd + " #{options[:additional_paired_flags]} #{options[:flags_paired].first} #{read_files.first} #{options[:flags_paired].last} #{read_files.last}" 
                else
                        raise('Incorrect number of read files. Must be 1 (single) or 2 (paired).')
                end
                map_file = nil
                if options[:paired_pipe].nil? || options[:single_pipe].nil?
                        map_file = options[:output] + "_#{map_process_id}" + '.sam'
                        cmd = cmd + " #{options[:flag_output]} #{map_file} &> #{options[:log]}_#{map_process_id}"
                else
                        if @paired[map_process_id]
                                pipe = options[:paired_pipe]
                        else
                                pipe = options[:single_pipe]
                        end
                        map_file = options[:output] + "_#{map_process_id}" + '.bam'
                        cmd = cmd + " 2> #{options[:log]}_#{map_process_id} " + pipe.gsub('/OUTPUT/', map_file)
                end
                @map_files << map_file
                system(cmd) if options[:force] || !File.exists?(map_file)
                @total_reads << File.open("#{options[:log]}_#{map_process_id}").readlines.select{|line| /\d+ reads; of these:/ =~ line}.first.split(' ').first.to_i if File.exists?("#{options[:log]}_#{map_process_id}") && @total_reads[map_process_id].nil?
                raise('ERROR: The mapping process has failed, please check the map folder into the temp folder') if @total_reads[map_process_id].nil? || @total_reads[map_process_id] == 0
        end
end
do_ref(user_options = {}) click to toggle source
# File lib/full_lengther_next/mapping.rb, line 75
def do_ref(user_options = {})
        options = {
                name: 'ref',
                command: 'bowtie2-build -f --threads /THREADS/ /REF_FASTA/ /OUTPUT/',
                log: 'reference_log',
                force: false
        }
        options.merge!(user_options)
        @ref = File.join(@temp_folder, options[:name])
        cmd = options[:command].gsub('/THREADS/', @threads.to_s)
        cmd.gsub!('/REF_FASTA/', @ref_fasta_path)
        cmd.gsub!('/OUTPUT/', @ref)
        cmd = cmd + " &> #{File.join(@temp_folder, options[:log])}"
        system(cmd) if options[:force] || Dir.glob(@ref+'*.bt2').length == 0 
end
do_samtools_ref() click to toggle source
# File lib/full_lengther_next/mapping.rb, line 91
def do_samtools_ref
        cmd = "samtools faidx #{@ref_fasta_path}"
        system(cmd) if !File.exists?(@ref_fasta_path + '.fai')
end
idxstats() click to toggle source
# File lib/full_lengther_next/mapping.rb, line 159
def idxstats
        @map_files.each_with_index do |map_file, map_process_id|
                prefix = File.basename(map_file).gsub(/\.bam|\.sam|\.cram/, '')
                file_path = File.join(@temp_folder, "#{prefix}_idxstats_#{map_process_id}.gz")
                cmd = "samtools idxstats #{map_file} | gzip - -f > #{file_path}"
                system(cmd) if !File.exists?(file_path)
                parse_idxstats(file_path)
        end
end
index(user_options = {}) click to toggle source
# File lib/full_lengther_next/mapping.rb, line 143
def index(user_options = {})
        @map_files.each do |map_file|
                system("samtools index #{map_file}") if (map_file.include?('.bam') && !File.exists?(map_file+'.bai')) || user_options[:force]
        end
end
mpileup(user_options = {}) click to toggle source
# File lib/full_lengther_next/mapping.rb, line 169
def mpileup(user_options = {})
        parse_options = {
                add_coverages: false, 
                normalize_coverages: false,
                cols: [1,2,4] # 1 based for cut
        }
        parse_options.merge!(user_options.delete(:parse_options)) if !user_options[:parse_options].nil?
        opts = []
        do_samtools_ref
        user_options.each do |flag, value|
                opts << [flag, value.to_s]
        end

        contig_list_file = File.join(@temp_folder, File.basename(@ref_fasta_path)+'.lst')
        system("grep '>' #{@ref_fasta_path} | sed 's/>//g' > #{contig_list_file}") if !File.exists?(contig_list_file)
        idxstats if @idxstats.empty?
        cut = nil
        cut = " |cut -f #{parse_options[:cols].join(',')}" if !parse_options[:cols].nil? && !parse_options[:cols].empty?
        mpileup_files = []
        @map_files.each_with_index do |map_file, map_process_id|
                prefix = File.basename(map_file).gsub(/\.bam|\.sam|\.cram/, '')
                file_path = File.join(@temp_folder, "#{prefix}_mpileup_#{map_process_id}.gz")
                mpileup_files << file_path
                cmd = "samtools mpileup -f #{@ref_fasta_path} #{opts.join(' ')} #{map_file}#{cut} | gzip - -f > #{file_path}" 
                system(cmd) if !File.exists?(file_path)
        end
        coverage_results = {}

        parse_mpileup(mpileup_files, contig_list_file) do |contig_name, contig_length, coverages|
                mapped_reads = @idxstats.map{|info| info[contig_name][:mapped]}.inject { |sum, n| sum + n }
                get_coverage_parameters(contig_name, contig_length, mapped_reads, coverages, parse_options, coverage_results)
        end
        return coverage_results
end
parse_idxstats(file_path) click to toggle source
# File lib/full_lengther_next/mapping.rb, line 221
def parse_idxstats(file_path)
        stats = {}
        stats_file = ScbiZcatFile.new(file_path)
        while !stats_file.eof
        fields = stats_file.readline.chomp.split("\t")
        stats[fields[0]] = {length: fields[1].to_i, mapped: fields[2].to_i, unmmapped: fields[3].to_i}
    end
    stats_file.close
    stats.delete('*')
    @idxstats << stats
end
parse_mpileup(file_paths, contig_list_file) { |contig_name, contig_length, all_coverages| ... } click to toggle source
# File lib/full_lengther_next/mapping.rb, line 204
def parse_mpileup(file_paths, contig_list_file)
        last_contig = nil
        mpileup_files = file_paths.map{|file_path| Mpileup.new(file_path)}
        File.open(contig_list_file).each do |contig_name|
                contig_name.chomp!
                contig_length = @idxstats.first[contig_name][:length]
                all_coverages = []
                mpileup_files.each do |mpileup_file|
                        coverages = mpileup_file.read_contig(contig_name, contig_length)
                        all_coverages << coverages if !coverages.nil? && !coverages.empty?  
                end
                yield(contig_name, contig_length, all_coverages)
        end
        mpileup_files.map{|mf| mf.close}
end
report() click to toggle source
# File lib/full_lengther_next/mapping.rb, line 149
def report
        reports = []
        @map_files.each do |map_file|
                cmd = "samtools flagstat #{map_file}"
                report = %x[#{cmd}].split("\n")
                reports << report
        end
        return reports
end

Private Instance Methods

calculate_coverage_parameters(coverages, ref_length, mapped_reads, options) click to toggle source
# File lib/full_lengther_next/mapping.rb, line 264
def calculate_coverage_parameters(coverages, ref_length, mapped_reads, options)
        n_mates = 1.0
        n_mates = 2.0 if @paired
        millions = @total_reads.inject { |sum, n| sum + n }/1.0e6
        mean_normalized_differences = 0
        mean_max = 0
        mean_coverage = 0
        proportion_sequence_mapped = 0
        fpkm = 0
        
        greater0 = coverages.select{|c| c > 0}
        coverages_greater0 = greater0.length
        if coverages_greater0 > 0
                fpkm = mapped_reads/n_mates/(ref_length/1000.0)/millions
                mean_coverage = coverages.inject { |sum, n| sum + n }.fdiv(ref_length)
                n_max = (coverages.length/10.0).ceil
                maximums = coverages.sort{|c1, c2| c2 <=> c1}[0..n_max-1]
                mean_max = maximums.inject { |sum, n| sum + n }.fdiv(n_max)

                mean_coverage_filtered =  greater0.inject { |sum, n| sum + n }.fdiv(coverages_greater0)
                normalized_differences = greater0.map{|c| (c - mean_coverage_filtered).abs/mean_coverage_filtered}
                mean_normalized_differences = normalized_differences.inject { |sum, n| sum + n } / normalized_differences.length
                proportion_sequence_mapped =  greater0.length.fdiv(ref_length)
        
                if options[:normalize_coverages]
                        max = coverages.max
                        coverages.map!{|cov| cov.fdiv(max) } 
                end
        end   
        return mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm
end
get_coverage_parameters(seq_name, contig_length, mapped_reads, mpileup_info, options, coverage_results) click to toggle source
# File lib/full_lengther_next/mapping.rb, line 236
def get_coverage_parameters(seq_name, contig_length, mapped_reads, mpileup_info, options, coverage_results)
        # begin
                mean_normalized_differences = 0
                mean_max = 0
                mean_coverage = 0
                proportion_sequence_mapped = 0
                fpkm = 0
                if mapped_reads > 0
                        if !mpileup_info.empty?
                                if mpileup_info.length == 1
                                        coverages = mpileup_info.first
                                else
                                        coverages = mpileup_info.transpose.map {|x| x.reduce(:+)}
                                end
                                mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm = calculate_coverage_parameters(coverages, contig_length, mapped_reads, options)
                        end
                end
                record = [mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm, mapped_reads]

                record << coverages if options[:add_coverages]
                coverage_results[seq_name] = record
        # rescue Exception => e
        #     puts "ERROR: The reference sequence: #{seq_name} has failed",
        #     e.message,
        #     e.backtrace.join("\n")
        # end
end