class UneLosHit

Attributes

final_hit[R]
full_prot[R]
is_ok[R]
msgs[R]
number_x[R]
output_seq[R]
q_index_start[R]

Public Class Methods

new(full_hit, query_fasta) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 9
def initialize(full_hit, query_fasta)
        #puts 'BEG ___________________'
        #full_hit.map{|hsp| puts hsp.inspect}
        mismas_ids_array, query_fasta = hits_misma_id(full_hit, query_fasta)
        #puts '..................'
        @mismas_ids_array = mismas_ids_array
        @msgs = []
        @output_seq = query_fasta

        if mismas_ids_array.count > 1
                mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
                @final_hit = mismas_ids_array.shift # We take first hsp like reference for unigene reconstruction
                #puts @output_seq.length
                #puts "\e[32m#{@final_hit.inspect}\e[0m"
                mismas_ids_array.each do |hit|
                        #puts '.....', "\e[31m#{hit.inspect}\e[0m"
                        ##if @final_hit.q_frame == hit.q_frame #Same frame
                        ##  puts "\e[33mSame Frame\e[0m"
                        ##  same_frame_hits_query(hit)
                        if  overlapping_hits?(hit) #Diff frame
                                if @msgs.empty?
                                        @msgs << ['OverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
                                else
                                        @msgs << ['AndOverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
                                end
                                #puts "\e[33mOverlapped hits\e[0m"
                                overlapped_hits_query(hit)                 
                        elsif  separated_hits?(hit) #Diff frame
                                if @msgs.empty?
                                        @msgs << ['SeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
                        else
                                        @msgs << ['AndSeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
                                end
                                #puts "\e[33mSeparated hits\e[0m"
                                separated_hits(hit) #Diff frame
                        end
                #puts @output_seq.length
                #puts  '.....'
                #puts "\e[32m#{@final_hit.inspect}\e[0m"
                end
        else
                @final_hit = mismas_ids_array.shift                  
        end
        #puts 'END ___________________'
        #puts @final_hit.inspect
end

Public Instance Methods

ajust_nt(nt) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 196
def ajust_nt(nt) # Returns the number of nt necessary for keep the ORF (or nt becomes multiple of 3)
        add=0
        if nt % 3 == 1
                add = 2
        elsif nt % 3 == 2
                add = 1
        end
        return add
end
hits_misma_id(full_hit, query_fasta_ori) click to toggle source

creamos un array en el que esten solo los hits con la misma id.

# File lib/full_lengther_next/une_los_hit.rb, line 156
def hits_misma_id(full_hit, query_fasta_ori)           
        # Se les hace la reverso complementaria si es necesario
        misma_id = []
        query_fasta = query_fasta_ori.dup
        #frame_ori = q.hits.first.q_frame

        full_hit.each do |h|
                if h.acc == full_hit.first.acc
                        # comprobar si los frames tienen el mismo sentido
                        #if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0
                                if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
                                        query_fasta = reverse_seq(query_fasta_ori, h)
                                        h.reversed = TRUE
                                end
                                misma_id << h
                        #end
                end
        end

        return misma_id, query_fasta
end
overlapped_hits(hit) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 103
def overlapped_hits(hit) #Colapsa cuando hay hsps en diferentes partes de la query pero son lo mismo en el subject con lo cual los eliminamos antes
        overlapped_aas = @final_hit.s_end - hit.s_beg + 1
        overlapped_nts = @final_hit.q_end - hit.q_beg + 1
        align_len_final_hit = @final_hit.q_seq.length
        hit_gaps_query = @final_hit.q_seq[align_len_final_hit-overlapped_aas..align_len_final_hit].count('-')
        hit_gaps_subject = @final_hit.s_seq[0..align_len_final_hit].count('-')
        total_gaps = (hit_gaps_query - hit_gaps_subject).abs # Gaps aƱaden aa q no existen, x lo q han de descontarse
        nt_discount = (overlapped_aas ) * 3

        absolute_overlap = 1
        if nt_discount < 0 #Not overlap on subject
    overlapped_aas = overlapped_aas.abs
                absolute_overlap = 0 #Don't cut q_seq and s_seq, because there is query overlap but there isn't subject overlap
                nt_discount =  @final_hit.q_end - hit.q_beg + 1
                nt_discount += ajust_nt(nt_discount)
        end
        add_nt = overlapped_nts + ajust_nt(hit.q_frame-1)
        @output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * add_nt + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]

        #q_seq and s_seq are aa sequences
        final_hit_upper_bound = @final_hit.q_seq.length - 1 - overlapped_aas * absolute_overlap
        @final_hit.q_seq = @final_hit.q_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.q_seq[overlapped_aas * absolute_overlap .. hit.q_seq.length-1]
        @final_hit.s_seq = @final_hit.s_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.s_seq[overlapped_aas * absolute_overlap .. hit.s_seq.length-1]

        @final_hit.q_end = hit.q_end
        
        @final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
        @final_hit.s_end = [@final_hit.s_end, hit.s_end].max
        @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1          
end
overlapped_hits_query(hit) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 64
def overlapped_hits_query(hit)
        overlapped_nts = @final_hit.q_end - hit.q_beg + 1
        add_nt = ajust_nt(hit.q_frame-1) # Fix frame-shift
        @output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * (overlapped_nts + add_nt) + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]

        @final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
        @final_hit.q_end = hit.q_end + add_nt
        
        @final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
        @final_hit.s_end = [@final_hit.s_end, hit.s_end].max
        @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
        @final_hit.q_len = @output_seq.length
end
overlapping_hits?(hit) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 178
def overlapping_hits?(hit)
        overlap = FALSE
        if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end
                overlap = TRUE
        end
        return overlap
end
same_frame_hits(hit) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 92
def same_frame_hits(hit)
        add = (hit.s_beg - @final_hit.s_end) + 1
        nt_add = add *3
        @final_hit.q_seq = @final_hit.q_seq + 'x' * add + hit.q_seq
        @final_hit.s_seq = @final_hit.s_seq + 'x' * add + hit.s_seq
        @output_seq = @output_seq[0..@final_hit.q_end-nt_add] + 'n'*nt_add  + @output_seq[hit.q_beg+1..@output_seq.length-1]
        @final_hit.q_end = hit.q_end
        @final_hit.s_end = hit.s_end
        @final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
end
same_frame_hits_query(hit) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 56
def same_frame_hits_query(hit)
        @final_hit.q_seq = @output_seq[@final_hit.q_beg..@final_hit.q_end].translate
        @final_hit.q_end = hit.q_end
        @final_hit.s_end = hit.s_end
        @final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
end
separated_hits(hit) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 134
def separated_hits(hit)                        
        number_x = hit.q_beg - @final_hit.q_end - 1
        number_x += ajust_nt(number_x)        

        num_x = ''
        num_x_aa = ''
        if number_x > 0
                num_x = 'n'*number_x
                num_x_aa = 'x'*(number_x/3)
        end
        @output_seq = @output_seq[0..@final_hit.q_end-1] + num_x + @output_seq[hit.q_beg-1..@output_seq.length-1]
@final_hit.q_seq = @final_hit.q_seq[0, @final_hit.q_seq.length] + num_x_aa + hit.q_seq[0, hit.q_seq.length]
@final_hit.s_seq = @final_hit.s_seq[0, @final_hit.s_seq.length] + num_x_aa + hit.s_seq[0, hit.s_seq.length]

        @final_hit.q_end = hit.q_end
        @final_hit.s_end = hit.s_end
        @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1  
end
separated_hits?(hit) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 186
def separated_hits?(hit)
        separated=FALSE
        if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end
                separated = TRUE
        end
        return separated
end
separated_hits_query(hit) click to toggle source
# File lib/full_lengther_next/une_los_hit.rb, line 79
def separated_hits_query(hit)                  
        separated_nts = hit.q_beg - @final_hit.q_end + 1
        add_nt = ajust_nt(separated_nts) # Fix frame-shift

        @output_seq = @output_seq[0..@final_hit.q_end - separated_nts] + 'n' * (separated_nts + add_nt) + @output_seq[@final_hit.q_end+1..@output_seq.length-1]
        @final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate

        @final_hit.q_end = hit.q_end + add_nt
        @final_hit.s_end = hit.s_end
        @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
        @final_hit.q_len = @output_seq.length
end