class TestCode

Public Class Methods

new(seq) click to toggle source
# File lib/full_lengther_next/test_code.rb, line 9
        def initialize(seq)
                
                name=''
                t_code=''
                status=''
                ref_start=0
                ref_end=seq.seq_fasta.length
                ref_frame=''
                orf=''
                protein = ''
                p_long = 0
                
                if seq.fasta_length < 200                     
                        seq.type = UNKNOWN
                        seq.test_code(0.0)
                        seq.warnings('<200nt')
                        seq.hit = [ref_start, ref_end, 0] #Last element is ref_frame
                else
                        
# para probar tescode con toda la secuencia, en lugar de con los ORFs ----------------------------------------------------------------------
                        # sense_strand = seq.seq_fasta.upcase
                        # antisense_strand = sense_strand.complementary_dna
                        # (t_code,t_status) = testCode_exec(sense_strand)
                        # ref_frame = 1
                        # ref_msgs = ''
                        # (as_t_code,as_t_status) = testCode_exec(antisense_strand)
                        # # puts "#{seq.seq_name}: t_code: #{t_code}, t_status: #{t_status}, as_t_code: #{as_t_code}, as_t_status: #{as_t_status}"
                        # seq.annotate(:tcode,"#{seq.seq_name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{t_status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
# --------------------------------------------------------------------------------------------------------------------------------


                        # see add_region filter
                        name,t_code,status,ref_start,ref_end,ref_frame,orf,ref_msgs,stop_before_start,more_than_one_frame = t_code(seq)
                        seq.test_code(t_code)
                        seq.warnings(ref_msgs)
                        seq.hit = [ref_start, ref_end, ref_frame]

                        if status == :unknown
                                seq.type = UNKNOWN
                        else
                                seq.type = CODING
                        end
                        
                        # if (ref_msgs.nil?)
                        #    ref_msgs = ''
                        # end
                        #
                        # if (stop_before_start)
                        #    ref_msgs += "There is a STOP codon before ATG. "
                        # end
                        #
                        # if (more_than_one_frame)
                        #    ref_msgs += "Possible frame error by an ins/del"
                        # end
                        #
                        # if (status.to_s =~ /^[CN]\_terminus/) || (status == :Internal)
                        #    tmp_status = "Putative #{status.to_s}"
                        # else
                        #    tmp_status = status.to_s
                        # end
                        #
                        #
                        # if (!orf.nil?) && (!more_than_one_frame)
                        #    protein = orf.translate
                        #    p_long = protein.length - 3
                        #    seq.annotate(:tcode,"#{name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{tmp_status}\t#{t_code}\t\t\t#{p_long}\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t#{protein}",true)
                        # else
                        #    seq.annotate(:tcode,"#{name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{tmp_status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
                        # end
                        
                end
        end

Public Instance Methods

add_region(orf_seq, orf_t_start, orf_t_end, orf_frame, orf_stop_codon, orf_type,seq) click to toggle source

to add regions over a determinated size

# File lib/full_lengther_next/test_code.rb, line 220
def add_region(orf_seq, orf_t_start, orf_t_end, orf_frame, orf_stop_codon, orf_type,seq)

        if (orf_seq.length >= 200)
                # puts "#{seq.seq_name}, #{orf_t_start} - #{orf_t_end}, #{orf_frame}, #{orf_type}"
                seq.add_orf(orf_seq, orf_t_start, orf_t_end, orf_frame, orf_stop_codon, orf_type)
        end

end
calcParam(valueOne,valueTwo,valueThree) click to toggle source
# File lib/full_lengther_next/test_code.rb, line 777
def calcParam (valueOne,valueTwo,valueThree)
        paramArray = [valueOne,valueTwo,valueThree];
        paramArray = paramArray.sort#{|a,b| return a-b}#(compareNumbers);
        paramValue = paramArray[2] / (paramArray[0] + 1.0);
        # puts paramArray.to_json
        return paramValue;
end
compare_regions(this_strand,tc_fasta) click to toggle source

para escoger la region codificante mas grande, incluso solapando verios frames

# File lib/full_lengther_next/test_code.rb, line 205
def compare_regions(this_strand,tc_fasta)
        
        # select the largest ORF without frame repair-------------------------------- hay que comentar todo el bloque de arriba
        best_orf = nil
        best_orf = this_strand.sort{|x,y| y.seq.length <=> x.seq.length }[0]
        
        if !best_orf.nil?
                best_orf.type = best_orf.status
        end
        
        return best_orf

end
compare_regions_old(this_strand,tc_fasta) click to toggle source

no se usa para escoger la region codificante mas grande, incluso solapando verios frames

# File lib/full_lengther_next/test_code.rb, line 626
def compare_regions_old(this_strand,tc_fasta)

        # array_regions = []
        # array_regions.push this_strand.first.dup
        #
        # this_strand.each do |one_orf|
        #
        #     prev_orf = array_regions.last.dup # taking last orf before add actual orf
        #     array_regions.push one_orf.dup # add actual orf, el primer orf me va a salir duplicado
        #
        #     if prev_orf.overlaps?(one_orf)
        #             prev_orf.seq = tc_fasta[prev_orf.t_start..one_orf.t_end]
        #
        #             (t_code,t_status)=testCode_exec(prev_orf.seq)
        #
        #             prev_orf.score = t_code
        #             prev_orf.status = t_status
        #
        #             #t_start, frame and stop_codon are the same
        #             prev_orf.t_end = one_orf.t_end
        #             if (prev_orf.type != one_orf.type)
        #                     prev_orf.type = type_fusion(prev_orf.type,one_orf.type)
        #             end
        #             prev_orf.more_than_one_frame = true
        #
        #             # puts "overlaps:"
        #             # puts "#{one_orf.t_start} - #{one_orf.t_end}"
        #             # puts "#{array_regions.last.t_start} - #{array_regions.last.t_end}"
        #
        #             array_regions.push prev_orf # add overlapped orf,
        #     end
        #
        # end
        #
        # # select the largest ORF without frame repair-------------------------------- hay que comentar todo el bloque de arriba
        # # best_orf = nil
        # # best_orf = this_strand.sort{|x,y| y.seq.length <=> x.seq.length }[0]
        # # ---------------------------------------------------------------------------
        #
        # # select the largest ORF ----------------------------------------------------
        # # best_orf = nil
        # # best_orf = array_regions.sort{|x,y| y.seq.length <=> x.seq.length }[0]
        # # if !best_orf.nil?
        # #   puts "best_orf.status: #{best_orf.status}, best_orf.type: #{best_orf.type}"
        # # end
        #
        #
        # # ---------------------------------------------------------------------------
        #
        # # select the largest coding ORF ---------------------------------------------
        # best_orf = nil
        # array_regions.sort{|x,y| y.seq.length <=> x.seq.length }.each do |one_orf|
        #     # puts "#{one_orf.status}, #{one_orf.type}"
        #     if (one_orf.status == :coding)
        #             best_orf = one_orf
        #             break
        #     end
        # end
        # # ---------------------------------------------------------------------------
        #
        # # puts "\n\n\nstart"
        # # array_regions.sort{|x,y| y.seq.length <=> x.seq.length }.each do |one_orf|
        # #   puts one_orf.seq.length
        # # end
        # # puts "end\n\n\n"
        # # puts "best_orf: #{best_orf.seq.length}"
        #
        # # if !best_orf.nil?
        # #   best_orf.type = best_orf.status
        # # end
        #
        # return best_orf

end
countBases(valueOne,valueTwo,valueThree) click to toggle source
# File lib/full_lengther_next/test_code.rb, line 785
def countBases (valueOne,valueTwo,valueThree)
        return valueOne + valueTwo + valueThree;
end
generate_uncomplete_orf(a,frame,seq) click to toggle source

se buscan las regiones Complete, N-terminus, C-terminus e Internal en la secuencia de cada frame

# File lib/full_lengther_next/test_code.rb, line 230
def generate_uncomplete_orf(a,frame,seq)
        
        my_atg = true
        atg_codon = false
        stop_codon = false
        any_stop = false
        
        orf =''
        t_start = 0
        t_end = 0
        
        a.each do |e|
                t_end += 3
                orf += e
                
                if (e == 'ATG') && (!atg_codon)
                        atg_codon = true
                        stop_codon = false
                        t_start = t_end - 3
                elsif (e == 'TAG') || (e == 'TGA') || (e == 'TAA')
                        orf_tmp = orf[t_start..t_end]
                        if (any_stop)
                                # case 1, complete orf
                                if (atg_codon)
                                        orf_tmp = orf[t_start..t_end]
                                        add_region(orf_tmp, t_start, t_end, frame, any_stop, :Complete,seq)
                                end
                        else
                                # case 2, C_terminus
                                if (my_atg)
                                        orf_tmp = orf[0..t_end]
                                        add_region(orf_tmp, 0, t_end, frame, any_stop, :C_terminus,seq)
                                        # case 3, putative complete, complete without stop codon before atg
                                        if (atg_codon)
                                                orf_tmp = orf[t_start..t_end]
                                                add_region(orf_tmp, t_start, t_end, frame, any_stop, :Complete, seq)
                                        end
                                end
                        end
                        
                        stop_codon = true
                        any_stop = true
                        my_atg = false
                        atg_codon = false
                        t_start += 3
                end
        end
        
        # case 4, N_terminus and case 6, putative N_terminus
        if (atg_codon) && (!stop_codon)
                orf_tmp = orf[t_start..t_end]
                add_region(orf_tmp, t_start, t_end, frame, any_stop, :N_terminus,seq)
        end
        
        # case 5, internal
        if (my_atg) && (!any_stop)
                orf_tmp = orf[0..t_end]
                add_region(orf_tmp, 0, t_end, frame, any_stop, :Internal,seq)
        end
        
end
getConclusion(testCode_value) click to toggle source
# File lib/full_lengther_next/test_code.rb, line 866
def getConclusion (testCode_value)
        codeProb = "";
        if (testCode_value < 0.74)
                codeProb = :unknown;
        elsif (testCode_value >=0.74 and testCode_value < 0.95)
                codeProb = :putative_coding;
        elsif (testCode_value >=0.95)
                codeProb = :coding;
        end

        return codeProb;
end
orf_fusion(orfs_array,tc_fasta,name,ref_status,ref_code,ref_start,ref_end,ref_frame,ref_orf,ref_msgs) click to toggle source

no se usa

# File lib/full_lengther_next/test_code.rb, line 349
def orf_fusion(orfs_array,tc_fasta,name,ref_status,ref_code,ref_start,ref_end,ref_frame,ref_orf,ref_msgs)

        lower_start = 9999
        higher_end = 0
        new_orf = false
        ref_name = name

        # vamos a coger el mejor de referencia (el coding mas largo)
        # y vamos a poner en el warning los coding que estan en el mismo sentido y no estan contenidos en el mejor
        orfs_array.sort! {|orf1,orf2| orf1[1] <=> orf2[1]}

        tmp_orf = ref_orf
        tmp_start = ref_start
        tmp_end = ref_end
        tmp_frame = ref_frame
        tmp_msg = ''

        # puts "\n\n#{name} ---- tmp_frame: #{tmp_frame} ------------------------ \n\n"

        orfs_array.each do |orf|

                (orf[1],orf[2]) = corrige_frame(orf[3],orf[1],orf[2])
                # (orf[1],orf[2]) = $an.corrige_frame(orf[3],orf[1],orf[2])

                # puts "*** name: #{name}, orf[1]: #{orf[1]}, orf[2]: #{orf[2]}, orf[3]: #{orf[3]} ref_start: #{ref_start}, ref_end: #{ref_end}, ref_frame: #{ref_frame} ***\n\n"

                if (orf[0] != tmp_orf)

                        if ((tmp_end >= orf[1]) && (tmp_end <= orf[2])) || ((tmp_start >= orf[1]) && (tmp_start <= orf[2])) # los ORFs solapan

                                # puts "SOLAPAN     frame: #{orf[3]}  tmp_start: #{tmp_start} tmp_end: #{tmp_end} orf_start: #{orf[1]} orf_end: #{orf[2]}"
                                if (tmp_frame > 0)
                                        tmp_msg = ", overlapping coding region (#{orf[1]},#{orf[2]})"
                                elsif (tmp_frame < 0)
                                        tmp_msg = ", overlapping coding region (-#{orf[1]},-#{orf[2]})"
                                end
                                new_orf = true

                        elsif (tmp_end < orf[1]) || (tmp_start > orf[2]) # los ORFs estan separados

                                # puts "#{name}  frame: #{orf[3]}    SEPARADOS --> tmp_start: #{tmp_start} tmp_end: #{tmp_end} orf_start: #{orf[1]} orf_end: #{orf[2]}"
                                if (tmp_frame > 0)
                                        tmp_msg = ", other coding region (#{orf[1]},#{orf[2]})"
                                elsif (tmp_frame < 0)
                                        tmp_msg = ", other coding region (-#{orf[1]},-#{orf[2]})"
                                end
                                new_orf = true

                        end

                        if (new_orf == true)

                                if (orf[1] < lower_start)
                                        lower_start = orf[1]
                                end

                                # if (orf[2] > higher_end)
                                # end

                                (tmp_code,tmp_status)=testCode_exec(tc_fasta[lower_start-1..higher_end-1])

                                if (tmp_status != 'unknown')
                                        # puts "#{name} ----------------------------------- FUSION!!!!!!!!!!!!!!!!\n\n"
                                        # tenemos varios ORFs q son codificantes al unirlos
                                        ref_msgs += tmp_msg
                                        new_orf = false
                                else
                                        # puts "#{name} ------------------------------------NO se unen\n\n"
                                end
                        end
                end
        end
        return[ref_status,ref_code,ref_name,ref_start,ref_end,ref_frame,ref_orf,ref_msgs]
end
t_code(seq) click to toggle source
# File lib/full_lengther_next/test_code.rb, line 82
def t_code(seq)
        
        name = seq.seq_name
        tc_fasta = seq.seq_fasta
        
        # generamos todos los ORFs de cada secuencia
        uncomplete_orf_finder(seq)
        minus_strand = []
        plus_strand = []
        
        # puts "**************************************************"
        # puts "#{name} #{tc_fasta.length}"
        # ordenamos los ORFs empezando desde 5' a 3' y luego separamos los de cada hebra en un array distinto
        seq.orfs.sort{|x,y| x.t_start <=> y.t_start }.each do |one_orf|
                
                (t_code,t_status)=testCode_exec(one_orf.seq)
                one_orf.status = t_status
                one_orf.score = t_code
                
                if (one_orf.frame < 0)
                        minus_strand.push one_orf
                elsif (one_orf.frame > 0)
                        plus_strand.push one_orf
                end
                
        end
        #----------------------------------- Plus strand
        if (!plus_strand.empty?)
                # puts "--------------Plus strand:"
                best_plus_region = compare_regions(plus_strand,tc_fasta)
                if (!best_plus_region.nil?)
                        # puts "#{best_plus_region.seq.length}, #{best_plus_region.status}, #{best_plus_region.type}, #{best_plus_region.t_start} - #{best_plus_region.t_end}, #{best_plus_region.frame}"
                        # puts best_plus_region.seq
                end
        end
        #----------------------------------- Minus strand
        if (!minus_strand.empty?)
                # puts "--------------Minus strand:"
                best_minus_region = compare_regions(minus_strand,tc_fasta)
                if (!best_minus_region.nil?)
                        # puts "#{best_minus_region.seq.length}, #{best_minus_region.status}, #{best_minus_region.type}, #{best_minus_region.t_start} - #{best_minus_region.t_end}, #{best_minus_region.frame}"
                        # puts best_minus_region.seq
                end
        end
        # obtenemos la region codificante mas larga de ambas hebras
        best_region = nil
        if (!best_plus_region.nil?)
                if (!best_minus_region.nil?)
                        if (best_minus_region.seq.length > best_plus_region.seq.length)
                                best_region = best_minus_region
                        else
                                best_region = best_plus_region
                        end
                else
                        best_region = best_plus_region
                end
        elsif (!best_minus_region.nil?)
                best_region = best_minus_region
        end
        # puts "--------------Best region:"
        # puts " --------------------------------- #{best_region.seq.length}, #{best_region.status}, #{best_region.type}, #{best_region.t_start} - #{best_region.t_end}, #{best_region.frame}"
        # comprobamos el tipo de ORF segun si tiene un codon de parada antes del atg

        if (!best_region.nil?) && (best_region.seq.length >= 200)
                if (best_region.type == :Complete) && (!best_region.stop_codon)
                        best_region.type = 'Putative Complete'
                        ref_msgs = 'NO STOP codon before ATG. '
                elsif (best_region.type == :N_terminus) && (!best_region.stop_codon)
                        ref_msgs = 'NO STOP codon before ATG. '
                end
                return [name, best_region.score, best_region.type, best_region.t_start, best_region.t_end, best_region.frame, best_region.seq, ref_msgs, best_region.stop_codon, best_region.more_than_one_frame]
        else
                ref_score = 0.0
                ref_start = 0
                ref_end = 0
                ref_frame = 0
                ref_orf = ''
                ref_type = :unknown
                ref_msgs = 'Non coding ORF found >= 200 nt '
                return [name, ref_score, ref_type, ref_start, ref_end, ref_frame, ref_orf, ref_msgs, false, false]
        end
end
t_code_old(seq) click to toggle source

no se usa

# File lib/full_lengther_next/test_code.rb, line 424
        def t_code_old(seq)

                ref_code = 0.0
                ref_name = ''
                ref_start = 0
                ref_end = 0
                ref_frame = 0
                ref_status = ''
                ref_orf = ''
                ref_msgs = ''

                name = seq.seq_name
                tc_fasta = seq.seq_fasta
                
                # generamos todos los ORFs mayores de 200pb de cada secuencia, en fl_string_utils
                orfs_array = tc_fasta.orf_finder
                
                if (orfs_array[0].nil?)
                        ref_name = name
                        ref_code = 0.0
                        ref_start = 0
                        ref_end = 0
                        ref_frame = 0
                        ref_status = 'unknown'
                        ref_orf = ''
                        ref_msgs = 'ORF length < 200 nt'
                        # ref_msgs = 'Your sequence has not an ORF longer than 200 nt'

                else
                        one_good_orf_minus = false
                        more_than_one_minus = false
                        one_good_orf_plus = false
                        more_than_one_plus = false

                        good_orfs_minus = []
                        good_orfs_plus = []

                        one_coding = false
                        one_putative = false

                        # orfs_array.sort! {|orf1,orf2| (orf1[2] - orf1[1]) <=> (orf2[2] - orf2[1])}
                        orfs_array.each do |orf|

                                # long = orf.length - 1
                                if (orf[0])
                                # if (long >= 200)
                                        (t_code,t_status)=testCode_exec(orf[0])
#                                               puts "name: #{name},t_status: #{t_status}, t_code: #{t_code}, stop_codon: #{orf[4]}, length: #{tc_fasta.length}, start: #{orf[1]}, end: #{orf[2]}, frame: #{orf[3]}\n\n"

                                        if (t_status != 'unknown')
                                                if (orf[3].to_i < 0)
                                                        if (one_good_orf_minus == true)
                                                                more_than_one_minus = true
                                                                orf.push t_code
                                                                orf.push t_status
                                                                good_orfs_minus.push orf
                                                        else
                                                                one_good_orf_minus = true
                                                                orf.push t_code #orf[5]
                                                                orf.push t_status #orf[6]
                                                                good_orfs_minus.push orf
                                                        end
                                                elsif (orf[3].to_i > 0)
                                                        if (one_good_orf_plus == true)
                                                                more_than_one_plus = true
                                                                orf.push t_code
                                                                orf.push t_status
                                                                good_orfs_plus.push orf
                                                        else
                                                                one_good_orf_plus = true
                                                                orf.push t_code #orf[5]
                                                                orf.push t_status #orf[6]
                                                                good_orfs_plus.push orf
                                                        end
                                                end
                                        end

                                        # if (t_code.to_f > ref_code) # cogemos el de mejor testcode
                                        # puts "name: #{name}, orf[0].length: #{orf[0].length}, ref_orf.length: #{ref_orf.length}, t_status: #{t_status}, one_coding: #{one_coding}\n\n"
                                        if (orf[0].length > ref_orf.length) and (t_status == 'coding') # cogemos el mayor de los coding
                                                # puts "compleeeeeeeeeeeeeeetttttttttttaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                                                ref_code = t_code.to_f
                                                ref_name = name
                                                ref_orf = orf[0]
                                                ref_start = orf[1]
                                                ref_end = orf[2]
                                                ref_frame = orf[3]

                                                one_coding = true
                                                (ref_status,ref_msgs) = checking_beginning(orf[4],t_status,orf[1],tc_fasta,orf[2])

                                        elsif (one_coding == false) and (orf[0].length > ref_orf.length) and (t_status == 'putative_coding')
                                                # puts "putaaaaaaaaaaaaaaaaaaativeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
                                                ref_code = t_code.to_f
                                                ref_name = name
                                                ref_orf = orf[0]
                                                ref_start = orf[1]
                                                ref_end = orf[2]
                                                ref_frame = orf[3]

                                                one_putative = true
                                                (ref_status,ref_msgs) = checking_beginning(orf[4],t_status,orf[1],tc_fasta,orf[2])

                                        elsif (one_coding == false) and (one_putative == false)
                                                # puts "unknoooooooooooooooooooooooooooooooooooooooooooooooooooooooooooown"
                                                ref_status = t_status
                                                ref_code = t_code.to_f
                                                ref_name = name
                                                ref_orf = orf[0]
                                                ref_start = orf[1]
                                                ref_end = orf[2]
                                                ref_frame = orf[3]

                                                ref_msgs = 'Bad testcode score'
                                                # ref_msgs = 'test code did not find a coding region in your sequence'
                                        end
                                        # puts "**name: #{name},ref_status: #{ref_status}, ref_code: #{ref_code}, length: #{tc_fasta.length}, ref_start: #{ref_start}, ref_end: #{ref_end}, ref_frame: #{ref_frame}\n\n"
                                        # puts "t_code: #{t_code}, t_status: #{t_status}"
                                end
                        end

                        # vamos a preparar el mejor ORF y devolverlo
                        (ref_start,ref_end) = corrige_frame(ref_frame,ref_start,ref_end)
                        # puts "**name: #{name}, ref_start: #{ref_start}, ref_end: #{ref_end}, ref_frame: #{ref_frame}\n\n"
                        
                        # si encontramos más de un orf valido
                        if (more_than_one_plus) or (more_than_one_minus)
                                if (ref_frame > 0)
                                        if (more_than_one_plus)
                                                (ref_status,ref_code,ref_name,ref_start,ref_end,ref_frame,ref_orf,ref_msgs) = orf_fusion(good_orfs_plus,tc_fasta,name,ref_status,ref_code,ref_start,ref_end,ref_frame,ref_orf,ref_msgs)
                                                # if (ref_frame == 0)
                                                #         ref_frame = 7
                                                # end
                                                # puts "merging ORFs with orf_fusion!!!!"
                                        end
                                elsif (ref_frame < 0)
                                        if (more_than_one_minus)
                                                (ref_status,ref_code,ref_name,ref_start,ref_end,ref_frame,ref_orf,ref_msgs) = orf_fusion(good_orfs_minus,tc_fasta,name,ref_status,ref_code,ref_start,ref_end,ref_frame,ref_orf,ref_msgs)
                                                # if (ref_frame == 0)
                                                #         ref_frame = -7
                                                # end
                                                # puts "merging ORFs with orf_fusion!!!!"
                                        end
                                end
                        end

                        # puts "name: #{name},t_status: #{ref_status}, t_code: #{ref_code}, frame: #{ref_frame},start: #{ref_start}, end: #{ref_end}\n\n"

                        if (ref_frame < 0)
                                (kk1,kk2,ref_start2,ref_end2) = reverse_seq(tc_fasta,ref_frame,ref_start,ref_end)

                                ref_start_ok = "#{ref_start2} (-#{ref_start})"
                                ref_end_ok = "#{ref_end2} (-#{ref_end})"
                        else
                                ref_start_ok = ref_start
                                ref_end_ok = ref_end
                        end
                end

                        return [ref_name,ref_code,ref_status,ref_start_ok,ref_end_ok,ref_frame,ref_orf,ref_msgs]

        end
testCode_exec(sequence) click to toggle source

test code program functions #########

# File lib/full_lengther_next/test_code.rb, line 703
def testCode_exec (sequence)

        sequence.downcase!
        basesOne = [0,0,0,0];
        basesTwo = [0,0,0,0];
        basesThree = [0,0,0,0];

        #for (j = 0; j < sequence.length; j = j + 3)

        0.step(sequence.length-1,3) do |j|
                if (sequence[j].chr == "g")
                        basesOne[0] = basesOne[0] + 1;
                elsif (sequence[j].chr == "a")
                        basesOne[1] = basesOne[1] + 1;
                elsif (sequence[j].chr == "t")
                        basesOne[2] = basesOne[2] + 1;
                elsif (sequence[j].chr == "c")
                        basesOne[3] = basesOne[3] + 1;
                else
                end
        end

                                        #for (j = 1; j < sequence.length; j = j + 3)
        1.step(sequence.length-1,3) do |j|
                if (sequence[j].chr == "g")
                        basesTwo[0] = basesTwo[0] + 1;
                elsif (sequence[j].chr == "a")
                        basesTwo[1] = basesTwo[1] + 1;
                elsif (sequence[j].chr == "t")
                        basesTwo[2] = basesTwo[2] + 1;
                elsif (sequence[j].chr == "c")
                        basesTwo[3] = basesTwo[3] + 1;
                else
                end
        end


        #for (j = 2; j < sequence.length; j = j + 3)
        2.step(sequence.length-1,3) do |j|
                if (sequence[j].chr == "g")
                        basesThree[0] = basesThree[0] + 1;
                elsif (sequence[j].chr == "a")
                        basesThree[1] = basesThree[1] + 1;
                elsif (sequence[j].chr == "t")
                        basesThree[2] = basesThree[2] + 1;
                elsif (sequence[j].chr == "c")
                        basesThree[3] = basesThree[3] + 1;
                else
                end
        end

        paramG = calcParam(basesOne[0],basesTwo[0],basesThree[0]);
        contentG = countBases(basesOne[0],basesTwo[0],basesThree[0]) / sequence.length.to_f;
        posProbG = usePosParam(paramG,"g");
        contProbG = useContParam(contentG,"g");
        paramA = calcParam(basesOne[1],basesTwo[1],basesThree[1]);
        contentA = countBases(basesOne[1],basesTwo[1],basesThree[1]) / sequence.length.to_f;
        posProbA = usePosParam(paramA,"a");
        contProbA = useContParam(contentA,"a");
        paramT = calcParam(basesOne[2],basesTwo[2],basesThree[2]);
        contentT = countBases(basesOne[2],basesTwo[2],basesThree[2]) / sequence.length.to_f;
        posProbT = usePosParam(paramT,"t");
        contProbT = useContParam(contentT,"t");
        paramC = calcParam(basesOne[3],basesTwo[3],basesThree[3]);
        contentC = countBases(basesOne[3],basesTwo[3],basesThree[3]) / sequence.length.to_f;
        posProbC = usePosParam(paramC,"c");
        contProbC = useContParam(contentC,"c");
        valueY = posProbG * 0.31 + contProbG * 0.15 + posProbA * 0.26 + contProbA * 0.11 + posProbT * 0.33 + contProbT * 0.14 + posProbC * 0.18 + contProbC * 0.12;
        valueY = ((valueY*1000.0).round/1000.0);

        # return 'The TestCode value is <b>' + valueY.to_s + '</b>, which indicates that the sequence ' + getConclusion(valueY) + '.';
        return [valueY, getConclusion(valueY)]
end
type_fusion(prev_type,one_type) click to toggle source

cuando se unen dos regiones de diferentes frames, nos dice de que tipo es la union, Complete, Internal, N-terminus…

# File lib/full_lengther_next/test_code.rb, line 166
def type_fusion(prev_type,one_type)
        res_type = :Internal
        if (prev_type == :C_terminus) #-------------- C-terminus
                        if (one_type == :N_terminus)
                                res_type = :Internal
                        elsif (one_type == :Internal)
                                res_type = :Internal
                        elsif (one_type == :Complete)
                                res_type = :C_terminus
                        end
        elsif (prev_type == :N_terminus) #-------------- N-terminus
                if (one_type == :C_terminus)
                        res_type = :Complete
                elsif (one_type == :Internal)
                        res_type = :N_terminus
                elsif (one_type == :Complete)
                        res_type = :Complete
                end
        elsif (prev_type == :Internal) #-------------- Internal
                if (one_type == :C_terminus)
                        res_type = :C_terminus
                elsif (one_type == :N_terminus)
                        res_type = :Internal
                elsif (one_type == :Complete)
                        res_type = :C_terminus
                end
        elsif (prev_type == :Complete) #-------------- Complete
                if (one_type == :C_terminus)
                        res_type = :Complete
                elsif (one_type == :N_terminus)
                        res_type = :N_terminus
                elsif (one_type == :Internal)
                        res_type = :N_terminus
                end
        end
        return res_type
end
uncomplete_orf_finder(seq) click to toggle source

recorre cada uno de los frames y los pasa a generate_uncomplete_orf

# File lib/full_lengther_next/test_code.rb, line 293
def uncomplete_orf_finder(seq)
        
        s = seq.seq_fasta.upcase
        f1 = s.split('').each_slice(3).map{|e| e.join}
        generate_uncomplete_orf(f1,1,seq)
        
        s.sub!(/^./,'')
        f2 = s.split('').each_slice(3).map{|e| e.join}
        generate_uncomplete_orf(f2,2,seq)
        
        s.sub!(/^./,'')
        f3 = s.split('').each_slice(3).map{|e| e.join}
        generate_uncomplete_orf(f3,3,seq)
        
        # vamos a por los ORFs de la cadena complementaria
        s = seq.seq_fasta.upcase
        s = s.complementary_dna
        
        f4 = s.split('').each_slice(3).map{|e| e.join}
        generate_uncomplete_orf(f4,-1,seq)
        
        s.sub!(/^./,'')
        f5 = s.split('').each_slice(3).map{|e| e.join}
        generate_uncomplete_orf(f5,-2,seq)
        
        s.sub!(/^./,'')
        f6 = s.split('').each_slice(3).map{|e| e.join}
        generate_uncomplete_orf(f6,-3,seq)
        
end
useContParam(paramValue,base) click to toggle source
# File lib/full_lengther_next/test_code.rb, line 828
def useContParam (paramValue,base)
        arrayOfCodingProb = [];
        codeProb = 0;
        if (base == "g")
                arrayOfCodingProb = [0.29,0.33,0.41,0.41,0.73,0.64,0.64,0.47,0.54,0.40]
        elsif (base == "a")
                arrayOfCodingProb = [0.21,0.81,0.65,0.67,0.49,0.62,0.55,0.44,0.49,0.28]
        elsif (base == "t")
                arrayOfCodingProb = [0.58,0.51,0.69,0.56,0.75,0.55,0.40,0.39,0.24,0.28]
        elsif (base == "c")
                arrayOfCodingProb = [0.31,0.39,0.44,0.43,0.59,0.59,0.64,0.51,0.64,0.82]
        end

        if (paramValue >= 0 and paramValue < 0.17)
                codeProb = arrayOfCodingProb[0];
        elsif (paramValue >=0.17 and paramValue < 0.19)
                codeProb = arrayOfCodingProb[1];
        elsif (paramValue >=0.19 and paramValue < 0.21)
                codeProb = arrayOfCodingProb[2];
        elsif (paramValue >=0.21 and paramValue < 0.23)
                codeProb = arrayOfCodingProb[3];
        elsif (paramValue >=0.23 and paramValue < 0.25)
                codeProb = arrayOfCodingProb[4];
        elsif (paramValue >=0.25 and paramValue < 0.27)
                codeProb = arrayOfCodingProb[5];
        elsif (paramValue >=0.27 and paramValue < 0.29)
                codeProb = arrayOfCodingProb[6];
        elsif (paramValue >=0.29 and paramValue < 0.31)
                codeProb = arrayOfCodingProb[7];
        elsif (paramValue >=0.31 and paramValue < 0.33)
                codeProb = arrayOfCodingProb[8];
        elsif (paramValue >=0.33)
                codeProb = arrayOfCodingProb[9];
        end

        return codeProb;
end
usePosParam(paramValue,base) click to toggle source
# File lib/full_lengther_next/test_code.rb, line 789
def usePosParam (paramValue,base)
        arrayOfCodingProb = [];
        codeProb = 0;
        if (base == "g")
                arrayOfCodingProb = [0.08,0.08,0.16,0.27,0.48,0.53,0.64,0.74,0.88,0.90]
        elsif (base == "a")
                arrayOfCodingProb = [0.22,0.20,0.34,0.45,0.68,0.58,0.93,0.84,0.68,0.94]
        elsif (base == "t")
                arrayOfCodingProb = [0.09,0.09,0.20,0.54,0.44,0.69,0.68,0.91,0.97,0.97]
        elsif (base == "c")
                arrayOfCodingProb = [0.23,0.30,0.33,0.51,0.48,0.66,0.81,0.70,0.70,0.80]
        end


        if (paramValue >= 0 and paramValue < 1.1)
                codeProb = arrayOfCodingProb[0];
        elsif (paramValue >=1.1 and paramValue < 1.2)
                codeProb = arrayOfCodingProb[1];
        elsif (paramValue >=1.2 and paramValue < 1.3)
                codeProb = arrayOfCodingProb[2];
        elsif (paramValue >=1.3 and paramValue < 1.4)
                codeProb = arrayOfCodingProb[3];
        elsif (paramValue >=1.4 and paramValue < 1.5)
                codeProb = arrayOfCodingProb[4];
        elsif (paramValue >=1.5 and paramValue < 1.6)
                codeProb = arrayOfCodingProb[5];
        elsif (paramValue >=1.6 and paramValue < 1.7)
                codeProb = arrayOfCodingProb[6];
        elsif (paramValue >=1.7 and paramValue < 1.8)
                codeProb = arrayOfCodingProb[7];
        elsif (paramValue >=1.8 and paramValue < 1.9)
                codeProb = arrayOfCodingProb[8];
        elsif (paramValue >=1.9)
                codeProb = arrayOfCodingProb[9];
        end

        return codeProb;
end
window_walking(seq) click to toggle source

no se usa gnuplot must be installed

# File lib/full_lengther_next/test_code.rb, line 588
def window_walking(seq)
        
        tcode_array = []
        y1=[]
        y2=[]
        
        s = seq.seq_fasta.upcase
        (s.length-200).times do |i|
                (t_code,t_status)=testCode_exec(s[i..i+199])
                y1.push t_code
        end
        
        tcode_array_rc = []
        
        src = s.complementary_dna
        (src.length-200).times do |i|
                (t_code,t_status)=testCode_exec(src[i..i+199])
                # puts "#{i}-#{i+199}"
                # puts t_status
                # puts t_code
                puts src[i..i+199]
                y2.push t_code
        end
        
        # Create lines plot
  p=ScbiPlot::Lines.new('lines.png','title')
  x=(1..src.length-200).entries
  p.add_x(x)
        
        # puts "x_length: #{src.length-200} #{x}"
        # puts "x: #{x.length}, y: #{y1.length}"
        p.add_series('serie0', y1)

        p.do_graph
        
end