class Contig

Attributes

completed[RW]
hits[RW]
length[RW]
mod_coord[RW]
name[RW]
q_frameshift[RW]
s_frameshift[RW]
seq[RW]
stops[RW]
type[RW]

Public Class Methods

new(name) click to toggle source
# File lib/gene_assembler/contig.rb, line 8
def initialize (name)
        @name=name
        @seq=''
        @type=nil
        @length=''
        @hits=[]
        @snps=[]
        @gos=[]
        @completed=''
        @localization=[]
        @q_frameshift=[]
        @s_frameshift=[]
        @stops=[]
        @mod_coord=FALSE #Indica si se han alterado las coordenadas del contig previamente
end

Public Instance Methods

add_go(go,name,obsolete) click to toggle source
# File lib/gene_assembler/contig.rb, line 85
def add_go(go,name,obsolete)
        go=GO.new(go,name,obsolete)
        @gos << go
        return go
end
add_hit(hit_name, s_length, reversed, type) click to toggle source
# File lib/gene_assembler/contig.rb, line 103
def add_hit(hit_name, s_length, reversed, type)
        hit=Hit.new(hit_name, s_length, reversed, type)
  @hits << hit
  return hit
end
add_localization(localization) click to toggle source
# File lib/gene_assembler/contig.rb, line 57
def add_localization(localization)
        @localization << localization
end
add_snp(position) click to toggle source
# File lib/gene_assembler/contig.rb, line 97
def add_snp(position)
        snp=SNP.new(position)
        @snps << snp
        return snp
end
compare(contig) click to toggle source
# File lib/gene_assembler/contig.rb, line 357
   def compare(contig) #Alinea un contig con otro en base a las coordenadas del subject
           exon_match=-1
           exones=0
match_found=FALSE
           #SELF HIT
           self.each_hit {|self_hit|
 if match_found #Romper bucle si ha habido coindidencia definitiva
   break
 end
           #SELF HSP
           self_hit.each_hsp {|self_hsp|
   if match_found
     break
   end
                   #CONTIG HIT
                   last=0
                   contig_hsp_count=0
                   contig.each_hit {|contig_hit|
                   #CONTIG HSP
                   contig_hit.each_hsp {|contig_hsp|#Valoracion del coverage de cada hit entre si, en el momento que el segundo de mayor se cancela
                           coverage=self_hsp.compare(contig_hsp)
                           if coverage>last #Guardar coincidencia a la espera de una mejor
                                   exon_match=contig_hsp_count
                                   last=coverage
                           end
                           if coverage>0.2 #Contaje de exones
                                   exones+=1
                           end 
                           if coverage==0 && exon_match>-1       #Romper bucle cuando se ha encontrado coincidencia y los demas exones no coinciden
                                   match_found=TRUE
                                   break
                           end
                           contig_hsp_count+=1
                     }
                     if match_found
                       break
                     end
                    }
 }
           }
           return exon_match,exones
   end
coor_intrones() click to toggle source
# File lib/gene_assembler/contig.rb, line 507
def coor_intrones
        #Determinar bordes de los intrones
        intrones=[]
        last_hsp=nil
        if first_hit.hsp_count>1
                first_hit.each_hsp_with_index{|hsp,i|
                        if i>0
                                intrones << [last_hsp.q_end,hsp.q_beg]
                        end
                        last_hsp=hsp
                }
        end
        #---------------------------------
        return intrones
end
correct_hsps(blast_coor_type) click to toggle source
# File lib/gene_assembler/contig.rb, line 350
def correct_hsps(blast_coor_type)
  each_hit {|hit|
    hit.correct_hsps(blast_coor_type)
  }
end
draw() click to toggle source
# File lib/gene_assembler/contig.rb, line 180
def draw #Realiza una representacion del contig a nivel del subject
        last_hsp_end=0
        overlap=0
        first_hit.each_hsp_with_index{|hsp,c|
                dif=hsp.s_beg-last_hsp_end
                if dif>=0 && dif<=2
                        print '/' # Limite solapante uno a continuacion de otro (disposicion normal del exon)
                elsif dif>2
                        print '-'*(hsp.s_beg-last_hsp_end)
                        print '|'
                elsif dif<0
                        print '&'
                        overlap=dif
                end
                h=c+1 #Num de hsp
                print "#{h.to_s.center(hsp.s_end-hsp.s_beg+overlap-1)}"
                if dif>2
                        #print '|'
                end
                last_hsp_end=hsp.s_end
        }
        print '|-'
        ending=first_hit.s_length-last_hsp_end
        if ending<0
                ending=0
        end
        print '-'*(ending)
        puts "\n"
end
each_go() { |go| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 91
def each_go
        @gos.each do |go|
                yield go
        end
end
each_hit() { |hit| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 117
def each_hit
      @hits.each do |hit|
              yield hit
      end
end
each_hit_with_index() { |hit,i| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 123
def each_hit_with_index
  @hits.each_with_index do |hit,i|
    yield hit,i
  end
end
each_localization() { |localization| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 61
def each_localization
        @localization.each do |localization|
                yield localization
        end
end
each_localization_with_index() { |localization,i| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 67
def each_localization_with_index
        @localization.each_with_index do |localization,i|
                yield localization,i
        end
end
each_q_frameshift() { |qfs| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 174
def each_q_frameshift
        @q_frameshift.each do |qfs|
                yield qfs
        end
end
each_snp() { |snp| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 134
def each_snp
      @snps.each do |snp|
              yield snp
      end
end
each_snp_with_index() { |snp,i| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 140
def each_snp_with_index
      @snps.each_with_index do |snp,i|
              yield snp,i
      end
end
each_stop() { |stop| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 73
def each_stop
        @stops.each do |stop|
                yield stop
        end
end
each_stop_with_index() { |stop,i| ... } click to toggle source
# File lib/gene_assembler/contig.rb, line 79
def each_stop_with_index
        @stops.each_with_index do |stop,i|
                yield stop,i
        end
end
exon_acumulative() click to toggle source
# File lib/gene_assembler/contig.rb, line 263
def exon_acumulative #Suma la longitud de todos los exones
        long=0
        exones=exones_q
        exones.each do |ex|
                long+=ex
        end
        return long
end
exones_q() click to toggle source
# File lib/gene_assembler/contig.rb, line 229
def exones_q # Devuelve un array con el tamaño de cada hsp/exon a nivel del query
        exones_q=[]
        each_hit {|hit|
                hit.each_hsp{|hsp|
                        long=(hsp.q_end-hsp.q_beg).abs
                        exones_q << long
                }
        }
        return exones_q
end
exones_s() click to toggle source
# File lib/gene_assembler/contig.rb, line 218
def exones_s # Devuelve un array con el tamaño de cada hsp/exon a nivel del subject
        exones_s=[]
        each_hit {|hit|
                hit.each_hsp{|hsp|
                        long=(hsp.s_end-hsp.s_beg).abs
                        exones_s << long
                }
        }
        return exones_s
end
first_hit() click to toggle source
# File lib/gene_assembler/contig.rb, line 32
def first_hit
  h=nil
  each_hit{|hit|
     h=hit
     break
  }
  return h
end
frameshift_modified_coordenates(add) click to toggle source
# File lib/gene_assembler/contig.rb, line 168
def frameshift_modified_coordenates(add)
        @q_frameshift.length.times do |n|
                @q_frameshift[n]+=add
        end   
end
gff(id,parent,add) click to toggle source
# File lib/gene_assembler/contig.rb, line 571
        def gff(id,parent,add) #Devuelve en estrutura gff los exones en genomico
                text=[]
                #Exones
                first_hit.each_hsp{|hsp|
#                       if hsp.reversed==TRUE
#                               hsp.q_beg=@length-hsp.q_beg
#                               hsp.q_end=@length-hsp.q_end
#                       end
                        #puts "#{hsp.q_beg+add} #{hsp.q_end+add}"
                        text<<"#{id}\t.\texon\t#{hsp.q_beg+add}\t#{hsp.q_end+add}\t.\t+\t.\tID=#{parent}_exon;Parent=#{parent};Name=#{parent}_exon"
                }
                return text
        end
gff_prot(id,prot_name) click to toggle source
# File lib/gene_assembler/contig.rb, line 585
def gff_prot(id,prot_name) #Devuelve en estrutura gff los exones en proteina
        #parent="#{parent}_mRNA"
        text=[]
        #Exones
        first_hit.each_hsp{|hsp|
                #puts "#{hsp.q_beg+add} #{hsp.q_end+add}"
                text<<"#{id}\t.\tprotein_match\t#{hsp.s_beg}\t#{hsp.s_end}\t.\t+\t.\tID=#{prot_name}_prot;Parent=#{prot_name};Name=#{id}_prot"
        }
        return text
end
has_hit?() click to toggle source
# File lib/gene_assembler/contig.rb, line 109
def has_hit?
        has_hit=FALSE
        if @hits.length>0
                has_hit=TRUE
        end
        return has_hit
end
hit_count() click to toggle source
# File lib/gene_assembler/contig.rb, line 49
def hit_count
  count=0
  each_hit{|hit|
    count+=1
  }
  return count
end
hits_sort!() click to toggle source
# File lib/gene_assembler/contig.rb, line 146
def hits_sort!
        each_hit{|hit|
                hit.hsps_sort!       
        }
end
hsp_at(position) click to toggle source
# File lib/gene_assembler/contig.rb, line 623
def hsp_at(position)
  hsp_ret=nil
  count_hsp=0
  found=FALSE
  each_hit {|hit|
    hit.each_hsp {|hsp|
      if position==count_hsp
        hsp_ret=hsp
        found=TRUE
        break
      end
      count_hsp+=1
    }
    if found
      break
    end  
  }
  return hsp_ret
end
hsp_minor_than?(hsp_length) click to toggle source
# File lib/gene_assembler/contig.rb, line 337
def hsp_minor_than?(hsp_length) # En nt
        minor=FALSE
        each_hit_with_index {|hit,i|
                if i>0 || i<hit.hsp_count
                if hit.hsp_minor_than?(hsp_length)
                        minor=TRUE
                        break
                end
                end
        }
        return minor
end
indices() click to toggle source
# File lib/gene_assembler/contig.rb, line 210
def indices #Muestra los indices de subject y query del contig
        each_hit_with_index {|hit,ind|
          hit.each_hsp_with_index{|hsp,i| 
                  puts "#{ind+1}.#{i+1})\t#{hsp.q_beg}\t#{hsp.q_end}\t#{hsp.s_beg}\t#{hsp.s_end}\t#{@name}\t#{@length}\t#{@mod_coord}"
                }
        }
end
intrones_q() click to toggle source
# File lib/gene_assembler/contig.rb, line 240
def intrones_q # Devuelve un array con el tamaño de cada intron a nivel del query
        intrones_q=[]
        l=first_hit.hsp_count
        each_hit {|hit|
                hit.each_hsp_with_index{|hsp,ind|
                        if !first_hit.hsp_at(ind+1)
                                break
                        end
                        long=(first_hit.hsp_at(ind+1).q_beg-hsp.q_end).abs
                        intrones_q << long
                }
        }
        return intrones_q
end
is_gapped?() click to toggle source
# File lib/gene_assembler/contig.rb, line 304
def is_gapped? #Examina si hay gaps internos en la estructura del gen q se mapea sobre la proteina q pudieran señalar la falta parcial o completa de un exon
        gap=3 #Gap maximo permitido medido en aa, como minimo poner a 1
        gapped=FALSE
        s_end_last=0
        @hits.first.hsps.each do |hsp|
                if s_end_last >0
                        dif=hsp.s_beg-s_end_last
                        if dif>gap #En caso de coordenadas solapantes, siempre saldra dif negativo, con lo que la condicion gap no se cumple
                                gapped=TRUE
                                break
                        end
                end
                s_end_last=hsp.s_end
        end
        return gapped
end
is_one_hsp?() click to toggle source
# File lib/gene_assembler/contig.rb, line 296
def is_one_hsp? #Examina si el hit esta compuesto por un solo hsp
        is_one=FALSE
        if first_hit.hsp_count==1
                is_one=TRUE
        end
        return is_one
end
is_truncated?() click to toggle source
# File lib/gene_assembler/contig.rb, line 321
        def is_truncated? #Examina si los exones en el borde del contig estan truncados/interrumpidos
                truncated=FALSE
#               puts self.name
#               puts @hits.inspect
                #Truncado por el inicio
                if first_hit.first_hsp.s_beg>1 && first_hit.first_hsp.q_beg==1
                        truncated=TRUE
                end  
                
                #Truncado por el final
                if first_hit.last_hsp.q_end==@length
                        truncated=TRUE
                end  
                return truncated
        end
last_hit() click to toggle source
# File lib/gene_assembler/contig.rb, line 41
def last_hit
  h=nil
  each_hit{|hit|
     h=hit
  }
  return h
end
length=(length) click to toggle source
# File lib/gene_assembler/contig.rb, line 24
def length=(length)
        if length.class.to_s=='String'
                @length=length.to_i
        else 
                @length=length
        end
end
mixed?() click to toggle source

Funciones para comprobar validez de los contig

# File lib/gene_assembler/contig.rb, line 273
def mixed? #Examina si los hsp de un hit estan desordenados
        is_mix=FALSE
        beg=nil
        sign=0
        sign_local=0
        @hits.first.hsps.each_with_index do |hsp,c|
                if !beg.nil?
                        dif=hsp.q_beg-beg
                        if dif>=0
                                sign_local=1
                        else sign_local=0
                        end
                        if sign_local!=sign && c>1
                                is_mix=TRUE
                                break
                        end
                end
                sign=sign_local
                beg=hsp.q_beg
        end
        return is_mix
end
modified_coordenates(add) click to toggle source
# File lib/gene_assembler/contig.rb, line 152
def modified_coordenates(add)
        @mod_coord=TRUE
        each_hit{|hit|
                hit.modified_coordenates(add)
                stop_modified_coordenates(add)
                frameshift_modified_coordenates(add)
        }
        return last_hit.last_hsp.q_end
end
n_hits?() click to toggle source
# File lib/gene_assembler/contig.rb, line 129
def n_hits?
  n=@hits.length
  return n
end
n_intron() click to toggle source
# File lib/gene_assembler/contig.rb, line 255
def n_intron
  n_intron=0
  each_hit{|hit|
     n_intron+=1  
  }
  return n_intron
end
overlap() click to toggle source
# File lib/gene_assembler/contig.rb, line 606
def overlap
  overlap=[]
  last_hit=nil
  each_hit_with_index{|hit,i|
    overlap << hit.hsp_overlap
    if i>0
      diference=hit.overlap_with(last_hit)
      if diference<0
        overlap << diference
      end
    end
    last_hit=hit
    }
  overlap.flatten!
  return overlap
end
rev_comp() click to toggle source
# File lib/gene_assembler/contig.rb, line 407
      def rev_comp
         rev_seq=[]
    @seq.each_char do |char|
      char.upcase!
      if char =='A'
        rev_seq << 'T'
      elsif char =='T'
        rev_seq << 'A'
      elsif char =='G'
        rev_seq << 'C'
      elsif char =='C'
        rev_seq << 'G'
      else
        rev_seq << char
      end
    end
    rev_seq.reverse!
    @seq=rev_seq.join
    
end
rev_comp_if_hit() click to toggle source
# File lib/gene_assembler/contig.rb, line 400
def rev_comp_if_hit #Devuelve la secuencia reversocomplementaria del contig
        if first_hit.reversed
                rev_comp
                first_hit.reversed=FALSE
        end
end
rev_coord() click to toggle source
# File lib/gene_assembler/contig.rb, line 428
def rev_coord
        each_hit {|hit|
       hit.rev_coord(@length)
       hit.hsps_sort!
        }
end
stop_codon(codon,ends,*beg) click to toggle source
# File lib/gene_assembler/contig.rb, line 481
def stop_codon(codon,ends,*beg) #Busqueda posibles codones de parada
        reference=ends-1
        if !beg.empty?
                reference=ends
                ends=beg.first
        end
        position=nil
        follow=TRUE
        while follow
                pos=@seq.index(codon,ends)#Implicitamente lleva el +1
                if pos.nil?
                        follow=FALSE
                else
                        dif=(pos-reference).abs
                        #puts "#{pos} #{dif} #{reference}"
                        if dif.modulo(3)==0
                                position=pos
                                follow=FALSE
                        else
                                ends=pos+1
                        end
                end
        end
        return position
end
stop_modified_coordenates(add) click to toggle source
# File lib/gene_assembler/contig.rb, line 162
def stop_modified_coordenates(add)
        @stops.length.times do |n|
                @stops[n]+=add
        end   
end
transfer_contig_hits(contig) click to toggle source
# File lib/gene_assembler/contig.rb, line 596
def transfer_contig_hits(contig)
  contig.each_hit{|hit|
    self.transfer_hit(hit)
  }
end
transfer_hit(hit) click to toggle source
# File lib/gene_assembler/contig.rb, line 602
def transfer_hit(hit)
  @hits << hit
end