class Contig
Attributes
completed[RW]
hits[RW]
length[RW]
mod_coord[RW]
name[RW]
q_frameshift[RW]
s_frameshift[RW]
seq[RW]
stops[RW]
type[RW]
Public Class Methods
new(name)
click to toggle source
# File lib/gene_assembler/contig.rb, line 8 def initialize (name) @name=name @seq='' @type=nil @length='' @hits=[] @snps=[] @gos=[] @completed='' @localization=[] @q_frameshift=[] @s_frameshift=[] @stops=[] @mod_coord=FALSE #Indica si se han alterado las coordenadas del contig previamente end
Public Instance Methods
add_go(go,name,obsolete)
click to toggle source
# File lib/gene_assembler/contig.rb, line 85 def add_go(go,name,obsolete) go=GO.new(go,name,obsolete) @gos << go return go end
add_hit(hit_name, s_length, reversed, type)
click to toggle source
# File lib/gene_assembler/contig.rb, line 103 def add_hit(hit_name, s_length, reversed, type) hit=Hit.new(hit_name, s_length, reversed, type) @hits << hit return hit end
add_localization(localization)
click to toggle source
# File lib/gene_assembler/contig.rb, line 57 def add_localization(localization) @localization << localization end
add_snp(position)
click to toggle source
# File lib/gene_assembler/contig.rb, line 97 def add_snp(position) snp=SNP.new(position) @snps << snp return snp end
compare(contig)
click to toggle source
# File lib/gene_assembler/contig.rb, line 357 def compare(contig) #Alinea un contig con otro en base a las coordenadas del subject exon_match=-1 exones=0 match_found=FALSE #SELF HIT self.each_hit {|self_hit| if match_found #Romper bucle si ha habido coindidencia definitiva break end #SELF HSP self_hit.each_hsp {|self_hsp| if match_found break end #CONTIG HIT last=0 contig_hsp_count=0 contig.each_hit {|contig_hit| #CONTIG HSP contig_hit.each_hsp {|contig_hsp|#Valoracion del coverage de cada hit entre si, en el momento que el segundo de mayor se cancela coverage=self_hsp.compare(contig_hsp) if coverage>last #Guardar coincidencia a la espera de una mejor exon_match=contig_hsp_count last=coverage end if coverage>0.2 #Contaje de exones exones+=1 end if coverage==0 && exon_match>-1 #Romper bucle cuando se ha encontrado coincidencia y los demas exones no coinciden match_found=TRUE break end contig_hsp_count+=1 } if match_found break end } } } return exon_match,exones end
coor_intrones()
click to toggle source
# File lib/gene_assembler/contig.rb, line 507 def coor_intrones #Determinar bordes de los intrones intrones=[] last_hsp=nil if first_hit.hsp_count>1 first_hit.each_hsp_with_index{|hsp,i| if i>0 intrones << [last_hsp.q_end,hsp.q_beg] end last_hsp=hsp } end #--------------------------------- return intrones end
correct_hsps(blast_coor_type)
click to toggle source
# File lib/gene_assembler/contig.rb, line 350 def correct_hsps(blast_coor_type) each_hit {|hit| hit.correct_hsps(blast_coor_type) } end
draw()
click to toggle source
# File lib/gene_assembler/contig.rb, line 180 def draw #Realiza una representacion del contig a nivel del subject last_hsp_end=0 overlap=0 first_hit.each_hsp_with_index{|hsp,c| dif=hsp.s_beg-last_hsp_end if dif>=0 && dif<=2 print '/' # Limite solapante uno a continuacion de otro (disposicion normal del exon) elsif dif>2 print '-'*(hsp.s_beg-last_hsp_end) print '|' elsif dif<0 print '&' overlap=dif end h=c+1 #Num de hsp print "#{h.to_s.center(hsp.s_end-hsp.s_beg+overlap-1)}" if dif>2 #print '|' end last_hsp_end=hsp.s_end } print '|-' ending=first_hit.s_length-last_hsp_end if ending<0 ending=0 end print '-'*(ending) puts "\n" end
each_go() { |go| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 91 def each_go @gos.each do |go| yield go end end
each_hit() { |hit| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 117 def each_hit @hits.each do |hit| yield hit end end
each_hit_with_index() { |hit,i| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 123 def each_hit_with_index @hits.each_with_index do |hit,i| yield hit,i end end
each_localization() { |localization| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 61 def each_localization @localization.each do |localization| yield localization end end
each_localization_with_index() { |localization,i| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 67 def each_localization_with_index @localization.each_with_index do |localization,i| yield localization,i end end
each_q_frameshift() { |qfs| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 174 def each_q_frameshift @q_frameshift.each do |qfs| yield qfs end end
each_snp() { |snp| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 134 def each_snp @snps.each do |snp| yield snp end end
each_snp_with_index() { |snp,i| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 140 def each_snp_with_index @snps.each_with_index do |snp,i| yield snp,i end end
each_stop() { |stop| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 73 def each_stop @stops.each do |stop| yield stop end end
each_stop_with_index() { |stop,i| ... }
click to toggle source
# File lib/gene_assembler/contig.rb, line 79 def each_stop_with_index @stops.each_with_index do |stop,i| yield stop,i end end
exon_acumulative()
click to toggle source
# File lib/gene_assembler/contig.rb, line 263 def exon_acumulative #Suma la longitud de todos los exones long=0 exones=exones_q exones.each do |ex| long+=ex end return long end
exones_q()
click to toggle source
# File lib/gene_assembler/contig.rb, line 229 def exones_q # Devuelve un array con el tamaño de cada hsp/exon a nivel del query exones_q=[] each_hit {|hit| hit.each_hsp{|hsp| long=(hsp.q_end-hsp.q_beg).abs exones_q << long } } return exones_q end
exones_s()
click to toggle source
# File lib/gene_assembler/contig.rb, line 218 def exones_s # Devuelve un array con el tamaño de cada hsp/exon a nivel del subject exones_s=[] each_hit {|hit| hit.each_hsp{|hsp| long=(hsp.s_end-hsp.s_beg).abs exones_s << long } } return exones_s end
first_hit()
click to toggle source
# File lib/gene_assembler/contig.rb, line 32 def first_hit h=nil each_hit{|hit| h=hit break } return h end
frameshift_modified_coordenates(add)
click to toggle source
# File lib/gene_assembler/contig.rb, line 168 def frameshift_modified_coordenates(add) @q_frameshift.length.times do |n| @q_frameshift[n]+=add end end
gff(id,parent,add)
click to toggle source
# File lib/gene_assembler/contig.rb, line 571 def gff(id,parent,add) #Devuelve en estrutura gff los exones en genomico text=[] #Exones first_hit.each_hsp{|hsp| # if hsp.reversed==TRUE # hsp.q_beg=@length-hsp.q_beg # hsp.q_end=@length-hsp.q_end # end #puts "#{hsp.q_beg+add} #{hsp.q_end+add}" text<<"#{id}\t.\texon\t#{hsp.q_beg+add}\t#{hsp.q_end+add}\t.\t+\t.\tID=#{parent}_exon;Parent=#{parent};Name=#{parent}_exon" } return text end
gff_prot(id,prot_name)
click to toggle source
# File lib/gene_assembler/contig.rb, line 585 def gff_prot(id,prot_name) #Devuelve en estrutura gff los exones en proteina #parent="#{parent}_mRNA" text=[] #Exones first_hit.each_hsp{|hsp| #puts "#{hsp.q_beg+add} #{hsp.q_end+add}" text<<"#{id}\t.\tprotein_match\t#{hsp.s_beg}\t#{hsp.s_end}\t.\t+\t.\tID=#{prot_name}_prot;Parent=#{prot_name};Name=#{id}_prot" } return text end
has_hit?()
click to toggle source
# File lib/gene_assembler/contig.rb, line 109 def has_hit? has_hit=FALSE if @hits.length>0 has_hit=TRUE end return has_hit end
hit_count()
click to toggle source
# File lib/gene_assembler/contig.rb, line 49 def hit_count count=0 each_hit{|hit| count+=1 } return count end
hits_sort!()
click to toggle source
# File lib/gene_assembler/contig.rb, line 146 def hits_sort! each_hit{|hit| hit.hsps_sort! } end
hsp_at(position)
click to toggle source
# File lib/gene_assembler/contig.rb, line 623 def hsp_at(position) hsp_ret=nil count_hsp=0 found=FALSE each_hit {|hit| hit.each_hsp {|hsp| if position==count_hsp hsp_ret=hsp found=TRUE break end count_hsp+=1 } if found break end } return hsp_ret end
hsp_minor_than?(hsp_length)
click to toggle source
# File lib/gene_assembler/contig.rb, line 337 def hsp_minor_than?(hsp_length) # En nt minor=FALSE each_hit_with_index {|hit,i| if i>0 || i<hit.hsp_count if hit.hsp_minor_than?(hsp_length) minor=TRUE break end end } return minor end
indices()
click to toggle source
# File lib/gene_assembler/contig.rb, line 210 def indices #Muestra los indices de subject y query del contig each_hit_with_index {|hit,ind| hit.each_hsp_with_index{|hsp,i| puts "#{ind+1}.#{i+1})\t#{hsp.q_beg}\t#{hsp.q_end}\t#{hsp.s_beg}\t#{hsp.s_end}\t#{@name}\t#{@length}\t#{@mod_coord}" } } end
intrones_q()
click to toggle source
# File lib/gene_assembler/contig.rb, line 240 def intrones_q # Devuelve un array con el tamaño de cada intron a nivel del query intrones_q=[] l=first_hit.hsp_count each_hit {|hit| hit.each_hsp_with_index{|hsp,ind| if !first_hit.hsp_at(ind+1) break end long=(first_hit.hsp_at(ind+1).q_beg-hsp.q_end).abs intrones_q << long } } return intrones_q end
is_gapped?()
click to toggle source
# File lib/gene_assembler/contig.rb, line 304 def is_gapped? #Examina si hay gaps internos en la estructura del gen q se mapea sobre la proteina q pudieran señalar la falta parcial o completa de un exon gap=3 #Gap maximo permitido medido en aa, como minimo poner a 1 gapped=FALSE s_end_last=0 @hits.first.hsps.each do |hsp| if s_end_last >0 dif=hsp.s_beg-s_end_last if dif>gap #En caso de coordenadas solapantes, siempre saldra dif negativo, con lo que la condicion gap no se cumple gapped=TRUE break end end s_end_last=hsp.s_end end return gapped end
is_one_hsp?()
click to toggle source
# File lib/gene_assembler/contig.rb, line 296 def is_one_hsp? #Examina si el hit esta compuesto por un solo hsp is_one=FALSE if first_hit.hsp_count==1 is_one=TRUE end return is_one end
is_truncated?()
click to toggle source
# File lib/gene_assembler/contig.rb, line 321 def is_truncated? #Examina si los exones en el borde del contig estan truncados/interrumpidos truncated=FALSE # puts self.name # puts @hits.inspect #Truncado por el inicio if first_hit.first_hsp.s_beg>1 && first_hit.first_hsp.q_beg==1 truncated=TRUE end #Truncado por el final if first_hit.last_hsp.q_end==@length truncated=TRUE end return truncated end
last_hit()
click to toggle source
# File lib/gene_assembler/contig.rb, line 41 def last_hit h=nil each_hit{|hit| h=hit } return h end
length=(length)
click to toggle source
# File lib/gene_assembler/contig.rb, line 24 def length=(length) if length.class.to_s=='String' @length=length.to_i else @length=length end end
mixed?()
click to toggle source
Funciones para comprobar validez de los contig
# File lib/gene_assembler/contig.rb, line 273 def mixed? #Examina si los hsp de un hit estan desordenados is_mix=FALSE beg=nil sign=0 sign_local=0 @hits.first.hsps.each_with_index do |hsp,c| if !beg.nil? dif=hsp.q_beg-beg if dif>=0 sign_local=1 else sign_local=0 end if sign_local!=sign && c>1 is_mix=TRUE break end end sign=sign_local beg=hsp.q_beg end return is_mix end
modified_coordenates(add)
click to toggle source
# File lib/gene_assembler/contig.rb, line 152 def modified_coordenates(add) @mod_coord=TRUE each_hit{|hit| hit.modified_coordenates(add) stop_modified_coordenates(add) frameshift_modified_coordenates(add) } return last_hit.last_hsp.q_end end
n_hits?()
click to toggle source
# File lib/gene_assembler/contig.rb, line 129 def n_hits? n=@hits.length return n end
n_intron()
click to toggle source
# File lib/gene_assembler/contig.rb, line 255 def n_intron n_intron=0 each_hit{|hit| n_intron+=1 } return n_intron end
overlap()
click to toggle source
# File lib/gene_assembler/contig.rb, line 606 def overlap overlap=[] last_hit=nil each_hit_with_index{|hit,i| overlap << hit.hsp_overlap if i>0 diference=hit.overlap_with(last_hit) if diference<0 overlap << diference end end last_hit=hit } overlap.flatten! return overlap end
rev_comp()
click to toggle source
# File lib/gene_assembler/contig.rb, line 407 def rev_comp rev_seq=[] @seq.each_char do |char| char.upcase! if char =='A' rev_seq << 'T' elsif char =='T' rev_seq << 'A' elsif char =='G' rev_seq << 'C' elsif char =='C' rev_seq << 'G' else rev_seq << char end end rev_seq.reverse! @seq=rev_seq.join end
rev_comp_if_hit()
click to toggle source
# File lib/gene_assembler/contig.rb, line 400 def rev_comp_if_hit #Devuelve la secuencia reversocomplementaria del contig if first_hit.reversed rev_comp first_hit.reversed=FALSE end end
rev_coord()
click to toggle source
# File lib/gene_assembler/contig.rb, line 428 def rev_coord each_hit {|hit| hit.rev_coord(@length) hit.hsps_sort! } end
start_codon_search()
click to toggle source
# File lib/gene_assembler/contig.rb, line 435 def start_codon_search #Busqueda codon inicio, busca si existe y una vez encontrado modifica coordenadas para acomodar el codon exists=FALSE s_beg=@hits.first.hsps.first.s_beg s_end=@hits.first.hsps.first.s_end q_beg=@hits.first.hsps.first.q_beg q_end=@hits.first.hsps.first.q_end if s_beg<=10 # Se busca codon si la proteina carece de los 10 primeros aa continue=TRUE index=0 temp_index=0 while continue==TRUE if temp_index==0 find=@seq.index('ATG') else find=@seq.index('ATG',temp_index+1) end find+=1 if find==nil continue=FALSE else if find==q_beg continue=FALSE index=find elsif find>(q_beg-1+3) continue=FALSE else if (find-q_end).modulo(3)==0 || find-q_end==0 index=find end if temp_index==0 temp_index=find+1 else temp_index=find end end end end#While if index>0 @hits.first.hsps.first.s_beg=1 @hits.first.hsps.first.q_beg=index exists=TRUE end end return exists end
stop_codon(codon,ends,*beg)
click to toggle source
# File lib/gene_assembler/contig.rb, line 481 def stop_codon(codon,ends,*beg) #Busqueda posibles codones de parada reference=ends-1 if !beg.empty? reference=ends ends=beg.first end position=nil follow=TRUE while follow pos=@seq.index(codon,ends)#Implicitamente lleva el +1 if pos.nil? follow=FALSE else dif=(pos-reference).abs #puts "#{pos} #{dif} #{reference}" if dif.modulo(3)==0 position=pos follow=FALSE else ends=pos+1 end end end return position end
stop_codon_search()
click to toggle source
# File lib/gene_assembler/contig.rb, line 523 def stop_codon_search #Busqueda codon de parada, busca si existe exists=FALSE homology_start=first_hit.first_hsp.q_beg homology_end=first_hit.last_hsp.q_end n=1 codon='' position=nil intrones=coor_intrones @seq.chars do |c| if n<homology_start#Comenzar comparacion al principio del primer exon n+=1 next end if n>homology_end#Terminar comparacion break end #Saltar intrones if !intrones.empty? jump=FALSE intrones.each do |int| if n>int[0] && n<int[1] #NO se incluye el borde del exon #print 'i' jump=TRUE break end end if jump n+=1 next end end #----------------- codon+=c if codon.length==3 #Comparacion del codon #puts position.to_s+"\t"+codon if codon=='TAG'||codon=='TAA'||codon=='TGA' @stops << position exists=TRUE end codon='' elsif codon.length==1 #Guardar posicion del primer nucleotido del codon position=n end n+=1 end return exists end
stop_modified_coordenates(add)
click to toggle source
# File lib/gene_assembler/contig.rb, line 162 def stop_modified_coordenates(add) @stops.length.times do |n| @stops[n]+=add end end
transfer_contig_hits(contig)
click to toggle source
# File lib/gene_assembler/contig.rb, line 596 def transfer_contig_hits(contig) contig.each_hit{|hit| self.transfer_hit(hit) } end
transfer_hit(hit)
click to toggle source
# File lib/gene_assembler/contig.rb, line 602 def transfer_hit(hit) @hits << hit end