class PluginLowQuality
Inherit: Plugin
Inherit: Plugin
Public Class Methods
check_params(params)
click to toggle source
Returns an array with the errors due to parameters are missing
# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 165 def self.check_params(params) errors=[] comment='Minimum quality value for every nucleotide' default_value = 20 params.check_param(errors,'min_quality','Integer',default_value,comment) #comment='Quality window for scanning low quality segments' #default_value = 15 #params.check_param(errors,'window_width','Integer',default_value,comment) comment='Minimum length of a bad quality segment inside the sequence' default_value = 8 params.check_param(errors,'min_length_inside_seq','Integer',default_value,comment) comment='Maximum consecutive good-quality bases between two bad quality regions' default_value = 2 params.check_param(errors,'max_consecutive_good_bases','Integer',default_value,comment) return errors end
Public Instance Methods
add_action_after_high_qual(p_begin,p_end,actions,seq)
click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 237 def add_action_after_high_qual(p_begin,p_end,actions,seq) action_size = seq.insert_end-p_end if action_size>=(@window/2) # puts "action_SIZE2 #{action_size} > #{@window/2}" if ((p_end<seq.seq_fasta.size-1) && (action_size>0) ) #if there is action before the high qual part # it's created an action before of the high quality part a = seq.new_action(p_end-seq.insert_start+1,seq.seq_fasta.size-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid actions.push a end end end
add_action_before_high_qual(p_begin,p_end,actions,seq,start)
click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 220 def add_action_before_high_qual(p_begin,p_end,actions,seq,start) action_size = p_begin-1 if action_size>=(@window/2) # puts "action_SIZE1 #{action_size} > #{@window/2}" if ( (p_begin>0) && (action_size>0) ) #if there is action before the high qual part # it's created an action before of the high quality part a = seq.new_action(start ,p_begin-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid # puts " new low qual start: #{start} = #{a.start_pos} end: #{p_begin} -1 = #{a.end_pos}" actions.push a end end end
create_sum_window(qual,ini,index_window_end)
click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 16 def create_sum_window(qual,ini,index_window_end) # puts "--------index w #{index_window_end}" sum=[] i=ini # puts "#{i} #{index_window_end}" while (i<=index_window_end) # initialize sum sum[i]=0 i += 1 end # puts " contenido de sum" + sum.join.to_s + " i index_window_end window #{i} #{index_window_end} #{@window}" i=ini while (i<ini+@window) sum[ini] += qual[i] i+=1 end i=ini+1 while (i<=index_window_end) sum[i]=sum[i-1]-qual[i-1]+qual[i+@window-1] i+=1 end # puts '2____' + sum.join(',') + 'pos sum' + ini.to_s return sum end
cut_fine_bounds(qual,new_start,new_end)
click to toggle source
cuts fine the high quality bounds
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 138 def cut_fine_bounds(qual,new_start,new_end) # puts " ççççççççççççççç #{new_start+@window} >= #{new_end} " # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o1" # cut it fine one_ok = 0 i=@window-1 # puts " qual[new_start+i] new_start #{new_start} i #{i} = #{new_start+i} qual.size #{qual.size}" while (i>=0) if (qual[new_start+i] < @low) break if one_ok else one_ok = 1 end i-=1 end new_start += i+1 oneOk = 0 i=0 while (i<@window) if (qual[new_end+i] < @low) break if oneOk else oneOk = 1 end i+=1 end new_end += i-1 # puts "6b new_start #{new_start} new-end #{new_end}" # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2" return new_start, new_end end
cut_fine_bounds_short(qual,new_start,new_end)
click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 108 def cut_fine_bounds_short(qual,new_start,new_end) i=0 # puts " qual[new_start+i] new_start #{new_start} i #{i} = #{new_start+i} qual.size #{qual.size}" while (i<@window) if (qual[new_start+i]>=@low) break end i+=1 end new_start +=i # puts "#{new_start} ***********" i=@window -1 while (i>=0) if (qual[new_end+i]>=@low) break end i-=1 end new_end += i # puts "6a new_start #{new_start} new-end #{new_end}" # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2 short" return new_start, new_end end
exec_seq(seq,blast_query)
click to toggle source
Begins the plugin1's execution whit the sequence “seq” Creates an action by each subsequence with low quality to eliminate it A subsequence has low quality if (the add of all its qualitis < subsequence_size*20)
Creates the qualities windows from the sequence, looks for the subsequence with high quality and mark, with an action, the before part to the High Quality Subsequence like a low quality part Finally mark, with an action, the after part to the High Quality Subsequence like a low quality part
# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 118 def exec_seq(seq,blast_query) if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? ) $LOG.debug " Quality File haven't been provided. It's impossible to execute " + self.class.to_s elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual').to_s=='true')) $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence" min_quality=@params.get_param('min_quality').to_i min_length_inside_seq=@params.get_param('min_length_inside_seq').to_i max_consecutive_good_bases=@params.get_param('max_consecutive_good_bases').to_i type='ActionLowQuality' actions=[] regions=get_low_qual_regions(seq.seq_qual,min_quality,min_length_inside_seq,max_consecutive_good_bases) regions.each do |r| low_qual_size=r.last-r.first+1 # puts "(#{low_qual_size}) = [#{r.first},#{r.last}]: #{a[r.first..r.last].map{|e| ("%2d" % e.to_s)}.join(' ')}" add_stats('low_qual',low_qual_size) # create action a = seq.new_action(r.first,r.last,type) # adds the correspondent action to the sequence actions.push a end # add quals seq.add_actions(actions) end end
find_bounds_high_quality(sum,ini,index_window_end)
click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 51 def find_bounds_high_quality(sum,ini,index_window_end) new_start = -1 new_end = -1 # puts " ini #{ini} iwe #{index_window_end}" # puts "ini #{ini} index_window_end #{index_window_end} sum[ini] #{sum[ini]} cut_off #{@cut_off} suma #{sum.size} " if (ini>index_window_end) temp_start= ini # new_start, new_end = temp_start, index_window_end new_end = index_window_end # para que no crea que no hay alta calidad, sino que hemos sobrepasado el indice final de la ventana # new_start, new_end = index_window_end, index_window_end end # puts " temp_start #{temp_start}" if (ini>index_window_end) temp_start=((ini<=index_window_end) && (sum[ini]>=@cut_off))? ini : -1 i=ini+1 while (i<=index_window_end) if (sum[i]>=@cut_off) if (temp_start<0) temp_start=i #just in! # puts "just in ---- #{sum[i]}>= cut off #{@cut_off} pos #{temp_start}" end else # puts "sum #{sum[i]} < cut off " if(temp_start>=0) #just out! # puts "update #{sum[i]}< cut off #{@cut_off} pos #{i}.if #{i-1} - #{temp_start} > #{new_end} - #{new_start}" if (((i-1-temp_start)>=(new_end-new_start))) new_start,new_end=temp_start,i-1 # puts "just out ---- new start,new_end = #{temp_start}, #{i-1} index_window_end = #{index_window_end}" end temp_start= -1 end end i+=1 end # puts "4 temp_start #{temp_start} new_start #{new_start} new-end #{new_end}" if (temp_start != -1) # finished while ok # puts "4 #{index_window_end} - #{temp_start} > #{new_end} - #{new_start}" if ((index_window_end- temp_start) >= (new_end-new_start)) #put the end of the window at the end of sequence new_start, new_end = temp_start, index_window_end #-1 end end # puts "5 temp_start #{temp_start} new_start #{new_start} new-end #{new_end}" # puts " newstart #{new_start} newend #{new_end}" return new_start,new_end end
get_low_qual_regions(quals,min_value, min_region_size,max_good_quals=2)
click to toggle source
# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 72 def get_low_qual_regions(quals,min_value, min_region_size,max_good_quals=2) # the initial region is the whole array left=0 right=quals.length-1 # puts quals.map{|e| ("%2d" % e.to_s)}.join(' ') # puts "[#{left},#{right}]" i = 0 from_pos=0 regions =[] # get all new regions begin rstart, rend = next_low_qual_region(quals,from_pos,min_value,max_good_quals) if !rstart.nil? from_pos= rend+1 if valid_low_qual_region?(quals,rstart,rend,min_region_size) regions << [rstart,rend] end end end while !rstart.nil? return regions end
next_low_qual_region(quals,from_pos,min_value,max_good_quals=2)
click to toggle source
# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 16 def next_low_qual_region(quals,from_pos,min_value,max_good_quals=2) rstart=nil rend=nil i=from_pos good_q=0 # skip good values while (i< quals.length) && (quals[i]>=min_value) i +=1 end # now we have found a bad quality, or end of sequence if i < quals.length rstart=i len=0 # puts " - [#{rstart},#{len}]" # continue growing while region of lowqual until more than 2 bases of good qual are found begin q=quals[i] if q<min_value len += 1 # puts "BAD #{q}<#{min_value}" len += good_q good_q=0 else good_q+=1 end # puts "#{q} - q[#{rstart},#{rend}], #{good_q}" i+=1 end while (i < quals.length) && (good_q <= max_good_quals) rend = rstart + len -1 # puts "#{q} - q[#{rstart},#{rend}], #{good_q}" end return [rstart,rend] end
valid_low_qual_region?(quals,rstart,rend,min_region_size)
click to toggle source
A region is valid if it starts in 0, ends in seq.length or is big enought
# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 62 def valid_low_qual_region?(quals,rstart,rend,min_region_size) # puts [rstart,rend,0,quals.length,(rend-rstart+1)].join(';') # res =((rstart==0) || (rend==quals.length-1) || ((rend-rstart+1)>=min_region_size)) # if res # puts "VALID" # end return ((rstart==0) || (rend==quals.length-1) || ((rend-rstart+1)>=min_region_size)) end
Private Instance Methods
find_high_quality(qual,ini=0)
click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 174 def find_high_quality(qual,ini=0) # puts qual.class.to_s + qual.size.to_s + 'size,' + @window.to_s + ' window, '+ qual.join(',') + 'size' + qual.size.to_s update=false # if @window>qual.length-ini #search in the last window although has a low size # @window=qual.length-ini # # puts ' UPDATE WINDOW Y CUT OFF ' + @window.to_s # @cut_off=@window*@low # update=true # end if (ini==0 or update) #index_window_start = ini @index_window_end = qual.size- @window #don't sub 1, or will lost the last nucleotide of the sequence -1; #TODO En seqtrim de Juan iwe, que en nuestro seqtrim se llama index_window_end, está perdiendo 2 nucleótidos de la última ventana calculada @sum = create_sum_window(qual,ini,@index_window_end) # puts "SUMA #{@sum.join(' ')}" end new_start, new_end = find_bounds_high_quality(@sum,ini,@index_window_end) # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o1" if (new_start>=0) if (new_start+@window >= new_end) # puts "cfs" new_start, new_end = cut_fine_bounds_short(qual,new_start,new_end) # puts "cfs" else # puts "cf" new_start, new_end = cut_fine_bounds(qual,new_start,new_end) # puts "cf" end end # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2" return new_start,new_end #+1 end