class PluginLowQuality

Inherit: Plugin

Inherit: Plugin

Public Class Methods

check_params(params) click to toggle source

Returns an array with the errors due to parameters are missing

# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 165
def self.check_params(params) 
  
  errors=[]
  
      comment='Minimum quality value for every nucleotide'
              default_value = 20
              params.check_param(errors,'min_quality','Integer',default_value,comment)
              
  
        #comment='Quality window for scanning low quality segments'
              #default_value = 15
              #params.check_param(errors,'window_width','Integer',default_value,comment)
 
  
        comment='Minimum length of a bad quality segment inside the sequence'
              default_value = 8
              params.check_param(errors,'min_length_inside_seq','Integer',default_value,comment)
 
  
        comment='Maximum consecutive good-quality bases between two bad quality regions'
              default_value = 2
              params.check_param(errors,'max_consecutive_good_bases','Integer',default_value,comment)
  
  return errors
end

Public Instance Methods

add_action_after_high_qual(p_begin,p_end,actions,seq) click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 237
def add_action_after_high_qual(p_begin,p_end,actions,seq)

  action_size = seq.insert_end-p_end
  if action_size>=(@window/2)


     # puts "action_SIZE2 #{action_size} > #{@window/2}"

     if ((p_end<seq.seq_fasta.size-1) && (action_size>0) )  #if there is action before the high qual part
       # it's created an action before of the high quality part
       a = seq.new_action(p_end-seq.insert_start+1,seq.seq_fasta.size-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid

       actions.push a   
     end 
   end 
end
add_action_before_high_qual(p_begin,p_end,actions,seq,start) click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 220
def add_action_before_high_qual(p_begin,p_end,actions,seq,start)

  action_size = p_begin-1
  if action_size>=(@window/2)  


    # puts "action_SIZE1 #{action_size} > #{@window/2}"

    if ( (p_begin>0) && (action_size>0) )  #if there is action before the high qual part
      # it's created an action before of the high quality part
      a = seq.new_action(start ,p_begin-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid
      # puts " new low qual start: #{start}  = #{a.start_pos} end: #{p_begin} -1 = #{a.end_pos}"
      actions.push a   
    end 
  end             
end
create_sum_window(qual,ini,index_window_end) click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 16
def create_sum_window(qual,ini,index_window_end)

  # puts "--------index w #{index_window_end}"
  sum=[] 
  i=ini  
  # puts "#{i} #{index_window_end}"
  while (i<=index_window_end) # initialize sum
    sum[i]=0 
    i += 1
  end
  # puts " contenido de sum" + sum.join.to_s  + " i index_window_end  window #{i} #{index_window_end} #{@window}"

  i=ini
  while (i<ini+@window)

    sum[ini] += qual[i] 
    i+=1
  end                                           


  i=ini+1 

  while (i<=index_window_end)            

    sum[i]=sum[i-1]-qual[i-1]+qual[i+@window-1]
    i+=1

  end   

  # puts '2____' + sum.join(',') + 'pos sum' + ini.to_s

  return sum 

end
cut_fine_bounds(qual,new_start,new_end) click to toggle source

cuts fine the high quality bounds

# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 138
def cut_fine_bounds(qual,new_start,new_end)   
  # puts "  ççççççççççççççç #{new_start+@window} >= #{new_end} "
  # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o1"
  # cut it fine

   one_ok = 0         

    i=@window-1
    # puts " qual[new_start+i] new_start #{new_start} i #{i} = #{new_start+i} qual.size #{qual.size}"
    while (i>=0) 
        if (qual[new_start+i] < @low) 
            break if one_ok
        else 
            one_ok = 1
        end    
        i-=1
    end
    new_start += i+1
    oneOk = 0  
    i=0
    while (i<@window) 
        if (qual[new_end+i] < @low) 
            break if oneOk
        else 
            oneOk = 1
        end  
        i+=1
    end
    new_end += i-1 
    # puts "6b  new_start #{new_start} new-end #{new_end}"

  # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2"
  return new_start, new_end

end
cut_fine_bounds_short(qual,new_start,new_end) click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 108
def cut_fine_bounds_short(qual,new_start,new_end)

    i=0                    
    # puts " qual[new_start+i] new_start #{new_start} i #{i} = #{new_start+i} qual.size #{qual.size}"
    while (i<@window)
      if (qual[new_start+i]>=@low)
        break
      end    
      i+=1
    end  
    new_start +=i 
    # puts "#{new_start} ***********"

    i=@window -1
    while (i>=0)  
      if (qual[new_end+i]>=@low)    
        break            
      end
      i-=1            
    end     
    new_end += i
    # puts "6a new_start #{new_start} new-end #{new_end}"
    
     # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2 short"
    return new_start, new_end  

end
exec_seq(seq,blast_query) click to toggle source

Begins the plugin1's execution whit the sequence “seq” Creates an action by each subsequence with low quality to eliminate it A subsequence has low quality if (the add of all its qualitis < subsequence_size*20)

Creates the qualities windows from the sequence, looks for the subsequence with high quality and mark, with an action, the before part to the High Quality Subsequence like a low quality part Finally mark, with an action, the after part to the High Quality Subsequence like a low quality part

# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 118
def exec_seq(seq,blast_query)

   if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? ) 
     $LOG.debug " Quality File haven't been provided. It's impossible to execute " + self.class.to_s     
   elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual').to_s=='true'))
        
        $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
        
        min_quality=@params.get_param('min_quality').to_i
        min_length_inside_seq=@params.get_param('min_length_inside_seq').to_i
        max_consecutive_good_bases=@params.get_param('max_consecutive_good_bases').to_i
        
        type='ActionLowQuality'
        actions=[]
        
        regions=get_low_qual_regions(seq.seq_qual,min_quality,min_length_inside_seq,max_consecutive_good_bases)
        
        regions.each do |r|
          low_qual_size=r.last-r.first+1
          
          # puts "(#{low_qual_size}) = [#{r.first},#{r.last}]: #{a[r.first..r.last].map{|e| ("%2d" % e.to_s)}.join(' ')}"
         
         
         add_stats('low_qual',low_qual_size)
         
         
         # create action
         a = seq.new_action(r.first,r.last,type) # adds the correspondent action to the sequence
         actions.push a
         
         
         
        end

        # add quals
        seq.add_actions(actions)
   end       

 end
find_bounds_high_quality(sum,ini,index_window_end) click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 51
def find_bounds_high_quality(sum,ini,index_window_end) 

  new_start = -1
  new_end = -1
  
# puts " ini #{ini} iwe #{index_window_end}"
# puts "ini #{ini} index_window_end #{index_window_end} sum[ini] #{sum[ini]} cut_off #{@cut_off} suma #{sum.size} "
 if (ini>index_window_end) 
     temp_start= ini
     # new_start, new_end = temp_start, index_window_end
     new_end = index_window_end # para que no crea que no hay alta calidad, sino que hemos sobrepasado el indice final de la ventana
       # new_start, new_end = index_window_end, index_window_end
 end   
# puts " temp_start #{temp_start}" if (ini>index_window_end)
temp_start=((ini<=index_window_end) && (sum[ini]>=@cut_off))? ini : -1    

  i=ini+1
  while (i<=index_window_end)
    if (sum[i]>=@cut_off)  
      if (temp_start<0)
         temp_start=i  #just in!
         # puts "just in ---- #{sum[i]}>= cut off #{@cut_off} pos #{temp_start}"
      end

    else 
        # puts "sum #{sum[i]} < cut off "
        if(temp_start>=0)              #just out!
          # puts "update #{sum[i]}< cut off #{@cut_off} pos #{i}.if #{i-1} - #{temp_start} > #{new_end} - #{new_start}"
          if (((i-1-temp_start)>=(new_end-new_start)))   
            new_start,new_end=temp_start,i-1 
            # puts "just out ---- new start,new_end = #{temp_start}, #{i-1}  index_window_end = #{index_window_end}"
          end
          temp_start= -1 
        end
    end
    i+=1  


  end 
  # puts "4 temp_start #{temp_start} new_start #{new_start} new-end #{new_end}"

  if (temp_start != -1)   # finished while ok
    # puts "4 #{index_window_end} - #{temp_start} > #{new_end} - #{new_start}"
      if ((index_window_end- temp_start) >= (new_end-new_start)) #put the end of the window at the end of sequence
          new_start, new_end = temp_start, index_window_end     #-1
      end
  end  

  # puts "5 temp_start #{temp_start} new_start #{new_start} new-end #{new_end}"
  
  # puts  " newstart  #{new_start} newend #{new_end}"
 
  return new_start,new_end 
 

end
get_low_qual_regions(quals,min_value, min_region_size,max_good_quals=2) click to toggle source
# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 72
def get_low_qual_regions(quals,min_value, min_region_size,max_good_quals=2)

  # the initial region is the whole array
  left=0
  right=quals.length-1
  # puts quals.map{|e| ("%2d" % e.to_s)}.join(' ')

  # puts "[#{left},#{right}]"

  i = 0

  from_pos=0
  regions =[]

  # get all new regions
  begin
    rstart, rend = next_low_qual_region(quals,from_pos,min_value,max_good_quals)
    if !rstart.nil?
      from_pos= rend+1

      if valid_low_qual_region?(quals,rstart,rend,min_region_size)
        regions << [rstart,rend]
      end
    end
  end while !rstart.nil?

  return regions  

end
next_low_qual_region(quals,from_pos,min_value,max_good_quals=2) click to toggle source
# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 16
def next_low_qual_region(quals,from_pos,min_value,max_good_quals=2)

   rstart=nil
   rend=nil

   i=from_pos

   good_q=0

   # skip good values
   while (i< quals.length) && (quals[i]>=min_value)
     i +=1 
   end 

   # now we have found a bad quality, or end of sequence
   if i < quals.length
     rstart=i
     len=0

      # puts "   - [#{rstart},#{len}]"

     # continue growing while region of lowqual until more than 2 bases of good qual are found
     begin
       q=quals[i]

       if q<min_value
         len += 1
         # puts "BAD #{q}<#{min_value}"
         len += good_q
         good_q=0
       else
         good_q+=1
       end
        # puts "#{q} - q[#{rstart},#{rend}], #{good_q}"

       i+=1
     end while (i < quals.length) && (good_q <= max_good_quals)

     rend = rstart + len -1
     # puts "#{q} - q[#{rstart},#{rend}], #{good_q}"
   end

   return [rstart,rend]
end
valid_low_qual_region?(quals,rstart,rend,min_region_size) click to toggle source

A region is valid if it starts in 0, ends in seq.length or is big enought

# File lib/seqtrimnext/plugins/plugin_low_quality.rb, line 62
def valid_low_qual_region?(quals,rstart,rend,min_region_size)
  # puts [rstart,rend,0,quals.length,(rend-rstart+1)].join(';')
  # res =((rstart==0) || (rend==quals.length-1) || ((rend-rstart+1)>=min_region_size))
  # if res
  #    puts "VALID"
  # end
  return ((rstart==0) || (rend==quals.length-1) || ((rend-rstart+1)>=min_region_size))
end

Private Instance Methods

find_high_quality(qual,ini=0) click to toggle source
# File lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb, line 174
def find_high_quality(qual,ini=0)  

  # puts qual.class.to_s + qual.size.to_s + 'size,' + @window.to_s + ' window, '+ qual.join(',')  + 'size' + qual.size.to_s
  
  update=false
  # if @window>qual.length-ini     #search in the last window although has a low size
  #     @window=qual.length-ini
  #      # puts ' UPDATE WINDOW  Y CUT OFF ' + @window.to_s
  #      @cut_off=@window*@low
  #      update=true
  #   end
             
  if (ini==0 or update)
    #index_window_start = ini
    @index_window_end = qual.size- @window #don't sub 1, or will lost the last nucleotide of the sequence -1;
    #TODO En seqtrim de Juan iwe, que en nuestro seqtrim se llama index_window_end, está perdiendo 2 nucleótidos de la última ventana calculada


    @sum = create_sum_window(qual,ini,@index_window_end) 
    # puts "SUMA #{@sum.join(' ')}"
  end              
        
  new_start, new_end = find_bounds_high_quality(@sum,ini,@index_window_end) 
  # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o1"

  if (new_start>=0)
    if (new_start+@window >= new_end)
       # puts "cfs"
      new_start, new_end = cut_fine_bounds_short(qual,new_start,new_end)
      # puts "cfs"

    else  
      # puts "cf"
      new_start, new_end = cut_fine_bounds(qual,new_start,new_end) 
      # puts "cf"
    end 
  end 
  
   # puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2"

  return new_start,new_end #+1


end