class PluginIndeterminations

Constants

MAX_RUBBISH

Public Class Methods

check_params(params) click to toggle source

Returns an array with the errors due to parameters are missing

# File lib/seqtrimnext/plugins/plugin_indeterminations.rb, line 191
def self.check_params(params)
  errors=[]
  
              comment='Minimum number of Ns within the sequence to be rejected by having an internal segment of indeterminations. Indeterminations at the end of the sequence will be removed regardless of their size and without rejecting the sequence'
              default_value = 15
              params.check_param(errors,'poly_n_length','Integer',default_value,comment)
  
              comment='Minimum percent of Ns in a segment to be considered a valid indetermination'
              default_value = 80
              params.check_param(errors,'poly_n_percent','Integer',default_value,comment)

  comment='Maximum distance to the end of the sequence to be considered an internal segment'
  default_value = 15
  params.check_param(errors,'poly_n_max_to_end','Integer',default_value,comment)

              comment='Rejects sequences with indeterminations in the middle'
              default_value = 'true'
              params.check_param(errors,'middle_indetermination_rejects','String',default_value,comment)
  
  return errors
end

Public Instance Methods

check_poly_length(poly_start,poly_end) click to toggle source
# File lib/seqtrimnext/plugins/plugin_indeterminations.rb, line 120
def check_poly_length(poly_start,poly_end)
  #puts "poly_length: #{1+(poly_end-poly_start)} nt"
  return (1+(poly_end-poly_start)) >= @params.get_param('poly_n_length').to_i
end
check_poly_percent(poly,poly_base) click to toggle source
# File lib/seqtrimnext/plugins/plugin_indeterminations.rb, line 125
def check_poly_percent(poly,poly_base)
  
  # count Ts en poly['found']
  s=poly['found']
  ta_count = s.count(poly_base.downcase+poly_base.upcase)
  #puts "poly_percent: #{(ta_count.to_f/s.size.to_f)*100}%"
  res=((ta_count.to_f/s.size.to_f)*100 >= @params.get_param('poly_n_percent').to_i)
  
  return res
end
exec_seq(seq,blast_query) click to toggle source
# File lib/seqtrimnext/plugins/plugin_indeterminations.rb, line 145
def exec_seq(seq,blast_query)

    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing indeterminations N+" 
    
    actions=[]
    
   # find simple indeterminations at the beginning of sequence
     match=seq.seq_fasta.match(/^[nN]+/)
    
    if !match.nil?
      found=match[0].length
      
      a = seq.new_action(0,found-1,'ActionIndetermination')
      actions.push a       

      #Add actions
      seq.add_actions(actions)
      actions=[]
      add_stats('indetermination_size',found)
 
    end
    

    # find simple indeterminations at end of sequence
                       match=seq.seq_fasta.match(/[nN]+$/)
    
    if !match.nil?
                        found=match[0].length
      
      a = seq.new_action(seq.seq_fasta.length-found,seq.seq_fasta.length,'ActionIndetermination')
      a.right_action=true
      actions.push a       

      #Add actions
      seq.add_actions(actions)
      actions=[]
      add_stats('indetermination_size',found)
       
    end
    
    find_polys('[N]',seq,actions)
    seq.add_actions(actions)
    
  end
find_polys(ta,seq,actions) click to toggle source

Uses the param poly_at_length to look for at least that number of contiguous A's

# File lib/seqtrimnext/plugins/plugin_indeterminations.rb, line 28
def find_polys(ta,seq,actions)

  minn = 4
  m2 = 1#(minn/2)
  m4 = (minn/4)
  r = [-1,0,0]
  re2 = /((#{ta}{#{m2},})(.{0,3})(#{ta}{#{1},}))/i
  
  
  type='ActionIndetermination'
  poly_base = 'N'
  
  matches = re2.global_match(seq.seq_fasta,3)

              matches2 = /[^N]N$/.match(seq.seq_fasta)
              
              
  # HASH
  polys = []

  # crear una region poly nuevo
  poly = {}
  #i=0

  matches.each do |pattern2|

    #puts pattern2.match[0]
      m_start = pattern2.match.begin(0)+pattern2.offset
      m_end = pattern2.match.end(0)+pattern2.offset-1   
      
                       #puts "MATCH: #{m_start} #{m_end}"

     # does one exist in polys with overlap?

     # yes => group it, updated end

     # no => one new

     if (e=overlap(polys,m_start,m_end))
       
       e['end'] = m_end
       e['found'] = seq.seq_fasta.slice(e['begin'],e['end']-e['begin']+1)
       
     else
        poly={}
        poly['begin'] = m_start
        poly['end'] = m_end #  the next pos to pattern's end
        poly['found'] = seq.seq_fasta.slice(poly['begin'],poly['end']-poly['begin']+1)
        polys.push poly
     end
     
  end  
  
  
  poly_size=0 

  polys.each do |poly|
    #puts "NEW POLY: #{poly.to_json}"
              
              if poly_near_end(poly['end'],seq.seq_fasta) # near right side
                #puts "near end"
      a = seq.new_action(poly['begin'],poly['end'],type)
      a.right_action=true
      actions.push a
      
      poly_size=poly['end']-poly['begin']+1
      add_stats('size',poly_size)
    else
              #puts "far of end"
            if check_poly_length(poly['begin'],poly['end']) and (check_poly_percent(poly,poly_base))
                     #puts "ok"
                          a = seq.new_action(poly['begin'],poly['end'],type)
                    a.right_action=true
                    actions.push a
                
        if @params.get_param('middle_indetermination_rejects').to_s=='true'
                      seq.seq_rejected=true 
          seq.seq_rejected_by_message='Indeterminations in middle of sequence'
        end
                    
                    poly_size=poly['end']-poly['begin']+1
                    add_stats('size',poly_size)
            end
    
    
    end
  end 
  
  
end
overlap(polys,mi_start,mi_end) click to toggle source
# File lib/seqtrimnext/plugins/plugin_indeterminations.rb, line 14
def overlap(polys,mi_start,mi_end)
        
        # overlap = polys.find{|e| ( mi_start < e['end'])}
        overlap = polys.find{|e| ( overlapX?(mi_start,mi_end, e['begin'],e['end']) )}
        # puts " Overlap #{mi_start} #{mi_end} => #{overlap}"
        
        return overlap
end
poly_near_end(pos,seq_fasta) click to toggle source
# File lib/seqtrimnext/plugins/plugin_indeterminations.rb, line 136
def poly_near_end(pos,seq_fasta)

       max_to_end = @params.get_param('poly_n_max_to_end').to_i
       
       res = (pos>=(seq_fasta.length-max_to_end))
       
end