class SeqtrimWorker

Public Instance Methods

add_output_data(obj) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 107
def add_output_data(obj)
  obj.output_text=[]

  if @tuple_size>1
    obj.each_slice(@tuple_size) do |seqs|

      write_seq_to_files_tuple(obj.output_files,seqs, obj.stats)

      seqs.each do |seq|
        obj.output_text << seq.to_text
      end
    end

  else
    obj.each do |seq|
      write_seq_to_files_normal(obj.output_files,seq, obj.stats)
      obj.output_text << seq.to_text
    end
  end

  # @remove seqs since they are not needed anymore to write output files
  obj.remove_all_seqs
end
add_stat(stats,key,subkey,value,count=1) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 131
def add_stat(stats,key,subkey,value,count=1)

  stats[key]={} if !stats[key]
  stats[key][subkey]={} if !stats[key][subkey]
  stats[key][subkey][value]=0 if !stats[key][subkey][value]

  stats[key][subkey][value]+=count
end
closing_worker() click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 102
def closing_worker

end
get_file(files,fn) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 425
def get_file(files,fn)
  res=files[fn]

  if !res
    files[fn]=[]
    res=files[fn]
  end

  return res
end
get_mid_message(mid) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 256
def get_mid_message(mid)
  if (mid.nil? || (mid.message=='no_MID') ) # without mid
    mid_id = 'no_MID'
    mid_message = ' No MID found'
  else
    mid_id = mid.tag_id
    mid_message=''
    if mid_id != mid_message
      mid_message = ' '+mid.message
    end
  end
  return mid_id,mid_message
end
json_file(files) click to toggle source

ACCESS TO FILES

# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 387
def json_file(files)
  return get_file(files,File.join(OUTPUT_PATH,'results.json'))
end
low_complexity_file(files, dir_name, file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 413
def low_complexity_file(files, dir_name, file_name)
  return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_'+file_name+'.fastq'))
end
low_sffinfo_file(files, dir_name, file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 421
def low_sffinfo_file(files, dir_name, file_name)
  return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_sff_info_'+file_name+'.txt'))
end
paired_file(files, dir_name, file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 400
def paired_file(files, dir_name, file_name)
  return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_'+file_name+'.fastq'))
end
paired_file_ilu1(files, dir_name, file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 404
def paired_file_ilu1(files, dir_name, file_name)
  return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_1_'+file_name+'.fastq'))
end
paired_file_ilu2(files, dir_name, file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 408
def paired_file_ilu2(files, dir_name, file_name)
  return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_2_'+file_name+'.fastq'))
end
process_object(obj) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 56
def process_object(obj)

  running_seqs=SequenceGroup.new(obj.flatten)

  # execute plugins
  @plugin_manager.execute_plugins(running_seqs)

  # add output data
  add_output_data(running_seqs)

  return running_seqs
end
receive_initial_config(obj) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 69
def receive_initial_config(obj)

  # Reads the parameters
  $WORKER_LOG.info "Params received"
  #                           @params = Params.new(params_path)
  @params = obj
  @tuple_size=@params.get_param('tuple_size')

  @use_qual=@params.get_param('use_qual')
  @use_json=@params.get_param('use_json')
end
rejected_output_file(files) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 391
def rejected_output_file(files)
  return get_file(files,File.join(OUTPUT_PATH,'rejected.txt'))
end
save_empty_insert(files,seq, stats) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 243
def save_empty_insert(files,seq, stats)
  seq.seq_rejected=true
  seq.seq_rejected_by_message='short insert'

  message = 'No valid inserts found'

  rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)

  add_stat(stats,'sequences','rejected',message)
  add_stat(stats,'sequences','count','rejected')
  
end
save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 299
def save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
  mid_id,mid_message=get_mid_message(mid)

  # save fasta and qual in no MID file
  has_low_complexity = seq.get_actions(ActionLowComplexity)

  if has_low_complexity.empty?
    add_stat(stats,'sequences','count','output_seqs')

    fasta_file=sequence_file(files,dir_name,file_name)
    sff_file=sffinfo_file(files,dir_name,file_name)
  else
    add_stat(stats,'sequences','count','output_seqs_low_complexity')

    fasta_file=low_complexity_file(files,dir_name,file_name)
    sff_file=low_sffinfo_file(files,dir_name,file_name)
  end

  q=[]
  if @use_qual
    q=qual_inserts[0]
  end

  n=seq.seq_name
  c=mid_message

  seq_comments=seq.get_comment_line
  if !seq_comments.strip.empty?
    c=seq_comments + c
  end

  f=inserts[0]

  fasta_file << FastqFile.to_fastq(n,f,q,c)

  inserts_pos = seq.get_actions(ActionInsert)

  sff_file<< "#{n} #{inserts_pos[0].start_pos+1} #{inserts_pos[0].end_pos+1}"
  
  
end
save_rejected_empty_or_single(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 222
def save_rejected_empty_or_single(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
  if (seq.seq_rejected) # save to rejected sequences
    save_rejected_seq(files,seq, stats)
  elsif (inserts.empty?)  #sequence with no inserts
    save_empty_insert(files,seq, stats)
  elsif (inserts.count == 1) # sequence with one insert
    save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
  end
end
save_rejected_seq(files,seq, stats) click to toggle source

SAVE NORMAL ===============================

# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 234
def save_rejected_seq(files,seq, stats)
  # message = seq.seq_rejected_by_message
  message= seq.get_comment_line
  rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)

  add_stat(stats,'sequences','rejected',seq.seq_rejected_by_message)
  add_stat(stats,'sequences','count','rejected')
end
save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 270
def save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
  add_stat(stats,'sequences','count','output_seqs_paired')

  mid_id,mid_message=get_mid_message(mid)

  # save left read
  n="#{seq.seq_name}_left"
  c="template=#{seq.seq_name} dir=R library=#{mid_id} #{seq.get_comment_line}"
  f=inserts[0].reverse.tr('actgACTG','tgacTGAC')
  q=[]
  if @use_qual
    q=qual_inserts[0].reverse
  end
  
  paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
  
  # save right read
  n="#{seq.seq_name}_right"
  c="template=#{seq.seq_name} dir=F library=#{mid_id}  #{seq.get_comment_line}"
  f=inserts[1]
  q=[]
  if @use_qual
    q=qual_inserts[1]
  end

  paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
  
end
save_two_inserts_tuple(files,seq1,seq2, stats,inserts1,inserts2,qual_inserts1,qual_inserts2,mid,dir_name,file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 190
def save_two_inserts_tuple(files,seq1,seq2, stats,inserts1,inserts2,qual_inserts1,qual_inserts2,mid,dir_name,file_name)
  
  add_stat(stats,'sequences','count','output_seqs_paired')
  add_stat(stats,'sequences','count','output_seqs_paired')

  mid_id,mid_message=get_mid_message(mid)

  # save left read
  n="#{seq1.seq_name}"
  c=seq1.get_comment_line # "template=#{seq1.seq_name} dir=R library=#{mid_id}"
  f=inserts1[0]#.reverse.tr('actgACTG','tgacTGAC')
  q=[]
  if @use_qual
    q=qual_inserts1[0] #.reverse
  end
  
  paired_file_ilu1(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
  
  # save right read
  n="#{seq2.seq_name}"
  c=seq2.get_comment_line # "template=#{seq2.seq_name} dir=F library=#{mid_id}"
  f=inserts2[0]
  q=[]
  if @use_qual
    q=qual_inserts2[0]
  end

  paired_file_ilu2(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
  
end
sequence_file(files, dir_name, file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 396
def sequence_file(files, dir_name, file_name)
  return get_file(files,File.join(OUTPUT_PATH,dir_name,'sequences_'+file_name+'.fastq'))
end
sffinfo_file(files, dir_name, file_name) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 417
def sffinfo_file(files, dir_name, file_name)
  return get_file(files,File.join(OUTPUT_PATH,dir_name,'sff_info_'+file_name+'.txt'))
end
starting_worker() click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 81
def starting_worker

  # $WORKER_LOG.level = Logger::ERROR
  #$WORKER_LOG.level = Logger::WARN
  $WORKER_LOG.level = Logger::INFO
  $WORKER_LOG.info "Loading actions"

  @action_manager = ActionManager.new

  $WORKER_LOG.info "Loading plugins"
  @plugin_list = @params.get_param('plugin_list') # puts in plugin_list the plugins's array
  $WORKER_LOG.info "PLUGIN LIST:" + @plugin_list

  @plugin_manager = PluginManager.new(@plugin_list,@params) # creates an instance from PluginManager. This must storage the plugins and load it

rescue Exception => e
  puts (e.message+ e.backtrace.join("\n"))

end
write_seq_to_files_normal(files,seq, stats) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 342
def write_seq_to_files_normal(files,seq, stats)

  # puts stats.to_json

  dir_name,file_name,priority=seq.get_file_tag_path
  # puts File.join(dir_name,'sequences_'+file_name)

  # get current inserts
  inserts = seq.get_inserts

  # qualities are optional
  if @use_qual
    qual_inserts = seq.get_qual_inserts
  end

  # save json if necessary
  if @use_json
    json_file(files)<< seq.to_json
  end

  # find mids
  mid = seq.get_actions(ActionMid).first


  if (seq.seq_rejected) # save to rejected sequences
    save_rejected_seq(files,seq, stats)
    
  elsif (inserts.empty?)  #sequence with no inserts
    save_empty_insert(files,seq, stats)
    
  elsif (inserts.count == 2) # sequence with two inserts  = PAIRED SEQUENCES
    save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
    
  elsif (inserts.count == 1) # sequence with one insert
    save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
  end

end
write_seq_to_files_tuple(files,seqs, stats) click to toggle source
# File lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb, line 140
def write_seq_to_files_tuple(files,seqs, stats)

  
  seq1=seqs[0]
  seq2=seqs[1]
  
  dir_name,file_name,priority=seq1.get_file_tag_path
  dir_name2,file_name2,priority2=seq2.get_file_tag_path
  
  # both paired sequences must go in same file, there are priorities
  if (dir_name!=dir_name2) || (file_name!=file_name2)
    if priority2>priority
      dir_name=dir_name2
      file_name=file_name2
    end
  end
  
  # get current inserts
  inserts1 = seq1.get_inserts
  inserts2 = seq2.get_inserts

  # qualities are optional
  if @use_qual
    qual_inserts1 = seq1.get_qual_inserts
    qual_inserts2 = seq2.get_qual_inserts
  end
  
  
  
  # save json if necessary
  if @use_json
    json_file(files)<< seq1.to_json
    json_file(files)<< seq2.to_json
  end

  # find mids
  mid1 = seq1.get_actions(ActionMid).first
  mid2 = seq2.get_actions(ActionMid).first
  
  
  if !inserts1.empty? && !inserts2.empty? # both have inserts
    # save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
    save_two_inserts_tuple(files,seq1,seq2, stats,inserts1,inserts2,qual_inserts1,qual_inserts2,mid1,dir_name,file_name)
  else
    save_rejected_empty_or_single(files,seq1, stats,inserts1,qual_inserts1,mid1,dir_name,file_name)
    save_rejected_empty_or_single(files,seq2, stats,inserts2,qual_inserts2,mid2,dir_name,file_name)
  end
  
end