class Bio::SGE

Attributes

command[RW]
count[RW]
error_dir[RW]
input_dir[RW]
log_dir[RW]
output_dir[RW]
query[RW]
sge_opts[RW]
target[RW]
task_max[RW]
task_min[RW]
task_step[RW]
work_dir[RW]

Public Class Methods

new(query = nil, target = nil, command = nil, sge_opts = nil) { |self| ... } click to toggle source
# File lib/bio-sge.rb, line 341
def initialize(query = nil, target = nil, command = nil, sge_opts = nil)
  @work_dir = Dir.pwd
  @query = "#{@work_dir}/#{query}"
  @target = "#{@work_dir}/#{target}"
  @command = command
  @sge_opts = sge_opts

  yield(self) if block_given?

  @log_dir = "log"
  @input_dir = "input"
  @output_dir = "output"
  @error_dir = "error"
  @script_file = "script.rb"
  @count_file = "count.txt"
end

Public Instance Methods

clean() click to toggle source
# File lib/bio-sge.rb, line 399
def clean
  rmtree(@count_file)
  rmtree(@input_dir)
end
clear() click to toggle source
# File lib/bio-sge.rb, line 392
def clear
  rmtree(@script_file)
  rmtree(@output_dir)
  rmtree(@error_dir)
  rmtree(@log_dir)
end
distclean() click to toggle source
# File lib/bio-sge.rb, line 404
def distclean
  clear
  clean
end
extract() click to toggle source
# File lib/bio-sge.rb, line 441
def extract
  return if File.exists?(@count_file)

  slice = slice_old = nil
  @count = 0
  File.open(@count_file, "a") do |count_file|
    Bio::FlatFile.auto(@query) do |ff|
      ff.each do |entry|
        @count += 1
        $stderr.print "Extracting ... #{@count} (#{entry.entry_id}) "
        if (@task_min and @count < @task_min) or (@task_max and @count > @task_max)
          $stderr.puts "skip."
          next
        else
          slice_old = slice
          slice = (@count - 1) / @@slice + 1
          slice_dir = "#{@input_dir}/#{slice}"
          mkpath(slice_dir) if slice_old != slice
          File.open("#{slice_dir}/#{@count}", "w") do |file|
            file.puts ff.entry_raw
          end
          count_file.puts [@count, entry.entry_id].join("\t")
          $stderr.puts "done."
        end
      end
    end
  end
end
mkpath(dir) click to toggle source
# File lib/bio-sge.rb, line 409
def mkpath(dir)
  $stderr.print "Creating #{dir} ... "
  if File.directory?(dir)
    $stderr.puts "skip (already exists)."
  else
    FileUtils.mkpath(dir)
    $stderr.puts "done."
  end
end
prepare() click to toggle source
# File lib/bio-sge.rb, line 358
def prepare
  setup
  script
  extract
end
rmtree(file) click to toggle source
# File lib/bio-sge.rb, line 386
def rmtree(file)
  $stderr.print "Deleting #{file} ... "
  FileUtils.rmtree(file)
  $stderr.puts "done."
end
script() click to toggle source
# File lib/bio-sge.rb, line 426
def script
  sge_script = @@template.dup
  sge_script.gsub!('%WORK_DIR%', @work_dir)
  sge_script.gsub!('%INPUT_DIR%', @input_dir)
  sge_script.gsub!('%OUTPUT_DIR%', @output_dir)
  sge_script.gsub!('%ERROR_DIR%', @error_dir)
  sge_script.gsub!('%TARGET%', @target)
  sge_script.gsub!('%COMMAND%', @command)
  sge_script.gsub!('%SLICE%', @@slice.to_s)

  File.open(@script_file, "w") do |file|
    file.puts sge_script
  end
end
setup() click to toggle source
# File lib/bio-sge.rb, line 419
def setup
  mkpath(@log_dir)
  mkpath(@input_dir)
  mkpath(@output_dir)
  mkpath(@error_dir)
end
submit() click to toggle source
# File lib/bio-sge.rb, line 364
def submit
  unless @count
    $stderr.puts "Reading #{@count_file} ..."
    @count = File.readlines(@count_file).last[/^\d+/].to_i
    $stderr.puts "done."
  end

  task_min = @task_min || 1
  task_max = @task_max || @count
  task_step = @task_step || 1000

  # system upper limit is 75000
  limit = 50000
  task_min.step(task_max, limit) do |offset|
    opts = "#{@sge_opts} -o #{@log_dir} -e #{@log_dir} -cwd"
    span = "-t #{offset}-#{[offset + limit, task_max].min}:#{task_step}"
    qsub = "qsub #{opts} #{span} #{@script_file}"
    $stderr.puts "Submitting ... #{qsub}"
    system(qsub)
  end
end