class WorkflowManager::FGCZCluster
Public Instance Methods
cluster_nodes()
click to toggle source
# File lib/workflow_manager/cluster.rb, line 218 def cluster_nodes nodes = { 'fgcz-c-043: cpu 24,mem 23 GB,scr 11T' => 'fgcz-c-043', 'fgcz-c-044: cpu 16,mem 128 GB,scr 500G' => 'fgcz-c-044', 'fgcz-c-045: cpu 64,mem 504 GB,scr 15T' => 'fgcz-c-045', 'fgcz-c-046: cpu 64,mem 504 GB,scr 11T' => 'fgcz-c-046', 'fgcz-c-047: cpu 32,mem 1 TB,scr 28T' => 'fgcz-c-047', 'fgcz-c-048: cpu 48,mem 252 GB,scr 3.5T' => 'fgcz-c-048', 'fgcz-c-049: cpu 8,mem 63 GB,scr 1.7T' => 'fgcz-c-049', 'fgcz-c-051: cpu 8,mem 31 GB,scr 800G' => 'fgcz-c-051', 'fgcz-c-052: cpu 8,mem 31 GB,scr 800G' => 'fgcz-c-052', 'fgcz-c-053: cpu 8,mem 31 GB,scr 800G' => 'fgcz-c-053', 'fgcz-c-054: cpu 8,mem 31 GB,scr 800G' => 'fgcz-c-054', 'fgcz-c-055: cpu 8,mem 31 GB,scr 800G' => 'fgcz-c-055', 'fgcz-c-057: cpu 8,mem 31 GB,scr 200G' => 'fgcz-c-057', 'fgcz-c-058: cpu 8,mem 31 GB,scr 200G' => 'fgcz-c-058', 'fgcz-c-059: cpu 8,mem 31 GB,scr 200G' => 'fgcz-c-059', 'fgcz-c-061: cpu 8,mem 31 GB,scr 200G' => 'fgcz-c-061', 'fgcz-c-063: cpu 12,mem 70 GB,scr 450G' => 'fgcz-c-063', 'fgcz-c-065: cpu 24,mem 70 GB,scr 197G' => 'fgcz-c-065', 'fgcz-h-004: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-004', 'fgcz-h-009: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-009', 'fgcz-h-010: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-010', } end
copy_commands(org_dir, dest_parent_dir, now=nil)
click to toggle source
# File lib/workflow_manager/cluster.rb, line 202 def copy_commands(org_dir, dest_parent_dir, now=nil) commands = if now == "force" target_file = File.join(dest_parent_dir, File.basename(org_dir)) ["g-req copynow -f #{org_dir} #{dest_parent_dir}"] elsif now ["g-req copynow #{org_dir} #{dest_parent_dir}"] else ["g-req -w copy #{org_dir} #{dest_parent_dir}"] end end
delete_command(target)
click to toggle source
# File lib/workflow_manager/cluster.rb, line 215 def delete_command(target) command = "g-req remove #{target}" end
job_ends?(log_file)
click to toggle source
# File lib/workflow_manager/cluster.rb, line 177 def job_ends?(log_file) log_flag = false IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io| while line=io.gets if line =~ /__SCRIPT END__/ log_flag = true break end end end log_flag end
job_pending?(job_id)
click to toggle source
# File lib/workflow_manager/cluster.rb, line 189 def job_pending?(job_id) qstat_flag = false IO.popen('qstat -u "*"') do |io| while line=io.gets jobid, prior, name, user, state, *others = line.chomp.split if jobid.strip == job_id and state =~ /qw/ qstat_flag = true break end end end qstat_flag end
job_running?(job_id)
click to toggle source
# File lib/workflow_manager/cluster.rb, line 164 def job_running?(job_id) qstat_flag = false IO.popen('qstat -u "*"') do |io| while line=io.gets jobid, prior, name, user, state, *others = line.chomp.split if jobid.strip == job_id and state == 'r' qstat_flag = true break end end end qstat_flag end
kill_command(job_id)
click to toggle source
# File lib/workflow_manager/cluster.rb, line 212 def kill_command(job_id) command = "qdel #{job_id}" end
node_list()
click to toggle source
# File lib/workflow_manager/cluster.rb, line 243 def node_list node2scr = {} command = "qhost -F scratch" keep = nil IO.popen(command) do |out| while line=out.gets hostname, arch, ncpu, loading, memtot, memuse, *others = line.split if hostname =~ /fgcz/ keep = hostname elsif scratch_ = line.chomp.split.last and scratch = scratch_.split('=').last node2scr[keep] = scratch.to_i keep = nil end end end list = {} keep = nil command = 'qhost -q' IO.popen(command) do |out| while line=out.gets # HOSTNAME ARCH NCPU LOAD MEMTOT MEMUSE SWAPTO SWAPUS hostname, arch, ncpu, loading, memtot, memuse, *others = line.split if hostname =~ /fgcz/ #puts [hostname, ncpu, loading, memtot, memuse].join("\t") mem = memtot.gsub(/G/, '').to_i keep = [hostname, ncpu, "#{mem}G"] elsif hostname == "GT" and keep and cores = line.chomp.split.last and cores !~ /[du]/ hostname = keep.shift keep[0] = cores if scr = node2scr[hostname] and scr >= 1000 scr = "%.1f" % (scr.to_f / 1000) scr << "T" else scr = scr.to_s + "G" end keep << scr list[hostname] = keep keep = nil end end end # reformat nodes = {} list.each do |hostname, specs| # 20190823 masa tentatively off use f47 unless hostname =~ /fgcz-c-047/ cores, ram, scr = specs key = "#{hostname}: cores #{cores}, ram #{ram}, scr #{scr}" value = hostname nodes[key] = value end end nodes end
submit_job(script_file, script_content, option='')
click to toggle source
# File lib/workflow_manager/cluster.rb, line 147 def submit_job(script_file, script_content, option='') if script_name = File.basename(script_file) and script_name =~ /\.sh/ script_name = script_name.split(/\.sh/).first + ".sh" new_job_script = generate_new_job_script(script_name, script_content) new_job_script_base = File.basename(new_job_script) log_file = File.join(@log_dir, new_job_script_base + "_o.log") err_file = File.join(@log_dir, new_job_script_base + "_e.log") command = "g-sub -o #{log_file} -e #{err_file} #{option} #{new_job_script}" job_id = `#{command}` job_id = job_id.match(/Your job (\d+) \(/)[1] [job_id, log_file, command] else err_msg = "FGCZCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}" warn err_msg raise err_msg end end