class Cagnut::Configuration::Checks::Datasets

Attributes

config[RW]

Public Class Methods

new(config) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 8
def initialize config
  @config = config
end

Public Instance Methods

check(config_name) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 12
def check config_name
  @config['prefix_name'] = "#{Cagnut.prefix_name}_#{config_name}"
  analysis_folder = create_analysis_folder config
  @config['samples'].each_with_index do |sample, index|
    setup_requirements sample, index, analysis_folder
  end
  @config
end
check_datatype(qseq_dir, fastq_dir) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 133
def check_datatype qseq_dir, fastq_dir
  @config['samples'].each_with_index do |sample, index|
    case @config['info']['data_type']
    when 'TILESQSEQ'
      file = "#{qseq_dir}/*.txt*"
      pattern = '.*s_\d+_1_(\d+).*'
      file_end = '.fastq'
    when 'TILESFASTQ'
      file = "#{fastq_dir}/*.fastq*"
      pattern = '(.*_R1_.*).fastq.*+'
      file_end = '.fastq'
    end
    @config['samples'][index]['seqs_path']= fetch_seqs Dir[file], file_end, pattern
  end
  @config
end
check_file_size(flist, previous_size=0) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 105
def check_file_size flist, previous_size=0
  flist.each do |file|
    file_size = File.size(file)
    abort "#{file} is empty." unless file_size > 0
  end
end
check_ln_file(sample, qseq_path, fastq_path) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 51
def check_ln_file sample, qseq_path, fastq_path
  fastq = Dir.glob("#{fastq_path}/*.fastq*")
  qseq = Dir.glob("#{qseq_path}/*")
  return unless (fastq + qseq).empty?
  abort "Not found #{sample['name']} files in fastq and qseq"
end
check_pu(index) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 129
def check_pu index
  @config['samples'][index]['pu'] ||= 'NA'
end
create_analysis_folder(config) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 21
def create_analysis_folder config
  output_data_dir = dir_rm_slash @config['cagnut']['output_data_dir']
  analysis_folder = "#{output_data_dir}/#{config['prefix_name']}"
  FileUtils.mkdir_p "#{output_data_dir}/#{config['prefix_name']}"
  analysis_folder
end
dir_present?(dataset) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 64
def dir_present? dataset
  return if Dir.exist?(dataset)
  puts "Error: Missing data directory #{@config['datasets']}"
  exit
end
dir_rm_slash(dir) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 70
def dir_rm_slash dir
  dir.gsub %r{/\z}, ''
end
fetch_flist(dir) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 84
def fetch_flist dir
  flist = Dir.glob("#{dir}/*.fastq*")
  return flist unless flist.empty?
  abort "No fastq found in #{dir}"
end
fetch_seqs(files_path, file_end, pattern) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 150
def fetch_seqs files_path, file_end, pattern
  files_path.map do |file|
    return file if File.basename(file, file_end).match(/#{pattern}/)
  end.flatten.compact
end
files_to_much?(flist) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 120
def files_to_much? flist
  return unless flist.size > 1
  puts %(
    DATA_TYPE = #{@config['info']['data_type']} but more than one fastq found.
    Only the first would be processed.
    #{flist.inspect}
  )
end
ln_fastq_file(sample, flist, fastq_dir) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 90
def ln_fastq_file sample, flist, fastq_dir
  check_file_size flist
  if %w(ONEFASTQ ONEFASTQSE).include? @config['info']['data_type']
    files_to_much? flist
    file_type = link_name flist, sample['name']
    seq_file = "#{fastq_dir}/#{file_type}"
    `ln -s #{flist[0]} #{seq_file} 2>/dev/null` if flist[0].match sample['name']
  else
    flist.each do |file|
      next unless file.match sample['name']
      `ln -s #{file} #{fastq_dir} 2>/dev/null`
    end
  end
end
ln_seq_files(sample, seq_txt, fastq_file) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 58
def ln_seq_files sample, seq_txt, fastq_file
  dir_present? sample['path']
  ln_seq_txt_file sample, seq_txt
  ln_fastq_file sample, fetch_flist(sample['path']), fastq_file
end
ln_seq_files_to_folder(sample, qseq_path, fastq_path) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 45
def ln_seq_files_to_folder sample, qseq_path, fastq_path
  ln_seq_files sample, qseq_path, fastq_path
  check_datatype qseq_path, fastq_path
  check_ln_file sample, qseq_path, fastq_path
end
ln_seq_txt_file(sample, qseq_dir) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 74
def ln_seq_txt_file sample, qseq_dir
  files =
    Dir.glob("#{sample['path']}/*_sequence.txt*") + Dir.glob("#{sample['path']}/*_qseq.txt*")
  check_file_size files
  files.each do |file|
    next unless file.match sample['name']
    `ln -s #{file} #{qseq_dir} 2>/dev/null`
  end
end
make_required_folders(config, sample, index, analysis_folder) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 42
def make_required_folders config, sample, index, analysis_folder
end
setup_requirements(sample, index, analysis_folder) click to toggle source
# File lib/cagnut/configuration/checks/datasets.rb, line 28
def setup_requirements sample, index, analysis_folder
  puts "Dataset : #{sample['path']}"
  dir = "#{analysis_folder}/#{sample['name']}"
  FileUtils.mkdir_p dir unless Dir.exist?(dir)
  @config['samples'][index]['path'] = dir_rm_slash sample['path']
  FileUtils.mkdir_p "#{analysis_folder}/#{sample['name']}/jobs"
  @config['samples'][index]['jobs'] = "#{analysis_folder}/#{sample['name']}/jobs"
  FileUtils.mkdir_p "#{analysis_folder}/#{sample['name']}/tmp"
  @config['samples'][index]['tmp'] = "#{analysis_folder}/#{sample['name']}/tmp"
  # mysql_insert if options[:mysql]
  make_required_folders @config, sample, index, dir
  check_pu index
end