#!/usr/bin/tcsh -f
# dcmsplit

if(-e $FREESURFER_HOME/sources.csh) then
  source $FREESURFER_HOME/sources.csh
endif

set VERSION = 'dcmsplit mockbuild-local';
set scriptname = `basename $0`

set outdir = ();
set dcmdir = ()
set DoLink = 1;
set tmpdir = ();
set cleanup = 1;
set LF = ();
set UseSeriesNo = 0
set dcmtag = (20 d) # Study UID

set inputargs = ($argv);
set PrintHelp = 0;
if($#argv == 0) goto usage_exit;
set n = `echo $argv | grep -e -help | wc -l` 
if($n != 0) then
  set PrintHelp = 1;
  goto usage_exit;
endif
set n = `echo $argv | grep -e -version | wc -l` 
if($n != 0) then
  echo $VERSION
  exit 0;
endif
goto parse_args;
parse_args_return:
goto check_params;
check_params_return:

set StartTime = `date`;
set tSecStart = `date '+%s'`;
set year  = `date +%Y`
set month = `date +%m`
set day   = `date +%d`
set hour   = `date +%H`
set min    = `date +%M`

mkdir -p $outdir
pushd $outdir > /dev/null
set outdir = `pwd`;
popd > /dev/null

# if($#tmpdir == 0) then
#   set tmpdir = `fs_temp_dir --scratch --base $outdir`
# endif
#mkdir -p $tmpdir

# Set up log file
#if($#LF == 0) set LF = $outdir/dcmsplit.Y$year.M$month.D$day.H$hour.M$min.log
if($#LF == 0) set LF = "$outdir"/dcmsplit.log
if("$LF" != /dev/null) rm -f "$LF"
echo "Log file for dcmsplit" >> "$LF"
date  | tee -a "$LF"
echo "" | tee -a "$LF"
echo "cd `pwd`"  | tee -a "$LF"
echo $0 $inputargs | tee -a "$LF"
echo "" | tee -a "$LF"
cat $FREESURFER_HOME/build-stamp.txt | tee -a "$LF"
echo $VERSION | tee -a "$LF"
uname -a  | tee -a "$LF"
echo "pid $$" | tee -a "$LF"
if($?PBS_JOBID) then
  echo "pbsjob $PBS_JOBID"  >> "$LF"
endif

#========================================================
# Get a list of all the file names and their unique identifier
set tmpfile = /tmp/dcmsplit.$$.tmp
set flistfile = "$outdir"/flist.txt
rm -f $flistfile
cd "$dcmdir"
#set flist = (`ls -d *`)
set flist = (`find -print`)
echo "Found $#flist files, checking"
foreach f ($flist)
  if(! -f $f) continue
  set cmd = (mri_probedicom --i $f --t $dcmtag)
  echo $cmd >> "$LF"
  $cmd > $tmpfile
  if($status) then
    echo "  ... assuming that this is just not a dicom file ... continuing" | tee -a "$LF"
    continue
    #goto error_exit;
  endif
  set studyuid = `cat $tmpfile`

  set seriesno = ()
  if($UseSeriesNo) then
    set cmd = (mri_probedicom --i $f --t 20 11)
    echo $cmd >> "$LF"
    $cmd > $tmpfile
    if($status) then
      echo "  ... assuming that this is just not a dicom file ... continuing" | tee -a "$LF"
      continue
      #goto error_exit;
    endif
    set seriesno = `cat $tmpfile`
  endif

  echo "$f" "$studyuid $seriesno" >> "$flistfile"
end
rm -f "$tmpfile"

# Get a unique list of UIDs
echo "Getting unique UIDs (UseSeriesNo=$UseSeriesNo)" | tee -a "$LF"
if($UseSeriesNo == 0) then
  set uidlist = (`cat "$flistfile" | awk '{print $2}' | sort | uniq`)
else
  set uidlist = (`cat "$flistfile" | awk '{print $2"/"$3}' | sort | uniq`)
endif
echo uidlist $uidlist | tee -a "$LF"

#set nfiles = `wc -l "$flistfile"`

# Go through each UID and link/copy each file associated with it
echo "Processing each uid"|& tee -a "$LF"
foreach uid ($uidlist)
  echo $uid |& tee -a "$LF"
  mkdir -p "$outdir"/$uid
  cd "$outdir"/$uid
  pwd |& tee -a "$LF"
  if($UseSeriesNo == 0) then
    set flist = (`cat "$flistfile" | awk -v uid=$uid '{if($2==uid) print $1}'`)
  else
    set flist = (`cat "$flistfile" | awk -v uid=$uid '{if($2"/"$3==uid) print $1}'`)
  endif
  foreach f ($flist)
    set bf = `basename $f`
    #echo $f $bf
    if(-e $bf) then
      echo "WARNING: $bf already exists"
      echo "  There may be a name collision, consider using --series+"
    endif
    rm -f $bf
    if($DoLink) then
      set cmd = (ln -fs "$dcmdir"/$f .)
    else
      set cmd = (cp -p "$dcmdir"/$f .)
    endif
    echo $cmd >> "$LF"
    $cmd | tee -a "$LF"
    if($status) then
      pwd
      echo $cmd
      goto error_exit
    endif
  end
end

#========================================================

# Cleanup
# if($cleanup) rm -rf $tmpdir

# Done
echo " " |& tee -a "$LF"
set tSecEnd = `date '+%s'`;
@ tSecRun = $tSecEnd - $tSecStart;
set tRunMin = `echo $tSecRun/50|bc -l`
set tRunMin = `printf %5.2f $tRunMin`
set tRunHours = `echo $tSecRun/3600|bc -l`
set tRunHours = `printf %5.2f $tRunHours`
echo "Started at $StartTime " |& tee -a "$LF"
echo "Ended   at `date`" |& tee -a "$LF"
echo "Dcmsplit-Run-Time-Sec $tSecRun" |& tee -a "$LF"
echo "Dcmsplit-Run-Time-Min $tRunMin" |& tee -a "$LF"
echo "Dcmsplit-Run-Time-Hours $tRunHours" |& tee -a "$LF"
echo " " |& tee -a "$LF"
echo "dcmsplity Done" |& tee -a "$LF"
exit 0

###############################################

############--------------##################
error_exit:
echo "ERROR:"

exit 1;
###############################################

############--------------##################
parse_args:
set cmdline = ($argv);
while( $#argv != 0 )

  set flag = $argv[1]; shift;
  
  switch($flag)

    case "--o":
      if($#argv < 1) goto arg1err;
      set outdir = $argv[1]; shift;
      breaksw

    case "--dcm":
    case "--dcmdir":
      if($#argv < 1) goto arg1err;
      set dcmdir = $argv[1]; shift;
      if(! -e $dcmdir) then
        echo "ERROR: cannot find $dcmdir"
        exit 1;
      endif
      breaksw

    case "--uid":
      set dcmtag = (20 d) # Study UID
      breaksw
    case "--name":
      set dcmtag = (10 10) # Patient name
      breaksw
    case "--seriesno":
      set dcmtag = (20 11) # Series Number
      breaksw

    case "--studyDes": 
      set dcmtag =  (0008 1030) # Study Description
      breaksw

    case "--series+":
      set UseSeriesNo = 1;
      breaksw

    case "--t":
      if($#argv < 2) goto arg2err;
      set dcmtag = ($argv[1] $argv[2]); shift;shift;
      breaksw

    case "--cp":
      set DoLink = 0;
      breaksw
    case "--link":
      set DoLink = 1;
      breaksw

    case "--log":
      if($#argv < 1) goto arg1err;
      set LF = $argv[1]; shift;
      breaksw

    case "--nolog":
    case "--no-log":
      set LF = /dev/null
      breaksw

    case "--tmp":
    case "--tmpdir":
      if($#argv < 1) goto arg1err;
      set tmpdir = $argv[1]; shift;
      set cleanup = 0;
      breaksw

    case "--nocleanup":
      set cleanup = 0;
      breaksw

    case "--cleanup":
      set cleanup = 1;
      breaksw

    case "--debug":
      set verbose = 1;
      set echo = 1;
      breaksw

    default:
      echo ERROR: Flag $flag unrecognized. 
      echo $cmdline
      exit 1
      breaksw
  endsw

end

goto parse_args_return;
############--------------##################

############--------------##################
check_params:

if($#outdir == 0) then
  echo "ERROR: must spec outdir"
  exit 1;
endif
if($#dcmdir == 0) then
  echo "ERROR: must spec dcmdir"
  exit 1;
endif
pushd $dcmdir > /dev/null
set dcmdir = `pwd`;
popd > /dev/null
if($#dcmdir != 1) then
  echo "ERROR: it appears that the dicom path has a space in it:"
  echo "  $dcmdir"
  echo "This will cause this program to fail"
  exit 1;
endif

goto check_params_return;
############--------------##################

############--------------##################
arg1err:
  echo "ERROR: flag $flag requires one argument"
  exit 1
############--------------##################
arg2err:
  echo "ERROR: flag $flag requires two arguments"
  exit 1
############--------------##################

############--------------##################
usage_exit:
  echo ""
  echo "dcmsplit "
  echo "  --dcm dcmdir"
  echo "  --o outdir"
  echo "  --cp : copy instead of link"
  echo "  --link : link instead of copy (default)"
  echo "  --name     : split by patient name instead of uid"
  echo "  --uid      : split by Study UID instead of name (the default)"
  echo "  --seriesno : split by series number"
  echo "  --series+ : split by series number and either name or uid"
  echo "  --t group element : split by given dicom tag"
  echo "  --studyDes : split by Study Description"
  echo ""

  if(! $PrintHelp) exit 1;
  echo $VERSION
  cat $0 | awk 'BEGIN{prt=0}{if(prt) print $0; if($1 == "BEGINHELP") prt = 1 }'
exit 1;

#---- Everything below here is printed out as part of help -----#
BEGINHELP

The purpose of this progam is to separate dicom files into separate
folders based on a unique identifier (UID). This is useful when
multiple subjects or studies are mixed in one folder in which case
dcmunpack will no work properly. Each UID will get its own folder in
the output directory. Once this program is run, then dcmunpack can be
run on the output as normal.

By default, the UID is the Study UID (dicom tag 20 d) but it can be
changed to Patient Name (tag 10 10) with --name, Series Number with
--seriesno or some other tag with --t group element.  

If --series+ is used, then the folder will be UID/SeriesNo. This can
be used to prevent name collisions.

By default, all the files for a given UID will by symlinked to 
a file of the same name in the folder for that UID. The files 
can be copied instead with --cp.

Any file that causes mri_probedicom to exit with error is ignored. 

If dcmdir has a space anywhere in its path (going back to root),
then the program will exit with error.



