#!/usr/bin/env tcsh

@global_parse `basename $0` "$*" ; if ($status) exit 0

#set version   = "0.0";  set rev_dat   = "Apr 03, 2024"
# + program for making cluster table of ROI overlap, a la Highlight
#   Don't Hide paper
#
#set version   = "1.0";  set rev_dat   = "Apr 05, 2024"
# + add help desc, rename some inputs
#
#set version   = "1.1";  set rev_dat   = "Sep 26, 2025"
# + fix errors with 'seq' when there were no overlaps or other cases
#
#set version   = "1.2";  set rev_dat   = "Dec 1, 2025"
# + add -input_dat and -dat_column_as_sign opts
#
set version   = "1.3";  set rev_dat   = "Dec 1, 2025"
# + fix a couple silly errors causing extraneous output text
#
# ----------------------------------------------------------------

set this_prog = "gen_cluster_table"
set prog_abbr = "gen_clust_tab"
#set tpname    = "${this_prog:gas///}"
set here      = $PWD

# ----------------------- set defaults --------------------------

set input_clust = ""               # dset, cluster map
set prefix      = ""

set input_dat     = ""             # dset to know mean value (and sign) of data
set nsign         = 1              # def table label-col width, for no sign data
set lsign         = " "            # def table col label, for no sign data
set vsign         = " "            # def table col value, for no sign data
set DO_SIGN_TEXT  = 0              # if using input_dat, report pos/neg text

set min_olap_perc = 10             # perc, min overlap for reporting
set tab_type      = olap           # in theory, could have other types
set DO_STRICT     = 0              # only, only report olap > min_olap_perc

set odir    = $here
set opref   = ""

set wdir    = ""

set DO_CLEAN  = 1                  # default: keep working dir

# ------------------- process options, a la rr ----------------------

if ( $#argv == 0 ) goto SHOW_HELP

set ac = 1
while ( $ac <= $#argv )
    # terminal options
    if ( ("$argv[$ac]" == "-h" ) || ("$argv[$ac]" == "-help" )) then
        goto SHOW_HELP
    endif
    if ( "$argv[$ac]" == "-ver" ) then
        goto SHOW_VERSION
    endif

    if ( "$argv[$ac]" == '-echo' ) then
        set echo

    # --------- required

    else if ( "$argv[$ac]" == "-input_clust" ) then
        if ( $ac >= $#argv ) goto FAIL_MISSING_ARG
        @ ac += 1
        set input_clust = "$argv[$ac]"

    else if ( "$argv[$ac]" == "-input_atlas" ) then
        if ( $ac >= $#argv ) goto FAIL_MISSING_ARG
        @ ac += 1
        set ref_atl = "$argv[$ac]"

    else if ( "$argv[$ac]" == "-prefix" ) then
        if ( $ac >= $#argv ) goto FAIL_MISSING_ARG
        @ ac += 1
        set prefix = "$argv[$ac]"
        set opref  = `basename "$argv[$ac]"`
        set odir   = `dirname  "$argv[$ac]"`

    # --------- opt

    else if ( "$argv[$ac]" == "-input_dat" ) then
        if ( $ac >= $#argv ) goto FAIL_MISSING_ARG
        @ ac += 1
        set input_dat = "$argv[$ac]"

    else if ( "$argv[$ac]" == "-dat_column_as_sign" ) then
        set DO_SIGN_TEXT = 1

    else if ( "$argv[$ac]" == "-min_olap_perc" ) then
        if ( $ac >= $#argv ) goto FAIL_MISSING_ARG
        @ ac += 1
        set min_olap_perc = "$argv[$ac]"

    else if ( "$argv[$ac]" == "-strict" ) then
        set DO_STRICT = 1

    else if ( "$argv[$ac]" == "-workdir" ) then
        if ( $ac >= $#argv ) goto FAIL_MISSING_ARG
        @ ac += 1
        set wdir = "$argv[$ac]"

        set tf = `python -c "print('/' in '${wdir}')"`
        if ( "${tf}" == "True" ) then
            echo "** ERROR: '-workdir ..' is a name only, no '/' allowed\n"
            goto BAD_EXIT
        endif

    else if ( "$argv[$ac]" == "-no_clean" ) then
        set DO_CLEAN = 0
        
    else
        echo "\n\n** ERROR: unexpected option #$ac = '$argv[$ac]'\n\n"
        goto BAD_EXIT
        
    endif
    @ ac += 1
end

# =======================================================================
# ======================== ** Verify + setup ** =========================
# =======================================================================

setenv AFNI_WHEREAMI_NO_WARN YES

echo "++ Start processing options for: gen_clust_table"

if ( "${prefix}" == "" ) then
    echo "** ERROR: need to provide output name with '-prefix ..'"
    goto BAD_EXIT
endif

if ( "${input_clust}" == "" ) then
    echo "** ERROR: need to provide input cluster map with '-input_clust ..'"
    goto BAD_EXIT
endif

if ( "${ref_atl}" == "" ) then
    echo "** ERROR: need to provide input atlas with '-input_atlas ..'"
    goto BAD_EXIT
endif

# make sure any dset_sign and input_clust dsets are on same grid; also set vars
if ( "${input_dat}" != "" ) then
    set is_same = `3dinfo -same_grid "${input_clust}" "${input_dat}"`
    if ( ! ${is_same[1]} ) then
        echo "** ERROR: input_clust and input_dat dsets must be on same grid"
        goto BAD_EXIT
    endif

    # ... and set vars for string display of mean value in table
    set nsign = 12        # number of spaces
    set lsign = "Mean"    # label for col

    # ... or report simple pos/neg text string, instead of mean
    if ( $DO_SIGN_TEXT ) then
        set nsign = 5         # number of spaces
        set lsign = "Sign"    # label for col
    endif
else
    if ( $DO_SIGN_TEXT ) then
        echo "** ERROR: to use '-dat_column_as_sign', you must also "
        echo "          provide an extra dataset via '-input_dat ..'"
        goto BAD_EXIT
    endif
endif

# make workdir name, if nec
if ( "${wdir}" == "" ) then
    set tmp_code = `3dnewid -fun11`  # should be essentially unique hash
    set wdir     = __workdir_${this_prog}_${tmp_code}
endif

# check output directory, use input one if nothing given
if ( ! -e "${odir}" ) then
    echo "++ Making new output directory: $odir"
    \mkdir -p "${odir}"
endif

# make the working directory
if ( ! -e "${odir}/${wdir}" ) then
    echo "++ Making working directory: ${odir}/${wdir}"
    \mkdir -p "${odir}/${wdir}"
else
    echo "+* WARNING:  Somehow found a premade working directory (?):"
    echo "      ${odir}/${wdir}"
endif

# =======================================================================
# =========================== ** Main work ** ===========================
# =======================================================================

echo "++ Make table for: ${tab_type}"

# ----- copy input map and atlas to wdir

set dset_clust = clust_map.nii.gz
set pref_atlas = ref_atlas
set dset_atlas = ${pref_atlas}.nii.gz
set dset_dat   = dset_dat.nii.gz

3dcalc -a "${input_clust}" -expr 'a' -prefix "${odir}/${wdir}/${dset_clust}"
3dcalc -a "${ref_atl}"     -expr 'a' -prefix "${odir}/${wdir}/${dset_atlas}"
if ( "${input_dat}" != "" ) then
    3dcalc -a "${input_dat}"  -expr 'a' -prefix "${odir}/${wdir}/${dset_dat}"
endif

# propagate table/header info
3drefit -copytables "${ref_atl}" "${odir}/${wdir}/${dset_atlas}"
3drefit -cmap INT_CMAP           "${odir}/${wdir}/${dset_atlas}"

# ----- move to wdir, check input properties

cd "${odir}/${wdir}"

set is_labeltable = `3dinfo -is_labeltable "${dset_atlas}"`
set is_atlas      = `3dinfo -is_atlas      "${dset_atlas}"`

# ----- check what info exists in header about atlas/lt

# to lt or atlas
if ( ! ${is_labeltable} && ! ${is_atlas} ) then
    echo "** ERROR: neither labeltable nor atlas_points in the atlas"
    goto BAD_EXIT
endif

# have atlas, no lt
if ( ! ${is_labeltable} ) then
    # create a labeltable from atlas
    adjunct_atlas_points_to_labeltable          \
        -input   "${dset_atlas}"                \
        -add_lt_to_input
endif

# atlasize ref atlas
@MakeLabelTable -atlasize_labeled_dset "${dset_atlas}"

# ----- get list of clust regions

# see if we have positive and/or negative cluster labels (= int indices)
set brickmm = `3dBrickStat -slow -min -max "${dset_clust}"`
set minval  = ${brickmm[1]}
set maxval  = ${brickmm[2]}

if ( `echo "${maxval} > 0" | bc` ) then
    set all_pos = `3dROIstats -quiet                                     \
                        -mask "3dcalc( -a ${dset_clust} -expr a*step(a)"  \
                        "${dset_clust}"`
else
    set all_pos = ( )
endif
set npos = ${#all_pos}

if ( `echo "${minval} < 0" | bc` ) then
    set all_neg = `3dROIstats -quiet                                     \
                        -mask "3dcalc( -a ${dset_clust} -expr a*step(-a)" \
                        "${dset_clust}"`
else
    set all_neg = ( )
endif
set nneg = ${#all_neg}

# build list of values: 1, 2, 3, ... N, -1, -2, -3, ..., M (can have gaps)
set all_clust = ( )
if ( ${npos} ) then
    foreach ii ( `seq 1 1 ${#all_pos}` ) 
        set vvv = ${all_pos[$ii]}
        # add each value to list as an int (should just be chopping off
        # .00000...)
        set all_clust = ( ${all_clust} `echo "scale=0; ${vvv}/1.0" | bc` )
    end
endif
if ( ${nneg} ) then
    foreach ii ( `seq ${#all_neg} -1 1` ) 
        set vvv = ${all_neg[$ii]}
        # add each value to list as an int (should just be chopping off
        # .00000...)
        set all_clust = ( ${all_clust} `echo "scale=0; ${vvv}/1.0" | bc` )
    end
endif
set nclust = ${#all_clust}

# ----- start the output file and formatting

cat <<EOF

++ start of table calculation
--------------------------------------------------------------------------
EOF

set ofile  = "${opref}"
printf "" > "${ofile}"
printf "%5s  %5s %${nsign}s  %7s   %s\n"                      \
        "Clust" "Nvox" "${lsign}" "Overlap"  "ROI location"    \
    |& tee -a "${ofile}"

set tfile  = __tmp_file_olap.txt

# store a list of unfound clusters
set all_unfound = ()

# go through the dset, int by int, to parse a bit
foreach ii ( `seq 1 1 ${nclust}` )
    set nn   = ${all_clust[$ii]}
    set nvox = `3dROIstats -nomeanout -quiet -nzvoxels \
                    -mask "clust_map.nii.gz<${nn}>"        \
                    "clust_map.nii.gz"` 

    # whereami_afni, use label 'ref_atlas' to refer to dset
    whereami_afni                                     \
        -omask "clust_map.nii.gz<${nn}>"              \
        -atlas ref_atlas                              \
        | grep --color=never '% overlap with'         \
        > "${tfile}"

    if ( $status ) then
        echo "** ERROR: whereami_afni exited with nonzero status"
        goto BAD_EXIT
    endif

    set nrow = `cat "${tfile}" | wc -l`
    
    # calculate+format the mean value of extra data within each ROI (-> vsign)
    if ( "${input_dat}" != "" ) then
        # it's easier to format here and then display as str below
        set vvv   = `3dROIstats -quiet                     \
                    -mask "clust_map.nii.gz<${nn}>"        \
                    "${dset_dat}[0]"`
        if ( $DO_SIGN_TEXT ) then
          # report string: pos, neg, zero
          if ( `echo "${vvv} > 0.0" | bc` ) then
            set vsign = "pos"
          else if ( `echo "${vvv} < 0.0" | bc` ) then
            set vsign = "neg"
          else 
            # should not happen...
            set vsign = "zero"
          endif
        else
            # report value, as scientific notation
            set vsign = `printf "%.3e" ${vvv}`
        endif
    endif

    # if nrow=0, that means this cluster had no overlap with any atlas
    # region, so we skip it in the table
    if ( ${nrow} ) then
        set NEED_ONE = 1
        foreach rr ( `seq 1 1 ${nrow}` )
            set line = `cat "${tfile}" | sed -n ${rr}p`
            set perc = `echo "${line}" | awk '{print $1}'`

            set roi = `echo "${line}"                           \
                        | awk -F'% overlap with' '{print $2}'   \
                        | awk -F, '{print $1}'`

            # is this perc above min_olap_perc?
            set is_ge_min = `echo "${perc} >= ${min_olap_perc}" | bc`

            if ( ${NEED_ONE} && ! ${is_ge_min} && $DO_STRICT ) then
                # overlap below min chosen val, AND we will be strict
                # about not showing it
                set fff = `printf "<%2.1f" "${min_olap_perc}"`
                printf "%5d  %5d %${nsign}s %8s%%   %s\n"               \
                    "${nn}" "${nvox}" "${vsign}" "${fff}"  "-"          \
                    |& tee -a "${ofile}"
                set NEED_ONE = 0
            else if ( ${NEED_ONE} ) then
                printf "%5d  %5d %${nsign}s %7.1f%%   %s\n"               \
                    "${nn}" "${nvox}" "${vsign}" "${perc}"  "${roi}"      \
                    |& tee -a "${ofile}"
                set NEED_ONE = 0
            else if ( ${is_ge_min} ) then
                printf "%5s  %5s %${nsign}s %7.1f%%   %s\n"               \
                    "" "" "" "${perc}" "${roi}"                           \
                    |& tee -a "${ofile}"
            endif
        end
    else 
        # keep a record of clusters with no olap
        set all_unfound = ( ${all_unfound} ${ii} )
    endif
end

cat <<EOF
--------------------------------------------------------------------------
EOF

# in case there were cases of no overlap, warn about those
if ( ${#all_unfound} ) then
    # grammarize
    if ( `echo "${#all_unfound} > 2" | bc` ) then
        # plural
        set sss = "There were ${#all_unfound} clusters"
    else
        # singular"
        set sss = "There was ${#all_unfound} cluster"
    endif
cat <<EOF

+* WARNING: ${sss} without any input_atlas overlap:
            ${all_unfound}

EOF
endif

# ----- finished with table, so copy the output to final spot
\cp "${opref}" ../.

# exit wdir, to odir
cd ..

# ---------------------------------------------------------------------

if ( $DO_CLEAN == 1 ) then
    # should be _in_ odir right now, so wdir is just here to be removed
    \rm -rf ${wdir}
else
    echo "++ NOT removing temporary axialization working dir: ${odir}/${wdir}"
endif

cat <<EOF

++ DONE. See the output:
   ${odir}/${opref}

EOF

goto GOOD_EXIT

# ========================================================================
# ========================================================================

SHOW_HELP:

cat << EOF
-------------------------------------------------------------------------
Overview ~1~

This is a program to take a cluster dataset and make a table of
overlaps with respect to a given atlas.

This would be useful for reporting more information about cluster
results than, say, peak voxel or middle-voxel tables.  For usage
example, see:

  Highlight Results, Don't Hide Them: Enhance interpretation, reduce
  biases and improve reproducibility. 
  Taylor PA, Reynolds RC, Calhoun V, Gonzalez-Castillo J, Handwerker
  DA, Bandettini PA, Mejia AF, Chen G (2023). Neuroimage 274:120138. 
  https://pubmed.ncbi.nlm.nih.gov/37116766/

This program basically wraps around the useful 'whereami_afni' program.

auth    : PA Taylor (SSCC, NIMH, NIH, USA)
ver     : ${version}
revdate : ${rev_dat}

-------------------------------------------------------------------------
Options ~1~

-input_clust IC    :(req) input dataset: the map of clusters, of which you 
                    want to list out overlaps. Should be a single 3D volume.

-input_atlas IA    :(req) input dataset: the reference atlas, to be used
                    to identify/list overlaps from the cluster input

-prefix      PPP   :(req) output name for the table, so likely should end 
                    with ".txt" or ".dat", for clarity

-min_olap_perc MOP :minimum overlap (as a percentage value) of cluster with
                    a given reference atlas region to be displayed in the 
                    table. That is, if MOP% or more of the cluster overlaps
                    with a given region, then list that region. 
                    (def: ${min_olap_perc})
                    **See Notes, below, for more about this**
                    
-strict            :by default, if no atlas region overlaps with the 
                    '-min_olap_perc ..' threshold value, then the atlas 
                    region with maximum overlap will be displayed still;
                    use this option, however, to strictly apply the threshold,
                    so no ROI would be shown.

-input_dat  ID     :by default, there is no info about the mean or sign of
                    a given cluster. With this option, users provide an extra
                    input dataset for that info. The ID dataset must be
                    on the same grid as the '-input_clust ..' dset.
                    Using this opt will add a column of mean values of the ID
                    data per cluster written using scientific notation
                    (see '-dat_column_as_sign' for alternative reporting).
                    NB: there is no check that the sign of values are
                    constant across each cluster, though that likely
                    should be the case.

-dat_column_as_sign :when adding an '-input_dat ..' dset, the added column 
                    has signed mean values of the extra dataset within each 
                    each cluster. This option changes the reporting to merely
                    be a text string reporting whether the cluster is positive 
                    ('pos'), negative ('neg') or zero ('zero', which shouldn't
                    actually happen, typically).

-workdir     WWW   :specify the name of the temporary working directory
                    (which is created as a new subdirectory of the output
                    file location---do not include path info here, just a
                    simple name)

-no_clean          :do not remove working directory (def: remove it)

-echo              :very verbose output when running (for troubleshooting)

-help, -h          :display this meager help info

-ver               :display this program version

-------------------------------------------------------------------------
Notes ~1~

Note that the '-min_olap_perc ..' value specifies the fraction of the
*cluster* for displaying in the table. If your cluster is inherently
much, much larger than your atlas regions, this can mean that you
won't see many overlaps reported in the table.  In such a case, you
might want to lower the '-min_olap_perc ..' significantly.

Future work might be to have a different thresholding criterion,
perhaps based on the fraction of the *atlas* region overlap with the
cluster, for reporting.

-------------------------------------------------------------------------
Examples ~1~

1) Basic usage to create a table:
   gen_cluster_table                                  \
        -input_clust  Clust_mask+tlrc.HEAD            \
        -input_atlas  MNI_Glasser_HCP_v1.0.nii.gz     \
        -prefix       table_cluster_olap_glasser.txt

2) Basic usage to create a table, using a lower overlap fraction cut-off:
   gen_cluster_table                                  \
        -input_clust   Clust_mask+tlrc.HEAD           \
        -input_atlas   MNI_Glasser_HCP_v1.0.nii.gz    \
        -min_olap_perc 5                              \
        -prefix        table_cluster_olap _glasser.txt

3) Add in a supplementary dataset of data values, and report the values of
   its voxels within each cluster
   gen_cluster_table                                   \
        -input_clust   Clust_mask+tlrc.HEAD            \
        -input_atlas   MNI_Glasser_HCP_v1.0.nii.gz     \
        -min_olap_perc 5                               \
        -prefix        table_cluster_olap _glasser.txt \
        -input_dat     EffectEstimateData.nii.gz

4) Same as #3, but report just the sign of the extra data's mean values within
   each cluster:
   gen_cluster_table                                   \
        -input_clust   Clust_mask+tlrc.HEAD            \
        -input_atlas   MNI_Glasser_HCP_v1.0.nii.gz     \
        -min_olap_perc 5                               \
        -prefix        table_cluster_olap _glasser.txt \
        -input_dat     EffectEstimateData.nii.gz       \
        -dat_column_as_sign

EOF

# ----------------------------------------------------------------------

    goto GOOD_EXIT

SHOW_VERSION:
    echo "version  $version (${rev_dat})"
    goto GOOD_EXIT

FAIL_MISSING_ARG:
    echo "** ERROR: Missing an argument after option flag: '$argv[$ac]'"
    goto BAD_EXIT

BAD_EXIT:
    exit 1

GOOD_EXIT:
    exit 0
