%global pkgvers 0
%global scdate0 20250221
%global sctags0 v3.8.0
%global source0 https://github.com/NVIDIA/cutlass.git

%global vcu_maj 12
%global vcu_min 8

%define header_only 1

Name:           cutlass
Version:        3.8.0
Release:        %{scdate0}.%{pkgvers}.cu%{vcu_maj}_%{vcu_min}%{?dist}
Summary:        Collection of CUDA C++ template
License:        BSD

URL:            https://github.com/NVIDIA/cutlass

BuildRequires:  cmake gcc-c++ git
BuildRequires:  doxygen graphviz python3-setuptools python3-devel

%define have_cuda 1
%define have_cuda_gcc 1
%define gpu_target_arch "52;61;75;86;89;90;120"

%bcond_without cuda
%if %{without cuda}
%define have_cuda 0
%endif

%if %{have_cuda}
%if %{have_cuda_gcc}
%if (0%{?fedora} > 34) || (0%{?rhel} > 8)
BuildRequires:  cuda-gcc-c++ < 12
%endif
%endif
BuildRequires:  cuda-nvcc-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-nvtx-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-cudart-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-nvml-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-nvrtc-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-driver-devel-%{vcu_maj}-%{vcu_min}
Requires:  cuda-nvcc-%{vcu_maj}-%{vcu_min}
Requires:  cuda-nvtx-%{vcu_maj}-%{vcu_min}
Requires:  cuda-cudart-%{vcu_maj}-%{vcu_min}
Requires:  cuda-nvrtc-%{vcu_maj}-%{vcu_min}
%endif

%global _lto_cflags %{nil}
%global debug_package %{nil}
%global __cmake_in_source_build 1
%undefine _hardened_build
%undefine _annotated_build
%undefine _find_debuginfo_dwz_opts
%undefine _missing_build_ids_terminate_build
%global __cmake_in_source_build 1
%global _default_patch_fuzz 100

%description
CUDA C++ template abstractions for implementing high-performance
matrix-multiplication (GEMM) and related computations at all
levels and scales within CUDA.

%package        devel
Summary:        Development files for %{name}
%if ! %{header_only}
Requires:       %{name} = %{version}-%{release}
%endif
%description    devel
This package contains development files for %{name}.

%if ! %{header_only}
%package        static
Summary:        Development files for %{name}
Requires:       %{name}-devel = %{version}-%{release}
%description    static
This package contains static files for %{name}.
%endif


%prep
%setup -T -c -n %{name}
git clone --depth 1 -n -b %{sctags0} %{source0} .
git reset --hard %{sctags0}
git log --format=fuller
# no rpath
sed -i '/-rpath/d' CMakeLists.txt


%build
mkdir -p build
pushd build
%global optflags %(echo "%{optflags} -fPIC" | sed 's|-g||')
export LD_LIBRARY_PATH="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/%{_lib}/"
%cmake .. \
       -DCMAKE_SKIP_RPATH=ON \
       -DCMAKE_VERBOSE_MAKEFILE=OFF \
       -DCMAKE_BUILD_TYPE=Release \
       -DCMAKE_EXE_LINKER_FLAGS="%{_libdir}/libstdc++.so.6" \
       -DBUILD_TESTING=OFF \
%if %{header_only}
       -DCUTLASS_ENABLE_HEADERS_ONLY=ON \
       -DUTLASS_ENABLE_LIBRARY=OFF \
%else
       -DCUTLASS_ENABLE_HEADERS_ONLY=OFF \
       -DUTLASS_ENABLE_LIBRARY=ON \
%endif
       -DCUTLASS_ENABLE_PROFILER=OFF \
       -DCUTLASS_ENABLE_EXAMPLES=OFF \
%if 0%{?fedora}
%ifarch x86_64
       -DCUTLASS_ENABLE_F16C=ON \
%endif
%endif
       -DCUTLASS_ENABLE_TESTS=OFF \
       -DCUDA_PROPAGATE_HOST_FLAGS=OFF \
%if %{have_cuda_gcc}
%if (0%{?fedora} > 34) || (0%{?rhel} > 8)
       -DCMAKE_CUDA_HOST_COMPILER=%{_bindir}/cuda-c++ \
%endif
%endif
       -DCUTLASS_NVCC_EMBED_PTX=ON \
       -DCUTLASS_NVCC_EMBED_CUBIN=ON \
       -DCUTLASS_NVCC_ARCHS=%{gpu_target_arch} \
       -DCUDA_NVCC_FLAGS="-Xfatbin=-compress-all --compiler-options -fPIC -Wno-deprecated-gpu-targets -allow-unsupported-compiler -D_SERIALIZE_H_INCLUDED" \
       -DCMAKE_CUDA_FLAGS="-Xfatbin=-compress-all --compiler-options -fPIC -Wno-deprecated-gpu-targets -allow-unsupported-compiler -D_SERIALIZE_H_INCLUDED" \
       -DCMAKE_CUDA_COMPILER=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc
%ifarch ppc64le
make -j2
%else
make %{?_smp_mflags}
%endif
popd


%install
rm -rf %{buildroot}
pushd build
%cmake_install
popd
# clean spurious
rm -rf %{buildroot}/usr/test
rm -rf %{buildroot}/usr/share/info
# strip elf
set +x
find %{buildroot} -type f -print | LC_ALL=C sort |
  file -N -f - | sed -n -e 's/^\(.*\):[ \t]*.*ELF.*, not stripped.*/\1/p' |
  xargs --no-run-if-empty stat -c '%h %D_%i %n' |
  while read nlinks inum f; do
      echo "Stripping: $f"
      strip -s $f
  done
set -x


%if ! %{header_only}
%files
%doc README.md docs
%license LICENSE.txt
%{_bindir}/*
%{_libdir}/*.so
%endif

%files devel
%{_includedir}/*
%{_libdir}/cmake/*

%if ! %{header_only}
%files static
%{_libdir}/*.a
%endif


%changelog
* Tue Nov 09 2021 Balint Cristian <cristian.balint@gmail.com>
- github release updates