%global pkgvers 0
%global scdate0 20250306
%global schash0 08fcb941769f6afb6ad5792c29e1c0db891d07ab
%global branch0 main
%global source0 https://github.com/pytorch/kineto.git

%global sshort0 %{expand:%%{lua:print(('%{schash0}'):sub(1,8))}}

%global vcu_maj 12
%global vcu_min 8

Name:           kineto
Version:        0.4.0
Release:        %{scdate0}.%{pkgvers}.git%{sshort0}.cu%{vcu_maj}_%{vcu_min}%{?dist}
Summary:        PyTorch Profiler
License:        BSD
URL:            https://github.com/pytorch/kineto

BuildRequires:  git cmake gcc-c++ python3-devel glibc-devel fmt-devel

%global have_cuda 1
%define have_cuda_gcc 1
%global gpu_target_bin "5.2;6.1;7.5;8.6;9.0"
%global gpu_target_ptx "5.2"


%bcond_without cuda
%if %{without cuda}
%global have_cuda 0
%endif

%if %{have_cuda}
%if %{have_cuda_gcc}
BuildRequires:  cuda-gcc-c++
%endif
BuildRequires:  cuda-nvcc-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-nvtx-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-cudart-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-nvml-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-nvrtc-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-driver-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-cupti-%{vcu_maj}-%{vcu_min}
%endif

%global __cmake_in_source_build 1

%description
Libkineto is an in-process profiling library integrated with the PyTorch Profiler.

%package        devel
Summary:        Development files
Requires:       %{name} = %{version}-%{release}

%description    devel
This package contains the development files for %{name}.


%prep
%setup -T -c -n %{name}
git clone --depth 1 -n -b %{branch0} %{source0} .
git fetch --depth 1 origin %{schash0}
git reset --hard %{schash0}
git submodule update --init --depth 1 libkineto/third_party/dynolog
git log --format=fuller


%build
pushd libkineto
rm -rf %{buildroot}
rm -rf third_party/fmt
rm -rf third_party/googletest
# build
sed -i '1 i #include <thread>' src/CuptiActivityApi.cpp
# objects
sed -i 's|CXX_VISIBILITY_PRESET hidden||' CMakeLists.txt
sed -i 's|$<TARGET_OBJECTS:kineto_base>)|$<TARGET_OBJECTS:kineto_base>\n$<TARGET_OBJECTS:kineto_api>)|' CMakeLists.txt
# sover
sed -i 's|CXX_STANDARD 14|CXX_STANDARD 17|' CMakeLists.txt
sed -i 's|ties(kineto PROPERTIES|ties(kineto PROPERTIES SOVERSION 1 VERSION 1.0|' CMakeLists.txt
# external fmt
sed -i '/if(NOT TARGET fmt)/,/^endif/d' CMakeLists.txt
sed -i 's|$<BUILD_INTERFACE:fmt::fmt-header-only>|fmt|' CMakeLists.txt
sed -i '/add_dependencies(kineto fmt::fmt-header-only)/d' CMakeLists.txt
# gcc13
sed -i '1i #include "stdint.h"' src/SampleListener.h

mkdir build
pushd build
export CUDA_SOURCE_DIR="/usr/local/cuda-%{vcu_maj}.%{vcu_min}"
%global optflags %(echo '%{optflags} -fPIC')
%cmake .. -Wno-dev \
       -DCMAKE_SKIP_RPATH=ON \
       -DCMAKE_VERBOSE_MAKEFILE=OFF \
       -DCMAKE_BUILD_TYPE=RelWithDebInfo \
       -DKINETO_LIBRARY_TYPE=shared \
       -DKINETO_BUILD_TESTS=OFF \
%if %{have_cuda}
       -DCUDA_ARCH_BIN=%{gpu_target_bin} \
       -DCUDA_ARCH_PTX=%{gpu_target_ptx} \
       -DCUDA_PROPAGATE_HOST_FLAGS=OFF \
%if %{have_cuda_gcc}
       -DCUDA_HOST_COMPILER="%{_bindir}/cuda-g++" \
       -DCMAKE_CUDA_HOST_COMPILER="%{_bindir}/cuda-g++" \
%endif
       -DENABLE_CUDA=ON \
       -DCMAKE_CUDA_FLAGS='--compiler-options -fpie' \
       -DCMAKE_CUDA_COMPILER='/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc' \
       -DCUPTI_INCLUDE_DIR='/usr/local/cuda-%{vcu_maj}.%{vcu_min}/include' \
       -DCUDA_cupti_LIBRARY='/usr/local/cuda-%{vcu_maj}.%{vcu_min}/lib64/libcupti.so'
%else
       -DENABLE_CUDA=OFF
%endif
make %{?_smp_mflags}
popd
popd


%install
pushd libkineto
rm -rf %{buildroot}

pushd build
make install DESTDIR=%{buildroot}
popd

mv -f %{buildroot}/usr/lib %{buildroot}/%{_libdir}
cp include/LoggingAPI.h %{buildroot}/%{_includedir}/%{name}/
cp src/ApproximateClock.h %{buildroot}/%{_includedir}/%{name}/
popd


%files
%doc README.md
%{_libdir}/*.so.*

%files devel
%{_includedir}/*
%{_libdir}/*.so
%{_datadir}/cmake/*


%changelog
* Fri Sep 25 2020 Cristian Balint <cristian.balint@gmail.com>
- initial build