%global pkgvers 0
%global scdate0 20250311
%global schash0 37fe645f945d56e0d4bfac1d9f3bcf355f950a1b
%global branch0 main
%global source0 https://github.com/apache/tvm.git

%global sshort0 %{expand:%%{lua:print(('%{schash0}'):sub(1,8))}}

%global vcu_maj 12
%global vcu_min 8

%define gpu_target_arch "61;75;86;89;90;50"

# features
%define have_onnx 1
%define have_mlir 0
%define have_dnnl 0
%define have_cuda 1
%define have_cuda_gcc 1
%define have_cutlass 0
%define have_cutlass_ft 0
%define have_mali 0
%define have_tflite 0
%define have_nnpack 1
%define have_arm_compute 1
%define have_verilator 1
# externals
%define ext_dmlc 1
%define ext_dlpack 1
%define ext_picojson 1
# runtime only
%define only_runtime 0
%bcond_with runtime
%if %{with runtime}
%define only_runtime 1
%endif

Name:           tvm
Version:        0.20
Release:        %{scdate0}.%{pkgvers}.git%{sshort0}.cu%{vcu_maj}_%{vcu_min}%{?dist}
Summary:        Compiler stack for cpu, gpu and specialized accelerators
License:        Apache

URL:            https://tvm.apache.org

Patch100:       tvm-gcc.patch

BuildRequires:  doxygen cmake git python3 python3-setuptools
BuildRequires:  python3-Cython python3-devel mesa-libGLU-devel
BuildRequires:  llvm-devel python3-devel openblas-devel libglvnd-devel
BuildRequires:  atlas-devel gtest-devel spirv-headers-devel spirv-tools-devel
BuildRequires:  vulkan-loader-devel vulkan-headers
%ifnarch riscv64 riscv32
BuildRequires:  cpuinfo-devel pthreadpool-devel
%endif


%global toolchain gcc

%if "%{toolchain}" == "gcc"
BuildRequires:  gcc-c++
%else
BuildRequires:  clang
%endif


%if 0%{?rhel}
%define have_mlir 0
%endif

%define elppc 0
%if 0%{?rhel}
%ifarch ppc64le
%define elppc 1
%endif
%endif

%if ! %{elppc}
BuildRequires: glfw-devel ocl-icd-devel ocl-icd-devel
%endif

%bcond_without cuda
%if %{without cuda}
%define have_cuda 0
%endif

%bcond_with mali
%if %{with mali}
%define have_mali 1
%endif

%if (0%{?rhel} == 9) || (0%{?fedora} > 38)
%define ext_picojson 0
%endif

%if %{ext_picojson}
BuildRequires:  picojson-devel
%endif

%if %{ext_dlpack}
BuildRequires:  dlpack-devel
%endif

%if %{ext_dmlc}
BuildRequires:  dmlc-core-devel
%endif

%if %{have_tflite}
%ifnarch ppc64le
BuildRequires:  tensorflow-tflite
BuildRequires:  tensorflow-devel
BuildRequires:  flatbuffers-devel
%endif
%endif

%if %{have_dnnl}
BuildRequires:  onednn-devel
%endif

%if %{have_onnx}
%ifnarch ppc64le
BuildRequires:  onnx-devel
%endif
%endif

%if %{have_mlir}
BuildRequires:  mlir-devel
%endif

%if %{have_arm_compute}
%if 0%{?fedora}
%ifarch x86_64 aarch64
BuildRequires:  arm-compute-library-devel
%endif
%endif
%endif

%if %{have_cuda}
%if %{have_cuda_gcc}
%if (0%{?fedora} > 34) || (0%{?rhel} > 8)
BuildRequires:  cuda-gcc-c++
Requires:       cuda-gcc-c++
%endif
%endif
%if %{have_cutlass}
BuildRequires:  cutlass-devel
%endif
BuildRequires:  cuda-nvcc-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-nvtx-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-cudart-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-nvml-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-nvrtc-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-driver-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  cuda-profiler-api-%{vcu_maj}-%{vcu_min}
BuildRequires:  libcublas-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  libcurand-devel-%{vcu_maj}-%{vcu_min}
BuildRequires:  libcudnn9-devel-cuda-%{vcu_maj}
BuildRequires:  nvidia-driver-cuda-libs
Requires:       cuda-nvcc-%{vcu_maj}-%{vcu_min}
Requires:       cuda-nvtx-%{vcu_maj}-%{vcu_min}
Requires:       cuda-cudart-%{vcu_maj}-%{vcu_min}
Requires:       cuda-nvrtc-%{vcu_maj}-%{vcu_min}
Requires:       libcublas-%{vcu_maj}-%{vcu_min}
Requires:       libcurand-%{vcu_maj}-%{vcu_min}
Requires:       libcudnn9-cuda-%{vcu_maj}
%endif

%if %{have_mali}
BuildRequires:  libmali
Requires:       libmali
%endif

%if %{have_nnpack}
BuildRequires:  nnpack-devel
%endif

Requires:      ocl-icd xgboost-python3
Requires:      %{name}-runtime
Requires:      %{name}-python3

%global _lto_cflags %{nil}
%undefine _hardened_build
%undefine _annotated_build
%undefine _strict_symbol_defs_build
%undefine _missing_build_ids_terminate_build
%global __cmake_in_source_build 1

%description
Open deep learning compiler stack for cpu, gpu and specialized accelerators.

%package        runtime
Summary:        Runtime library for tvm
%if %{have_cuda}
Requires:       cuda-nvcc-%{vcu_maj}-%{vcu_min}
Requires:       cuda-nvtx-%{vcu_maj}-%{vcu_min}
Requires:       cuda-cudart-%{vcu_maj}-%{vcu_min}
Requires:       cuda-nvrtc-%{vcu_maj}-%{vcu_min}
Requires:       libcublas-%{vcu_maj}-%{vcu_min}
Requires:       libcurand-%{vcu_maj}-%{vcu_min}
Requires:       libcudnn9-cuda-%{vcu_maj}
%endif

%description    runtime
This package contains runtime library for tvm.

%package        runtime-devel
Summary:        Development files for tvm
Requires:       %{name}-runtime = %{version}-%{release}

%description    runtime-devel
This package contains runtime development files for tvm.

%if ! %{only_runtime}
%package        devel
Summary:        Development files for tvm
Requires:       %{name} = %{version}-%{release}
Requires:       %{name}-runtime = %{version}-%{release}
Requires:       %{name}-runtime-devel = %{version}-%{release}

%description    devel
This package contains development files for tvm.

%package        python3
Summary:        Python files for tvm
Requires:       python3-antlr4-runtime
Requires:       %{name}-runtime = %{version}-%{release}

%description    python3
This package contains python files for tvm.
%endif


%prep
%setup -T -c -n %{name}
git clone --depth 1 -n -b %{branch0} %{source0} .
git fetch --depth 1 origin %{schash0}
git reset --hard %{schash0}
git submodule update --init --depth 1 3rdparty/rang
git submodule update --init --depth 1 3rdparty/libcrc
%if %{have_cuda}
%if %{have_cutlass_ft}
git submodule update --init --depth 1 3rdparty/cutlass_fpA_intB_gemm
git submodule update --init --depth 1 3rdparty/libflash_attn
%endif
git submodule update --init --depth 1 3rdparty/flashinfer
%endif
%ifnarch aarch64
git submodule update --init --depth 1 3rdparty/libbacktrace
%endif
%if ! %{ext_dlpack}
git submodule update --init --depth 1 3rdparty/dlpack
%endif
%if ! %{ext_dmlc}
git submodule update --init --depth 1 3rdparty/dmlc-core
%endif
%if ! %{ext_picojson}
git submodule update --init --depth 1 3rdparty/picojson
%endif
git log --format=fuller
%patch -P 100 -p1 -b .gcc14~

# clean
%if %{ext_dlpack}
rm -rf 3rdparty/dlpack
%endif
%if %{ext_dmlc}
rm -rf 3rdparty/dmlc-core
%endif
%if %{ext_picojson}
rm -rf 3rdparty/picojson
%endif
rm -rf 3rdparty/cutlass

# py3.6 compat
%if 0%{?rhel} || (0%{?fedora} < 36)
#older py
sed -i '/import annotations/d' python/tvm/meta_schedule/profiler.py
%endif
# fix nvcc path and usage
%if %{have_cuda_gcc} && (0%{?fedora} > 34) || (0%{?rhel} > 8)
sed -i -e 's|\["nvcc"\]|\["/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc"\] + \["-ccbin"\, "cuda-gcc"\]|' python/tvm/contrib/nvcc.py
%else
sed -i -e 's|\["nvcc"\]|\["/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc"\]|' python/tvm/contrib/nvcc.py
%endif
# fix libinfo
sed -i -e 's|header_path = \[\]|header_path = \["/usr"\]|' python/tvm/_ffi/libinfo.py
# fix tflite
sed -i 's|libtensorflow-lite.a|libtensorflowlite.so|' cmake/modules/contrib/TFLite.cmake
sed -i 's|${TFLITE_CONTRIB_LIB}|%{_libdir}/libtensorflowlite.so|' cmake/modules/contrib/TFLite.cmake
%if %{have_cuda}
# fix tvm_rpc linkage
sed -i 's|LINK_FLAGS -lpthread|LINK_FLAGS -lpthread ${CUDA_CUDART_LIBRARY} ${CUDA_NVRTC_LIBRARY} ${CUDA_CUBLAS_LIBRARY} ${CUDA_CUBLASLT_LIBRARY}|' apps/cpp_rpc/CMakeLists.txt
sed -i 's|tvm_runtime PRIVATE ${TVM_RUNTIME_LINKER_LIBS}|tvm_runtime PRIVATE ${TVM_RUNTIME_LINKER_LIBS} ${CUDA_CUDART_LIBRARY} ${CUDA_NVRTC_LIBRARY} ${CUDA_CUBLAS_LIBRARY} ${CUDA_CUBLASLT_LIBRARY}|' CMakeLists.txt
%endif
# fix vta
echo '' > cmake/modules/VTA.cmake
# use openblas
sed -i 's|cblas.h|openblas/cblas.h|' src/runtime/contrib/cblas/cblas.cc
# ext dmlc
%if %{ext_dmlc}
sed -i 's|../../../3rdparty/dmlc-core/include/||' src/tir/transforms/common_subexpr_elim_tools.h
%endif
# cutlass_ft
%if ! %{have_cutlass_ft}
sed -i '/fpA_intB_gemm/d' CMakeLists.txt
sed -i '/flash_attn/d' CMakeLists.txt
#sed -i '/fpA_intB_gemm/d' cmake/modules/contrib/CUTLASS.cmake
#sed -i '/flash_attn/d' cmake/modules/contrib/CUTLASS.cmake
%endif
# cython3
%if 0%{?fedora}
sed -i 's|_c_dlpack_deleter(object pycaps):|_c_dlpack_deleter(object pycaps) noexcept:|' python/tvm/_ffi/_cython/ndarray.pxi
%endif
# libstdc++ older
sed -i 's|set(TVM_RPC_LINKER_LIBS "")|set(TVM_RPC_LINKER_LIBS "stdc++")|' apps/cpp_rpc/CMakeLists.txt


%build
mkdir build
pushd build
%global optflags $(echo "%{optflags}" | sed 's|-Wp,-D_GLIBCXX_ASSERTIONS||')
export LD_LIBRARY_PATH="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/%{_lib}/"
%cmake .. -Wno-dev \
       -DCMAKE_SKIP_RPATH=ON \
       -DCMAKE_VERBOSE_MAKEFILE=OFF \
       -DCMAKE_BUILD_TYPE=RelWithDebInfo \
%if 0%{?rhel}
       -DCMAKE_SHARED_LINKER_FLAGS="-static-libstdc++" \
%else
       -DCMAKE_SHARED_LINKER_FLAGS="%{_libdir}/libstdc++.so.6" \
%endif
       -DSUMMARIZE=ON \
       -DUSE_MICRO=ON \
       -DUSE_KHRONOS_SPIRV=ON \
       -DUSE_VULKAN=ON \
%if ! %{elppc}
       -DUSE_OPENCL=ON \
%else
       -DUSE_OPENCL=OFF \
%endif
       -DUSE_MICRO_STANDALONE_RUNTIME=ON \
%if %{have_cuda}
       -DUSE_CUDA=ON \
       -DUSE_CUDNN=ON \
       -DUSE_CUBLAS=ON \
       -DUSE_CURAND=ON \
       -DUSE_THRUST=ON \
%if %{have_cutlass}
       -DUSE_CUTLASS=ON \
%else
       -DUSE_CUTLASS=OFF \
%endif
%if 0%{?fedora} || (0%{?rhel} > 8)
       -DCMAKE_CUDA_PROPAGATE_HOST_FLAGS=OFF \
%if %{have_cuda_gcc}
       -DCMAKE_CUDA_HOST_COMPILER="%{_bindir}/cuda-gcc" \
%endif
%endif
       -DCMAKE_CUDA_ARCHITECTURES=%{gpu_target_arch} \
       -DCMAKE_CUDA_COMPILER="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc" \
       -DCMAKE_CUDA_FLAGS="-Wno-deprecated-gpu-targets -allow-unsupported-compiler" \
%else
       -DUSE_CUDA=OFF \
%endif
       -DUSE_BLAS=openblas \
       -DUSE_OPENMP=ON \
       -DUSE_CPP_RPC=ON \
       -DUSE_RTTI=ON \
       -DUSE_RPC=ON \
       -DUSE_SORT=ON \
%if 0%{?fedora} > 41
       -DUSE_LLVM=llvm-config-20 \
%else
       -DUSE_LLVM=llvm-config-64 \
%endif
       -DUSE_RANDOM=ON \
%if %{have_nnpack}
       -DUSE_NNPACK=ON \
%else
       -DUSE_NNPACK=OFF \
%endif
       -DUSE_RANDOM=ON \
       -DUSE_UMA=ON \
%if %{have_dnnl}
       -DUSE_MKL=OFF \
       -DUSE_DNNL=ON \
%endif
%if %{have_onnx}
       -DUSE_TARGET_ONNX=ON \
%endif
%if %{have_mlir}
       -DUSE_MLIR=ON \
%endif
%if %{have_tflite}
%ifnarch ppc64le
       -DUSE_TFLITE=ON \
       -DUSE_EDGETPU=OFF \
       -DUSE_TENSORFLOW_PATH=%{_libdir} \
       -DTFLITE_CONTRIB_LIB=%{_libdir} \
%endif
%endif
%if %{have_arm_compute}
%if 0%{?fedora}
%ifarch x86_64 aarch64
       -DUSE_ARM_COMPUTE_LIB=ON \
%endif
%endif
%endif
%if %{have_verilator}
       -DUSE_VERILATOR=ON \
%endif
%ifarch aarch64
       -DUSE_LIBBACKTRACE=OFF \
%else
       -DUSE_LIBBACKTRACE=COMPILE \
%endif
%if %{ext_dmlc}
       -DDMLC_PATH="%{_includedir}" \
%endif
%if %{ext_picojson}
       -DPICOJSON_PATH="%{_includedir}" \
%endif
%if %{ext_dlpack}
       -DDLPACK_PATH="%{_includedir}/dlpack" \
%endif
       -DNNPACK_CLOG_CONTRIB_LIB="%{_libdir}"
%if %{only_runtime}
make %{?_smp_mflags} tvm_rpc
%else
make %{?_smp_mflags}
popd
# python
pushd python
%py3_build
popd
%endif


%install
%if ! %{only_runtime}
# binaries
pushd build
make install DESTDIR=%{buildroot}
popd
# python
pushd python
%{__python3} setup.py install -O1 --skip-build --root %{buildroot} --install-lib=%{python3_sitearch}
popd
# additionals
install -m 755 build/tvm_rpc %{buildroot}%{_bindir}/
# symlink python shared object
rm -rf %{buildroot}%{python3_sitearch}/%{name}/*.so
ln -sf %{_libdir}/libtvm.so %{buildroot}%{python3_sitearch}/%{name}/libtvm.so
ln -sf %{_libdir}/libtvm_runtime.so %{buildroot}%{python3_sitearch}/%{name}/libtvm_runtime.so
# symlink python include headers
rm -rf %{buildroot}%{python3_sitearch}/%{name}/include/tvm/runtime
mv -f %{buildroot}%{python3_sitearch}/%{name}/include/tvm/* %{buildroot}%{_includedir}/tvm/
rmdir %{buildroot}%{python3_sitearch}/%{name}/include/tvm
ln -sf %{_includedir}/tvm %{buildroot}%{python3_sitearch}/%{name}/include/tvm
# fix egg requirements
sed -i 's|[<=>].*||g' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt
# clean spurious
rm -rf %{buildroot}/usr/tvm
%else
# runtime only
mkdir -p %{buildroot}%{_bindir}
mkdir -p %{buildroot}%{_libdir}
mkdir -p %{buildroot}%{_includedir}/%{name}
install -m 755 build/tvm_rpc %{buildroot}%{_bindir}/
install -m 755 build/libtvm_runtime.so %{buildroot}%{_libdir}/
cp -Rp include/tvm/runtime %{buildroot}%{_includedir}/tvm/
%endif


%if ! %{only_runtime}
%files
%doc docs
%doc README.md CONTRIBUTORS.md
%license LICENSE
%{_bindir}/tvmc
%{_libdir}/libtvm.so*

%files devel
%exclude %{_includedir}/tvm/runtime
%{_includedir}/tvm
%{_libdir}/cmake/*
%{_datadir}/*

%files python3
%{python3_sitearch}/*
%endif

%files runtime
%{_bindir}/tvm_rpc
%{_libdir}/libtvm_runtime.so*

%files runtime-devel
%{_includedir}/tvm/runtime


%changelog
* Fri Mar 29 2019 Balint Cristian <cristian.balint@gmail.com>
- github update releases