# APACHE TIKA # https://github.com/apache/tika # https://tika.apache.org %global _tika_scm_host https://github.com %global _tika_scm_repo apache/tika %global _tika_scm_branch 3.2.2 # %%global _tika_scm_branch branch_3x # %%global _tika_scm_branch main %global _tika_jdk_version 21 %global _owner pgnd %global _build_timestamp %( date +%%Y%%m%%d_%%H%%M%%S --utc ) %global _dist .%{_build_timestamp}.%{_owner}.fc%{fedora} # https://stackoverflow.com/questions/47838041/rpmbuild-how-to-disable-check-buildroot # https://rpm-software-management.github.io/rpm/manual/dependency_generators.html # %%define __spec_install_pre /bin/true # %%define __arch_install_post %%{nil} # %%define __os_install_post %%{nil} %global _disable_source_fetch 0 %global debug_package %{nil} # %%undefine _auto_set_build_flags %global _hardened_build 1 # %%global __brp_mangle_shebangs %%{nil} # %%global __brp_strip %%{nil} # %%define __requires_exclude ^.*/xxx/bin/python.*$ # %%define __requires_exclude_from ^.*/xxx/bin/python.*$ # %%define _build_id_links none # %%bcond_with XXX : opt build with XXX; default, without # %%bcond_without XXX : opt build without XXX; default, with %global _tika_name tika-server %global _tika_pkgnm tika-server %global _tika_unitnm tika-server %global _tika_comment Content detection and analysis framework %global _tika_descrip %{expand: %{_tika_comment}.} # https://spdx.org/licenses/Apache-2.0.html %global _tika_license Apache-2.0 %global _tika_usr tika %global _tika_grp tika %global _tika_cache_dir /var/cache/tika %global _tika_conf_dir /usr/local/etc/tika %global _tika_conf_file tika-server-config-custom.xml %global _tika_install_dir /usr/local/tika-pgnd %global _tika_jar_file tika-server.jar %global _tika_log_dir /var/log/tika %global _tika_unit_dir /etc/systemd/system %global _tika_webapp_dir /srv/webapps/tika # %%global _tika_scm_url https://github.com/%%{_tika_scm_repo}.git %global _tika_scm_url %{_tika_scm_host}/%{_tika_scm_repo} %global _tika_scm_repo_esc %( echo %{_tika_scm_repo} | sed 's|_|-|g' | sed 's|/|%2F|g') %global _tika_scm_repo_norm %( echo %{_tika_scm_repo} | sed 's|_|-|g' | sed 's|/|-|g' ) %global _tika_scm_branch_norm %( echo %{_tika_scm_branch} | sed 's|_|-|g' | sed 's|/|-|g' ) %global _tika_commit %( git ls-remote %{_tika_scm_url} | grep /%{_tika_scm_branch}$ | cut -f1 ) %global _tika_shortcommit %( c=%{_tika_commit}; echo ${c} | head -c 7 ) # GITHUB API: https://docs.github.com/en/rest/repos %global _tika_scm_host_api https://api.github.com/repos %global _tika_scm_tarball %{_tika_scm_host_api}/%{_tika_scm_repo}/tarball/%{_tika_commit} %global _tika_scm_extract_dir %{_tika_scm_repo_norm}-%{shortcommit0} # %%global forgeurl0 %%{_tika_scm_url} # %%global branch0 %%{_tika_scm_branch} # %%global commit0 %%( git ls-remote %%{_tika_scm_url} | grep /%%{_tika_scm_branch}$ | cut -f1 ) # %%global shortcommit0 %%(c=%%{commit0}; echo ${c:0:7}) # %%global archiveext0 tar.gz # %%global archivename0 %%( echo %%{_tika_scm_repo} | sed 's|/|-|g')-%%{shortcommit0} # %%global archiveurl0 https://api.github.com/repos/%%{_tika_scm_repo}/tarball/%%{_tika_scm_branch} # %%global forgesource0 %%{archiveurl0} # %%global forgesetupargs0 -n %%{archivename0} %global forgeurl0 %{_tika_scm_url} %global commit0 %{_tika_commit} %global shortcommit0 %{_tika_shortcommit} %global forgesource0 %{_tika_scm_tarball} %global extractdir0 %{_tika_scm_extract_dir} %global forgesetupargs0 -T -D -b 0 -n %{extractdir0} %forgemeta -i -a %global dist %{_dist} # Vendor Pinning Vendor: %{_owner} # NEVRA (n-e:v-r.a): https://blog.jasonantman.com/2014/07/how-yum-and-rpm-compare-versions/ Name: %{_tika_pkgnm} Epoch: 3 Version: %{scm0}_%( echo %{_tika_scm_branch} | sed 's|-|_|g' ) Release: 0%{?dist} %global _same_evr %{epoch}:%{version}-%{release} Summary: %{_tika_comment} License: %{_tika_license} URL: %{forgeurl0} Source0: %{forgesource0} Source100: %{_tika_unitnm}.service Source101: %{_tika_unitnm}.target Source102: %{_tika_conf_file} Source103: log4j2.xml # https://docs.fedoraproject.org/en-US/packaging-guidelines/Scriptlets BuildRequires: maven-local-openjdk%{_tika_jdk_version} BuildRequires: xmvn-toolchain-openjdk%{_tika_jdk_version} BuildRequires: mvn(org.apache.felix:maven-bundle-plugin) # If you prefer to run the pipes tests instead of excluding them, # add these to BuildRequires (not just runtime Requires) so they’re # present during %%check, and then you can drop the excludes: # BuildRequires: ImageMagick ImageMagick-c++ ImageMagick-djvu ImageMagick-libs ImageMagick-perl # BuildRequires: tesseract tesseract-equ tesseract-langpack-deu tesseract-langpack-eng tesseract-langpack-fra # BuildRequires: tesseract-langpack-ita tesseract-langpack-pol tesseract-langpack-por tesseract-langpack-spa # BuildRequires: tesseract-osd tesseract-tessdata-doc tesseract-tools # BuildRequires: pdfbox-tools BuildRequires: systemd BuildRequires: systemd-rpm-macros %{?systemd_requires} Requires(pre): user(tika) Requires(pre): group(tika) Requires: boost-numpy3 Requires: ImageMagick Requires: ImageMagick-c++ Requires: ImageMagick-djvu Requires: ImageMagick-libs Requires: ImageMagick-perl Requires: java-%{_tika_jdk_version}-openjdk-headless Requires: pdfbox-tools Requires: python3-numpy Requires: python3-scikit-image Requires: python3-scikit-learn Requires: tesseract Requires: tesseract-equ Requires: tesseract-langpack-deu Requires: tesseract-langpack-eng Requires: tesseract-langpack-fra Requires: tesseract-langpack-ita Requires: tesseract-langpack-pol Requires: tesseract-langpack-por Requires: tesseract-langpack-spa Requires: tesseract-osd Requires: tesseract-tessdata-doc Requires: tesseract-tools Provides: tika-server = %{_same_evr} Obsoletes: tika-server < %{_same_evr} %description %{_tika_descrip} %prep echo '##### STARTING PREP #####' %forgesetup -a %build echo '##### STARTING BUILD #####' cd %{_builddir}/%{extractdir0} # Pin JDK version # Ensure Maven runs with the JDK toolchain (not any JRE in the chroot). # Uses your macro so there is no hard-coded version. export JAVA_HOME=%{_jvmdir}/java-%{_tika_jdk_version}-openjdk export PATH="$JAVA_HOME/bin:$PATH" java -version javac -version # OutOfMemoryError # https://cwiki.apache.org/confluence/display/maven/outofmemoryerror # https://confluence.atlassian.com/confkb/how-to-fix-out-of-memory-errors-by-increasing-available-memory-154071.html # https://maven.apache.org/docs/3.3.1/release-notes.html#JVM_and_Command_Line_Options MAVEN_OPTS='' MAVEN_OPTS+=' -Xmx2048m -Xms1024m' MAVEN_OPTS+=' -XX:MaxMetaspaceSize=512m' MAVEN_OPTS+=' -Djava.awt.headless=true' #MAVEN_OPTS+=' -Djava.util.logging.manager=org.jboss.logmanager.LogManager' export MAVEN_OPTS mvn -v mvn \ clean \ install \ --quiet \ --also-make \ --threads ${RPM_BUILD_NCPUS} \ -DskipTests \ --projects :tika-server-standard %install echo '##### STARTING INSTALL #####' cd %{_builddir}/%{extractdir0} mkdir -p ${RPM_BUILD_ROOT}/%{_tika_conf_dir} mkdir -p ${RPM_BUILD_ROOT}/%{_tika_install_dir} mkdir -p ${RPM_BUILD_ROOT}/%{_tika_unit_dir} mkdir -p ${RPM_BUILD_ROOT}/%{_tika_webapp_dir} _tika_buildjar_path=$( find %{_builddir}/%{extractdir0}/tika-server/tika-server-standard/target -type f -iname "tika-server-standard*.jar" -and -not -iname "*tests*") _tika_buildjar_file=$( basename ${_tika_buildjar_path} ) # GENERATE .service FILES, scripts sed -i \ -e 's|__TIKA_CACHEDIR__|%{_tika_cache_dir}|g' \ -e 's|__TIKA_CONFDIR__|%{_tika_conf_dir}|g' \ -e 's|__TIKA_CONFFILE__|%{_tika_conf_file}|g' \ -e 's|__TIKA_GRP__|%{_tika_grp}|g' \ -e 's|__TIKA_INSTALLDIR__|%{_tika_install_dir}|g' \ -e 's|__TIKA_JARFILE_BUILD__|'${_tika_buildjar_file}'|g' \ -e 's|__TIKA_JARFILE__|%{_tika_jar_file}|g' \ -e 's|__TIKA_LOGDIR__|%{_tika_log_dir}|g' \ -e 's|__TIKA_USR__|%{_tika_usr}|g' \ -e 's|__TIKA_WEBAPPDIR__|%{_tika_webapp_dir}|g' \ %{SOURCE100} %{SOURCE102} %{SOURCE103} install -D -p -m 0644 \ ${_tika_buildjar_path} \ ${RPM_BUILD_ROOT}/%{_tika_install_dir}/ install -D -p -m 0644 \ %{SOURCE100} \ ${RPM_BUILD_ROOT}/%{_tika_unit_dir}/%{_tika_unitnm}.service install -D -p -m 0644 \ %{SOURCE101} \ ${RPM_BUILD_ROOT}/%{_tika_unit_dir}/%{_tika_unitnm}.target install -D -p -m 0644 \ %{SOURCE102} \ ${RPM_BUILD_ROOT}/%{_tika_conf_dir}/%{_tika_conf_file} install -D -p -m 0644 \ %{SOURCE103} \ ${RPM_BUILD_ROOT}/%{_tika_conf_dir}/log4j2.xml %check echo '##### STARTING CHECK (TESTS) #####' cd %{_builddir}/%{extractdir0} # One small JVM, no metaspace cap (lets JVM flex under tight RAM) export JAVA_TOOL_OPTIONS='-Xmx640m -Xms256m -Djava.awt.headless=true' # Use the same pinned JDK for tests export JAVA_HOME=%{_jvmdir}/java-%{_tika_jdk_version}-openjdk export PATH="$JAVA_HOME/bin:$PATH" # Run ONLY the server module tests, in-process (no forks), and exclude fork/pipes tests mvn \ test \ --quiet \ --threads 1 \ -Dsurefire.forkCount=0 \ -DskipITs \ -DfailIfNoTests=false \ -Dtest=\!ForkParserTest,\!TikaPipesTest,\!Pipes*Test \ --projects :tika-server-standard %pre # before unpacking the new files %post # after the new files are in place systemctl daemon-reload || : systemctl --no-reload enable my-servers.timer || : # Start/restart the service so the new version runs immediately. systemctl restart --quiet %{_tika_unitnm}.service || : %preun # $1 == 0 → erase; $1 == 1 → upgrade if [ "$1" -eq 0 ] then # Package is being removed – stop the service. systemctl --no-reload stop --no-warn %{_tika_unitnm}.service || : fi # (No action on upgrade; the old service stays stopped until %postun.) %postun # rm -f %{_tika_webapp_dir}/tika-server.jar # systemctl daemon-reload # $1 == 0 → erase; $1 == 1 → upgrade if [ "$1" -eq 0 ]; then # Full erase – clean up unit files and disable timer. systemctl --no-reload disable %{_app_unitnm}.timer || : rm -f %{_app_webapp_dir}/app-server.jar systemctl daemon-reload else # Upgrade – reload unit files and restart the service. systemctl daemon-reload systemctl try-restart %{_app_unitnm}.service || : fi %files # http://ftp.rpm.org/max-rpm/s1-rpm-inside-files-list-directives.html %doc %license %dir %{_tika_install_dir} %{_tika_install_dir}/* %attr(0644,root,root) %{_tika_unit_dir}/%{_tika_unitnm}.service %attr(0644,root,root) %{_tika_unit_dir}/%{_tika_unitnm}.target %attr(0644,%{_tika_usr},%{_tika_grp}) %{_tika_conf_dir}/%{_tika_conf_file} %attr(0644,%{_tika_usr},%{_tika_grp}) %{_tika_conf_dir}/log4j2.xml %dir %{_tika_webapp_dir} # track; delete on uninstall if exists; no error if !exists %ghost %{_tika_webapp_dir}/%{_tika_jar_file} %changelog * Tue Apr 15 2025 pgnd _ - bump 1744764706