# SPEC file overview:
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages/#con_rpm-spec-file-overview
# Fedora packaging guidelines:
# https://docs.fedoraproject.org/en-US/packaging-guidelines/
%define vendor apache
%define spark_major 3
%define spark_version 3.2.0
%define hadoop_version 3.2
%define spark_package spark-%{spark_version}-bin-hadoop%{hadoop_version}
%define python_version 3.8
%define java_version 1.8.0
%define user_name apache-spark
%define group_name apache-spark
%define spark spark%{spark_major}
Name: %{vendor}-spark%{spark_major}
Version: %{spark_version}
Release: 0%{?dist}
Summary: Apache Spark
Requires(pre): shadow-utils
BuildRequires: systemd-rpm-macros python-rpm-macros
BuildRequires: python%{python_version}
BuildRequires: /usr/bin/pathfix.py
Requires: mysql-connector-java
BuildArch: noarch
License: Apache
URL: http://spark.apache.org
Source0: https://archive.apache.org/dist/spark/spark-%{spark_version}/%{spark_package}.tgz
Source1: %{spark}-hive-metastore.sql
Requires: java-%{java_version}-openjdk-headless python%{python_version}
%description
Big data processing with Apache Spark
%prep
%setup -q -n %{spark_package}
%build
%install
mkdir -p %{buildroot}/opt/%{vendor}/%{spark_package}
mkdir -p %{buildroot}/%{_bindir}
mkdir -p %{buildroot}/%{_sbindir}
mkdir -p %{buildroot}/%{_unitdir}
mkdir -p %{buildroot}/%{_sharedstatedir}/%{spark}
mkdir -p %{buildroot}/%{_sysconfdir}/sysconfig
mkdir -p %{buildroot}/%{_sysconfdir}/%{spark}
mkdir -p %{buildroot}/%{_localstatedir}/log/%{spark}/
mkdir -p %{buildroot}/%{_localstatedir}/log/%{spark}/event_log/
mkdir -p %{buildroot}/%{_sharedstatedir}/%{spark}/warehouse/
mkdir -p %{buildroot}/%{_datadir}/%{name}/
cp %{SOURCE1} %{buildroot}/%{_datadir}/%{name}/hive-metastore.sql
cp -r * %{buildroot}/opt/%{vendor}/%{spark_package}
cp -r conf/* %{buildroot}/%{_sysconfdir}/%{spark}
ln -s ./%{spark_package} %{buildroot}/opt/%{vendor}/%{spark}
cat << EOF > %{buildroot}/%{_bindir}/%{spark}-submit
#!/bin/bash
export SPARK_CONF_DIR=%{_sysconfdir}/%{spark}
/opt/%{vendor}/%{spark}/bin/spark-submit \$@
EOF
cat << EOF > %{buildroot}/%{_bindir}/%{spark}-beeline
#!/bin/bash
export SPARK_CONF_DIR=%{_sysconfdir}/%{spark}
/opt/%{vendor}/%{spark}/bin/beeline \$@
EOF
cat << EOF > %{buildroot}/%{_bindir}/%{spark}-pyspark
#!/bin/bash
export SPARK_CONF_DIR=%{_sysconfdir}/%{spark}
/opt/%{vendor}/%{spark}/bin/pyspark \$@
EOF
cat << EOF > %{buildroot}/%{_bindir}/%{spark}-sql
#!/bin/bash
export SPARK_CONF_DIR=%{_sysconfdir}/%{spark}
/opt/%{vendor}/%{spark}/bin/spark-sql \$@
EOF
cat << EOF > %{buildroot}/%{_sbindir}/%{spark}-thrift
#!/bin/bash
export SPARK_CONF_DIR=%{_sysconfdir}/%{spark}
export SPARK_NO_DAEMONIZE=true
/opt/%{vendor}/%{spark}/sbin/start-thriftserver.sh \$@
EOF
cat << EOF > %{buildroot}/%{_sbindir}/%{spark}-master
#!/bin/bash
export SPARK_CONF_DIR=%{_sysconfdir}/%{spark}
export SPARK_NO_DAEMONIZE=true
/opt/%{vendor}/%{spark}/sbin/start-master.sh \$@
EOF
cat << EOF > %{buildroot}/%{_sbindir}/%{spark}-slave
#!/bin/bash
export SPARK_CONF_DIR=%{_sysconfdir}/%{spark}
export SPARK_NO_DAEMONIZE=true
/opt/%{vendor}/%{spark}/sbin/start-slave.sh \$@
EOF
cat << EOF > %{buildroot}/%{_sbindir}/%{spark}-historyserver
#!/bin/bash
export SPARK_CONF_DIR=%{_sysconfdir}/%{spark}
export SPARK_NO_DAEMONIZE=true
/opt/%{vendor}/%{spark}/sbin/start-history-server.sh \$@
EOF
cat << EOF > %{buildroot}/%{_unitdir}/%{spark}-thrift.service
[Unit]
Description=Spark%{spark_major} SQL Thrift Server
After=network-online.target
[Service]
EnvironmentFile=%{_sysconfdir}/sysconfig/%{spark}
ExecStart=%{_sbindir}/%{spark}-thrift
WorkingDirectory=%{_sharedstatedir}/%{spark}
User=%{user_name}
Type=simple
KillSignal=SIGTERM
Restart=on-failure
SendSIGKILL=no
[Install]
WantedBy=default.target
EOF
cat << EOF > %{buildroot}/%{_unitdir}/%{spark}-master.service
[Unit]
Description=Spark%{spark_major} Master Server
After=network-online.target
[Service]
EnvironmentFile=%{_sysconfdir}/sysconfig/%{spark}
ExecStart=%{_sbindir}/%{spark}-master
WorkingDirectory=%{_sharedstatedir}/%{spark}
User=%{user_name}
Type=simple
KillSignal=SIGTERM
Restart=on-failure
SendSIGKILL=no
[Install]
WantedBy=default.target
EOF
cat << EOF > %{buildroot}/%{_unitdir}/%{spark}-slave.service
[Unit]
Description=Spark%{spark_major} Slave Server
After=network-online.target
[Service]
EnvironmentFile=%{_sysconfdir}/sysconfig/%{spark}
ExecStart=%{_sbindir}/%{spark}-slave
WorkingDirectory=%{_sharedstatedir}/%{spark}
User=%{user_name}
Type=simple
KillSignal=SIGTERM
Restart=on-failure
SendSIGKILL=no
[Install]
WantedBy=default.target
EOF
cat << EOF > %{buildroot}/%{_unitdir}/%{spark}-historyserver.service
[Unit]
Description=Spark%{spark_major} History Server
After=network-online.target
[Service]
EnvironmentFile=%{_sysconfdir}/sysconfig/%{spark}
ExecStart=%{_sbindir}/%{spark}-historyserver
WorkingDirectory=%{_sharedstatedir}/%{spark}
User=%{user_name}
Type=simple
KillSignal=SIGTERM
Restart=on-failure
SendSIGKILL=no
[Install]
WantedBy=default.target
EOF
cat << EOF > %{buildroot}/%{_sysconfdir}/sysconfig/%{spark}
SPARK_LOG_DIR=%{_localstatedir}/log/%{spark}/
SPARK_CONF_DIR=%{_sysconfdir}/%{spark}/
EOF
cat << EOF > %{buildroot}/%{_sysconfdir}/%{spark}/spark-defaults.conf
# spark.master local[*]
spark.eventLog.enabled true
spark.eventLog.dir %{_localstatedir}/log/%{spark}/event_log/
spark.history.fs.logDirectory %{_localstatedir}/log/%{spark}/event_log/
spark.sql.warehouse.dir %{_sharedstatedir}/%{spark}/warehouse/
spark.jars /usr/share/java/mysql-connector-java.jar
EOF
cat << EOF > %{buildroot}/%{_sysconfdir}/%{spark}/spark-env.sh
umask 002
export JAVA_HOME=/usr/lib/jvm/jre-%{java_version}/
export PYSPARK_PYTHON=%{_bindir}/python%{python_version}
EOF
cat << EOF > %{buildroot}/%{_sysconfdir}/%{spark}/hive-site.xml
javax.jdo.option.ConnectionURL
jdbc:mysql://localhost/%{spark}_hive_metastore
javax.jdo.option.ConnectionDriverName
com.mysql.cj.jdbc.Driver
javax.jdo.option.ConnectionUserName
hive
javax.jdo.option.ConnectionPassword
hive
EOF
cat << EOF > %{buildroot}/%{_sysconfdir}/%{spark}/beeline-site.xml
beeline.hs2.jdbc.url.default
local
beeline.hs2.jdbc.url.local
jdbc:hive2://localhost:10000
EOF
%py3_shebang_fix %{buildroot}/opt/%{vendor}/%{spark_package}/bin/
%py3_shebang_fix %{buildroot}/opt/%{vendor}/%{spark_package}/python/pyspark/find_spark_home.py
%py3_shebang_fix %{buildroot}/opt/%{vendor}/%{spark_package}/python/run-tests.py
cat << EOF > %{buildroot}/%{_datadir}/%{name}/README.rst
You will need to create a metastore database in MySQL/MariaDB::
create database hive_metastore;
grant all privileges on hive_metastore.* to hive@'%' identified by 'hive';
then initialize using::
mysql hive_metastore < %{_datadir}/%{name}/hive-metastore.sql
EOF
%files
%defattr(-, root, root, -)
%attr(0755, root, root) /usr/bin/*
%attr(0755, root, root) /usr/sbin/*
%config %{_sysconfdir}/%{spark}/spark-defaults.conf
%config %{_sysconfdir}/%{spark}/spark-env.sh
%config %{_sysconfdir}/%{spark}/hive-site.xml
%config %{_sysconfdir}/%{spark}/beeline-site.xml
%{_datadir}/%{name}/hive-metastore.sql
%{_datadir}/%{name}/README.rst
%{_sysconfdir}/%{spark}/*.template
%{_unitdir}/%{spark}-*.service
%{_sysconfdir}/sysconfig/%{spark}
/opt/%{vendor}/%{spark_package}
/opt/%{vendor}/%{spark}
%dir %attr(2775, %{user_name}, %{group_name}) %{_sharedstatedir}/%{spark}
%dir %attr(2775, %{user_name}, %{group_name}) %{_sharedstatedir}/%{spark}/warehouse
%dir %attr(2775, %{user_name}, %{group_name}) %{_localstatedir}/log/%{spark}
%dir %attr(2777, %{user_name}, %{group_name}) %{_localstatedir}/log/%{spark}/event_log/
%pre
getent group %{group_name} >/dev/null || groupadd -r %{group_name}
getent passwd %{user_name} >/dev/null || \
useradd -r -g %{group_name} -d %{_sharedstatedir}/%{spark} -s /sbin/nologin \
-c "Useful comment about the purpose of this account" %{user_name}
exit 0
%post
%systemd_post %{spark}-thrift.service
%systemd_post %{spark}-master.service
%systemd_post %{spark}-slave.service
%systemd_post %{spark}-historyserver.service
%preun
%systemd_preun %{spark}-thrift.service
%systemd_preun %{spark}-master.service
%systemd_preun %{spark}-slave.service
%systemd_preun %{spark}-historyserver.service
%postun
%systemd_postun_with_restart %{spark}-thrift.service
%systemd_postun_with_restart %{spark}-master.service
%systemd_postun_with_restart %{spark}-slave.service
%systemd_postun_with_restart %{spark}-historyserver.service
%changelog