Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-5150] Adding Spark-4.0.0-preview1 #103

Open
wants to merge 9 commits into
base: 4.0-preview1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ jobs:
- name: Build image (Jupyter)
run: sudo make build FLAVOUR=jupyter

- name: Build image (Kyuubi)
run: sudo make build FLAVOUR=kyuubi
# - name: Build image (Kyuubi)
# run: sudo make build FLAVOUR=kyuubi

- name: Get Artifact Name
id: artifact
Expand All @@ -54,8 +54,8 @@ jobs:
echo "base_artifact_name=${BASE_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
JUPYTER_ARTIFACT=$(make help FLAVOUR=jupyter | grep 'Artifact: ')
echo "jupyter_artifact_name=${JUPYTER_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
KYUUBI_ARTIFACT=$(make help FLAVOUR=kyuubi | grep 'Artifact: ')
echo "kyuubi_artifact_name=${KYUUBI_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
# KYUUBI_ARTIFACT=$(make help FLAVOUR=kyuubi | grep 'Artifact: ')
# echo "kyuubi_artifact_name=${KYUUBI_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT

- name: Change artifact permissions
run: sudo chmod a+r ${{ steps.artifact.outputs.base_artifact_name }} ${{ steps.artifact.outputs.jupyter_artifact_name }} ${{ steps.artifact.outputs.kyuubi_artifact_name }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/integration-gpu.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name: GPU integration CI pipeline

on:
pull_request:
workflow_call:
# on:
# pull_request:
# workflow_call:

jobs:
build-rock:
Expand Down
38 changes: 19 additions & 19 deletions .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ jobs:
echo "base_artifact_name=${BASE_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
JUPYTER_ARTIFACT=$(make help FLAVOUR=jupyter | grep 'Artifact: ')
echo "jupyter_artifact_name=${JUPYTER_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
KYUUBI_ARTIFACT=$(make help FLAVOUR=kyuubi | grep 'Artifact: ')
echo "kyuubi_artifact_name=${KYUUBI_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
GPU_ARTIFACT=$(make help FLAVOUR=spark-gpu | grep 'Artifact: ')
echo "gpu_artifact_name=${GPU_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
# KYUUBI_ARTIFACT=$(make help FLAVOUR=kyuubi | grep 'Artifact: ')
# echo "kyuubi_artifact_name=${KYUUBI_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
# GPU_ARTIFACT=$(make help FLAVOUR=spark-gpu | grep 'Artifact: ')
# echo "gpu_artifact_name=${GPU_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT

- name: Install and configure microk8s
run: |
Expand Down Expand Up @@ -75,20 +75,20 @@ jobs:

sg microk8s -c "make tests FLAVOUR=jupyter"

- name: Run tests (Kyuubi)
env:
AZURE_STORAGE_ACCOUNT: ${{ secrets.AZURE_STORAGE_ACCOUNT }}
AZURE_STORAGE_KEY: ${{ secrets.AZURE_STORAGE_KEY }}
run: |
# Unpack Artifact
mv charmed-spark/${{ steps.artifact.outputs.kyuubi_artifact_name }} .
rmdir charmed-spark
# - name: Run tests (Kyuubi)
# env:
# AZURE_STORAGE_ACCOUNT: ${{ secrets.AZURE_STORAGE_ACCOUNT }}
# AZURE_STORAGE_KEY: ${{ secrets.AZURE_STORAGE_KEY }}
# run: |
# # Unpack Artifact
# mv charmed-spark/${{ steps.artifact.outputs.kyuubi_artifact_name }} .
# rmdir charmed-spark

# Import artifact into docker with new tag
sudo make microk8s-import \
FLAVOUR=kyuubi \
TAG=$(yq .version images/charmed-spark/rockcraft.yaml) \
REPOSITORY=ghcr.io/canonical/ PREFIX=test- \
-o ${{ steps.artifact.outputs.kyuubi_artifact_name }}
# # Import artifact into docker with new tag
# sudo make microk8s-import \
# FLAVOUR=kyuubi \
# TAG=$(yq .version images/charmed-spark/rockcraft.yaml) \
# REPOSITORY=ghcr.io/canonical/ PREFIX=test- \
# -o ${{ steps.artifact.outputs.kyuubi_artifact_name }}

sg microk8s -c "make tests FLAVOUR=kyuubi"
# sg microk8s -c "make tests FLAVOUR=kyuubi"
6 changes: 3 additions & 3 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ jobs:
uses: ./.github/workflows/integration.yaml
secrets: inherit

tests-gpu:
uses: ./.github/workflows/integration-gpu.yaml
secrets: inherit
# tests-gpu:
# uses: ./.github/workflows/integration-gpu.yaml
# secrets: inherit

publish:
needs: [tests, release_checks, tests-gpu]
Expand Down
12 changes: 4 additions & 8 deletions images/charmed-spark/rockcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ description: |

license: Apache-2.0

version: "3.5.1"
version: "4.0.0-preview1"

base: [email protected]

Expand Down Expand Up @@ -51,8 +51,8 @@ services:
parts:
spark:
plugin: dump
source: https://github.com/canonical/central-uploader/releases/download/spark-3.5.1-ubuntu1/spark-3.5.1-ubuntu1-20240801193221-bin-k8s.tgz
source-checksum: sha512/4c9ec4dc3f67602ec9f83fa08486985647b8ab7eaf3748c3042a382ed367fc776b7623a067ceef408337cf5c2171a010da0ff063dcbf76c4b84674b661d2330b
source: https://github.com/canonical/central-uploader/releases/download/spark-4.0.0-preview1-ubuntu0/spark-4.0.0-preview1-ubuntu0-20240813100410-bin-k8s.tgz
source-checksum: sha512/9d506d28d356c33608bebaf53dd6b60705826f60764bb6ed60d1b5cf3f496d99bf45b6aaee5df6f7cce0e37dad04ab003f45e8219e15dc918a2cef78d8425c09
overlay-script: |
sed -i 's/http:\/\/deb.\(.*\)/https:\/\/deb.\1/g' /etc/apt/sources.list
apt-get update
Expand Down Expand Up @@ -96,16 +96,12 @@ parts:
mkdir -p $CRAFT_PART_INSTALL/opt/spark/jars
cd $CRAFT_PART_INSTALL/opt/spark/jars

ICEBERG_SPARK_RUNTIME_VERSION='3.4_2.12'
ICEBERG_VERSION='1.4.3'
SPARK_METRICS_VERSION='3.4-1.0.1'
SERVLET_FILTERS_VERSION='0.0.1'
SHA1SUM_ICEBERG_JAR='48d553e4e5496f731b9e0e6adb5bc0fd040cb0df'
SHA512SUM_SPARK_METRICS_ASSEMBLY_JAR='493cf77133cbf03e96fb848121ce10ac16e6f907f595df637649b98b42118e57d6b6e1bdab71bfee3394eb369637c5b4f6b05dd8fa30a1ff6899e74069c972ce'
SHA512SUM_SPARK_SERVLET_FILTER_JAR='ffeb809d58ef0151d513b09d4c2bfd5cc064b0b888ca45899687aed2f42bcb1ce9834be9709290dd70bd9df84049f02cbbff6c2d5ec3c136c278c93f167c8096'

JARS=(
"https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/LIB_VERSION/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-LIB_VERSION.jar $ICEBERG_VERSION sha1sum $SHA1SUM_ICEBERG_JAR"
"https://github.com/canonical/central-uploader/releases/download/spark-metrics-assembly-LIB_VERSION/spark-metrics-assembly-LIB_VERSION.jar $SPARK_METRICS_VERSION sha512sum $SHA512SUM_SPARK_METRICS_ASSEMBLY_JAR"
"https://github.com/canonical/central-uploader/releases/download/servlet-filters-LIB_VERSION/servlet-filters-LIB_VERSION.jar $SERVLET_FILTERS_VERSION sha512sum $SHA512SUM_SPARK_SERVLET_FILTER_JAR"
)
Expand Down Expand Up @@ -139,7 +135,7 @@ parts:
- python3-pip
overlay-script: |
mkdir -p $CRAFT_PART_INSTALL/opt/spark8t/python/dist
pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.7/spark8t-0.0.7-py3-none-any.whl
pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.10/spark8t-0.0.10-py3-none-any.whl
rm usr/bin/pip*
stage:
- opt/spark8t/python/dist
Expand Down
39 changes: 20 additions & 19 deletions tests/integration/integration-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ cleanup_user_failure() {
teardown_test_pod() {
kubectl logs testpod-admin -n $NAMESPACE
kubectl logs testpod -n $NAMESPACE
kubectl logs -l spark-version=3.5.1 -n $NAMESPACE
kubectl logs -l spark-version=4.0.0-preview1 -n $NAMESPACE
kubectl -n $NAMESPACE delete pod $ADMIN_POD_NAME
}

run_example_job_in_pod() {
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.13-$(get_spark_version).jar"

PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods --sort-by=.metadata.creationTimestamp | grep driver | tail -n 1 | cut -d' ' -f1)
NAMESPACE=$1
Expand Down Expand Up @@ -318,7 +318,7 @@ test_iceberg_example_in_pod_using_abfss(){


run_example_job_in_pod_with_pod_templates() {
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.13-$(get_spark_version).jar"

PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods --sort-by=.metadata.creationTimestamp | grep driver | tail -n 1 | cut -d' ' -f1)

Expand Down Expand Up @@ -364,7 +364,7 @@ run_example_job_in_pod_with_pod_templates() {


run_example_job_in_pod_with_metrics() {
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.13-$(get_spark_version).jar"
LOG_FILE="/tmp/server.log"
SERVER_PORT=9091
PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods --sort-by=.metadata.creationTimestamp | grep driver | tail -n 1 | cut -d' ' -f1)
Expand Down Expand Up @@ -413,7 +413,7 @@ run_example_job_in_pod_with_metrics() {


run_example_job_with_error_in_pod() {
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.13-$(get_spark_version).jar"

PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods --sort-by=.metadata.creationTimestamp | grep driver | tail -n 1 | cut -d' ' -f1)
NAMESPACE=$1
Expand Down Expand Up @@ -489,8 +489,9 @@ run_spark_shell_in_pod() {
# "Pi is roughly 3.13956232343"

echo -e "$(kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$SPARK_SHELL_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.spark-shell --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > spark-shell.out

pi=$(cat spark-shell.out | grep "^Pi is roughly" | rev | cut -d' ' -f1 | rev | cut -c 1-3)
cat spark-shell.out
echo "-------------------------------------------------"
pi=$(cat spark-shell.out | grep "Pi is roughly 3" | rev | cut -d' ' -f1 | rev | cut -c 1-3)
echo -e "Spark-shell Pi Job Output: \n ${pi}"
rm spark-shell.out
validate_pi_value $pi
Expand Down Expand Up @@ -641,29 +642,29 @@ echo -e "##################################"

(setup_user_context && test_example_job_in_pod_with_templates && cleanup_user_success) || cleanup_user_failure_in_pod

echo -e "########################################"
echo -e "RUN EXAMPLE JOB WITH PROMETHEUS METRICS"
echo -e "########################################"
# echo -e "########################################"
# echo -e "RUN EXAMPLE JOB WITH PROMETHEUS METRICS"
# echo -e "########################################"

(setup_user_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod
# (setup_user_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod

echo -e "########################################"
echo -e "RUN EXAMPLE JOB WITH ERRORS"
echo -e "########################################"

(setup_user_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod

echo -e "##################################"
echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES"
echo -e "##################################"
# echo -e "##################################"
# echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES"
# echo -e "##################################"

(setup_user_context && test_iceberg_example_in_pod_using_s3 && cleanup_user_success) || cleanup_user_failure_in_pod
# (setup_user_context && test_iceberg_example_in_pod_using_s3 && cleanup_user_success) || cleanup_user_failure_in_pod

echo -e "##################################"
echo -e "RUN EXAMPLE THAT USES AZURE STORAGE"
echo -e "##################################"
# echo -e "##################################"
# echo -e "RUN EXAMPLE THAT USES AZURE STORAGE"
# echo -e "##################################"

(setup_user_context && test_iceberg_example_in_pod_using_abfss && cleanup_user_success) || cleanup_user_failure_in_pod
# (setup_user_context && test_iceberg_example_in_pod_using_abfss && cleanup_user_success) || cleanup_user_failure_in_pod

echo -e "##################################"
echo -e "TEARDOWN TEST POD"
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/resources/testpod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ metadata:
name: testpod
spec:
containers:
- image: ghcr.io/canonical/test-charmed-spark:3.5.1
- image: ghcr.io/canonical/test-charmed-spark:4.0.0-preview1
name: spark
ports:
- containerPort: 18080
4 changes: 3 additions & 1 deletion tests/integration/utils/k8s-utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ wait_for_pod() {
namespace=$2

echo "Waiting for pod '$pod_name' to become ready..."
kubectl wait --for condition=Ready pod/$pod_name -n $namespace --timeout 60s
kubectl wait --for condition=Ready pod/$pod_name -n $namespace --timeout 300s
kubectl -n $namespace get pods
}


Expand Down Expand Up @@ -90,4 +91,5 @@ setup_admin_pod(){
kubectl -n $namespace exec $pod_name -- env KCONFIG="$user_kubeconfig" /bin/bash -c 'echo "$KCONFIG" > ~/.kube/config'

echo "Admin pod with name '$pod_name' created and configured successfully."
kubectl -n $namespace get pods
}
Loading