From addcfee8740675739611065f1ea2d5057961cc1f Mon Sep 17 00:00:00 2001 From: Paolo Sottovia Date: Wed, 25 Sep 2024 16:22:10 +0000 Subject: [PATCH 1/2] Add support for Spark-4.0-preview1 --- .github/workflows/build.yaml | 8 +- .github/workflows/integration-gpu.yaml | 1 - .github/workflows/integration.yaml | 18 ----- .github/workflows/on_push.yaml | 2 +- .github/workflows/publish.yaml | 94 +----------------------- .github/workflows/trivy.yml | 3 +- images/charmed-spark/rockcraft.yaml | 19 ++--- tests/integration/resources/testpod.yaml | 2 +- tests/integration/utils/k8s-utils.sh | 2 +- 9 files changed, 14 insertions(+), 135 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 5202d6cf..b3a7f36f 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -44,9 +44,6 @@ jobs: - name: Build image (Jupyter) run: sudo make build FLAVOUR=jupyter - - name: Build image (Kyuubi) - run: sudo make build FLAVOUR=kyuubi - - name: Get Artifact Name id: artifact run: | @@ -54,11 +51,9 @@ jobs: echo "base_artifact_name=${BASE_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT JUPYTER_ARTIFACT=$(make help FLAVOUR=jupyter | grep 'Artifact: ') echo "jupyter_artifact_name=${JUPYTER_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT - KYUUBI_ARTIFACT=$(make help FLAVOUR=kyuubi | grep 'Artifact: ') - echo "kyuubi_artifact_name=${KYUUBI_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT - name: Change artifact permissions - run: sudo chmod a+r ${{ steps.artifact.outputs.base_artifact_name }} ${{ steps.artifact.outputs.jupyter_artifact_name }} ${{ steps.artifact.outputs.kyuubi_artifact_name }} + run: sudo chmod a+r ${{ steps.artifact.outputs.base_artifact_name }} ${{ steps.artifact.outputs.jupyter_artifact_name }} - name: Upload locally built artifact uses: actions/upload-artifact@v4 @@ -67,6 +62,5 @@ jobs: path: | ${{ steps.artifact.outputs.base_artifact_name }} ${{ steps.artifact.outputs.jupyter_artifact_name }} - ${{ steps.artifact.outputs.kyuubi_artifact_name }} diff --git a/.github/workflows/integration-gpu.yaml b/.github/workflows/integration-gpu.yaml index 9095dfa6..875e554e 100644 --- a/.github/workflows/integration-gpu.yaml +++ b/.github/workflows/integration-gpu.yaml @@ -1,7 +1,6 @@ name: GPU integration CI pipeline on: - pull_request: workflow_call: jobs: diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml index 288614f4..2bbbb27e 100644 --- a/.github/workflows/integration.yaml +++ b/.github/workflows/integration.yaml @@ -74,21 +74,3 @@ jobs: -o ${{ steps.artifact.outputs.jupyter_artifact_name }} sg snap_microk8s -c "make tests FLAVOUR=jupyter" - - - name: Run tests (Kyuubi) - env: - AZURE_STORAGE_ACCOUNT: ${{ secrets.AZURE_STORAGE_ACCOUNT }} - AZURE_STORAGE_KEY: ${{ secrets.AZURE_STORAGE_KEY }} - run: | - # Unpack Artifact - mv charmed-spark/${{ steps.artifact.outputs.kyuubi_artifact_name }} . - rmdir charmed-spark - - # Import artifact into docker with new tag - sudo make microk8s-import \ - FLAVOUR=kyuubi \ - TAG=$(yq .version images/charmed-spark/rockcraft.yaml) \ - REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ - -o ${{ steps.artifact.outputs.kyuubi_artifact_name }} - - sg snap_microk8s -c "make tests FLAVOUR=kyuubi" diff --git a/.github/workflows/on_push.yaml b/.github/workflows/on_push.yaml index 01bed96b..05bcbd0c 100644 --- a/.github/workflows/on_push.yaml +++ b/.github/workflows/on_push.yaml @@ -1,7 +1,7 @@ on: push: branches: - - '3.4-22.04/*' + - '4.0-preview1-22.04/*' jobs: publish: diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 3d7bc8ad..bbeaef39 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -47,12 +47,8 @@ jobs: uses: ./.github/workflows/integration.yaml secrets: inherit - tests-gpu: - uses: ./.github/workflows/integration-gpu.yaml - secrets: inherit - publish: - needs: [tests, release_checks, tests-gpu] + needs: [tests, release_checks] runs-on: ubuntu-latest steps: - name: Checkout repository @@ -76,10 +72,6 @@ jobs: echo "base_artifact_name=${BASE_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT JUPYTER_ARTIFACT=$(make help FLAVOUR=jupyter | grep 'Artifact: ') echo "jupyter_artifact_name=${JUPYTER_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT - KYUUBI_ARTIFACT=$(make help FLAVOUR=kyuubi | grep 'Artifact: ') - echo "kyuubi_artifact_name=${KYUUBI_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT - GPU_ARTIFACT=$(make help FLAVOUR=spark-gpu | grep 'Artifact: ') - echo "gpu_artifact_name=${GPU_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT - name: Download artifact uses: actions/download-artifact@v4 @@ -156,87 +148,3 @@ jobs: echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" docker push ${IMAGE_NAME}:${VERSION_TAG} fi - - - - name: Publish Kyuubi Image to Channel - run: | - # Unpack artifact - mv charmed-spark/${{ steps.artifact.outputs.kyuubi_artifact_name }} . - rmdir charmed-spark - - REPOSITORY="ghcr.io/canonical/" - RISK=${{ needs.release_checks.outputs.risk }} - TRACK=${{ needs.release_checks.outputs.track }} - if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi - - # Import artifact into docker with new tag - sudo make docker-import \ - FLAVOUR=kyuubi \ - REPOSITORY=${REPOSITORY} \ - TAG=${TAG} \ - -o ${{ steps.artifact.outputs.kyuubi_artifact_name }} - - IMAGE_NAME=$(make help FLAVOUR=kyuubi REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) - - DESCRIPTION=$(yq .flavours.kyuubi.image_description images/metadata.yaml | xargs) - - echo "FROM ${IMAGE_NAME}:${TAG}" | docker build --label org.opencontainers.image.description="${DESCRIPTION}" --label org.opencontainers.image.revision="${COMMIT_ID}" --label org.opencontainers.image.source="${{ github.repositoryUrl }}" -t "${IMAGE_NAME}:${TAG}" - - - echo "Publishing ${IMAGE_NAME}:${TAG}" - docker push ${IMAGE_NAME}:${TAG} - - if [[ "$RISK" == "edge" ]]; then - VERSION_LONG=$(make help FLAVOUR=kyuubi | grep "Tag\:" | cut -d ":" -f2 | xargs) - VERSION_TAG="${VERSION_LONG}-${{ needs.release_checks.outputs.base }}_edge" - - docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG} - - echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" - docker push ${IMAGE_NAME}:${VERSION_TAG} - fi - - - - name: Download gpu artifact - uses: actions/download-artifact@v4 - with: - name: charmed-spark-gpu - path: charmed-spark-gpu - - - name: Publish Charmed Spark GPU Image to Channel - run: | - # Unpack artifact - mv charmed-spark-gpu/${{ steps.artifact.outputs.gpu_artifact_name }} . - rmdir charmed-spark-gpu - - REPOSITORY="ghcr.io/canonical/" - RISK=${{ needs.release_checks.outputs.risk }} - TRACK=${{ needs.release_checks.outputs.track }} - if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi - - IMAGE_NAME=$(make help REPOSITORY=${REPOSITORY} TAG=${TAG} FLAVOUR=spark-gpu help | grep "Image\:" | cut -d ":" -f2 | xargs) - - # Import artifact into docker with new tag - sudo make docker-import \ - FLAVOUR=spark-gpu \ - REPOSITORY=${REPOSITORY} \ - TAG=${TAG} \ - -o ${{ steps.artifact.outputs.gpu_artifact_name }} - - # Add relevant labels - COMMIT_ID=$(git log -1 --format=%H) - DESCRIPTION=$(yq .description images/charmed-spark-gpu/rockcraft.yaml | xargs) - - echo "FROM ${IMAGE_NAME}:${TAG}" | docker build --label org.opencontainers.image.description="${DESCRIPTION}" --label org.opencontainers.image.revision="${COMMIT_ID}" --label org.opencontainers.image.source="${{ github.repositoryUrl }}" -t "${IMAGE_NAME}:${TAG}" - - - echo "Publishing ${IMAGE_NAME}:${TAG}" - docker push ${IMAGE_NAME}:${TAG} - - if [[ "$RISK" == "edge" ]]; then - VERSION_LONG=$(make help FLAVOUR=spark-gpu | grep "Tag\:" | cut -d ":" -f2 | xargs) - VERSION_TAG="${VERSION_LONG}-${{ needs.release_checks.outputs.base }}_edge" - - docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG} - - echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" - docker push ${IMAGE_NAME}:${VERSION_TAG} - fi \ No newline at end of file diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index e27571ce..6afaf2b1 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -2,8 +2,7 @@ name: trivy on: push: branches: - - 3.4-22.04/edge - pull_request: + - '4.0-preview1-22.04/*' jobs: build: uses: ./.github/workflows/build.yaml diff --git a/images/charmed-spark/rockcraft.yaml b/images/charmed-spark/rockcraft.yaml index 57880654..61909084 100644 --- a/images/charmed-spark/rockcraft.yaml +++ b/images/charmed-spark/rockcraft.yaml @@ -10,7 +10,7 @@ description: | license: Apache-2.0 -version: "3.4.2" +version: "4.0.0-preview1" base: ubuntu@22.04 @@ -51,8 +51,8 @@ services: parts: spark: plugin: dump - source: https://github.com/canonical/central-uploader/releases/download/spark-3.4.2-ubuntu6/spark-3.4.2-ubuntu6-20240904084915-bin-k8s.tgz - source-checksum: sha512/57976cc02187d0b43130ec47ae9f5adb354d199a1e638cbade622ce438324ff689674b1ac959a8e25a705f73fe23bb875e5910b9342b68deb39d612338d35500 + source: https://github.com/canonical/central-uploader/releases/download/spark-4.0.0-preview1-ubuntu0/spark-4.0.0-preview1-ubuntu0-20240813100410-bin-k8s.tgz + source-checksum: sha512/9d506d28d356c33608bebaf53dd6b60705826f60764bb6ed60d1b5cf3f496d99bf45b6aaee5df6f7cce0e37dad04ab003f45e8219e15dc918a2cef78d8425c09 overlay-script: | sed -i 's/http:\/\/deb.\(.*\)/https:\/\/deb.\1/g' /etc/apt/sources.list apt-get update @@ -96,16 +96,13 @@ parts: mkdir -p $CRAFT_PART_INSTALL/opt/spark/jars cd $CRAFT_PART_INSTALL/opt/spark/jars - ICEBERG_SPARK_RUNTIME_VERSION='3.4_2.12' - ICEBERG_VERSION='1.4.3' - SPARK_METRICS_VERSION='3.4-1.0.2' - SERVLET_FILTERS_VERSION='0.0.1' - SHA1SUM_ICEBERG_JAR='48d553e4e5496f731b9e0e6adb5bc0fd040cb0df' - SHA512SUM_SPARK_METRICS_ASSEMBLY_JAR='9be728c3bda6a8e9db77452f416bc23245271a5db2da64557429352917c0772801ead19f3b1a33f955ec2eced3cb952c6c3a7c617cdeb4389cd17284f3c711f7' - SHA512SUM_SPARK_SERVLET_FILTER_JAR='ffeb809d58ef0151d513b09d4c2bfd5cc064b0b888ca45899687aed2f42bcb1ce9834be9709290dd70bd9df84049f02cbbff6c2d5ec3c136c278c93f167c8096' + + SPARK_METRICS_VERSION='4.0-1.0.1' + SERVLET_FILTERS_VERSION='4.0.1' + SHA512SUM_SPARK_METRICS_ASSEMBLY_JAR='0c5af6d7e2a22f3f12a8c3bcb8baccad07934d4c882234b4705b481766e176bf0931cecdaffebfba58361958d30aa62b02f08314d07fd66ea7d4ea026afac989' + SHA512SUM_SPARK_SERVLET_FILTER_JAR='a18e8ffe0d80d6cd42e1e817765e62c9e24ee3998b82bd4d848494a5f96c40f548d7148471d3b8ca35d4e5aa71c1ceefffad6f69d21e94794818291bdbe6931f' JARS=( - "https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/LIB_VERSION/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-LIB_VERSION.jar $ICEBERG_VERSION sha1sum $SHA1SUM_ICEBERG_JAR" "https://github.com/canonical/central-uploader/releases/download/spark-metrics-assembly-LIB_VERSION/spark-metrics-assembly-LIB_VERSION.jar $SPARK_METRICS_VERSION sha512sum $SHA512SUM_SPARK_METRICS_ASSEMBLY_JAR" "https://github.com/canonical/central-uploader/releases/download/servlet-filters-LIB_VERSION/servlet-filters-LIB_VERSION.jar $SERVLET_FILTERS_VERSION sha512sum $SHA512SUM_SPARK_SERVLET_FILTER_JAR" ) diff --git a/tests/integration/resources/testpod.yaml b/tests/integration/resources/testpod.yaml index f190da3e..67847229 100644 --- a/tests/integration/resources/testpod.yaml +++ b/tests/integration/resources/testpod.yaml @@ -4,7 +4,7 @@ metadata: name: testpod spec: containers: - - image: ghcr.io/canonical/test-charmed-spark:3.4.2 + - image: ghcr.io/canonical/test-charmed-spark:4.0.0-preview1 name: spark ports: - containerPort: 18080 diff --git a/tests/integration/utils/k8s-utils.sh b/tests/integration/utils/k8s-utils.sh index 95af2008..2f787742 100644 --- a/tests/integration/utils/k8s-utils.sh +++ b/tests/integration/utils/k8s-utils.sh @@ -24,7 +24,7 @@ wait_for_pod() { namespace=$2 echo "Waiting for pod '$pod_name' to become ready..." - kubectl wait --for condition=Ready pod/$pod_name -n $namespace --timeout 60s + kubectl wait --for condition=Ready pod/$pod_name -n $namespace --timeout 300s } From 8c37c2e47592f216e989d186e660a24c3632701e Mon Sep 17 00:00:00 2001 From: Paolo Sottovia Date: Thu, 26 Sep 2024 07:53:23 +0000 Subject: [PATCH 2/2] Fix integration tests for Spark 4.0 --- tests/integration/integration-tests.sh | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh index 0ef55517..920f49a9 100755 --- a/tests/integration/integration-tests.sh +++ b/tests/integration/integration-tests.sh @@ -116,12 +116,12 @@ cleanup_user_failure() { teardown_test_pod() { kubectl logs testpod-admin -n $NAMESPACE kubectl logs testpod -n $NAMESPACE - kubectl logs -l spark-version=3.4.2 -n $NAMESPACE + kubectl logs -l spark-version=4.0.0-preview1 -n $NAMESPACE kubectl -n $NAMESPACE delete pod $ADMIN_POD_NAME } run_example_job_in_pod() { - SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" + SPARK_EXAMPLES_JAR_NAME="spark-examples_2.13-$(get_spark_version).jar" PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods --sort-by=.metadata.creationTimestamp | grep driver | tail -n 1 | cut -d' ' -f1) NAMESPACE=$1 @@ -328,7 +328,7 @@ test_iceberg_example_in_pod_using_abfss(){ run_example_job_in_pod_with_pod_templates() { - SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" + SPARK_EXAMPLES_JAR_NAME="spark-examples_2.13-$(get_spark_version).jar" PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods --sort-by=.metadata.creationTimestamp | grep driver | tail -n 1 | cut -d' ' -f1) @@ -374,7 +374,7 @@ run_example_job_in_pod_with_pod_templates() { run_example_job_in_pod_with_metrics() { - SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" + SPARK_EXAMPLES_JAR_NAME="spark-examples_2.13-$(get_spark_version).jar" LOG_FILE="/tmp/server.log" SERVER_PORT=9091 PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods --sort-by=.metadata.creationTimestamp | grep driver | tail -n 1 | cut -d' ' -f1) @@ -423,7 +423,7 @@ run_example_job_in_pod_with_metrics() { run_example_job_with_error_in_pod() { - SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" + SPARK_EXAMPLES_JAR_NAME="spark-examples_2.13-$(get_spark_version).jar" PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods --sort-by=.metadata.creationTimestamp | grep driver | tail -n 1 | cut -d' ' -f1) NAMESPACE=$1 @@ -500,7 +500,7 @@ run_spark_shell_in_pod() { echo -e "$(kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$SPARK_SHELL_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.spark-shell --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > spark-shell.out - pi=$(cat spark-shell.out | grep "^Pi is roughly" | rev | cut -d' ' -f1 | rev | cut -c 1-3) + pi=$(cat spark-shell.out | grep "Pi is roughly" | rev | cut -d' ' -f1 | rev | cut -c 1-3) echo -e "Spark-shell Pi Job Output: \n ${pi}" rm spark-shell.out validate_pi_value $pi @@ -663,18 +663,6 @@ echo -e "########################################" (setup_user_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod -echo -e "##################################" -echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES" -echo -e "##################################" - -(setup_user_context && test_iceberg_example_in_pod_using_s3 && cleanup_user_success) || cleanup_user_failure_in_pod - -echo -e "##################################" -echo -e "RUN EXAMPLE THAT USES AZURE STORAGE" -echo -e "##################################" - -(setup_user_context && test_iceberg_example_in_pod_using_abfss && cleanup_user_success) || cleanup_user_failure_in_pod - echo -e "##################################" echo -e "TEARDOWN TEST POD" echo -e "##################################"