From 91a8926984b43546a04bc82e3e10c26f3c099891 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 11 Apr 2024 21:49:01 +0200 Subject: [PATCH 01/14] Fix Hive build --- hive/Dockerfile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/hive/Dockerfile b/hive/Dockerfile index ef4cd7ddb..ae7b3ba62 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -52,8 +52,7 @@ RUN cd /stackable/apache-hive-${PRODUCT}-src/ && \ RUN rm /stackable/apache-hive-${PRODUCT}/lib/postgresql-9.4.1208.jre7.jar && \ curl --fail -L https://repo.stackable.tech/repository/packages/pgjdbc/postgresql-42.7.2.jar -o /stackable/hive/lib/postgresql-42.7.2.jar - -COPY --link --from=hadoop-builder /stackable/hadoop /stackable/hadoop +COPY --chown=stackable:stackable --from=hadoop-builder /stackable/hadoop /stackable/hadoop # The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards # This way the build will fail should one of the files not be available anymore in a later Hadoop version! @@ -122,15 +121,15 @@ RUN microdnf update && \ USER stackable WORKDIR /stackable -COPY --link --from=builder /stackable/apache-hive-${PRODUCT} /stackable/apache-hive-${PRODUCT} +COPY --chown=stackable:stackable --from=builder /stackable/apache-hive-${PRODUCT} /stackable/apache-hive-${PRODUCT} RUN ln -s /stackable/apache-hive-${PRODUCT}/ /stackable/hive # It is useful to see which version of Hadoop is used at a glance # Therefore the use of the full name here -COPY --link --from=builder /stackable/hadoop /stackable/hadoop-${HADOOP} +COPY --chown=stackable:stackable --from=builder /stackable/hadoop /stackable/hadoop-${HADOOP} RUN ln -s /stackable/hadoop-${HADOOP}/ /stackable/hadoop -COPY --link --from=builder /stackable/jmx /stackable/jmx +COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx COPY hive/licenses /licenses # Mitigation for CVE-2021-44228 (Log4Shell) From fd735df7e60434eb9a89f45211b361af54a57c4e Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 11 Apr 2024 21:50:08 +0200 Subject: [PATCH 02/14] WIP: Add workflow_dispatch --- .github/workflows/dev.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/dev.yaml b/.github/workflows/dev.yaml index 46511c6cd..1d91f1aae 100644 --- a/.github/workflows/dev.yaml +++ b/.github/workflows/dev.yaml @@ -4,6 +4,7 @@ on: push: branches: - main + workflow_dispatch: jobs: build_and_push: From 5ab3021b8f4beee2f0e967539a2b29cb3bd05bd4 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 09:38:46 +0200 Subject: [PATCH 03/14] Only build HMS --- conf.py | 4 +++ hive/Dockerfile | 49 ++++++++++++++---------------- hive/stackable/bin/start-metastore | 12 ++++---- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/conf.py b/conf.py index 7d47cd16d..dc9ac7fc6 100644 --- a/conf.py +++ b/conf.py @@ -143,6 +143,10 @@ "java-base": "1.8.0", "hadoop": "3.3.4", "jackson_dataformat_xml": "2.12.3", + # Normally Hive 3.1.3 ships with "postgresql-9.4.1208.jre7.jar", but as this so old it does only support + # MD5 based authentication. Because of this, it does not work against more recent PostgresQL versions. + # See https://github.com/stackabletech/hive-operator/issues/170 for details. + "postgres_driver": "42.7.2", "aws_java_sdk_bundle": "1.12.262", "azure_storage": "7.0.1", "azure_keyvault_core": "1.0.0", diff --git a/hive/Dockerfile b/hive/Dockerfile index ae7b3ba62..dac698c48 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -11,6 +11,7 @@ ARG PRODUCT ARG HADOOP ARG JMX_EXPORTER ARG JACKSON_DATAFORMAT_XML +ARG POSTGRES_DRIVER ARG AWS_JAVA_SDK_BUNDLE ARG AZURE_STORAGE ARG AZURE_KEYVAULT_CORE @@ -35,36 +36,30 @@ RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache- RUN chmod +x patches/apply_patches.sh RUN patches/apply_patches.sh ${PRODUCT} RUN cd /stackable/apache-hive-${PRODUCT}-src/ && \ - mvn clean package -DskipTests -Pdist -RUN cd /stackable/apache-hive-${PRODUCT}-src/ && \ - tar -xzf packaging/target/apache-hive-${PRODUCT}-bin.tar.gz -C /stackable && \ - mv /stackable/apache-hive-${PRODUCT}-bin /stackable/apache-hive-${PRODUCT} && \ - ln -s /stackable/apache-hive-${PRODUCT}/ /stackable/hive && \ - cp /stackable/bin/start-metastore /stackable/hive/bin - + mvn clean package -DskipTests --projects standalone-metastore -# TODO: Remove hardcoded _new_ version -# Replace the old (postgresql-9.4.1208.jre7.jar) postgresql JDBC driver with a newer one, as the old one does only support MD5 based authentication. -# Because of this, the contained driver version does not work against more recent PostgresQL versions. -# See https://github.com/stackabletech/hive-operator/issues/170 for details. -# Note: We hardcode the versions here to make sure this replacement will be removed once Hive ships with a more recent driver -# version as the "rm" statement will fail. -RUN rm /stackable/apache-hive-${PRODUCT}/lib/postgresql-9.4.1208.jre7.jar && \ - curl --fail -L https://repo.stackable.tech/repository/packages/pgjdbc/postgresql-42.7.2.jar -o /stackable/hive/lib/postgresql-42.7.2.jar +RUN cd /stackable/apache-hive-${PRODUCT}-src/ && \ + mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable && \ + ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore && \ + cp /stackable/hive-metastore/bin/start-metastore /stackable/hive-metastore/bin/start-metastore.bak && \ + cp /stackable/bin/start-metastore /stackable/hive-metastore/bin COPY --chown=stackable:stackable --from=hadoop-builder /stackable/hadoop /stackable/hadoop +# Add a PostgreSQL driver, as this is the primary used persistence +RUN curl --fail -L https://repo.stackable.tech/repository/packages/pgjdbc/postgresql-${POSTGRES_DRIVER}.jar -o /stackable/hive-metastore/lib/postgresql-${POSTGRES_DRIVER}.jar + # The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards # This way the build will fail should one of the files not be available anymore in a later Hadoop version! # Add S3 Support for Hive (support for s3a://) -RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/hive/lib/ -RUN cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/hive/lib/ +RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/hive-metastore/lib/ +RUN cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/hive-metastore/lib/ # Add Azure ABFS support (support for abfs://) -RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/hive/lib/ -RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/hive/lib/ -RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/hive/lib/ +RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/hive-metastore/lib/ +RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/hive-metastore/lib/ +RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/hive-metastore/lib/ # The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode # the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar" @@ -77,8 +72,8 @@ RUN curl --fail -L "https://repo.stackable.tech/repository/packages/jmx-exporter ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar # Logging -RUN rm /stackable/hive/lib/log4j-slf4j-impl* && \ - curl --fail -L https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar -o /stackable/hive/lib/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar +RUN rm /stackable/hive-metastore/lib/log4j-slf4j-impl* && \ + curl --fail -L https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar -o /stackable/hive-metastore/lib/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar # === # For earlier versions this script removes the .class file that contains the @@ -121,8 +116,8 @@ RUN microdnf update && \ USER stackable WORKDIR /stackable -COPY --chown=stackable:stackable --from=builder /stackable/apache-hive-${PRODUCT} /stackable/apache-hive-${PRODUCT} -RUN ln -s /stackable/apache-hive-${PRODUCT}/ /stackable/hive +COPY --chown=stackable:stackable --from=builder /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/apache-hive-metastore-${PRODUCT}-bin +RUN ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore # It is useful to see which version of Hadoop is used at a glance # Therefore the use of the full name here @@ -138,8 +133,8 @@ COPY hive/licenses /licenses ENV LOG4J_FORMAT_MSG_NO_LOOKUPS=true ENV HADOOP_HOME=/stackable/hadoop -ENV HIVE_HOME=/stackable/hive -ENV PATH="${PATH}":/stackable/hadoop/bin:/stackable/hive/bin +ENV HIVE_HOME=/stackable/hive-metastore +ENV PATH="${PATH}":/stackable/hadoop/bin:/stackable/hive-metastore/bin -WORKDIR /stackable/hive +WORKDIR /stackable/hive-metastore CMD ["./bin/start-metastore", "--config", "conf", "--hive-bin-dir", "bin", "--db-type", "derby"] diff --git a/hive/stackable/bin/start-metastore b/hive/stackable/bin/start-metastore index 30aab9e4e..0c727fa2c 100755 --- a/hive/stackable/bin/start-metastore +++ b/hive/stackable/bin/start-metastore @@ -5,10 +5,10 @@ # Usage: start-metastore # Options: # --help -# --config +# --config # --db-type # --hive-bin-dir -# +# # Checks if the metastore database schema is initialized. If so it starts the metastore, # otherwise it tries to initialize the schma first. # @@ -22,7 +22,7 @@ HIVE_BIN_DIR="" function parse_args { while true; do echo "processing arg $1" - case $1 in + case $1 in --db-type) shift DB_TYPE=$1 @@ -71,14 +71,14 @@ function parse_args { } function init_schema { - if ! $HIVE_BIN_DIR/hive --config $CONF_DIR --service schemaTool -dbType $DB_TYPE -validate ; then + if ! $HIVE_BIN_DIR/base --config $CONF_DIR --service schemaTool -dbType $DB_TYPE -validate ; then echo "No valid schema found, initializing schema ..." - $HIVE_BIN_DIR/hive --config $CONF_DIR --service schemaTool -dbType $DB_TYPE -initSchema || exit 1 + $HIVE_BIN_DIR/base --config $CONF_DIR --service schemaTool -dbType $DB_TYPE -initSchema || exit 1 fi } function start_metastore { - $HIVE_BIN_DIR/hive --config $CONF_DIR --service metastore + $HIVE_BIN_DIR/base --config $CONF_DIR --service metastore } function main { From cc83c52f8d8e8ba224dc818902af19f8ddb8a2b4 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 09:45:46 +0200 Subject: [PATCH 04/14] Pull in Hopefully reduce intermediate image size by Lars --- hadoop/Dockerfile | 27 +++++++++++++------ hive/Dockerfile | 14 +++++----- ...uildi.patch => 002-HIVE-21939-3.1.3.patch} | 4 +-- hive/stackable/patches/apply_patches.sh | 0 4 files changed, 27 insertions(+), 18 deletions(-) rename hive/stackable/patches/3.1.3/{002-HIVE-21939-protoc-2.5.0-dependence-has-broken-buildi.patch => 002-HIVE-21939-3.1.3.patch} (99%) mode change 100644 => 100755 hive/stackable/patches/apply_patches.sh diff --git a/hadoop/Dockerfile b/hadoop/Dockerfile index a569875c9..59a4695e8 100644 --- a/hadoop/Dockerfile +++ b/hadoop/Dockerfile @@ -17,7 +17,16 @@ ARG TARGETOS RUN microdnf update && \ microdnf install \ # Required for Hadoop build - cmake cyrus-sasl-devel fuse-devel gcc gcc-c++ maven openssl-devel tar xz git \ + cmake \ + cyrus-sasl-devel \ + fuse-devel \ + gcc \ + gcc-c++ \ + git \ + maven \ + openssl-devel \ + tar \ + xz \ # Required for log4shell.sh unzip zip && \ microdnf clean all @@ -26,8 +35,6 @@ WORKDIR /stackable COPY hadoop/stackable /stackable -# Build from source to enable FUSE module, and to apply custom patches. -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . # The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode # the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar" @@ -52,20 +59,24 @@ RUN curl --fail -L -s -S https://repo.stackable.tech/repository/packages/protobu tar xzf /opt/protobuf.tar.gz --strip-components 1 --no-same-owner && \ ./configure --prefix=/opt/protobuf && \ make "-j$(nproc)" && \ - make install + make install && \ + rm -rf /opt/protobuf-src ENV PROTOBUF_HOME /opt/protobuf ENV PATH "${PATH}:/opt/protobuf/bin" WORKDIR /stackable -RUN patches/apply_patches.sh ${PRODUCT} -WORKDIR /stackable/hadoop-${PRODUCT}-src # Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module -RUN mvn clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \ +# Build from source to enable FUSE module, and to apply custom patches. +RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . && \ + patches/apply_patches.sh ${PRODUCT} && \ + cd hadoop-${PRODUCT}-src && \ + mvn clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \ cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} && \ # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves - cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin + cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin && \ + rm -rf /stackable/hadoop-${PRODUCT}-src # === # Mitigation for CVE-2021-44228 (Log4Shell) diff --git a/hive/Dockerfile b/hive/Dockerfile index dac698c48..090da8903 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -32,17 +32,15 @@ COPY --chown=stackable:stackable hive/stackable /stackable USER stackable WORKDIR /stackable -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-src.tar.gz" | tar -xzC . -RUN chmod +x patches/apply_patches.sh -RUN patches/apply_patches.sh ${PRODUCT} -RUN cd /stackable/apache-hive-${PRODUCT}-src/ && \ - mvn clean package -DskipTests --projects standalone-metastore - -RUN cd /stackable/apache-hive-${PRODUCT}-src/ && \ +RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-src.tar.gz" | tar -xzC . && \ + patches/apply_patches.sh ${PRODUCT} && \ + cd /stackable/apache-hive-${PRODUCT}-src/ && \ + mvn clean package -DskipTests --projects standalone-metastore && \ mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable && \ ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore && \ cp /stackable/hive-metastore/bin/start-metastore /stackable/hive-metastore/bin/start-metastore.bak && \ - cp /stackable/bin/start-metastore /stackable/hive-metastore/bin + cp /stackable/bin/start-metastore /stackable/hive-metastore/bin && \ + rm -rf /stackable/apache-hive-${PRODUCT}-src COPY --chown=stackable:stackable --from=hadoop-builder /stackable/hadoop /stackable/hadoop diff --git a/hive/stackable/patches/3.1.3/002-HIVE-21939-protoc-2.5.0-dependence-has-broken-buildi.patch b/hive/stackable/patches/3.1.3/002-HIVE-21939-3.1.3.patch similarity index 99% rename from hive/stackable/patches/3.1.3/002-HIVE-21939-protoc-2.5.0-dependence-has-broken-buildi.patch rename to hive/stackable/patches/3.1.3/002-HIVE-21939-3.1.3.patch index 1dfaed81e..c53ee91da 100644 --- a/hive/stackable/patches/3.1.3/002-HIVE-21939-protoc-2.5.0-dependence-has-broken-buildi.patch +++ b/hive/stackable/patches/3.1.3/002-HIVE-21939-3.1.3.patch @@ -26,7 +26,7 @@ index e36f1e64f0..6007b7961b 100644 + 2.6.1 1.3.0 2.7.0 - + @@ -443,6 +446,20 @@ @@ -57,6 +57,6 @@ index e36f1e64f0..6007b7961b 100644 none ${basedir}/src/main/protobuf/org/apache/hadoop/hive/metastore --- +-- 2.43.0 diff --git a/hive/stackable/patches/apply_patches.sh b/hive/stackable/patches/apply_patches.sh old mode 100644 new mode 100755 From 12c4249ebd92203d264b2767b58b3005f7e6958a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 09:51:05 +0200 Subject: [PATCH 05/14] WIP, revert: Only build, dont push --- .github/workflows/dev.yaml | 203 +++++++++++++++++++------------------ 1 file changed, 103 insertions(+), 100 deletions(-) diff --git a/.github/workflows/dev.yaml b/.github/workflows/dev.yaml index 1d91f1aae..dfb78f40a 100644 --- a/.github/workflows/dev.yaml +++ b/.github/workflows/dev.yaml @@ -72,6 +72,9 @@ jobs: registry: oci.stackable.tech username: robot$sdp+github-action-build password: ${{ secrets.HARBOR_ROBOT_SDP_GITHUB_ACTION_BUILD_SECRET }} + - name: For Maxi + run: | + docker --version - name: Publish dev images id: publish_images run: | @@ -86,110 +89,110 @@ jobs: --shard-index "${{matrix.shard_index}}" \ --export-tags-file bake-target-tags - # Push images to image repository - if [ -f bake-target-tags ]; then - echo "bake-target-tags: "$(< bake-target-tags) - IMAGE_NAME=$(cat bake-target-tags | cut -d ":" -f 1) - TAG_NAME=$(cat bake-target-tags | cut -d ":" -f 2) - echo "image: $IMAGE_NAME" - echo "tag: $TAG_NAME" - # Store the output of `docker image push` into a variable, so we can parse it for the digest - PUSH_OUTPUT=$(docker image push "$(< bake-target-tags)" 2>&1) - echo "$PUSH_OUTPUT" - # Obtain the digest of the pushed image from the output of `docker image push`, because signing by tag is deprecated and will be removed from cosign in the future - DIGEST=$(echo "$PUSH_OUTPUT" | awk "/: digest: sha256:[a-f0-9]{64} size: [0-9]+$/ { print \$3 }") - # Refer to image via its digest (docker.stackable.tech/stackable/airflow@sha256:0a1b2c...) - # This generates a signature and publishes it to the registry, next to the image - # Uses the keyless signing flow with Github Actions as identity provider - cosign sign -y "$IMAGE_NAME@$DIGEST" + # # Push images to image repository + # if [ -f bake-target-tags ]; then + # echo "bake-target-tags: "$(< bake-target-tags) + # IMAGE_NAME=$(cat bake-target-tags | cut -d ":" -f 1) + # TAG_NAME=$(cat bake-target-tags | cut -d ":" -f 2) + # echo "image: $IMAGE_NAME" + # echo "tag: $TAG_NAME" + # # Store the output of `docker image push` into a variable, so we can parse it for the digest + # PUSH_OUTPUT=$(docker image push "$(< bake-target-tags)" 2>&1) + # echo "$PUSH_OUTPUT" + # # Obtain the digest of the pushed image from the output of `docker image push`, because signing by tag is deprecated and will be removed from cosign in the future + # DIGEST=$(echo "$PUSH_OUTPUT" | awk "/: digest: sha256:[a-f0-9]{64} size: [0-9]+$/ { print \$3 }") + # # Refer to image via its digest (docker.stackable.tech/stackable/airflow@sha256:0a1b2c...) + # # This generates a signature and publishes it to the registry, next to the image + # # Uses the keyless signing flow with Github Actions as identity provider + # cosign sign -y "$IMAGE_NAME@$DIGEST" - # Generate SBOM for the image - syft scan --output cyclonedx-json=sbom.json --select-catalogers "-cargo-auditable-binary-cataloger" --scope all-layers --source-name "${{ matrix.product }}" --source-version "$TAG_NAME" "$IMAGE_NAME@$DIGEST"; - # Determine the PURL for the image - PURL="pkg:docker/stackable/${{ matrix.product }}@$DIGEST?repository_url=docker.stackable.tech"; - # Get metadata from the image - IMAGE_METADATA_DESCRIPTION=$(docker inspect --format='{{.Config.Labels.description}}' "$IMAGE_NAME@$DIGEST"); - IMAGE_METADATA_NAME=$(docker inspect --format='{{.Config.Labels.name}}' "$IMAGE_NAME@$DIGEST"); - # Merge the SBOM with the metadata for the image - jq -s '{"metadata":{"component":{"description":"'"$IMAGE_METADATA_NAME. $IMAGE_METADATA_DESCRIPTION"'","supplier":{"name":"Stackable GmbH","url":["https://stackable.tech/"]},"author":"Stackable GmbH","purl":"'"$PURL"'","publisher":"Stackable GmbH"}}} * .[0]' sbom.json > sbom.merged.json; - # Attest the SBOM to the image - cosign attest -y --predicate sbom.merged.json --type cyclonedx "$IMAGE_NAME@$DIGEST" + # # Generate SBOM for the image + # syft scan --output cyclonedx-json=sbom.json --select-catalogers "-cargo-auditable-binary-cataloger" --scope all-layers --source-name "${{ matrix.product }}" --source-version "$TAG_NAME" "$IMAGE_NAME@$DIGEST"; + # # Determine the PURL for the image + # PURL="pkg:docker/stackable/${{ matrix.product }}@$DIGEST?repository_url=docker.stackable.tech"; + # # Get metadata from the image + # IMAGE_METADATA_DESCRIPTION=$(docker inspect --format='{{.Config.Labels.description}}' "$IMAGE_NAME@$DIGEST"); + # IMAGE_METADATA_NAME=$(docker inspect --format='{{.Config.Labels.name}}' "$IMAGE_NAME@$DIGEST"); + # # Merge the SBOM with the metadata for the image + # jq -s '{"metadata":{"component":{"description":"'"$IMAGE_METADATA_NAME. $IMAGE_METADATA_DESCRIPTION"'","supplier":{"name":"Stackable GmbH","url":["https://stackable.tech/"]},"author":"Stackable GmbH","purl":"'"$PURL"'","publisher":"Stackable GmbH"}}} * .[0]' sbom.json > sbom.merged.json; + # # Attest the SBOM to the image + # cosign attest -y --predicate sbom.merged.json --type cyclonedx "$IMAGE_NAME@$DIGEST" - # Push to oci.stackable.tech as well - IMAGE_NAME=oci.stackable.tech/sdp/${{ matrix.product }} - echo "image: $IMAGE_NAME" - docker tag "$(< bake-target-tags)" "$IMAGE_NAME:$TAG_NAME" - # Store the output of `docker image push` into a variable, so we can parse it for the digest - PUSH_OUTPUT=$(docker image push "$IMAGE_NAME:$TAG_NAME" 2>&1) - echo "$PUSH_OUTPUT" - # Obtain the digest of the pushed image from the output of `docker image push`, because signing by tag is deprecated and will be removed from cosign in the future - DIGEST=$(echo "$PUSH_OUTPUT" | awk "/: digest: sha256:[a-f0-9]{64} size: [0-9]+$/ { print \$3 }") - # Refer to image via its digest (oci.stackable.tech/sdp/airflow@sha256:0a1b2c...) - # This generates a signature and publishes it to the registry, next to the image - # Uses the keyless signing flow with Github Actions as identity provider - cosign sign -y "$IMAGE_NAME@$DIGEST" + # # Push to oci.stackable.tech as well + # IMAGE_NAME=oci.stackable.tech/sdp/${{ matrix.product }} + # echo "image: $IMAGE_NAME" + # docker tag "$(< bake-target-tags)" "$IMAGE_NAME:$TAG_NAME" + # # Store the output of `docker image push` into a variable, so we can parse it for the digest + # PUSH_OUTPUT=$(docker image push "$IMAGE_NAME:$TAG_NAME" 2>&1) + # echo "$PUSH_OUTPUT" + # # Obtain the digest of the pushed image from the output of `docker image push`, because signing by tag is deprecated and will be removed from cosign in the future + # DIGEST=$(echo "$PUSH_OUTPUT" | awk "/: digest: sha256:[a-f0-9]{64} size: [0-9]+$/ { print \$3 }") + # # Refer to image via its digest (oci.stackable.tech/sdp/airflow@sha256:0a1b2c...) + # # This generates a signature and publishes it to the registry, next to the image + # # Uses the keyless signing flow with Github Actions as identity provider + # cosign sign -y "$IMAGE_NAME@$DIGEST" - # Generate SBOM for the image - syft scan --output cyclonedx-json=sbom.json --select-catalogers "-cargo-auditable-binary-cataloger" --scope all-layers --source-name "${{ matrix.product }}" --source-version "$TAG_NAME" "$IMAGE_NAME@$DIGEST"; - # Determine the PURL for the image - PURL="pkg:docker/sdp/${{ matrix.product }}@$DIGEST?repository_url=oci.stackable.tech"; - # Get metadata from the image - IMAGE_METADATA_DESCRIPTION=$(docker inspect --format='{{.Config.Labels.description}}' "$IMAGE_NAME@$DIGEST"); - IMAGE_METADATA_NAME=$(docker inspect --format='{{.Config.Labels.name}}' "$IMAGE_NAME@$DIGEST"); - # Merge the SBOM with the metadata for the image - jq -s '{"metadata":{"component":{"description":"'"$IMAGE_METADATA_NAME. $IMAGE_METADATA_DESCRIPTION"'","supplier":{"name":"Stackable GmbH","url":["https://stackable.tech/"]},"author":"Stackable GmbH","purl":"'"$PURL"'","publisher":"Stackable GmbH"}}} * .[0]' sbom.json > sbom.merged.json; - # Attest the SBOM to the image - cosign attest -y --predicate sbom.merged.json --type cyclonedx "$IMAGE_NAME@$DIGEST" - fi - create_manifests: - permissions: - id-token: write - runs-on: ubuntu-latest - needs: ["build_and_push"] - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1 - - uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # tag=v3.1.0 - with: - registry: docker.stackable.tech - username: github - password: ${{ secrets.NEXUS_PASSWORD }} - - name: Login to Stackable Harbor - uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # tag=v3 - with: - registry: oci.stackable.tech - username: robot$sdp+github-action-build - password: ${{ secrets.HARBOR_ROBOT_SDP_GITHUB_ACTION_BUILD_SECRET }} - - name: Set up Cosign - uses: sigstore/cosign-installer@9614fae9e5c5eddabb09f90a270fcb487c9f7149 # tag=v3.3.0 - - name: Build Manifest List - shell: bash - env: - DOCKER_USER: github - DOCKER_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} - run: | - for product_and_version in $(python3 enumerate-product-versions.py); do - PRODUCT="$(echo "$product_and_version" | cut -d '#' -f 1)" - VERSION="$(echo "$product_and_version" | cut -d '#' -f 2)" + # # Generate SBOM for the image + # syft scan --output cyclonedx-json=sbom.json --select-catalogers "-cargo-auditable-binary-cataloger" --scope all-layers --source-name "${{ matrix.product }}" --source-version "$TAG_NAME" "$IMAGE_NAME@$DIGEST"; + # # Determine the PURL for the image + # PURL="pkg:docker/sdp/${{ matrix.product }}@$DIGEST?repository_url=oci.stackable.tech"; + # # Get metadata from the image + # IMAGE_METADATA_DESCRIPTION=$(docker inspect --format='{{.Config.Labels.description}}' "$IMAGE_NAME@$DIGEST"); + # IMAGE_METADATA_NAME=$(docker inspect --format='{{.Config.Labels.name}}' "$IMAGE_NAME@$DIGEST"); + # # Merge the SBOM with the metadata for the image + # jq -s '{"metadata":{"component":{"description":"'"$IMAGE_METADATA_NAME. $IMAGE_METADATA_DESCRIPTION"'","supplier":{"name":"Stackable GmbH","url":["https://stackable.tech/"]},"author":"Stackable GmbH","purl":"'"$PURL"'","publisher":"Stackable GmbH"}}} * .[0]' sbom.json > sbom.merged.json; + # # Attest the SBOM to the image + # cosign attest -y --predicate sbom.merged.json --type cyclonedx "$IMAGE_NAME@$DIGEST" + # fi + # create_manifests: + # permissions: + # id-token: write + # runs-on: ubuntu-latest + # needs: ["build_and_push"] + # steps: + # - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1 + # - uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # tag=v3.1.0 + # with: + # registry: docker.stackable.tech + # username: github + # password: ${{ secrets.NEXUS_PASSWORD }} + # - name: Login to Stackable Harbor + # uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # tag=v3 + # with: + # registry: oci.stackable.tech + # username: robot$sdp+github-action-build + # password: ${{ secrets.HARBOR_ROBOT_SDP_GITHUB_ACTION_BUILD_SECRET }} + # - name: Set up Cosign + # uses: sigstore/cosign-installer@9614fae9e5c5eddabb09f90a270fcb487c9f7149 # tag=v3.3.0 + # - name: Build Manifest List + # shell: bash + # env: + # DOCKER_USER: github + # DOCKER_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} + # run: | + # for product_and_version in $(python3 enumerate-product-versions.py); do + # PRODUCT="$(echo "$product_and_version" | cut -d '#' -f 1)" + # VERSION="$(echo "$product_and_version" | cut -d '#' -f 2)" - echo "Generating manifest list for $PRODUCT in version $VERSION" + # echo "Generating manifest list for $PRODUCT in version $VERSION" - MANIFEST_NAME="docker.stackable.tech/stackable/${PRODUCT}:${VERSION}-stackable0.0.0-dev" - # Create and push to Stackable Nexus - # `docker manifest push` directly returns the digest of the manifest list - # As it is an experimental feature, this might change in the future - # Further reading: https://docs.docker.com/reference/cli/docker/manifest/push/ - # --amend because the manifest list would be updated since we use the same tag: 0.0.0-dev - docker manifest create "$MANIFEST_NAME" --amend "${MANIFEST_NAME}-amd64" --amend "${MANIFEST_NAME}-arm64" - DIGEST=$(docker manifest push $MANIFEST_NAME) + # MANIFEST_NAME="docker.stackable.tech/stackable/${PRODUCT}:${VERSION}-stackable0.0.0-dev" + # # Create and push to Stackable Nexus + # # `docker manifest push` directly returns the digest of the manifest list + # # As it is an experimental feature, this might change in the future + # # Further reading: https://docs.docker.com/reference/cli/docker/manifest/push/ + # # --amend because the manifest list would be updated since we use the same tag: 0.0.0-dev + # docker manifest create "$MANIFEST_NAME" --amend "${MANIFEST_NAME}-amd64" --amend "${MANIFEST_NAME}-arm64" + # DIGEST=$(docker manifest push $MANIFEST_NAME) - # Refer to image via its digest (oci.stackable.tech/sdp/airflow@sha256:0a1b2c...) - # This generates a signature and publishes it to the registry, next to the image - # Uses the keyless signing flow with Github Actions as identity provider - cosign sign -y "$MANIFEST_NAME@$DIGEST" + # # Refer to image via its digest (oci.stackable.tech/sdp/airflow@sha256:0a1b2c...) + # # This generates a signature and publishes it to the registry, next to the image + # # Uses the keyless signing flow with Github Actions as identity provider + # cosign sign -y "$MANIFEST_NAME@$DIGEST" - # Push to oci.stackable.tech as well - MANIFEST_NAME="oci.stackable.tech/sdp/${PRODUCT}:${VERSION}-stackable0.0.0-dev" - docker manifest create "$MANIFEST_NAME" --amend "${MANIFEST_NAME}-amd64" --amend "${MANIFEST_NAME}-arm64" - DIGEST=$(docker manifest push $MANIFEST_NAME) - cosign sign -y "$MANIFEST_NAME@$DIGEST" - done + # # Push to oci.stackable.tech as well + # MANIFEST_NAME="oci.stackable.tech/sdp/${PRODUCT}:${VERSION}-stackable0.0.0-dev" + # docker manifest create "$MANIFEST_NAME" --amend "${MANIFEST_NAME}-amd64" --amend "${MANIFEST_NAME}-arm64" + # DIGEST=$(docker manifest push $MANIFEST_NAME) + # cosign sign -y "$MANIFEST_NAME@$DIGEST" + # done From 18b3f4f3f557b01b79def218916bf54637091835 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 10:24:14 +0200 Subject: [PATCH 06/14] Fix log4shell scanner --- hive/Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hive/Dockerfile b/hive/Dockerfile index 090da8903..b9b7f8fd2 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -79,7 +79,7 @@ RUN rm /stackable/hive-metastore/lib/log4j-slf4j-impl* && \ # TODO: This can be restricted to target only versions which do not honor the environment # varible that has been set above but this has not currently been implemented COPY shared/log4shell.sh /bin -RUN /bin/log4shell.sh /stackable/apache-hive-${PRODUCT} +RUN /bin/log4shell.sh /stackable/apache-hive-metastore-${PRODUCT}-bin/ # Ensure no vulnerable files are left over # This will currently report vulnerable files being present, as it also alerts on @@ -88,7 +88,8 @@ RUN /bin/log4shell.sh /stackable/apache-hive-${PRODUCT} COPY shared/log4shell_1.6.1-log4shell_Linux_x86_64 /bin/log4shell_scanner_x86_64 COPY shared/log4shell_1.6.1-log4shell_Linux_aarch64 /bin/log4shell_scanner_aarch64 COPY shared/log4shell_scanner /bin/log4shell_scanner -RUN /bin/log4shell_scanner s /stackable/apache-hive-${PRODUCT} +# log4shell_scanner does not work on symlinks! +RUN /bin/log4shell_scanner s /stackable/apache-hive-metastore-${PRODUCT}-bin/ # === # syntax=docker/dockerfile:1@sha256:ac85f380a63b13dfcefa89046420e1781752bab202122f8f50032edf31be0021 @@ -98,12 +99,12 @@ ARG PRODUCT ARG HADOOP ARG RELEASE -LABEL name="Apache Hive" \ +LABEL name="Apache Hive metastore" \ maintainer="info@stackable.tech" \ vendor="Stackable GmbH" \ version="${PRODUCT}" \ release="${RELEASE}" \ - summary="The Stackable image for Apache Hive." \ + summary="The Stackable image for Apache Hive metastore." \ description="This image is deployed by the Stackable Operator for Apache Hive." RUN microdnf update && \ From 00b4e876182883ad4f7451bc459e6e54c921931f Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 10:34:36 +0200 Subject: [PATCH 07/14] changelog --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b55ee77bc..3391ee079 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ All notable changes to this project will be documented in this file. - Build all `0.0.0-dev` product images as multi-arch and push them to Nexus and Harbor. Also SBOMs are generated and everything is signed ([#614]). +### Changed + +- hive: Only build and ship Hive metastore. This reduces the image size from `2.63GB` to `1.9GB` and should also reduce the number of old dependencies ([#619]). + ### Fixed - superset: Let Superset 3.1.0 build on ARM by adding `make` and `diffutils` ([#611]). @@ -16,12 +20,15 @@ All notable changes to this project will be documented in this file. - python:3.11 manifest list fixed. Added proper hash ([#613]). - trino-cli: Include the trino-cli in the CI build process ([#614]). - hive: Fix compilation on ARM by back-porting [HIVE-21939](https://issues.apache.org/jira/browse/HIVE-21939) from [this](https://github.com/apache/hive/commit/2baf21bb55fcf33d8522444c78a8d8cab60e7415) commit ([#617]). +- hive: Fix compilation on ARM in CI as well ([#619]). +- hive: Fix compilation of x86 in CI due to lower disk usage to prevent disk running full ([#619]). [#611]: https://github.com/stackabletech/docker-images/pull/611 [#612]: https://github.com/stackabletech/docker-images/pull/612 [#613]: https://github.com/stackabletech/docker-images/pull/613 [#614]: https://github.com/stackabletech/docker-images/pull/614 [#617]: https://github.com/stackabletech/docker-images/pull/617 +[#619]: https://github.com/stackabletech/docker-images/pull/619 ## [24.3.0] - 2024-03-20 From 758ecec5554d5ad7454a9e914c9752bd5f4a356f Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 12:35:32 +0200 Subject: [PATCH 08/14] change CMD to doc comment --- hive/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hive/Dockerfile b/hive/Dockerfile index b9b7f8fd2..e5c6995e0 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -136,4 +136,4 @@ ENV HIVE_HOME=/stackable/hive-metastore ENV PATH="${PATH}":/stackable/hadoop/bin:/stackable/hive-metastore/bin WORKDIR /stackable/hive-metastore -CMD ["./bin/start-metastore", "--config", "conf", "--hive-bin-dir", "bin", "--db-type", "derby"] +# Start command is set by oeprator to something like "bin/start-metastore --config /stackable/config --db-type postgres --hive-bin-dir bin" From 20c21144fb95696b432a96ea88b539853b91d004 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 12:49:44 +0200 Subject: [PATCH 09/14] Revert "WIP, revert: Only build, dont push" This reverts commit 12c4249ebd92203d264b2767b58b3005f7e6958a. --- .github/workflows/dev.yaml | 203 ++++++++++++++++++------------------- 1 file changed, 100 insertions(+), 103 deletions(-) diff --git a/.github/workflows/dev.yaml b/.github/workflows/dev.yaml index dfb78f40a..1d91f1aae 100644 --- a/.github/workflows/dev.yaml +++ b/.github/workflows/dev.yaml @@ -72,9 +72,6 @@ jobs: registry: oci.stackable.tech username: robot$sdp+github-action-build password: ${{ secrets.HARBOR_ROBOT_SDP_GITHUB_ACTION_BUILD_SECRET }} - - name: For Maxi - run: | - docker --version - name: Publish dev images id: publish_images run: | @@ -89,110 +86,110 @@ jobs: --shard-index "${{matrix.shard_index}}" \ --export-tags-file bake-target-tags - # # Push images to image repository - # if [ -f bake-target-tags ]; then - # echo "bake-target-tags: "$(< bake-target-tags) - # IMAGE_NAME=$(cat bake-target-tags | cut -d ":" -f 1) - # TAG_NAME=$(cat bake-target-tags | cut -d ":" -f 2) - # echo "image: $IMAGE_NAME" - # echo "tag: $TAG_NAME" - # # Store the output of `docker image push` into a variable, so we can parse it for the digest - # PUSH_OUTPUT=$(docker image push "$(< bake-target-tags)" 2>&1) - # echo "$PUSH_OUTPUT" - # # Obtain the digest of the pushed image from the output of `docker image push`, because signing by tag is deprecated and will be removed from cosign in the future - # DIGEST=$(echo "$PUSH_OUTPUT" | awk "/: digest: sha256:[a-f0-9]{64} size: [0-9]+$/ { print \$3 }") - # # Refer to image via its digest (docker.stackable.tech/stackable/airflow@sha256:0a1b2c...) - # # This generates a signature and publishes it to the registry, next to the image - # # Uses the keyless signing flow with Github Actions as identity provider - # cosign sign -y "$IMAGE_NAME@$DIGEST" + # Push images to image repository + if [ -f bake-target-tags ]; then + echo "bake-target-tags: "$(< bake-target-tags) + IMAGE_NAME=$(cat bake-target-tags | cut -d ":" -f 1) + TAG_NAME=$(cat bake-target-tags | cut -d ":" -f 2) + echo "image: $IMAGE_NAME" + echo "tag: $TAG_NAME" + # Store the output of `docker image push` into a variable, so we can parse it for the digest + PUSH_OUTPUT=$(docker image push "$(< bake-target-tags)" 2>&1) + echo "$PUSH_OUTPUT" + # Obtain the digest of the pushed image from the output of `docker image push`, because signing by tag is deprecated and will be removed from cosign in the future + DIGEST=$(echo "$PUSH_OUTPUT" | awk "/: digest: sha256:[a-f0-9]{64} size: [0-9]+$/ { print \$3 }") + # Refer to image via its digest (docker.stackable.tech/stackable/airflow@sha256:0a1b2c...) + # This generates a signature and publishes it to the registry, next to the image + # Uses the keyless signing flow with Github Actions as identity provider + cosign sign -y "$IMAGE_NAME@$DIGEST" - # # Generate SBOM for the image - # syft scan --output cyclonedx-json=sbom.json --select-catalogers "-cargo-auditable-binary-cataloger" --scope all-layers --source-name "${{ matrix.product }}" --source-version "$TAG_NAME" "$IMAGE_NAME@$DIGEST"; - # # Determine the PURL for the image - # PURL="pkg:docker/stackable/${{ matrix.product }}@$DIGEST?repository_url=docker.stackable.tech"; - # # Get metadata from the image - # IMAGE_METADATA_DESCRIPTION=$(docker inspect --format='{{.Config.Labels.description}}' "$IMAGE_NAME@$DIGEST"); - # IMAGE_METADATA_NAME=$(docker inspect --format='{{.Config.Labels.name}}' "$IMAGE_NAME@$DIGEST"); - # # Merge the SBOM with the metadata for the image - # jq -s '{"metadata":{"component":{"description":"'"$IMAGE_METADATA_NAME. $IMAGE_METADATA_DESCRIPTION"'","supplier":{"name":"Stackable GmbH","url":["https://stackable.tech/"]},"author":"Stackable GmbH","purl":"'"$PURL"'","publisher":"Stackable GmbH"}}} * .[0]' sbom.json > sbom.merged.json; - # # Attest the SBOM to the image - # cosign attest -y --predicate sbom.merged.json --type cyclonedx "$IMAGE_NAME@$DIGEST" + # Generate SBOM for the image + syft scan --output cyclonedx-json=sbom.json --select-catalogers "-cargo-auditable-binary-cataloger" --scope all-layers --source-name "${{ matrix.product }}" --source-version "$TAG_NAME" "$IMAGE_NAME@$DIGEST"; + # Determine the PURL for the image + PURL="pkg:docker/stackable/${{ matrix.product }}@$DIGEST?repository_url=docker.stackable.tech"; + # Get metadata from the image + IMAGE_METADATA_DESCRIPTION=$(docker inspect --format='{{.Config.Labels.description}}' "$IMAGE_NAME@$DIGEST"); + IMAGE_METADATA_NAME=$(docker inspect --format='{{.Config.Labels.name}}' "$IMAGE_NAME@$DIGEST"); + # Merge the SBOM with the metadata for the image + jq -s '{"metadata":{"component":{"description":"'"$IMAGE_METADATA_NAME. $IMAGE_METADATA_DESCRIPTION"'","supplier":{"name":"Stackable GmbH","url":["https://stackable.tech/"]},"author":"Stackable GmbH","purl":"'"$PURL"'","publisher":"Stackable GmbH"}}} * .[0]' sbom.json > sbom.merged.json; + # Attest the SBOM to the image + cosign attest -y --predicate sbom.merged.json --type cyclonedx "$IMAGE_NAME@$DIGEST" - # # Push to oci.stackable.tech as well - # IMAGE_NAME=oci.stackable.tech/sdp/${{ matrix.product }} - # echo "image: $IMAGE_NAME" - # docker tag "$(< bake-target-tags)" "$IMAGE_NAME:$TAG_NAME" - # # Store the output of `docker image push` into a variable, so we can parse it for the digest - # PUSH_OUTPUT=$(docker image push "$IMAGE_NAME:$TAG_NAME" 2>&1) - # echo "$PUSH_OUTPUT" - # # Obtain the digest of the pushed image from the output of `docker image push`, because signing by tag is deprecated and will be removed from cosign in the future - # DIGEST=$(echo "$PUSH_OUTPUT" | awk "/: digest: sha256:[a-f0-9]{64} size: [0-9]+$/ { print \$3 }") - # # Refer to image via its digest (oci.stackable.tech/sdp/airflow@sha256:0a1b2c...) - # # This generates a signature and publishes it to the registry, next to the image - # # Uses the keyless signing flow with Github Actions as identity provider - # cosign sign -y "$IMAGE_NAME@$DIGEST" + # Push to oci.stackable.tech as well + IMAGE_NAME=oci.stackable.tech/sdp/${{ matrix.product }} + echo "image: $IMAGE_NAME" + docker tag "$(< bake-target-tags)" "$IMAGE_NAME:$TAG_NAME" + # Store the output of `docker image push` into a variable, so we can parse it for the digest + PUSH_OUTPUT=$(docker image push "$IMAGE_NAME:$TAG_NAME" 2>&1) + echo "$PUSH_OUTPUT" + # Obtain the digest of the pushed image from the output of `docker image push`, because signing by tag is deprecated and will be removed from cosign in the future + DIGEST=$(echo "$PUSH_OUTPUT" | awk "/: digest: sha256:[a-f0-9]{64} size: [0-9]+$/ { print \$3 }") + # Refer to image via its digest (oci.stackable.tech/sdp/airflow@sha256:0a1b2c...) + # This generates a signature and publishes it to the registry, next to the image + # Uses the keyless signing flow with Github Actions as identity provider + cosign sign -y "$IMAGE_NAME@$DIGEST" - # # Generate SBOM for the image - # syft scan --output cyclonedx-json=sbom.json --select-catalogers "-cargo-auditable-binary-cataloger" --scope all-layers --source-name "${{ matrix.product }}" --source-version "$TAG_NAME" "$IMAGE_NAME@$DIGEST"; - # # Determine the PURL for the image - # PURL="pkg:docker/sdp/${{ matrix.product }}@$DIGEST?repository_url=oci.stackable.tech"; - # # Get metadata from the image - # IMAGE_METADATA_DESCRIPTION=$(docker inspect --format='{{.Config.Labels.description}}' "$IMAGE_NAME@$DIGEST"); - # IMAGE_METADATA_NAME=$(docker inspect --format='{{.Config.Labels.name}}' "$IMAGE_NAME@$DIGEST"); - # # Merge the SBOM with the metadata for the image - # jq -s '{"metadata":{"component":{"description":"'"$IMAGE_METADATA_NAME. $IMAGE_METADATA_DESCRIPTION"'","supplier":{"name":"Stackable GmbH","url":["https://stackable.tech/"]},"author":"Stackable GmbH","purl":"'"$PURL"'","publisher":"Stackable GmbH"}}} * .[0]' sbom.json > sbom.merged.json; - # # Attest the SBOM to the image - # cosign attest -y --predicate sbom.merged.json --type cyclonedx "$IMAGE_NAME@$DIGEST" - # fi - # create_manifests: - # permissions: - # id-token: write - # runs-on: ubuntu-latest - # needs: ["build_and_push"] - # steps: - # - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1 - # - uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # tag=v3.1.0 - # with: - # registry: docker.stackable.tech - # username: github - # password: ${{ secrets.NEXUS_PASSWORD }} - # - name: Login to Stackable Harbor - # uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # tag=v3 - # with: - # registry: oci.stackable.tech - # username: robot$sdp+github-action-build - # password: ${{ secrets.HARBOR_ROBOT_SDP_GITHUB_ACTION_BUILD_SECRET }} - # - name: Set up Cosign - # uses: sigstore/cosign-installer@9614fae9e5c5eddabb09f90a270fcb487c9f7149 # tag=v3.3.0 - # - name: Build Manifest List - # shell: bash - # env: - # DOCKER_USER: github - # DOCKER_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} - # run: | - # for product_and_version in $(python3 enumerate-product-versions.py); do - # PRODUCT="$(echo "$product_and_version" | cut -d '#' -f 1)" - # VERSION="$(echo "$product_and_version" | cut -d '#' -f 2)" + # Generate SBOM for the image + syft scan --output cyclonedx-json=sbom.json --select-catalogers "-cargo-auditable-binary-cataloger" --scope all-layers --source-name "${{ matrix.product }}" --source-version "$TAG_NAME" "$IMAGE_NAME@$DIGEST"; + # Determine the PURL for the image + PURL="pkg:docker/sdp/${{ matrix.product }}@$DIGEST?repository_url=oci.stackable.tech"; + # Get metadata from the image + IMAGE_METADATA_DESCRIPTION=$(docker inspect --format='{{.Config.Labels.description}}' "$IMAGE_NAME@$DIGEST"); + IMAGE_METADATA_NAME=$(docker inspect --format='{{.Config.Labels.name}}' "$IMAGE_NAME@$DIGEST"); + # Merge the SBOM with the metadata for the image + jq -s '{"metadata":{"component":{"description":"'"$IMAGE_METADATA_NAME. $IMAGE_METADATA_DESCRIPTION"'","supplier":{"name":"Stackable GmbH","url":["https://stackable.tech/"]},"author":"Stackable GmbH","purl":"'"$PURL"'","publisher":"Stackable GmbH"}}} * .[0]' sbom.json > sbom.merged.json; + # Attest the SBOM to the image + cosign attest -y --predicate sbom.merged.json --type cyclonedx "$IMAGE_NAME@$DIGEST" + fi + create_manifests: + permissions: + id-token: write + runs-on: ubuntu-latest + needs: ["build_and_push"] + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1 + - uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # tag=v3.1.0 + with: + registry: docker.stackable.tech + username: github + password: ${{ secrets.NEXUS_PASSWORD }} + - name: Login to Stackable Harbor + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # tag=v3 + with: + registry: oci.stackable.tech + username: robot$sdp+github-action-build + password: ${{ secrets.HARBOR_ROBOT_SDP_GITHUB_ACTION_BUILD_SECRET }} + - name: Set up Cosign + uses: sigstore/cosign-installer@9614fae9e5c5eddabb09f90a270fcb487c9f7149 # tag=v3.3.0 + - name: Build Manifest List + shell: bash + env: + DOCKER_USER: github + DOCKER_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} + run: | + for product_and_version in $(python3 enumerate-product-versions.py); do + PRODUCT="$(echo "$product_and_version" | cut -d '#' -f 1)" + VERSION="$(echo "$product_and_version" | cut -d '#' -f 2)" - # echo "Generating manifest list for $PRODUCT in version $VERSION" + echo "Generating manifest list for $PRODUCT in version $VERSION" - # MANIFEST_NAME="docker.stackable.tech/stackable/${PRODUCT}:${VERSION}-stackable0.0.0-dev" - # # Create and push to Stackable Nexus - # # `docker manifest push` directly returns the digest of the manifest list - # # As it is an experimental feature, this might change in the future - # # Further reading: https://docs.docker.com/reference/cli/docker/manifest/push/ - # # --amend because the manifest list would be updated since we use the same tag: 0.0.0-dev - # docker manifest create "$MANIFEST_NAME" --amend "${MANIFEST_NAME}-amd64" --amend "${MANIFEST_NAME}-arm64" - # DIGEST=$(docker manifest push $MANIFEST_NAME) + MANIFEST_NAME="docker.stackable.tech/stackable/${PRODUCT}:${VERSION}-stackable0.0.0-dev" + # Create and push to Stackable Nexus + # `docker manifest push` directly returns the digest of the manifest list + # As it is an experimental feature, this might change in the future + # Further reading: https://docs.docker.com/reference/cli/docker/manifest/push/ + # --amend because the manifest list would be updated since we use the same tag: 0.0.0-dev + docker manifest create "$MANIFEST_NAME" --amend "${MANIFEST_NAME}-amd64" --amend "${MANIFEST_NAME}-arm64" + DIGEST=$(docker manifest push $MANIFEST_NAME) - # # Refer to image via its digest (oci.stackable.tech/sdp/airflow@sha256:0a1b2c...) - # # This generates a signature and publishes it to the registry, next to the image - # # Uses the keyless signing flow with Github Actions as identity provider - # cosign sign -y "$MANIFEST_NAME@$DIGEST" + # Refer to image via its digest (oci.stackable.tech/sdp/airflow@sha256:0a1b2c...) + # This generates a signature and publishes it to the registry, next to the image + # Uses the keyless signing flow with Github Actions as identity provider + cosign sign -y "$MANIFEST_NAME@$DIGEST" - # # Push to oci.stackable.tech as well - # MANIFEST_NAME="oci.stackable.tech/sdp/${PRODUCT}:${VERSION}-stackable0.0.0-dev" - # docker manifest create "$MANIFEST_NAME" --amend "${MANIFEST_NAME}-amd64" --amend "${MANIFEST_NAME}-arm64" - # DIGEST=$(docker manifest push $MANIFEST_NAME) - # cosign sign -y "$MANIFEST_NAME@$DIGEST" - # done + # Push to oci.stackable.tech as well + MANIFEST_NAME="oci.stackable.tech/sdp/${PRODUCT}:${VERSION}-stackable0.0.0-dev" + docker manifest create "$MANIFEST_NAME" --amend "${MANIFEST_NAME}-amd64" --amend "${MANIFEST_NAME}-arm64" + DIGEST=$(docker manifest push $MANIFEST_NAME) + cosign sign -y "$MANIFEST_NAME@$DIGEST" + done From 070974909d5b1d317ca4d0d92872d03346863aaf Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 12:50:27 +0200 Subject: [PATCH 10/14] remove tmp --- .github/workflows/dev.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/dev.yaml b/.github/workflows/dev.yaml index 1d91f1aae..46511c6cd 100644 --- a/.github/workflows/dev.yaml +++ b/.github/workflows/dev.yaml @@ -4,7 +4,6 @@ on: push: branches: - main - workflow_dispatch: jobs: build_and_push: From 46e9db2281afc572c2370c95fea992f8812f035a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 13:02:42 +0200 Subject: [PATCH 11/14] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3391ee079..9f3f8e1d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ All notable changes to this project will be documented in this file. ### Changed -- hive: Only build and ship Hive metastore. This reduces the image size from `2.63GB` to `1.9GB` and should also reduce the number of old dependencies ([#619]). +- hive: Only build and ship Hive metastore. This reduces the image size from `2.63GB` to `1.9GB` and should also reduce the number of dependencies ([#619]). ### Fixed From 67b492d4e2088c506a6ed6e37b0682b1d80ca33b Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 14:12:55 +0200 Subject: [PATCH 12/14] Update conf.py Co-authored-by: Maximilian Wittich <56642549+Maleware@users.noreply.github.com> --- conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf.py b/conf.py index dc9ac7fc6..c3912e42b 100644 --- a/conf.py +++ b/conf.py @@ -143,7 +143,7 @@ "java-base": "1.8.0", "hadoop": "3.3.4", "jackson_dataformat_xml": "2.12.3", - # Normally Hive 3.1.3 ships with "postgresql-9.4.1208.jre7.jar", but as this so old it does only support + # Normally Hive 3.1.3 ships with "postgresql-9.4.1208.jre7.jar", but as this is old enough it does only support # MD5 based authentication. Because of this, it does not work against more recent PostgresQL versions. # See https://github.com/stackabletech/hive-operator/issues/170 for details. "postgres_driver": "42.7.2", From 3792adddd94679b538ecdfaded5e64345e433fde Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 14:13:05 +0200 Subject: [PATCH 13/14] Update hive/Dockerfile Co-authored-by: Maximilian Wittich <56642549+Maleware@users.noreply.github.com> --- hive/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hive/Dockerfile b/hive/Dockerfile index e5c6995e0..5ee9b5ad5 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -136,4 +136,4 @@ ENV HIVE_HOME=/stackable/hive-metastore ENV PATH="${PATH}":/stackable/hadoop/bin:/stackable/hive-metastore/bin WORKDIR /stackable/hive-metastore -# Start command is set by oeprator to something like "bin/start-metastore --config /stackable/config --db-type postgres --hive-bin-dir bin" +# Start command is set by operator to something like "bin/start-metastore --config /stackable/config --db-type postgres --hive-bin-dir bin" From e9a66bf87941c9a4407d7d132f33b707b08b3173 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Apr 2024 14:24:00 +0200 Subject: [PATCH 14/14] Add TODO on --link --- hive/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/hive/Dockerfile b/hive/Dockerfile index 5ee9b5ad5..b353360d8 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -115,6 +115,7 @@ RUN microdnf update && \ USER stackable WORKDIR /stackable +# TODO: Try to use --link here, as it should be faster COPY --chown=stackable:stackable --from=builder /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/apache-hive-metastore-${PRODUCT}-bin RUN ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore