From 7cfdc010a56d5fc8a7f5712c386919eade6bc998 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Tue, 7 Apr 2026 23:24:09 +0530 Subject: [PATCH 1/4] HIVE-29552: Add docker scripts for Ozone --- packaging/src/docker/README.md | 31 +++++ packaging/src/docker/start-hive.sh | 15 ++- packaging/src/docker/stop-hive.sh | 6 + .../docker/storage/ozone/docker-compose.yml | 119 ++++++++++++++++++ 4 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 packaging/src/docker/storage/ozone/docker-compose.yml diff --git a/packaging/src/docker/README.md b/packaging/src/docker/README.md index c82fe7e5ffc2..0e6c9e75aad9 100644 --- a/packaging/src/docker/README.md +++ b/packaging/src/docker/README.md @@ -358,3 +358,34 @@ HIVE_WAREHOUSE_PATH="/data/warehouse/tablespace/managed/hive" \ S3_ENDPOINT_URL="s3.us-west-2.amazonaws.com" \ docker-compose up ``` + +#### Hive with Ozone-backed warehouse storage + +The cluster can also be started with Apache Ozone as the underlying storage layer, utilizing Ozone's S3 Gateway (S3G) to act as an S3-compatible backend for Hive. + +Use the provided startup script with the `--ozone` flag. This automatically injects S3A configurations into Hive and merges the main compose file with the Ozone-specific configuration located at `storage/ozone/docker-compose.yml`. + +```shell +docker compose down --rmi local # cleanup previous containers and images + +export POSTGRES_LOCAL_PATH=... # set the path to the postgres driver jar +./build.sh -hive 4.2.0 -hadoop 3.4.1 -tez 0.10.5 +./start-hive.sh --ozone +``` + +By default, this spins up the Ozone components (Ozone Manager, Storage Container Manager, DataNode, +Recon, S3G) alongside Hive on the same local network. + +To view Ozone's web UIs, you can navigate to: + +Ozone Manager: http://localhost:9874 + +Recon: http://localhost:9888 + +To stop and remove the Ozone stack: + +```shell +./stop-hive.sh --ozone # to stop containers +#OR +./stop-hive.sh --ozone --cleanup # to remove volumes and database state +``` \ No newline at end of file diff --git a/packaging/src/docker/start-hive.sh b/packaging/src/docker/start-hive.sh index ddffcd20dd3c..e8c9fcf77c35 100755 --- a/packaging/src/docker/start-hive.sh +++ b/packaging/src/docker/start-hive.sh @@ -22,6 +22,7 @@ cd "$SCRIPT_DIR" MODE="container" PROFILE="" SCALE="" +COMPOSE_FILES="docker-compose.yml" for arg in "$@"; do case "$arg" in @@ -32,6 +33,17 @@ for arg in "$@"; do export HIVE_ZOOKEEPER_QUORUM=zookeeper:2181 export HIVE_LLAP_DAEMON_SERVICE_HOSTS=@llap0 ;; +--ozone) + COMPOSE_FILES="docker-compose.yml:storage/ozone/docker-compose.yml" + # DEFAULT_FS defines the bucket authority + export DEFAULT_FS="s3a://hive" + + export HIVE_WAREHOUSE_PATH="/warehouse" + + export S3_ENDPOINT_URL="http://s3.ozone:9878" + export AWS_ACCESS_KEY_ID="ozone" + export AWS_SECRET_ACCESS_KEY="secret" + ;; *) echo "Unknown option: $arg" exit 1 @@ -40,7 +52,8 @@ for arg in "$@"; do done export HIVE_EXECUTION_MODE="$MODE" +export COMPOSE_FILE="$COMPOSE_FILES" -echo "Starting Hive cluster (mode=$HIVE_EXECUTION_MODE)" +echo "Starting Hive cluster (mode=$HIVE_EXECUTION_MODE, compose_files=$COMPOSE_FILE)" docker compose $PROFILE up -d $SCALE diff --git a/packaging/src/docker/stop-hive.sh b/packaging/src/docker/stop-hive.sh index 87f30ee3a278..712a8dd4320b 100755 --- a/packaging/src/docker/stop-hive.sh +++ b/packaging/src/docker/stop-hive.sh @@ -21,12 +21,16 @@ cd "$SCRIPT_DIR" PROFILE="--profile llap" # delete all containers regardless of profile CLEANUP_FLAG="" +COMPOSE_FILES="docker-compose.yml" for arg in "$@"; do case "$arg" in --cleanup) CLEANUP_FLAG="--volumes" ;; + --ozone) + COMPOSE_FILES="docker-compose.yml:storage/ozone/docker-compose.yml" + ;; *) echo "Unknown option: $arg" exit 1 @@ -34,6 +38,8 @@ for arg in "$@"; do esac done +export COMPOSE_FILE="$COMPOSE_FILES" + if [[ -n "$CLEANUP_FLAG" ]]; then echo "Stopping Hive cluster and removing compose volumes" else diff --git a/packaging/src/docker/storage/ozone/docker-compose.yml b/packaging/src/docker/storage/ozone/docker-compose.yml new file mode 100644 index 000000000000..3dc76e62b0ae --- /dev/null +++ b/packaging/src/docker/storage/ozone/docker-compose.yml @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3" + +x-image: + &image + image: ${OZONE_IMAGE:-apache/ozone}:${OZONE_IMAGE_VERSION:-2.1.0}${OZONE_IMAGE_FLAVOR:-} + +x-common-config: + &common-config + OZONE-SITE.XML_hdds.datanode.dir: "/data/hdds" + OZONE-SITE.XML_ozone.metadata.dirs: "/data/metadata" + OZONE-SITE.XML_ozone.om.address: "om" + OZONE-SITE.XML_ozone.om.http-address: "om:9874" + OZONE-SITE.XML_ozone.recon.address: "recon:9891" + OZONE-SITE.XML_ozone.recon.db.dir: "/data/metadata/recon" + OZONE-SITE.XML_ozone.replication: "1" + OZONE-SITE.XML_ozone.scm.block.client.address: "scm" + OZONE-SITE.XML_ozone.scm.client.address: "scm" + OZONE-SITE.XML_ozone.scm.datanode.id.dir: "/data/metadata" + OZONE-SITE.XML_ozone.scm.names: "scm" + no_proxy: "om,recon,scm,s3g,localhost,127.0.0.1" + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "1" + OZONE-SITE.XML_hdds.scm.safemode.healthy.pipeline.pct: "0" + OZONE-SITE.XML_ozone.s3g.domain.name: "s3.ozone" + +services: + datanode: + <<: *image + ports: + - 9864:9864 + command: ["ozone","datanode"] + environment: + <<: *common-config + networks: + - hive + om: + <<: *image + ports: + - 9874:9874 + environment: + <<: *common-config + CORE-SITE.XML_hadoop.proxyuser.hadoop.hosts: "*" + CORE-SITE.XML_hadoop.proxyuser.hadoop.groups: "*" + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + WAITFOR: scm:9876 + command: ["ozone","om"] + networks: + - hive + scm: + <<: *image + ports: + - 9876:9876 + environment: + <<: *common-config + ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION + command: ["ozone","scm"] + networks: + - hive + recon: + <<: *image + ports: + - 9888:9888 + environment: + <<: *common-config + command: ["ozone","recon"] + networks: + - hive + s3g: + <<: *image + ports: + - 9878:9878 + environment: + <<: *common-config + WAITFOR: om:9874 + command: + - sh + - -c + - | + set -e + ozone s3g & + s3g_pid=$$! + until ozone sh volume list >/dev/null 2>&1; do echo '...waiting...' && sleep 1; done; + + # Force create the s3v volume in case S3G hasn't auto-generated it yet + ozone sh volume create /s3v || true + + # Create the 'hive' bucket + ozone sh bucket delete /s3v/hive || true + ozone sh bucket create /s3v/hive || true + wait "$$s3g_pid" + healthcheck: + test: [ "CMD", "ozone", "sh", "bucket", "info", "/s3v/hive" ] + interval: 5s + timeout: 3s + retries: 10 + start_period: 30s + networks: + hive: + aliases: + - s3.ozone + - hive.s3.ozone + +networks: + hive: + name: hive From 8d99dea8270168732dc84c8d10523c55f60ea98a Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Wed, 8 Apr 2026 23:45:24 +0530 Subject: [PATCH 2/4] LLAP --- packaging/src/docker/README.md | 4 ++-- packaging/src/docker/docker-compose.yml | 6 ++++++ packaging/src/docker/entrypoint.sh | 2 +- packaging/src/docker/stop-hive.sh | 5 +---- packaging/src/docker/storage/ozone/docker-compose.yml | 2 -- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/packaging/src/docker/README.md b/packaging/src/docker/README.md index 0e6c9e75aad9..4038dac79ba4 100644 --- a/packaging/src/docker/README.md +++ b/packaging/src/docker/README.md @@ -385,7 +385,7 @@ Recon: http://localhost:9888 To stop and remove the Ozone stack: ```shell -./stop-hive.sh --ozone # to stop containers +./stop-hive.sh # to stop containers #OR -./stop-hive.sh --ozone --cleanup # to remove volumes and database state +./stop-hive.sh --cleanup # to remove volumes and database state ``` \ No newline at end of file diff --git a/packaging/src/docker/docker-compose.yml b/packaging/src/docker/docker-compose.yml index 3827cbf94f04..dcac38a38c31 100644 --- a/packaging/src/docker/docker-compose.yml +++ b/packaging/src/docker/docker-compose.yml @@ -155,6 +155,12 @@ services: LLAP_WEB_PORT: '15001' LLAP_MANAGEMENT_RPC_PORT: '15004' LLAP_SHUFFLE_PORT: '15551' + + DEFAULT_FS: "${DEFAULT_FS}" + HADOOP_CLASSPATH: /opt/hadoop/share/hadoop/tools/lib/* + S3_ENDPOINT_URL: "${S3_ENDPOINT_URL}" + AWS_ACCESS_KEY_ID: "${AWS_ACCESS_KEY_ID}" + AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}" volumes: - warehouse:/opt/hive/data/warehouse - scratch:/opt/hive/scratch diff --git a/packaging/src/docker/entrypoint.sh b/packaging/src/docker/entrypoint.sh index b6e71c2e7ee5..4b20b765c13d 100644 --- a/packaging/src/docker/entrypoint.sh +++ b/packaging/src/docker/entrypoint.sh @@ -85,7 +85,7 @@ function run_llap { # In this image, LLAP jars are under ${HIVE_HOME}/lib. export LLAP_DAEMON_HOME="${LLAP_DAEMON_HOME:-$HIVE_HOME}" export LLAP_DAEMON_CONF_DIR="${LLAP_DAEMON_CONF_DIR:-$HIVE_CONF_DIR}" - export LLAP_DAEMON_USER_CLASSPATH="${LLAP_DAEMON_USER_CLASSPATH:-$TEZ_HOME/*:$TEZ_HOME/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*}" +export LLAP_DAEMON_USER_CLASSPATH="${LLAP_DAEMON_USER_CLASSPATH:-$TEZ_HOME/*:$TEZ_HOME/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/tools/lib/*}" JAVA_ADD_OPENS=( "--add-opens=java.base/java.lang=ALL-UNNAMED" diff --git a/packaging/src/docker/stop-hive.sh b/packaging/src/docker/stop-hive.sh index 712a8dd4320b..d07f6fcb469a 100755 --- a/packaging/src/docker/stop-hive.sh +++ b/packaging/src/docker/stop-hive.sh @@ -21,16 +21,13 @@ cd "$SCRIPT_DIR" PROFILE="--profile llap" # delete all containers regardless of profile CLEANUP_FLAG="" -COMPOSE_FILES="docker-compose.yml" +COMPOSE_FILES="docker-compose.yml:storage/ozone/docker-compose.yml" for arg in "$@"; do case "$arg" in --cleanup) CLEANUP_FLAG="--volumes" ;; - --ozone) - COMPOSE_FILES="docker-compose.yml:storage/ozone/docker-compose.yml" - ;; *) echo "Unknown option: $arg" exit 1 diff --git a/packaging/src/docker/storage/ozone/docker-compose.yml b/packaging/src/docker/storage/ozone/docker-compose.yml index 3dc76e62b0ae..f5cf554b42c4 100644 --- a/packaging/src/docker/storage/ozone/docker-compose.yml +++ b/packaging/src/docker/storage/ozone/docker-compose.yml @@ -13,8 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -version: "3" - x-image: &image image: ${OZONE_IMAGE:-apache/ozone}:${OZONE_IMAGE_VERSION:-2.1.0}${OZONE_IMAGE_FLAVOR:-} From 2209466d20d6cd3ae4fe6394b4d169d819b4b2f5 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 9 Apr 2026 00:35:30 +0530 Subject: [PATCH 3/4] fix formatting --- packaging/src/docker/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/src/docker/entrypoint.sh b/packaging/src/docker/entrypoint.sh index 4b20b765c13d..5a5979b9ac9f 100644 --- a/packaging/src/docker/entrypoint.sh +++ b/packaging/src/docker/entrypoint.sh @@ -85,7 +85,7 @@ function run_llap { # In this image, LLAP jars are under ${HIVE_HOME}/lib. export LLAP_DAEMON_HOME="${LLAP_DAEMON_HOME:-$HIVE_HOME}" export LLAP_DAEMON_CONF_DIR="${LLAP_DAEMON_CONF_DIR:-$HIVE_CONF_DIR}" -export LLAP_DAEMON_USER_CLASSPATH="${LLAP_DAEMON_USER_CLASSPATH:-$TEZ_HOME/*:$TEZ_HOME/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/tools/lib/*}" + export LLAP_DAEMON_USER_CLASSPATH="${LLAP_DAEMON_USER_CLASSPATH:-$TEZ_HOME/*:$TEZ_HOME/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/tools/lib/*}" JAVA_ADD_OPENS=( "--add-opens=java.base/java.lang=ALL-UNNAMED" From 7e4dae9db78ea7242434411493ed64d3734a97e2 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 9 Apr 2026 16:34:18 +0530 Subject: [PATCH 4/4] Remove redundant HADOOP_CLASSPATH --- packaging/src/docker/docker-compose.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/packaging/src/docker/docker-compose.yml b/packaging/src/docker/docker-compose.yml index dcac38a38c31..c01dbbeb2876 100644 --- a/packaging/src/docker/docker-compose.yml +++ b/packaging/src/docker/docker-compose.yml @@ -157,7 +157,6 @@ services: LLAP_SHUFFLE_PORT: '15551' DEFAULT_FS: "${DEFAULT_FS}" - HADOOP_CLASSPATH: /opt/hadoop/share/hadoop/tools/lib/* S3_ENDPOINT_URL: "${S3_ENDPOINT_URL}" AWS_ACCESS_KEY_ID: "${AWS_ACCESS_KEY_ID}" AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}"