Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions packaging/src/docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -358,3 +358,34 @@ HIVE_WAREHOUSE_PATH="/data/warehouse/tablespace/managed/hive" \
S3_ENDPOINT_URL="s3.us-west-2.amazonaws.com" \
docker-compose up
```

#### Hive with Ozone-backed warehouse storage

The cluster can also be started with Apache Ozone as the underlying storage layer, utilizing Ozone's S3 Gateway (S3G) to act as an S3-compatible backend for Hive.

Use the provided startup script with the `--ozone` flag. This automatically injects S3A configurations into Hive and merges the main compose file with the Ozone-specific configuration located at `storage/ozone/docker-compose.yml`.

```shell
docker compose down --rmi local # cleanup previous containers and images

export POSTGRES_LOCAL_PATH=... # set the path to the postgres driver jar
./build.sh -hive 4.2.0 -hadoop 3.4.1 -tez 0.10.5
./start-hive.sh --ozone
```

By default, this spins up the Ozone components (Ozone Manager, Storage Container Manager, DataNode,
Recon, S3G) alongside Hive on the same local network.

To view Ozone's web UIs, you can navigate to:

Ozone Manager: http://localhost:9874

Recon: http://localhost:9888

To stop and remove the Ozone stack:

```shell
./stop-hive.sh # to stop containers
#OR
./stop-hive.sh --cleanup # to remove volumes and database state
```
5 changes: 5 additions & 0 deletions packaging/src/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ services:
LLAP_WEB_PORT: '15001'
LLAP_MANAGEMENT_RPC_PORT: '15004'
LLAP_SHUFFLE_PORT: '15551'

DEFAULT_FS: "${DEFAULT_FS}"
S3_ENDPOINT_URL: "${S3_ENDPOINT_URL}"
AWS_ACCESS_KEY_ID: "${AWS_ACCESS_KEY_ID}"
AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}"
volumes:
- warehouse:/opt/hive/data/warehouse
- scratch:/opt/hive/scratch
Expand Down
2 changes: 1 addition & 1 deletion packaging/src/docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ function run_llap {
# In this image, LLAP jars are under ${HIVE_HOME}/lib.
export LLAP_DAEMON_HOME="${LLAP_DAEMON_HOME:-$HIVE_HOME}"
export LLAP_DAEMON_CONF_DIR="${LLAP_DAEMON_CONF_DIR:-$HIVE_CONF_DIR}"
export LLAP_DAEMON_USER_CLASSPATH="${LLAP_DAEMON_USER_CLASSPATH:-$TEZ_HOME/*:$TEZ_HOME/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*}"
export LLAP_DAEMON_USER_CLASSPATH="${LLAP_DAEMON_USER_CLASSPATH:-$TEZ_HOME/*:$TEZ_HOME/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/tools/lib/*}"

JAVA_ADD_OPENS=(
"--add-opens=java.base/java.lang=ALL-UNNAMED"
Expand Down
15 changes: 14 additions & 1 deletion packaging/src/docker/start-hive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ cd "$SCRIPT_DIR"
MODE="container"
PROFILE=""
SCALE=""
COMPOSE_FILES="docker-compose.yml"

for arg in "$@"; do
case "$arg" in
Expand All @@ -32,6 +33,17 @@ for arg in "$@"; do
export HIVE_ZOOKEEPER_QUORUM=zookeeper:2181
export HIVE_LLAP_DAEMON_SERVICE_HOSTS=@llap0
;;
--ozone)
COMPOSE_FILES="docker-compose.yml:storage/ozone/docker-compose.yml"
# DEFAULT_FS defines the bucket authority
export DEFAULT_FS="s3a://hive"

export HIVE_WAREHOUSE_PATH="/warehouse"

export S3_ENDPOINT_URL="http://s3.ozone:9878"
export AWS_ACCESS_KEY_ID="ozone"
export AWS_SECRET_ACCESS_KEY="secret"
;;
*)
echo "Unknown option: $arg"
exit 1
Expand All @@ -40,7 +52,8 @@ for arg in "$@"; do
done

export HIVE_EXECUTION_MODE="$MODE"
export COMPOSE_FILE="$COMPOSE_FILES"

echo "Starting Hive cluster (mode=$HIVE_EXECUTION_MODE)"
echo "Starting Hive cluster (mode=$HIVE_EXECUTION_MODE, compose_files=$COMPOSE_FILE)"

docker compose $PROFILE up -d $SCALE
3 changes: 3 additions & 0 deletions packaging/src/docker/stop-hive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ cd "$SCRIPT_DIR"

PROFILE="--profile llap" # delete all containers regardless of profile
CLEANUP_FLAG=""
COMPOSE_FILES="docker-compose.yml:storage/ozone/docker-compose.yml"

for arg in "$@"; do
case "$arg" in
Expand All @@ -34,6 +35,8 @@ for arg in "$@"; do
esac
done

export COMPOSE_FILE="$COMPOSE_FILES"

if [[ -n "$CLEANUP_FLAG" ]]; then
echo "Stopping Hive cluster and removing compose volumes"
else
Expand Down
117 changes: 117 additions & 0 deletions packaging/src/docker/storage/ozone/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

x-image:
&image
image: ${OZONE_IMAGE:-apache/ozone}:${OZONE_IMAGE_VERSION:-2.1.0}${OZONE_IMAGE_FLAVOR:-}

x-common-config:
&common-config
OZONE-SITE.XML_hdds.datanode.dir: "/data/hdds"
OZONE-SITE.XML_ozone.metadata.dirs: "/data/metadata"
OZONE-SITE.XML_ozone.om.address: "om"
OZONE-SITE.XML_ozone.om.http-address: "om:9874"
OZONE-SITE.XML_ozone.recon.address: "recon:9891"
OZONE-SITE.XML_ozone.recon.db.dir: "/data/metadata/recon"
OZONE-SITE.XML_ozone.replication: "1"
OZONE-SITE.XML_ozone.scm.block.client.address: "scm"
OZONE-SITE.XML_ozone.scm.client.address: "scm"
OZONE-SITE.XML_ozone.scm.datanode.id.dir: "/data/metadata"
OZONE-SITE.XML_ozone.scm.names: "scm"
no_proxy: "om,recon,scm,s3g,localhost,127.0.0.1"
OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "1"
OZONE-SITE.XML_hdds.scm.safemode.healthy.pipeline.pct: "0"
OZONE-SITE.XML_ozone.s3g.domain.name: "s3.ozone"

services:
datanode:
<<: *image
ports:
- 9864:9864
command: ["ozone","datanode"]
environment:
<<: *common-config
networks:
- hive
om:
<<: *image
ports:
- 9874:9874
environment:
<<: *common-config
CORE-SITE.XML_hadoop.proxyuser.hadoop.hosts: "*"
CORE-SITE.XML_hadoop.proxyuser.hadoop.groups: "*"
ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION
WAITFOR: scm:9876
command: ["ozone","om"]
networks:
- hive
scm:
<<: *image
ports:
- 9876:9876
environment:
<<: *common-config
ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION
command: ["ozone","scm"]
networks:
- hive
recon:
<<: *image
ports:
- 9888:9888
environment:
<<: *common-config
command: ["ozone","recon"]
networks:
- hive
s3g:
<<: *image
ports:
- 9878:9878
environment:
<<: *common-config
WAITFOR: om:9874
command:
- sh
- -c
- |
set -e
ozone s3g &
s3g_pid=$$!
until ozone sh volume list >/dev/null 2>&1; do echo '...waiting...' && sleep 1; done;

# Force create the s3v volume in case S3G hasn't auto-generated it yet
ozone sh volume create /s3v || true

# Create the 'hive' bucket
ozone sh bucket delete /s3v/hive || true
ozone sh bucket create /s3v/hive || true
wait "$$s3g_pid"
healthcheck:
test: [ "CMD", "ozone", "sh", "bucket", "info", "/s3v/hive" ]
interval: 5s
timeout: 3s
retries: 10
start_period: 30s
networks:
hive:
aliases:
- s3.ozone
- hive.s3.ozone

networks:
hive:
name: hive
Loading