From f0976c89311a8bfa969298e17896283a9719f603 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 11:31:09 +0200 Subject: [PATCH 01/18] docs: detail how to mount and use external drivers --- .../usage-guide/database-driver-example.adoc | 180 ++++++++++++++++++ docs/modules/hive/partials/nav.adoc | 1 + 2 files changed, 181 insertions(+) create mode 100644 docs/modules/hive/pages/usage-guide/database-driver-example.adoc diff --git a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc new file mode 100644 index 00000000..956f319d --- /dev/null +++ b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc @@ -0,0 +1,180 @@ + += Database drivers + +The Stackable Apache Hive product images come with built-in support for using Postgresql as the metastore database. To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySql. + +== Install the MySql helm chart + +[source,bash] +---- +helm install mysql oci://registry-1.docker.io/bitnamicharts/mysql \ +--set auth.database=hive \ +--set auth.username=hive \ +--set auth.password=hive +---- + +== Download the driver to a `PersistentVolumeClaim` + +.Create a PersistentVolumeClaim +[source,yaml] +---- +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: pvc-hive-drivers +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +---- + +Download the driver from e.g. https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.0.31/[maven] to a volume backed by the PVC: + +.Download the driver +[source,yaml] +---- +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: pvc-hive-job +spec: + template: + spec: + restartPolicy: Never + volumes: + - name: external-drivers + persistentVolumeClaim: + claimName: pvc-hive-drivers + initContainers: + - name: dest-dir + image: docker.stackable.tech/stackable/tools:0.2.0-stackable24.3.0 + env: + - name: DEST_DIR + value: "/stackable/externals" + command: + [ + "bash", + "-x", + "-c", + "mkdir -p ${DEST_DIR} && chown stackable:stackable ${DEST_DIR} && chmod -R a=,u=rwX,g=rwX ${DEST_DIR}", + ] + securityContext: + runAsUser: 0 + volumeMounts: + - name: external-drivers + mountPath: /stackable/externals + containers: + - name: hive-driver + image: docker.stackable.tech/stackable/tools:0.2.0-stackable24.3.0 + env: + - name: DEST_DIR + value: "/stackable/externals" + command: + [ + "bash", + "-x", + "-c", + "curl -L https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.0.31/mysql-connector-j-8.0.31.jar \ + -o ${DEST_DIR}/mysql-connector-j-8.0.31.jar", + ] + volumeMounts: + - name: external-drivers + mountPath: /stackable/externals +---- + +This will make the driver available at `/stackable/external-drivers/mysql-connector-j-8.0.31.jar` when the volume `external-drivers` is mounted at `/stackable/external-drivers`. + +Once the above has completed successfully, we can confirm that the driver is in the expected location by running another job: + +[source,yaml] +---- +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: list-drivers-job +spec: + template: + spec: + restartPolicy: Never + volumes: + - name: external-drivers + persistentVolumeClaim: + claimName: pvc-hive-drivers + containers: + - name: hive-driver + image: docker.stackable.tech/stackable/tools:0.2.0-stackable24.3.0 + env: + - name: DEST_DIR + value: "/stackable/externals" + command: + [ + "bash", + "-x", + "-o", + "pipefail", + "-c", + "stat ${DEST_DIR}/mysql-connector-j-8.0.31.jar", + ] + volumeMounts: + - name: external-drivers + mountPath: /stackable/externals +---- + +== Create a Hive Cluster + +The MySql connection details can then be used in the definition of the Hive Metastore resource. Note that it is also necessary to "tell" Hive where to find the driver. This is done by setting the value of the environment variable `METASTORE_AUX_JARS_PATH` to the path to the mounted driver: + +[source,yaml] +---- +--- +apiVersion: hive.stackable.tech/v1alpha1 +kind: HiveCluster +metadata: + name: hive-with-drivers +spec: + image: + productVersion: 3.1.3 + clusterConfig: + database: + connString: jdbc:mysql://mysql:3306/hive # <1> + user: hive # <2> + password: hive + dbType: mysql + s3: + reference: minio # <3> + metastore: + roleGroups: + default: + envOverrides: + METASTORE_AUX_JARS_PATH: "/stackable/external-drivers/mysql-connector-j-8.0.31.jar" # <4> + podOverrides: # <5> + spec: + containers: + - name: hive + volumeMounts: + - name: external-drivers + mountPath: /stackable/external-drivers + volumes: + - name: external-drivers + persistentVolumeClaim: + claimName: pvc-hive-drivers + replicas: 1 +---- + +<1> The database connection details matching those given when deploying the MySql helm chart +<2> Plain-text Hive credentials will be replaced in an upcoming release! +<3> A reference to the file store using S3 (this has been omitted from this article for the sake of brevity, but is described in e.g. the xref:getting_started/first_steps.adoc[] guide) +<4> Use `envOverrides` to set the driver path +<5> Use `podOverrides` to mount the driver + +[sidebar] +This has been tested on Azure AKS and Amazon EKS, both running Kubernetes 1.29. The example shows a PVC mounted with the access mode `ReadWriteOnce` as we have a single metastore instance that is deployed only once the jobs have completed, and, so long as these all run after each other, they can be deployed to different nodes. Different scenarios may require a different access mode, the availability of which is dependent on the type of cluster in use. + + + + diff --git a/docs/modules/hive/partials/nav.adoc b/docs/modules/hive/partials/nav.adoc index b0ccf807..b8065b8b 100644 --- a/docs/modules/hive/partials/nav.adoc +++ b/docs/modules/hive/partials/nav.adoc @@ -6,6 +6,7 @@ ** xref:hive:usage-guide/listenerclass.adoc[] ** xref:hive:usage-guide/data-storage.adoc[] ** xref:hive:usage-guide/derby-example.adoc[] +** xref:hive:usage-guide/database-driver-example.adoc[] ** xref:hive:usage-guide/logging.adoc[] ** xref:hive:usage-guide/monitoring.adoc[] ** xref:hive:usage-guide/resources.adoc[] From 2d95d3e94a658bf782b430f753cac22635566d8a Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 11:57:17 +0200 Subject: [PATCH 02/18] added note to requirements page, corrected image tag --- .../hive/pages/required-external-components.adoc | 2 ++ .../pages/usage-guide/database-driver-example.adoc | 10 +++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/modules/hive/pages/required-external-components.adoc b/docs/modules/hive/pages/required-external-components.adoc index ce9f557c..0fc83e8e 100644 --- a/docs/modules/hive/pages/required-external-components.adoc +++ b/docs/modules/hive/pages/required-external-components.adoc @@ -8,3 +8,5 @@ The Hive Metastore requires a backend SQL database. Supported databases and vers * MS SQL Server 2008 R2 and above Reference: https://cwiki.apache.org/confluence/display/Hive/AdminManual+Metastore+Administration#AdminManualMetastoreAdministration-SupportedBackendDatabasesforMetastore[Hive Metastore documentation] + +The Stackable product images for Apache Hive come with built-in support for Postgresql. See xref:usage-guide/database-driver-example.adoc[] for details on how to make drivers for other databases (supported by Hive) available. \ No newline at end of file diff --git a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc index 956f319d..aa6d43e4 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc @@ -51,7 +51,7 @@ spec: claimName: pvc-hive-drivers initContainers: - name: dest-dir - image: docker.stackable.tech/stackable/tools:0.2.0-stackable24.3.0 + image: docker.stackable.tech/stackable/tools:1.0.0-stackable24.3.0 env: - name: DEST_DIR value: "/stackable/externals" @@ -69,7 +69,7 @@ spec: mountPath: /stackable/externals containers: - name: hive-driver - image: docker.stackable.tech/stackable/tools:0.2.0-stackable24.3.0 + image: docker.stackable.tech/stackable/tools:1.0.0-stackable24.3.0 env: - name: DEST_DIR value: "/stackable/externals" @@ -107,7 +107,7 @@ spec: claimName: pvc-hive-drivers containers: - name: hive-driver - image: docker.stackable.tech/stackable/tools:0.2.0-stackable24.3.0 + image: docker.stackable.tech/stackable/tools:1.0.0-stackable24.3.0 env: - name: DEST_DIR value: "/stackable/externals" @@ -174,7 +174,3 @@ spec: [sidebar] This has been tested on Azure AKS and Amazon EKS, both running Kubernetes 1.29. The example shows a PVC mounted with the access mode `ReadWriteOnce` as we have a single metastore instance that is deployed only once the jobs have completed, and, so long as these all run after each other, they can be deployed to different nodes. Different scenarios may require a different access mode, the availability of which is dependent on the type of cluster in use. - - - - From d82d8149eb6f173d61b6b9ae89f595440310b897 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 12:03:49 +0200 Subject: [PATCH 03/18] changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5fade52..7de71faa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- Added documentation/tutorial on using external database drivers ([#449]). + ### Changed - BREAKING: Switch to new image that only contains HMS. @@ -12,6 +16,7 @@ All notable changes to this project will be documented in this file. `metastore-log4j2.properties` ([#447]). [#447]: https://github.com/stackabletech/hive-operator/pull/447 +[#449]: https://github.com/stackabletech/hive-operator/pull/449 ## [24.3.0] - 2024-03-20 From 311d15cdb15accf7d84e92141d327808fbc818de Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 12:07:27 +0200 Subject: [PATCH 04/18] typo --- .../modules/hive/pages/usage-guide/database-driver-example.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc index aa6d43e4..7934083a 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc @@ -127,7 +127,7 @@ spec: == Create a Hive Cluster -The MySql connection details can then be used in the definition of the Hive Metastore resource. Note that it is also necessary to "tell" Hive where to find the driver. This is done by setting the value of the environment variable `METASTORE_AUX_JARS_PATH` to the path to the mounted driver: +The MySql connection details can then be used in the definition of the Hive Metastore resource. Note that it is also necessary to "tell" Hive where to find the driver. This is done by setting the value of the environment variable `METASTORE_AUX_JARS_PATH` to the path of the mounted driver: [source,yaml] ---- From 0aca705264820e18514a5ec75592685e0c65fec4 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 14:21:21 +0200 Subject: [PATCH 05/18] Update docs/modules/hive/pages/usage-guide/database-driver-example.adoc Co-authored-by: Malte Sander --- .../modules/hive/pages/usage-guide/database-driver-example.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc index 7934083a..2b484865 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc @@ -1,7 +1,7 @@ = Database drivers -The Stackable Apache Hive product images come with built-in support for using Postgresql as the metastore database. To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySql. +The Stackable Apache Hive product images come with built-in support for using Postgresql as the metastore database. To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySql. == Install the MySql helm chart From a9a6251d69c6fef47b572d8861c2ac0d15820aa1 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 14:28:44 +0200 Subject: [PATCH 06/18] initialize SQL in products --- .../hive/pages/usage-guide/database-driver-example.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc index 7934083a..1549f867 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver-example.adoc @@ -1,9 +1,9 @@ = Database drivers -The Stackable Apache Hive product images come with built-in support for using Postgresql as the metastore database. To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySql. +The Stackable Apache Hive product images come with built-in support for using PostgreSQL as the metastore database. To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySql. -== Install the MySql helm chart +== Install the MySQL helm chart [source,bash] ---- @@ -127,7 +127,7 @@ spec: == Create a Hive Cluster -The MySql connection details can then be used in the definition of the Hive Metastore resource. Note that it is also necessary to "tell" Hive where to find the driver. This is done by setting the value of the environment variable `METASTORE_AUX_JARS_PATH` to the path of the mounted driver: +The MySQL connection details can then be used in the definition of the Hive Metastore resource. Note that it is also necessary to "tell" Hive where to find the driver. This is done by setting the value of the environment variable `METASTORE_AUX_JARS_PATH` to the path of the mounted driver: [source,yaml] ---- @@ -166,7 +166,7 @@ spec: replicas: 1 ---- -<1> The database connection details matching those given when deploying the MySql helm chart +<1> The database connection details matching those given when deploying the MySQL helm chart <2> Plain-text Hive credentials will be replaced in an upcoming release! <3> A reference to the file store using S3 (this has been omitted from this article for the sake of brevity, but is described in e.g. the xref:getting_started/first_steps.adoc[] guide) <4> Use `envOverrides` to set the driver path From 439b6d5cb8ac701811404aca4afceff8a9294314 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 14:39:19 +0200 Subject: [PATCH 07/18] review feedback --- ...tabase-driver-example.adoc => database-driver.adoc} | 10 ++++++++-- docs/modules/hive/partials/nav.adoc | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) rename docs/modules/hive/pages/usage-guide/{database-driver-example.adoc => database-driver.adoc} (92%) diff --git a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc similarity index 92% rename from docs/modules/hive/pages/usage-guide/database-driver-example.adoc rename to docs/modules/hive/pages/usage-guide/database-driver.adoc index 12e878fd..1ff75c1f 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver-example.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -1,7 +1,9 @@ = Database drivers -The Stackable Apache Hive product images come with built-in support for using PostgreSQL as the metastore database. To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySQL. +The Stackable Apache Hive product images come with built-in support for using PostgreSQL as the metastore database. + +To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySQL. == Install the MySQL helm chart @@ -127,7 +129,11 @@ spec: == Create a Hive Cluster -The MySQL connection details can then be used in the definition of the Hive Metastore resource. Note that it is also necessary to "tell" Hive where to find the driver. This is done by setting the value of the environment variable `METASTORE_AUX_JARS_PATH` to the path of the mounted driver: +The MySQL connection details can then be used in the definition of the Hive Metastore resource. + +Note that it is also necessary to "tell" Hive where to find the driver. + +This is done by setting the value of the environment variable `METASTORE_AUX_JARS_PATH` to the path of the mounted driver: [source,yaml] ---- diff --git a/docs/modules/hive/partials/nav.adoc b/docs/modules/hive/partials/nav.adoc index b8065b8b..000e6099 100644 --- a/docs/modules/hive/partials/nav.adoc +++ b/docs/modules/hive/partials/nav.adoc @@ -6,7 +6,7 @@ ** xref:hive:usage-guide/listenerclass.adoc[] ** xref:hive:usage-guide/data-storage.adoc[] ** xref:hive:usage-guide/derby-example.adoc[] -** xref:hive:usage-guide/database-driver-example.adoc[] +** xref:hive:usage-guide/database-driver.adoc[] ** xref:hive:usage-guide/logging.adoc[] ** xref:hive:usage-guide/monitoring.adoc[] ** xref:hive:usage-guide/resources.adoc[] From c9446f101716bcb893568184013443fba629f09e Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 16:53:49 +0200 Subject: [PATCH 08/18] Update docs/modules/hive/pages/required-external-components.adoc Co-authored-by: Felix Hennig --- docs/modules/hive/pages/required-external-components.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/modules/hive/pages/required-external-components.adoc b/docs/modules/hive/pages/required-external-components.adoc index 0fc83e8e..281c917c 100644 --- a/docs/modules/hive/pages/required-external-components.adoc +++ b/docs/modules/hive/pages/required-external-components.adoc @@ -9,4 +9,5 @@ The Hive Metastore requires a backend SQL database. Supported databases and vers Reference: https://cwiki.apache.org/confluence/display/Hive/AdminManual+Metastore+Administration#AdminManualMetastoreAdministration-SupportedBackendDatabasesforMetastore[Hive Metastore documentation] -The Stackable product images for Apache Hive come with built-in support for Postgresql. See xref:usage-guide/database-driver-example.adoc[] for details on how to make drivers for other databases (supported by Hive) available. \ No newline at end of file +The Stackable product images for Apache Hive come with built-in support for PostgreSQL. +See xref:usage-guide/database-driver.adoc[] for details on how to make drivers for other databases (supported by Hive) available. \ No newline at end of file From 2341e9cf71c118463e71691e4642c5608745015f Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 16:54:31 +0200 Subject: [PATCH 09/18] Update docs/modules/hive/pages/usage-guide/database-driver.adoc Co-authored-by: Felix Hennig --- docs/modules/hive/pages/usage-guide/database-driver.adoc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index 1ff75c1f..443571b7 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -1,8 +1,7 @@ = Database drivers -The Stackable Apache Hive product images come with built-in support for using PostgreSQL as the metastore database. - +The Stackable product images for Apache Hive come with built-in support for using PostgreSQL as the metastore database. To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySQL. == Install the MySQL helm chart From 2258575ed28e069ebabf997d5da5dfb83f9e4d87 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 16:54:43 +0200 Subject: [PATCH 10/18] Update docs/modules/hive/pages/usage-guide/database-driver.adoc Co-authored-by: Felix Hennig --- docs/modules/hive/pages/usage-guide/database-driver.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index 443571b7..0be7dc20 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -14,7 +14,7 @@ helm install mysql oci://registry-1.docker.io/bitnamicharts/mysql \ --set auth.password=hive ---- -== Download the driver to a `PersistentVolumeClaim` +== Download the driver to a PersistentVolumeClaim .Create a PersistentVolumeClaim [source,yaml] From 5d8411b5a6a43530b8fa9d26ee6c209a1c6ec269 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 16:55:02 +0200 Subject: [PATCH 11/18] Update docs/modules/hive/pages/usage-guide/database-driver.adoc Co-authored-by: Felix Hennig --- docs/modules/hive/pages/usage-guide/database-driver.adoc | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index 0be7dc20..662cce89 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -129,9 +129,7 @@ spec: == Create a Hive Cluster The MySQL connection details can then be used in the definition of the Hive Metastore resource. - Note that it is also necessary to "tell" Hive where to find the driver. - This is done by setting the value of the environment variable `METASTORE_AUX_JARS_PATH` to the path of the mounted driver: [source,yaml] From 94e80dc8ce5219b60a1e415bff7c57131fed80ac Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 16:55:13 +0200 Subject: [PATCH 12/18] Update docs/modules/hive/pages/usage-guide/database-driver.adoc Co-authored-by: Felix Hennig --- docs/modules/hive/pages/usage-guide/database-driver.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index 662cce89..fe71c045 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -169,7 +169,7 @@ spec: replicas: 1 ---- -<1> The database connection details matching those given when deploying the MySQL helm chart +<1> The database connection details matching those given when deploying the MySQL Helm chart <2> Plain-text Hive credentials will be replaced in an upcoming release! <3> A reference to the file store using S3 (this has been omitted from this article for the sake of brevity, but is described in e.g. the xref:getting_started/first_steps.adoc[] guide) <4> Use `envOverrides` to set the driver path From 1c06ea9f4798cc370266c5979badc7b9cd5a95b5 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 16:55:39 +0200 Subject: [PATCH 13/18] Update docs/modules/hive/pages/usage-guide/database-driver.adoc Co-authored-by: Felix Hennig --- docs/modules/hive/pages/usage-guide/database-driver.adoc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index fe71c045..d6251ad2 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -175,5 +175,6 @@ spec: <4> Use `envOverrides` to set the driver path <5> Use `podOverrides` to mount the driver -[sidebar] -This has been tested on Azure AKS and Amazon EKS, both running Kubernetes 1.29. The example shows a PVC mounted with the access mode `ReadWriteOnce` as we have a single metastore instance that is deployed only once the jobs have completed, and, so long as these all run after each other, they can be deployed to different nodes. Different scenarios may require a different access mode, the availability of which is dependent on the type of cluster in use. +NOTE: This has been tested on Azure AKS and Amazon EKS, both running Kubernetes 1.29. +The example shows a PVC mounted with the access mode `ReadWriteOnce` as we have a single metastore instance that is deployed only once the jobs have completed, and, so long as these all run after each other, they can be deployed to different nodes. +Different scenarios may require a different access mode, the availability of which is dependent on the type of cluster in use. From 3b98ada0845d3323e15172eba7deb81335222d08 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 16:55:49 +0200 Subject: [PATCH 14/18] Update docs/modules/hive/pages/usage-guide/database-driver.adoc Co-authored-by: Felix Hennig --- docs/modules/hive/pages/usage-guide/database-driver.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index d6251ad2..06b6c5eb 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -126,7 +126,7 @@ spec: mountPath: /stackable/externals ---- -== Create a Hive Cluster +== Create a Hive cluster The MySQL connection details can then be used in the definition of the Hive Metastore resource. Note that it is also necessary to "tell" Hive where to find the driver. From 1eef2877890b50de269eb24e8d211737e86a51f5 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 23 Apr 2024 16:56:04 +0200 Subject: [PATCH 15/18] Update docs/modules/hive/pages/usage-guide/database-driver.adoc Co-authored-by: Felix Hennig --- docs/modules/hive/pages/usage-guide/database-driver.adoc | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index 06b6c5eb..7257af48 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -1,4 +1,3 @@ - = Database drivers The Stackable product images for Apache Hive come with built-in support for using PostgreSQL as the metastore database. From 560acb5973d99728d3c412d07614907c942bae20 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 24 Apr 2024 09:06:30 +0200 Subject: [PATCH 16/18] Update docs/modules/hive/pages/usage-guide/database-driver.adoc Co-authored-by: Sebastian Bernauer --- docs/modules/hive/pages/usage-guide/database-driver.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index 7257af48..b856aae5 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -1,6 +1,7 @@ = Database drivers -The Stackable product images for Apache Hive come with built-in support for using PostgreSQL as the metastore database. +The Stackable product images for Apache Hive come with built-in support for using PostgreSQL as the metastore database. +The MySQL driver is not shipped in our images due to licensing problems. To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySQL. == Install the MySQL helm chart From 64754ea2f3ac5b049246e346b9a7fff48642bc11 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 24 Apr 2024 14:46:30 +0200 Subject: [PATCH 17/18] added dockerfile alternative --- .../pages/usage-guide/database-driver.adoc | 53 ++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index b856aae5..1d6b307b 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -1,7 +1,7 @@ = Database drivers The Stackable product images for Apache Hive come with built-in support for using PostgreSQL as the metastore database. -The MySQL driver is not shipped in our images due to licensing problems. +The MySQL driver is not shipped in our images due to licensing issues. To use another supported database it is necessary to make the relevant drivers available to Hive: this tutorial shows how this is done for MySQL. == Install the MySQL helm chart @@ -89,7 +89,7 @@ spec: This will make the driver available at `/stackable/external-drivers/mysql-connector-j-8.0.31.jar` when the volume `external-drivers` is mounted at `/stackable/external-drivers`. -Once the above has completed successfully, we can confirm that the driver is in the expected location by running another job: +Once the above has completed successfully, you can confirm that the driver is in the expected location by running another job: [source,yaml] ---- @@ -178,3 +178,52 @@ spec: NOTE: This has been tested on Azure AKS and Amazon EKS, both running Kubernetes 1.29. The example shows a PVC mounted with the access mode `ReadWriteOnce` as we have a single metastore instance that is deployed only once the jobs have completed, and, so long as these all run after each other, they can be deployed to different nodes. Different scenarios may require a different access mode, the availability of which is dependent on the type of cluster in use. + +== Alternative: using a custom image + +If you have access to a registry to store custom images, another approach is to build a custom image on top of a Stackable product image and "bake" the driver into it directly: + +.Copy the driver +[source] +---- +FROM docker.stackable.tech/stackable/hive:3.1.3-stackable0.0.0-dev + +RUN curl --fail -L https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.0.31/mysql-connector-j-8.0.31.jar -o /tmp/mysql-connector-j-8.0.31.jar +---- + +.Build and tag the image +[source] +---- +docker build -f ./Dockerfile -t docker.stackable.tech/stackable/hive:3.1.3-stackable0.0.0-dev-mysql . +---- + +.Reference the new path to the driver without the need for using a volume mounted from a PVC +[source, yaml] +---- +--- +apiVersion: hive.stackable.tech/v1alpha1 +kind: HiveCluster +metadata: + name: hive +spec: + image: + custom: docker.stackable.tech/stackable/hive:3.1.3-stackable0.0.0-dev-mysql # <1> + productVersion: 3.1.3 + clusterConfig: + database: + ... + s3: + ... + metastore: + config: + logging: + enableVectorAgent: False + roleGroups: + default: + envOverrides: + METASTORE_AUX_JARS_PATH: "/tmp/mysql-connector-j-8.0.31.jar" # <2> + replicas: 1 +---- + +<1> Name of the custom image containing the driver +<2> Path to the driver \ No newline at end of file From f23041d7f696ac27ed6efe0a1503319482ddad9e Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 25 Apr 2024 08:56:55 +0200 Subject: [PATCH 18/18] change to path --- docs/modules/hive/pages/usage-guide/database-driver.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/modules/hive/pages/usage-guide/database-driver.adoc b/docs/modules/hive/pages/usage-guide/database-driver.adoc index 1d6b307b..cc1f0d6e 100644 --- a/docs/modules/hive/pages/usage-guide/database-driver.adoc +++ b/docs/modules/hive/pages/usage-guide/database-driver.adoc @@ -188,7 +188,7 @@ If you have access to a registry to store custom images, another approach is to ---- FROM docker.stackable.tech/stackable/hive:3.1.3-stackable0.0.0-dev -RUN curl --fail -L https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.0.31/mysql-connector-j-8.0.31.jar -o /tmp/mysql-connector-j-8.0.31.jar +RUN curl --fail -L https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.0.31/mysql-connector-j-8.0.31.jar -o /stackable/mysql-connector-j-8.0.31.jar ---- .Build and tag the image @@ -221,7 +221,7 @@ spec: roleGroups: default: envOverrides: - METASTORE_AUX_JARS_PATH: "/tmp/mysql-connector-j-8.0.31.jar" # <2> + METASTORE_AUX_JARS_PATH: "/stackable/mysql-connector-j-8.0.31.jar" # <2> replicas: 1 ----