Skip to content

Commit 9f38d47

Browse files
Address PR feedbacks
1 parent f6532b0 commit 9f38d47

File tree

3 files changed

+128
-129
lines changed

3 files changed

+128
-129
lines changed

tests/integration/integration-tests-kyuubi.sh

Lines changed: 42 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -69,20 +69,30 @@ setup_kyuubi_pod_with_s3() {
6969
s3_secret_key=$(get_s3_secret_key)
7070

7171
# Write Spark configs inside the Kyuubi container
72-
kubectl -n $NAMESPACE exec kyuubi-test -- env IMG="$image" /bin/bash -c 'echo spark.kubernetes.container.image=$IMG > /etc/spark8t/conf/spark-defaults.conf'
73-
kubectl -n $NAMESPACE exec kyuubi-test -- env NN="$NAMESPACE" /bin/bash -c 'echo spark.kubernetes.namespace=$NN >> /etc/spark8t/conf/spark-defaults.conf'
74-
kubectl -n $NAMESPACE exec kyuubi-test -- env UU="$SERVICE_ACCOUNT" /bin/bash -c 'echo spark.kubernetes.authenticate.driver.serviceAccountName=$UU >> /etc/spark8t/conf/spark-defaults.conf'
75-
kubectl -n $NAMESPACE exec kyuubi-test -- env ENDPOINT="$s3_endpoint" /bin/bash -c 'echo spark.hadoop.fs.s3a.endpoint=$ENDPOINT >> /etc/spark8t/conf/spark-defaults.conf'
76-
kubectl -n $NAMESPACE exec kyuubi-test -- env ACCESS_KEY="$s3_access_key" /bin/bash -c 'echo spark.hadoop.fs.s3a.access.key=$ACCESS_KEY >> /etc/spark8t/conf/spark-defaults.conf'
77-
kubectl -n $NAMESPACE exec kyuubi-test -- env SECRET_KEY="$s3_secret_key" /bin/bash -c 'echo spark.hadoop.fs.s3a.secret.key=$SECRET_KEY >> /etc/spark8t/conf/spark-defaults.conf'
78-
kubectl -n $NAMESPACE exec kyuubi-test -- /bin/bash -c 'echo spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider >> /etc/spark8t/conf/spark-defaults.conf'
79-
kubectl -n $NAMESPACE exec kyuubi-test -- /bin/bash -c 'echo spark.hadoop.fs.s3a.connection.ssl.enabled=false >> /etc/spark8t/conf/spark-defaults.conf'
80-
kubectl -n $NAMESPACE exec kyuubi-test -- /bin/bash -c 'echo spark.hadoop.fs.s3a.path.style.access=true >> /etc/spark8t/conf/spark-defaults.conf'
81-
kubectl -n $NAMESPACE exec kyuubi-test -- env BUCKET="$S3_BUCKET" /bin/bash -c 'echo spark.sql.warehouse.dir=s3a://$BUCKET/warehouse >> /etc/spark8t/conf/spark-defaults.conf'
82-
kubectl -n $NAMESPACE exec kyuubi-test -- env BUCKET="$S3_BUCKET" /bin/bash -c 'echo spark.kubernetes.file.upload.path=s3a://$BUCKET >> /etc/spark8t/conf/spark-defaults.conf'
83-
84-
# Wait some time for the server to be up and running
85-
sleep 10
72+
# Add relevant Spark configurations in the service account and write the config
73+
# to spark-defaults.conf file inside the container
74+
kubectl -n $NAMESPACE exec kyuubi-test -- \
75+
env IMG="$image" \
76+
UU="$SERVICE_ACCOUNT" \
77+
NN="$NAMESPACE" \
78+
ENDPOINT="$s3_endpoint" \
79+
ACCESS_KEY="$s3_access_key" \
80+
SECRET_KEY="$s3_secret_key" \
81+
BUCKET="$S3_BUCKET" \
82+
/bin/bash -c '\
83+
spark-client.service-account-registry add-config --username $UU --namespace $NN \
84+
--conf spark.kubernetes.container.image=$IMG \
85+
--conf spark.hadoop.fs.s3a.endpoint=$ENDPOINT \
86+
--conf spark.hadoop.fs.s3a.access.key=$ACCESS_KEY \
87+
--conf spark.hadoop.fs.s3a.secret.key=$SECRET_KEY \
88+
--conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
89+
--conf spark.hadoop.fs.s3a.connection.ssl.enabled=false \
90+
--conf spark.hadoop.fs.s3a.path.style.access=true \
91+
--conf spark.sql.warehouse.dir=s3a://$BUCKET/warehouse \
92+
--conf spark.kubernetes.file.upload.path=s3a://$BUCKET \
93+
&& \
94+
spark-client.service-account-registry get-config --username $UU --namespace $NN > /etc/spark8t/conf/spark-defaults.conf'
95+
8696
}
8797

8898

@@ -111,33 +121,30 @@ setup_kyuubi_pod_with_azure_abfss() {
111121
# Create Azure storage container
112122
create_azure_container $AZURE_CONTAINER
113123

114-
storage_account_name=$(get_storage_account)
115-
storage_account_key=$(get_azure_secret_key)
124+
storage_account_name=$(get_azure_storage_account_name)
125+
storage_account_key=$(get_azure_storage_secret_key)
116126
warehouse_path=$(construct_resource_uri $AZURE_CONTAINER warehouse abfss)
117127
file_upload_path=$(construct_resource_uri $AZURE_CONTAINER "" abfss)
118128

119-
# Write Spark configs inside the Kyuubi container
129+
# Add relevant Spark configurations in the service account and write the config
130+
# to spark-defaults.conf file inside the container
120131
kubectl -n $NAMESPACE exec kyuubi-test -- \
121132
env IMG="$image" \
122-
/bin/bash -c 'echo spark.kubernetes.container.image=$IMG > /etc/spark8t/conf/spark-defaults.conf'
123-
kubectl -n $NAMESPACE exec kyuubi-test -- \
124-
env NN="$NAMESPACE" \
125-
/bin/bash -c 'echo spark.kubernetes.namespace=$NN >> /etc/spark8t/conf/spark-defaults.conf'
126-
kubectl -n $NAMESPACE exec kyuubi-test -- \
127-
env UU="$SERVICE_ACCOUNT" \
128-
/bin/bash -c 'echo spark.kubernetes.authenticate.driver.serviceAccountName=$UU >> /etc/spark8t/conf/spark-defaults.conf'
129-
kubectl -n $NAMESPACE exec kyuubi-test -- \
130-
env ACCOUNT_NAME="$storage_account_name" SECRET_KEY="$storage_account_key"\
131-
/bin/bash -c 'echo spark.hadoop.fs.azure.account.key.$ACCOUNT_NAME.dfs.core.windows.net=$SECRET_KEY >> /etc/spark8t/conf/spark-defaults.conf'
132-
kubectl -n $NAMESPACE exec kyuubi-test -- \
133-
env WAREHOUSE="$warehouse_path" \
134-
/bin/bash -c 'echo spark.sql.warehouse.dir=$WAREHOUSE >> /etc/spark8t/conf/spark-defaults.conf'
135-
kubectl -n $NAMESPACE exec kyuubi-test -- \
136-
env UPLOAD_PATH="$file_upload_path" \
137-
/bin/bash -c 'echo spark.kubernetes.file.upload.path=$UPLOAD_PATH >> /etc/spark8t/conf/spark-defaults.conf'
133+
UU="$SERVICE_ACCOUNT" \
134+
NN="$NAMESPACE" \
135+
ACCOUNT_NAME="$storage_account_name" \
136+
SECRET_KEY="$storage_account_key" \
137+
WAREHOUSE="$warehouse_path" \
138+
UPLOAD_PATH="$file_upload_path" \
139+
/bin/bash -c '\
140+
spark-client.service-account-registry add-config --username $UU --namespace $NN \
141+
--conf spark.kubernetes.container.image=$IMG \
142+
--conf spark.hadoop.fs.azure.account.key.$ACCOUNT_NAME.dfs.core.windows.net=$SECRET_KEY \
143+
--conf spark.sql.warehouse.dir=$WAREHOUSE \
144+
--conf spark.kubernetes.file.upload.path=$UPLOAD_PATH \
145+
&& \
146+
spark-client.service-account-registry get-config --username $UU --namespace $NN > /etc/spark8t/conf/spark-defaults.conf'
138147

139-
# Wait some time for the server to be up and running
140-
sleep 10
141148
}
142149

143150

tests/integration/integration-tests.sh

Lines changed: 83 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ source ./tests/integration/utils/azure-utils.sh
2121

2222
# Global Variables
2323
NAMESPACE=tests
24+
SERVICE_ACCOUNT=spark
2425
ADMIN_POD_NAME=testpod-admin
2526
S3_BUCKET=spark-$(uuidgen)
2627
AZURE_CONTAINER=$S3_BUCKET
@@ -154,153 +155,95 @@ run_example_job_in_pod() {
154155
validate_pi_value $pi
155156
}
156157

158+
setup_s3_properties_in_pod(){
159+
# Setup S3 related Spark properties in the service account inside the pod
157160

158-
test_iceberg_example_in_pod(){
159-
# Test Iceberg integration in Charmed Spark Rock
160-
161-
# First create S3 bucket named 'spark'
162-
create_s3_bucket $S3_BUCKET
163-
164-
# Copy 'test-iceberg.py' script to 'spark' bucket
165-
copy_file_to_s3_bucket $S3_BUCKET ./tests/integration/resources/test-iceberg.py
166-
167-
NAMESPACE="tests"
168-
USERNAME="spark"
169-
170-
# Number of rows that are to be inserted during the test.
171-
NUM_ROWS_TO_INSERT="4"
172-
173-
# Number of driver pods that exist in the namespace already.
174-
PREVIOUS_DRIVER_PODS_COUNT=$(kubectl get pods --sort-by=.metadata.creationTimestamp -n ${NAMESPACE} | grep driver | wc -l)
175-
176-
# Submit the job from inside 'testpod'
177161
kubectl -n $NAMESPACE exec testpod -- \
178162
env \
179-
UU="$USERNAME" \
163+
UU="$SERVICE_ACCOUNT" \
180164
NN="$NAMESPACE" \
181-
IM="$(spark_image)" \
182-
NUM_ROWS="$NUM_ROWS_TO_INSERT" \
183165
ACCESS_KEY="$(get_s3_access_key)" \
184166
SECRET_KEY="$(get_s3_secret_key)" \
185167
S3_ENDPOINT="$(get_s3_endpoint)" \
186168
BUCKET="$S3_BUCKET" \
187169
/bin/bash -c '\
188-
spark-client.spark-submit \
170+
spark-client.service-account-registry add-config \
189171
--username $UU --namespace $NN \
190-
--conf spark.kubernetes.driver.request.cores=100m \
191-
--conf spark.kubernetes.executor.request.cores=100m \
192-
--conf spark.kubernetes.container.image=$IM \
193172
--conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
194173
--conf spark.hadoop.fs.s3a.connection.ssl.enabled=false \
195174
--conf spark.hadoop.fs.s3a.path.style.access=true \
196175
--conf spark.hadoop.fs.s3a.endpoint=$S3_ENDPOINT \
197176
--conf spark.hadoop.fs.s3a.access.key=$ACCESS_KEY \
198177
--conf spark.hadoop.fs.s3a.secret.key=$SECRET_KEY \
199-
--conf spark.jars.ivy=/tmp \
200-
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
201-
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
202-
--conf spark.sql.catalog.spark_catalog.type=hive \
203-
--conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
204-
--conf spark.sql.catalog.local.type=hadoop \
205-
--conf spark.sql.catalog.local.warehouse=s3a://$BUCKET/warehouse \
206-
--conf spark.sql.defaultCatalog=local \
207-
s3a://$BUCKET/test-iceberg.py -n $NUM_ROWS'
208-
209-
# Delete 'spark' bucket
210-
delete_s3_bucket $S3_BUCKET
211-
212-
# Number of driver pods after the job is completed.
213-
DRIVER_PODS_COUNT=$(kubectl get pods --sort-by=.metadata.creationTimestamp -n ${NAMESPACE} | grep driver | wc -l)
214-
215-
# If the number of driver pods is same as before, job has not been run at all!
216-
if [[ "${PREVIOUS_DRIVER_PODS_COUNT}" == "${DRIVER_PODS_COUNT}" ]]
217-
then
218-
echo "ERROR: Sample job has not run!"
219-
exit 1
220-
fi
221-
222-
# Find the ID of the driver pod that ran the job.
223-
# tail -n 1 => Filter out the last line
224-
# cut -d' ' -f1 => Split by spaces and pick the first part
225-
DRIVER_POD_ID=$(kubectl get pods --sort-by=.metadata.creationTimestamp -n ${NAMESPACE} | grep test-iceberg-.*-driver | tail -n 1 | cut -d' ' -f1)
226-
227-
# Filter out the output log line
228-
OUTPUT_LOG_LINE=$(kubectl logs ${DRIVER_POD_ID} -n ${NAMESPACE} | grep 'Number of rows inserted:' )
178+
--conf spark.sql.catalog.local.warehouse=s3a://$BUCKET/warehouse'
179+
}
229180

230-
# Fetch out the number of rows inserted
231-
# rev => Reverse the string
232-
# cut -d' ' -f1 => Split by spaces and pick the first part
233-
# rev => Reverse the string back
234-
NUM_ROWS_INSERTED=$(echo $OUTPUT_LOG_LINE | rev | cut -d' ' -f1 | rev)
181+
setup_azure_storage_properties_in_pod(){
182+
# Setup Azure Storage related Spark properties in the service account inside the pod
235183

236-
if [ "${NUM_ROWS_INSERTED}" != "${NUM_ROWS_TO_INSERT}" ]; then
237-
echo "ERROR: ${NUM_ROWS_TO_INSERT} were supposed to be inserted. Found ${NUM_ROWS_INSERTED} rows. Aborting with exit code 1."
238-
exit 1
239-
fi
184+
warehouse_path=$(construct_resource_uri $AZURE_CONTAINER warehouse abfss)
240185

186+
kubectl -n $NAMESPACE exec testpod -- \
187+
env \
188+
UU="$SERVICE_ACCOUNT" \
189+
NN="$NAMESPACE" \
190+
ACCOUNT_NAME="$(get_azure_storage_account_name)" \
191+
SECRET_KEY="$(get_azure_storage_secret_key)" \
192+
WAREHOUSE="$warehouse_path" \
193+
/bin/bash -c '\
194+
spark-client.service-account-registry add-config \
195+
--username $UU --namespace $NN \
196+
--conf spark.hadoop.fs.azure.account.key.$ACCOUNT_NAME.dfs.core.windows.net=$SECRET_KEY \
197+
--conf spark.sql.catalog.local.warehouse=$WAREHOUSE'
241198
}
242199

243200

244-
test_iceberg_example_in_pod_with_azure_using_abfss(){
245-
# Test Iceberg integration in Charmed Spark Rock with Azure Storage
246-
247-
# First create S3 bucket named 'spark'
248-
create_azure_container $AZURE_CONTAINER
249-
250-
# Copy 'test-iceberg.py' script to 'spark' bucket
251-
copy_file_to_azure_container $AZURE_CONTAINER ./tests/integration/resources/test-iceberg.py
201+
test_iceberg_example_in_pod(){
202+
# Test Iceberg integration in Charmed Spark Rock
203+
#
204+
# Arguments:
205+
# $1: The path of the script in the cloud
206+
echo $0 $1
252207

253-
STORAGE_ACCOUNT_NAME=$(get_storage_account)
254-
STORAGE_ACCOUNT_KEY=$(get_azure_secret_key)
255-
USERNAME="spark"
256208

257209
# Number of rows that are to be inserted during the test.
258210
NUM_ROWS_TO_INSERT="4"
211+
script_path=$1
259212

260213
# Number of driver pods that exist in the namespace already.
261214
PREVIOUS_DRIVER_PODS_COUNT=$(kubectl get pods --sort-by=.metadata.creationTimestamp -n ${NAMESPACE} | grep driver | wc -l)
262215

263-
iceberg_script=$(construct_resource_uri $AZURE_CONTAINER test-iceberg.py abfss)
264-
warehouse_path=$(construct_resource_uri $AZURE_CONTAINER warehouse abfss)
265216
# Submit the job from inside 'testpod'
266217
kubectl -n $NAMESPACE exec testpod -- \
267218
env \
268219
UU="$USERNAME" \
269220
NN="$NAMESPACE" \
270221
IM="$(spark_image)" \
271222
NUM_ROWS="$NUM_ROWS_TO_INSERT" \
272-
ACCOUNT_NAME="$STORAGE_ACCOUNT_NAME" \
273-
SECRET_KEY="$STORAGE_ACCOUNT_KEY" \
274-
SCRIPT="$iceberg_script" \
275-
WAREHOUSE="$warehouse_path" \
223+
SCRIPT="$script_path" \
276224
/bin/bash -c '\
277225
spark-client.spark-submit \
278226
--username $UU --namespace $NN \
279227
--conf spark.kubernetes.driver.request.cores=100m \
280228
--conf spark.kubernetes.executor.request.cores=100m \
281229
--conf spark.kubernetes.container.image=$IM \
282-
--conf spark.hadoop.fs.azure.account.key.$ACCOUNT_NAME.dfs.core.windows.net=$SECRET_KEY \
283230
--conf spark.jars.ivy=/tmp \
284231
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
285232
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
286233
--conf spark.sql.catalog.spark_catalog.type=hive \
287234
--conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
288235
--conf spark.sql.catalog.local.type=hadoop \
289-
--conf spark.sql.catalog.local.warehouse=$WAREHOUSE \
290236
--conf spark.sql.defaultCatalog=local \
291237
$SCRIPT -n $NUM_ROWS'
292238

293-
# Delete 'spark' bucket
294-
delete_azure_container $AZURE_CONTAINER
295-
296239
# Number of driver pods after the job is completed.
297240
DRIVER_PODS_COUNT=$(kubectl get pods --sort-by=.metadata.creationTimestamp -n ${NAMESPACE} | grep driver | wc -l)
298241

299242
# If the number of driver pods is same as before, job has not been run at all!
300243
if [[ "${PREVIOUS_DRIVER_PODS_COUNT}" == "${DRIVER_PODS_COUNT}" ]]
301244
then
302245
echo "ERROR: Sample job has not run!"
303-
exit 1
246+
return 1
304247
fi
305248

306249
# Find the ID of the driver pod that ran the job.
@@ -319,9 +262,58 @@ test_iceberg_example_in_pod_with_azure_using_abfss(){
319262

320263
if [ "${NUM_ROWS_INSERTED}" != "${NUM_ROWS_TO_INSERT}" ]; then
321264
echo "ERROR: ${NUM_ROWS_TO_INSERT} were supposed to be inserted. Found ${NUM_ROWS_INSERTED} rows. Aborting with exit code 1."
322-
exit 1
265+
return 1
323266
fi
324267

268+
return 0
269+
}
270+
271+
272+
test_iceberg_example_in_pod_using_s3(){
273+
# Test Iceberg integration in Charmed Spark Rock using S3
274+
275+
# First create S3 bucket named 'spark'
276+
create_s3_bucket $S3_BUCKET
277+
278+
# Now, setup S3 properties in service account inside the pod
279+
setup_s3_properties_in_pod
280+
281+
# Copy 'test-iceberg.py' script to 'spark' bucket
282+
copy_file_to_s3_bucket $S3_BUCKET ./tests/integration/resources/test-iceberg.py
283+
script_path="s3a://$S3_BUCKET/test-iceberg.py"
284+
285+
test_iceberg_example_in_pod $script_path
286+
return_value=$?
287+
288+
delete_s3_bucket $S3_BUCKET
289+
290+
if [ $return_value -eq 1 ]; then
291+
exit 1
292+
fi
293+
}
294+
295+
296+
test_iceberg_example_in_pod_using_abfss(){
297+
# Test Iceberg integration in Charmed Spark Rock with Azure Storage
298+
299+
# First create S3 bucket named 'spark'
300+
create_azure_container $AZURE_CONTAINER
301+
302+
# Now, setup S3 properties in service account inside the pod
303+
setup_azure_storage_properties_in_pod
304+
305+
# Copy 'test-iceberg.py' script to 'spark' bucket
306+
copy_file_to_azure_container $AZURE_CONTAINER ./tests/integration/resources/test-iceberg.py
307+
script_path=$(construct_resource_uri $AZURE_CONTAINER test-iceberg.py abfss)
308+
309+
test_iceberg_example_in_pod $script_path
310+
return_value=$?
311+
312+
delete_azure_container $AZURE_CONTAINER
313+
314+
if [ $return_value -eq 1 ]; then
315+
exit 1
316+
fi
325317
}
326318

327319

@@ -642,13 +634,13 @@ echo -e "##################################"
642634
echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES"
643635
echo -e "##################################"
644636

645-
(setup_user_context && test_iceberg_example_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
637+
(setup_user_context && test_iceberg_example_in_pod_using_s3 && cleanup_user_success) || cleanup_user_failure_in_pod
646638

647639
echo -e "##################################"
648640
echo -e "RUN EXAMPLE THAT USES AZURE STORAGE"
649641
echo -e "##################################"
650642

651-
(setup_user_context && test_iceberg_example_in_pod_with_azure_using_abfss && cleanup_user_success) || cleanup_user_failure_in_pod
643+
(setup_user_context && test_iceberg_example_in_pod_using_abfss && cleanup_user_success) || cleanup_user_failure_in_pod
652644

653645
echo -e "##################################"
654646
echo -e "TEARDOWN TEST POD"

tests/integration/utils/azure-utils.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ if ! azcli storage container list > /dev/null 2>&1; then
1818
fi
1919

2020

21-
get_storage_account(){
21+
get_azure_storage_account_name(){
2222
# Print the name of the azure container (from the environment variable).
2323
echo $AZURE_STORAGE_ACCOUNT
2424
}
2525

2626

27-
get_azure_secret_key(){
27+
get_azure_storage_secret_key(){
2828
# Print the secret key for the Azure storage account used for test.
2929
echo $AZURE_STORAGE_KEY
3030
}
@@ -83,7 +83,7 @@ construct_resource_uri(){
8383
container=$1
8484
path=$2
8585
protocol=$3
86-
account_name=$(get_storage_account)
86+
account_name=$(get_azure_storage_account_name)
8787

8888
case "$protocol" in
8989
"abfs")

0 commit comments

Comments
 (0)