Skip to content

Commit 7b419e3

Browse files
PR feedbacks
1 parent 9f38d47 commit 7b419e3

File tree

1 file changed

+56
-33
lines changed

1 file changed

+56
-33
lines changed

tests/integration/integration-tests.sh

Lines changed: 56 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ setup_s3_properties_in_pod(){
175175
--conf spark.hadoop.fs.s3a.endpoint=$S3_ENDPOINT \
176176
--conf spark.hadoop.fs.s3a.access.key=$ACCESS_KEY \
177177
--conf spark.hadoop.fs.s3a.secret.key=$SECRET_KEY \
178+
--conf spark.sql.warehouse.dir=s3a://$BUCKET/warehouse \
178179
--conf spark.sql.catalog.local.warehouse=s3a://$BUCKET/warehouse'
179180
}
180181

@@ -194,6 +195,7 @@ setup_azure_storage_properties_in_pod(){
194195
spark-client.service-account-registry add-config \
195196
--username $UU --namespace $NN \
196197
--conf spark.hadoop.fs.azure.account.key.$ACCOUNT_NAME.dfs.core.windows.net=$SECRET_KEY \
198+
--conf spark.sql.warehouse.dir=$WAREHOUSE \
197199
--conf spark.sql.catalog.local.warehouse=$WAREHOUSE'
198200
}
199201

@@ -203,8 +205,6 @@ test_iceberg_example_in_pod(){
203205
#
204206
# Arguments:
205207
# $1: The path of the script in the cloud
206-
echo $0 $1
207-
208208

209209
# Number of rows that are to be inserted during the test.
210210
NUM_ROWS_TO_INSERT="4"
@@ -296,13 +296,13 @@ test_iceberg_example_in_pod_using_s3(){
296296
test_iceberg_example_in_pod_using_abfss(){
297297
# Test Iceberg integration in Charmed Spark Rock with Azure Storage
298298

299-
# First create S3 bucket named 'spark'
299+
# First create an Azure Storage container
300300
create_azure_container $AZURE_CONTAINER
301301

302-
# Now, setup S3 properties in service account inside the pod
302+
# Now, setup Azure Storage properties in service account inside the pod
303303
setup_azure_storage_properties_in_pod
304304

305-
# Copy 'test-iceberg.py' script to 'spark' bucket
305+
# Copy 'test-iceberg.py' script to the Azure Storage container
306306
copy_file_to_azure_container $AZURE_CONTAINER ./tests/integration/resources/test-iceberg.py
307307
script_path=$(construct_resource_uri $AZURE_CONTAINER test-iceberg.py abfss)
308308

@@ -500,18 +500,18 @@ test_spark_shell_in_pod() {
500500
run_spark_shell_in_pod $NAMESPACE spark
501501
}
502502

503-
run_spark_sql_in_pod() {
504-
echo "run_spark_sql_in_pod ${1} ${2}"
505-
506-
NAMESPACE=$1
507-
USERNAME=$2
503+
run_spark_sql_in_pod(){
504+
# Test Spark SQL inside a pod that runs charmed spark rock.
505+
#
506+
# Arguments:
507+
# $1: The path to the file that contains the lines to be passed to Spark SQL
508508

509-
SPARK_SQL_COMMANDS=$(cat ./tests/integration/resources/test-spark-sql.sql)
510-
create_s3_bucket $S3_BUCKET
509+
sql_script_file=$1
511510

511+
SPARK_SQL_COMMANDS=$(cat $sql_script_file)
512512
echo -e "$(kubectl -n $NAMESPACE exec testpod -- \
513513
env \
514-
UU="$USERNAME" \
514+
UU="$SERVICE_ACCOUNT" \
515515
NN="$NAMESPACE" \
516516
CMDS="$SPARK_SQL_COMMANDS" \
517517
IM=$(spark_image) \
@@ -520,37 +520,54 @@ run_spark_sql_in_pod() {
520520
S3_ENDPOINT=$(get_s3_endpoint) \
521521
BUCKET="$S3_BUCKET" \
522522
/bin/bash -c 'echo "$CMDS" | spark-client.spark-sql \
523-
--username $UU \
524-
--namespace $NN \
523+
--username $UU --namespace $NN \
525524
--conf spark.kubernetes.container.image=$IM \
526-
--conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
527-
--conf spark.hadoop.fs.s3a.connection.ssl.enabled=false \
528-
--conf spark.hadoop.fs.s3a.path.style.access=true \
529-
--conf spark.hadoop.fs.s3a.endpoint=$S3_ENDPOINT \
530-
--conf spark.hadoop.fs.s3a.access.key=$ACCESS_KEY \
531-
--conf spark.hadoop.fs.s3a.secret.key=$SECRET_KEY \
532525
--conf spark.driver.extraJavaOptions='-Dderby.system.home=/tmp/derby' \
533-
--conf spark.sql.warehouse.dir=s3a://$BUCKET/warehouse')" > spark-sql.out
534-
535-
# derby.system.home=/tmp/derby is needed because
536-
# kubectl exec runs commands with `/` as working directory
537-
# and by default derby.system.home has value `.`, the current working directory
538-
# (for which _daemon_ user has no permission on)
526+
')" > spark-sql.out
539527

540528
num_rows_inserted=$(cat spark-sql.out | grep "^Inserted Rows:" | rev | cut -d' ' -f1 | rev )
541529
echo -e "${num_rows_inserted} rows were inserted."
542530
rm spark-sql.out
543-
delete_s3_bucket $S3_BUCKET
544531
if [ "${num_rows_inserted}" != "3" ]; then
545532
echo "ERROR: Testing spark-sql failed. ${num_rows_inserted} out of 3 rows were inserted. Aborting with exit code 1."
546-
exit 1
533+
return 1
547534
fi
535+
536+
return 0
548537
}
549538

550-
test_spark_sql_in_pod() {
551-
run_spark_sql_in_pod tests spark
539+
test_spark_sql_in_pod_using_s3() {
540+
# Test Spark SQL with S3 as object storage
541+
542+
create_s3_bucket $S3_BUCKET
543+
setup_s3_properties_in_pod
544+
545+
run_spark_sql_in_pod ./tests/integration/resources/test-spark-sql.sql
546+
return_value=$?
547+
548+
delete_s3_bucket $S3_BUCKET
549+
550+
if [ $return_value -eq 1 ]; then
551+
exit 1
552+
fi
553+
}
554+
555+
test_spark_sql_in_pod_using_abfss() {
556+
# Test Spark SQL with Azure Blob as object storage (using abfss protocol)
557+
create_azure_container $AZURE_CONTAINER
558+
setup_azure_storage_properties_in_pod
559+
560+
run_spark_sql_in_pod ./tests/integration/resources/test-spark-sql.sql
561+
return_value=$?
562+
563+
delete_azure_container $S3_BUCKET
564+
565+
if [ $return_value -eq 1 ]; then
566+
exit 1
567+
fi
552568
}
553569

570+
554571
run_pyspark_in_pod() {
555572
echo "run_pyspark_in_pod ${1} ${2}"
556573

@@ -607,10 +624,16 @@ echo -e "##################################"
607624
(setup_user_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
608625

609626
echo -e "##################################"
610-
echo -e "RUN SPARK SQL IN POD"
627+
echo -e "RUN SPARK SQL IN POD (Using S3 Object Storage)"
628+
echo -e "##################################"
629+
630+
(setup_user_context && test_spark_sql_in_pod_using_s3 && cleanup_user_success) || cleanup_user_failure_in_pod
631+
632+
echo -e "##################################"
633+
echo -e "RUN SPARK SQL IN POD (Using Azure Storage ABFSS)"
611634
echo -e "##################################"
612635

613-
(setup_user_context && test_spark_sql_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
636+
(setup_user_context && test_spark_sql_in_pod_using_abfss && cleanup_user_success) || cleanup_user_failure_in_pod
614637

615638
echo -e "##################################"
616639
echo -e "RUN EXAMPLE JOB WITH POD TEMPLATE"

0 commit comments

Comments
 (0)