@@ -175,6 +175,7 @@ setup_s3_properties_in_pod(){
175
175
--conf spark.hadoop.fs.s3a.endpoint=$S3_ENDPOINT \
176
176
--conf spark.hadoop.fs.s3a.access.key=$ACCESS_KEY \
177
177
--conf spark.hadoop.fs.s3a.secret.key=$SECRET_KEY \
178
+ --conf spark.sql.warehouse.dir=s3a://$BUCKET/warehouse \
178
179
--conf spark.sql.catalog.local.warehouse=s3a://$BUCKET/warehouse'
179
180
}
180
181
@@ -194,6 +195,7 @@ setup_azure_storage_properties_in_pod(){
194
195
spark-client.service-account-registry add-config \
195
196
--username $UU --namespace $NN \
196
197
--conf spark.hadoop.fs.azure.account.key.$ACCOUNT_NAME.dfs.core.windows.net=$SECRET_KEY \
198
+ --conf spark.sql.warehouse.dir=$WAREHOUSE \
197
199
--conf spark.sql.catalog.local.warehouse=$WAREHOUSE'
198
200
}
199
201
@@ -203,8 +205,6 @@ test_iceberg_example_in_pod(){
203
205
#
204
206
# Arguments:
205
207
# $1: The path of the script in the cloud
206
- echo $0 $1
207
-
208
208
209
209
# Number of rows that are to be inserted during the test.
210
210
NUM_ROWS_TO_INSERT=" 4"
@@ -296,13 +296,13 @@ test_iceberg_example_in_pod_using_s3(){
296
296
test_iceberg_example_in_pod_using_abfss (){
297
297
# Test Iceberg integration in Charmed Spark Rock with Azure Storage
298
298
299
- # First create S3 bucket named 'spark'
299
+ # First create an Azure Storage container
300
300
create_azure_container $AZURE_CONTAINER
301
301
302
- # Now, setup S3 properties in service account inside the pod
302
+ # Now, setup Azure Storage properties in service account inside the pod
303
303
setup_azure_storage_properties_in_pod
304
304
305
- # Copy 'test-iceberg.py' script to 'spark' bucket
305
+ # Copy 'test-iceberg.py' script to the Azure Storage container
306
306
copy_file_to_azure_container $AZURE_CONTAINER ./tests/integration/resources/test-iceberg.py
307
307
script_path=$( construct_resource_uri $AZURE_CONTAINER test-iceberg.py abfss)
308
308
@@ -500,18 +500,18 @@ test_spark_shell_in_pod() {
500
500
run_spark_shell_in_pod $NAMESPACE spark
501
501
}
502
502
503
- run_spark_sql_in_pod () {
504
- echo " run_spark_sql_in_pod ${1} ${2} "
505
-
506
- NAMESPACE= $1
507
- USERNAME= $2
503
+ run_spark_sql_in_pod (){
504
+ # Test Spark SQL inside a pod that runs charmed spark rock.
505
+ #
506
+ # Arguments:
507
+ # $1: The path to the file that contains the lines to be passed to Spark SQL
508
508
509
- SPARK_SQL_COMMANDS=$( cat ./tests/integration/resources/test-spark-sql.sql)
510
- create_s3_bucket $S3_BUCKET
509
+ sql_script_file=$1
511
510
511
+ SPARK_SQL_COMMANDS=$( cat $sql_script_file )
512
512
echo -e " $( kubectl -n $NAMESPACE exec testpod -- \
513
513
env \
514
- UU=" $USERNAME " \
514
+ UU=" $SERVICE_ACCOUNT " \
515
515
NN=" $NAMESPACE " \
516
516
CMDS=" $SPARK_SQL_COMMANDS " \
517
517
IM=$( spark_image) \
@@ -520,37 +520,54 @@ run_spark_sql_in_pod() {
520
520
S3_ENDPOINT=$( get_s3_endpoint) \
521
521
BUCKET=" $S3_BUCKET " \
522
522
/bin/bash -c ' echo "$CMDS" | spark-client.spark-sql \
523
- --username $UU \
524
- --namespace $NN \
523
+ --username $UU --namespace $NN \
525
524
--conf spark.kubernetes.container.image=$IM \
526
- --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
527
- --conf spark.hadoop.fs.s3a.connection.ssl.enabled=false \
528
- --conf spark.hadoop.fs.s3a.path.style.access=true \
529
- --conf spark.hadoop.fs.s3a.endpoint=$S3_ENDPOINT \
530
- --conf spark.hadoop.fs.s3a.access.key=$ACCESS_KEY \
531
- --conf spark.hadoop.fs.s3a.secret.key=$SECRET_KEY \
532
525
--conf spark.driver.extraJavaOptions=' -Dderby.system.home=/tmp/derby' \
533
- --conf spark.sql.warehouse.dir=s3a://$BUCKET/warehouse' ) " > spark-sql.out
534
-
535
- # derby.system.home=/tmp/derby is needed because
536
- # kubectl exec runs commands with `/` as working directory
537
- # and by default derby.system.home has value `.`, the current working directory
538
- # (for which _daemon_ user has no permission on)
526
+ ' ) " > spark-sql.out
539
527
540
528
num_rows_inserted=$( cat spark-sql.out | grep " ^Inserted Rows:" | rev | cut -d' ' -f1 | rev )
541
529
echo -e " ${num_rows_inserted} rows were inserted."
542
530
rm spark-sql.out
543
- delete_s3_bucket $S3_BUCKET
544
531
if [ " ${num_rows_inserted} " != " 3" ]; then
545
532
echo " ERROR: Testing spark-sql failed. ${num_rows_inserted} out of 3 rows were inserted. Aborting with exit code 1."
546
- exit 1
533
+ return 1
547
534
fi
535
+
536
+ return 0
548
537
}
549
538
550
- test_spark_sql_in_pod () {
551
- run_spark_sql_in_pod tests spark
539
+ test_spark_sql_in_pod_using_s3 () {
540
+ # Test Spark SQL with S3 as object storage
541
+
542
+ create_s3_bucket $S3_BUCKET
543
+ setup_s3_properties_in_pod
544
+
545
+ run_spark_sql_in_pod ./tests/integration/resources/test-spark-sql.sql
546
+ return_value=$?
547
+
548
+ delete_s3_bucket $S3_BUCKET
549
+
550
+ if [ $return_value -eq 1 ]; then
551
+ exit 1
552
+ fi
553
+ }
554
+
555
+ test_spark_sql_in_pod_using_abfss () {
556
+ # Test Spark SQL with Azure Blob as object storage (using abfss protocol)
557
+ create_azure_container $AZURE_CONTAINER
558
+ setup_azure_storage_properties_in_pod
559
+
560
+ run_spark_sql_in_pod ./tests/integration/resources/test-spark-sql.sql
561
+ return_value=$?
562
+
563
+ delete_azure_container $S3_BUCKET
564
+
565
+ if [ $return_value -eq 1 ]; then
566
+ exit 1
567
+ fi
552
568
}
553
569
570
+
554
571
run_pyspark_in_pod () {
555
572
echo " run_pyspark_in_pod ${1} ${2} "
556
573
@@ -607,10 +624,16 @@ echo -e "##################################"
607
624
(setup_user_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
608
625
609
626
echo -e " ##################################"
610
- echo -e " RUN SPARK SQL IN POD"
627
+ echo -e " RUN SPARK SQL IN POD (Using S3 Object Storage)"
628
+ echo -e " ##################################"
629
+
630
+ (setup_user_context && test_spark_sql_in_pod_using_s3 && cleanup_user_success) || cleanup_user_failure_in_pod
631
+
632
+ echo -e " ##################################"
633
+ echo -e " RUN SPARK SQL IN POD (Using Azure Storage ABFSS)"
611
634
echo -e " ##################################"
612
635
613
- (setup_user_context && test_spark_sql_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
636
+ (setup_user_context && test_spark_sql_in_pod_using_abfss && cleanup_user_success) || cleanup_user_failure_in_pod
614
637
615
638
echo -e " ##################################"
616
639
echo -e " RUN EXAMPLE JOB WITH POD TEMPLATE"
0 commit comments