Skip to content

Commit 037cf38

Browse files
authored
Update the Getting Started Workflow with each Cloud Provider's Blob Storage (#1435)
* AWS First Draft * Debug * revert typo * Add JQ to docker runtime * Debug, pt2 * debug * debug * Allow Instance Profile Roles * change random suffix * change instance profile to regular IAM roles * AWS Final Draft * Azure First Draft * debug * Azure First Draft * debug * typo * GCP First Try * GCP Complete * GCP Final * add all jars to Spark * refactor
1 parent 6a21722 commit 037cf38

File tree

8 files changed

+104
-22
lines changed

8 files changed

+104
-22
lines changed

getting-started/assets/cloud_providers/deploy-aws.sh

+12-3
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,22 @@ EC2_INSTANCE_ID=$(cat /var/lib/cloud/data/instance-id)
2121

2222
DESCRIBE_INSTANCE=$(aws ec2 describe-instances \
2323
--instance-ids $EC2_INSTANCE_ID \
24-
--query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone}' \
24+
--query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone,RoleArn:IamInstanceProfile.Arn}' \
2525
--output json)
2626

2727
CURRENT_VPC=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."VPC")
28-
2928
CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."AZ" | sed 's/.$//')
29+
RAW_ROLE_ARN=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."RoleArn")
30+
export AWS_ROLE_ARN="${RAW_ROLE_ARN/instance-profile/role}"
31+
3032

3133
ALL_SUBNETS=$(aws ec2 describe-subnets \
3234
--region $CURRENT_REGION \
3335
--query 'Subnets[*].{SubnetId:SubnetId}' \
3436
--output json \
3537
| jq -r '[.[]["SubnetId"]] | join(" ")')
3638

37-
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'A-Za-z0-9' | head -c 8)
39+
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8)
3840
SUBNET_GROUP_NAME="polaris-db-subnet-group-$RANDOM_SUFFIX"
3941
INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX"
4042

@@ -69,6 +71,13 @@ POSTGRES_ADDR=$(echo $DESCRIBE_DB | jq -r '.["DBInstances"][0]["Endpoint"]' | jq
6971
FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR/{realm}" | sed 's/[&/\]/\\&/g')
7072
sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml"
7173

74+
S3_BUCKET_NAME="polaris-quickstart-s3-$RANDOM_SUFFIX"
75+
echo "S3 Bucket Name: $S3_BUCKET_NAME"
76+
77+
aws s3api create-bucket --bucket $S3_BUCKET_NAME --region $CURRENT_REGION --create-bucket-configuration LocationConstraint=$CURRENT_REGION
78+
79+
export STORAGE_LOCATION="s3://$S3_BUCKET_NAME/quickstart_catalog/"
80+
7281
./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \
7382
-Dquarkus.container-image.tag=postgres-latest \
7483
-Dquarkus.container-image.build=true \

getting-started/assets/cloud_providers/deploy-azure.sh

+36-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
# under the License.
1818
#
1919

20-
CURRENT_REGION=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.location')
21-
CURRENT_RESOURCE_GROUP=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.resourceGroupName')
20+
DESCRIBE_INSTANCE=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01")
21+
CURRENT_RESOURCE_GROUP=$(echo $DESCRIBE_INSTANCE | jq -r '.compute.resourceGroupName')
22+
CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r '.compute.location')
23+
CURRENT_VM_NAME=$(echo $DESCRIBE_INSTANCE | jq -r '.compute.name')
2224
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8)
2325
INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX"
2426

@@ -31,6 +33,38 @@ POSTGRES_ADDR=$(echo $CREATE_DB_RESPONSE | jq -r '.host')
3133
FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g')
3234
sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml"
3335

36+
STORAGE_ACCOUNT_NAME="polaristest$RANDOM_SUFFIX"
37+
STORAGE_CONTAINER_NAME="polaris-test-container-$RANDOM_SUFFIX"
38+
39+
az storage account create \
40+
--name "$STORAGE_ACCOUNT_NAME" \
41+
--resource-group "$CURRENT_RESOURCE_GROUP" \
42+
--location "$CURRENT_REGION" \
43+
--sku Standard_LRS \
44+
--kind StorageV2 \
45+
--enable-hierarchical-namespace false
46+
47+
az storage container create \
48+
--account-name "$STORAGE_ACCOUNT_NAME" \
49+
--name "$STORAGE_CONTAINER_NAME" \
50+
--auth-mode login
51+
52+
ASSIGNEE_PRINCIPAL_ID=$(az vm show --name $CURRENT_VM_NAME --resource-group $CURRENT_RESOURCE_GROUP --query identity.principalId -o tsv)
53+
SCOPE=$(az storage account show --name $STORAGE_ACCOUNT_NAME --resource-group $CURRENT_RESOURCE_GROUP --query id -o tsv)
54+
ROLE="Storage Blob Data Contributor"
55+
az role assignment create \
56+
--assignee $ASSIGNEE_PRINCIPAL_ID \
57+
--role "$ROLE" \
58+
--scope "$SCOPE"
59+
60+
export AZURE_TENANT_ID=$(az account show --query tenantId -o tsv)
61+
export STORAGE_LOCATION="abfss://$STORAGE_CONTAINER_NAME@$STORAGE_ACCOUNT_NAME.dfs.core.windows.net/quickstart_catalog"
62+
63+
cat >> getting-started/eclipselink/trino-config/catalog/iceberg.properties << EOF
64+
fs.native-azure.enabled=true
65+
azure.auth-type=DEFAULT
66+
EOF
67+
3468
./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \
3569
-Dquarkus.container-image.tag=postgres-latest \
3670
-Dquarkus.container-image.build=true \

getting-started/assets/cloud_providers/deploy-gcp.sh

+6
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ POSTGRES_ADDR=$(gcloud sql instances describe $DB_INSTANCE_NAME --format="get(ip
4141
FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g')
4242
sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml"
4343

44+
GCS_BUCKET_NAME="polaris-test-gcs-$RANDOM_SUFFIX"
45+
echo "GCS Bucket Name: $GCS_BUCKET_NAME"
46+
47+
gcloud storage buckets create "gs://$GCS_BUCKET_NAME" --location=$CURRENT_REGION
48+
export STORAGE_LOCATION="gs://$GCS_BUCKET_NAME/quickstart_catalog/"
49+
4450
./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \
4551
-Dquarkus.container-image.tag=postgres-latest \
4652
-Dquarkus.container-image.build=true \

getting-started/assets/polaris/create-catalog.sh

+41-16
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
set -e
2121

22+
apk add --no-cache jq
23+
2224
token=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \
2325
--user root:s3cr3t \
2426
-d grant_type=client_credentials \
@@ -32,29 +34,52 @@ fi
3234
echo
3335
echo "Obtained access token: ${token}"
3436

37+
STORAGE_TYPE="FILE"
38+
if [ -z "${STORAGE_LOCATION}" ]; then
39+
echo "STORAGE_LOCATION is not set, using FILE storage type"
40+
STORAGE_LOCATION="file:///var/tmp/quickstart_catalog/"
41+
else
42+
echo "STORAGE_LOCATION is set to '$STORAGE_LOCATION'"
43+
if [[ "$STORAGE_LOCATION" == s3* ]]; then
44+
STORAGE_TYPE="S3"
45+
elif [[ "$STORAGE_LOCATION" == gs* ]]; then
46+
STORAGE_TYPE="GCS"
47+
else
48+
STORAGE_TYPE="AZURE"
49+
fi
50+
echo "Using StorageType: $STORAGE_TYPE"
51+
fi
52+
53+
STORAGE_CONFIG_INFO="{\"storageType\": \"$STORAGE_TYPE\", \"allowedLocations\": [\"$STORAGE_LOCATION\"]}"
54+
55+
if [[ "$STORAGE_TYPE" == "S3" ]]; then
56+
STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn "$AWS_ROLE_ARN" '. + {roleArn: $roleArn}')
57+
elif [[ "$STORAGE_TYPE" == "AZURE" ]]; then
58+
STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg tenantId "$AZURE_TENANT_ID" '. + {tenantId: $tenantId}')
59+
fi
60+
3561
echo
3662
echo Creating a catalog named quickstart_catalog...
3763

64+
PAYLOAD='{
65+
"catalog": {
66+
"name": "quickstart_catalog",
67+
"type": "INTERNAL",
68+
"readOnly": false,
69+
"properties": {
70+
"default-base-location": "'$STORAGE_LOCATION'"
71+
},
72+
"storageConfigInfo": '$STORAGE_CONFIG_INFO'
73+
}
74+
}'
75+
76+
echo $PAYLOAD
77+
3878
curl -s -H "Authorization: Bearer ${token}" \
3979
-H 'Accept: application/json' \
4080
-H 'Content-Type: application/json' \
4181
http://polaris:8181/api/management/v1/catalogs \
42-
-d '{
43-
"catalog": {
44-
"name": "quickstart_catalog",
45-
"type": "INTERNAL",
46-
"readOnly": false,
47-
"properties": {
48-
"default-base-location": "file:///var/tmp/quickstart_catalog/"
49-
},
50-
"storageConfigInfo": {
51-
"storageType": "FILE",
52-
"allowedLocations": [
53-
"file:///var/tmp"
54-
]
55-
}
56-
}
57-
}'
82+
-d "$PAYLOAD" -v
5883

5984
echo
6085
echo Done.

getting-started/eclipselink/docker-compose.yml

+5-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ services:
5050
depends_on:
5151
polaris:
5252
condition: service_healthy
53+
environment:
54+
- STORAGE_LOCATION=${STORAGE_LOCATION}
55+
- AWS_ROLE_ARN=${AWS_ROLE_ARN}
56+
- AZURE_TENANT_ID=${AZURE_TENANT_ID}
5357
volumes:
5458
- ../assets/polaris/:/polaris
5559
entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"'
@@ -69,7 +73,7 @@ services:
6973
retries: 15
7074
command: [
7175
/opt/spark/bin/spark-sql,
72-
--packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0,software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17",
76+
--packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0,software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17,org.apache.iceberg:iceberg-gcp-bundle:1.7.0,org.apache.iceberg:iceberg-azure-bundle:1.7.0",
7377
--conf, "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions",
7478
--conf, "spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog",
7579
--conf, "spark.sql.catalog.polaris.type=rest",

site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md

+2
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta
2727

2828
The requirements to run the script below are:
2929
* There must be at least two subnets created in the VPC and region in which your EC2 instance reside. The span of subnets MUST include at least 2 availability zones (AZs) within the same region.
30+
* Your EC2 instance must be enabled with [IMDSv1 or IMDSv2 with 2+ hop limit](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-IMDS-new-instances.html#configure-IMDS-new-instances-instance-settings).
3031
* The AWS identity that you will use to run this script must have the following AWS permissions:
3132
* "ec2:DescribeInstances"
3233
* "rds:CreateDBInstance"
3334
* "rds:DescribeDBInstances"
3435
* "rds:CreateDBSubnetGroup"
36+
* "sts:AssumeRole" on the same role as the Instance Profile role of the EC2 instance on which you are running this script. Additionally, you should ensure that the Instance Profile contains a trust policy that allows the role to trust itself to be assumed.
3537

3638
```shell
3739
chmod +x getting-started/assets/cloud_providers/deploy-aws.sh

site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta
2828
The requirements to run the script below are:
2929
* Install the AZ CLI, if it is not already installed on the Azure VM. Instructions to download the AZ CLI can be found [here](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli).
3030
* You must be logged into the AZ CLI. Please run `az account show` to ensure that you are logged in prior to running this script.
31+
* Assign a System-Assigned Managed Identity to the Azure VM.
3132

3233
```shell
3334
chmod +x getting-started/assets/cloud_providers/deploy-azure.sh

site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta
2828
The requirements to run the script below are:
2929
* Install the `gcloud` CLI, if it is not already installed on the GCP VM. Instructions to download the `gcloud` CLI can be found [here](https://cloud.google.com/sdk/docs/install).
3030
* Ensure the `Cloud SQL Admin API` has been enabled in your project and that your VM's Principal has access to the correct role: `roles/cloudsql.admin`.
31+
* Ensure the VM's Principal has access to at least Read-only scope on Compute Engine: `compute.readonly`.
3132

3233
```shell
3334
chmod +x getting-started/assets/cloud_providers/deploy-gcp.sh

0 commit comments

Comments
 (0)