diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..2bcfbb9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +capideploy +sample.json +*.log \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..80b5c93 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,22 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Generic capideploy", + "type": "go", + "request": "launch", + "mode": "debug", + "cwd": "${workspaceFolder}", + "program": "${workspaceFolder}/pkg/cmd/capideploy/capideploy.go", + "envFile": "${env:HOME}/capideploy_aws.rc", + "args": [ + "delete_networking", + "-p=sample.jsonnet", + "-v" + ] + }, + ] +} \ No newline at end of file diff --git a/1_deploy.sh b/1_deploy.sh new file mode 100755 index 0000000..2a72ec4 --- /dev/null +++ b/1_deploy.sh @@ -0,0 +1,75 @@ +##!/bin/bash + +# Watch the progress: +# cat ./deploy.log | grep elapsed +# or +# less -R ./deploy.log + +set -e # Exit on failure +set -x # Print commands + +go build ./pkg/cmd/capideploy/capideploy.go + +./capideploy list_deployment_resources -p sample.jsonnet -v > deploy.log + +set +x +SECONDS=0 +export BILLED_RESOURCES=$(cat deploy.log | grep ",billed") +if [ "$BILLED_RESOURCES" != "" ]; then + echo "This deployment has resources that may be still/already active, please check the log" +fi + +set -x # Print commands + +./capideploy create_floating_ips -p sample.jsonnet -v >> deploy.log + +set +x + +# Save reserved BASTION_IP so we can run capitoolbelt on bastion +export BASTION_IP=$(cat deploy.log | grep "export BASTION_IP=" | cut -d "=" -f2) +if [ "$BASTION_IP" = "" ]; then + echo "Cannot retrieve BASTION_IP" + exit 1 +fi + +# Configure SSH jumphost so we can run nodetool on Cassandra hosts (requires write access to ~/.ssh/config) +if ! grep -q "$BASTION_IP" ~/.ssh/config; then + echo "Adding a new jumphost to ~/.ssh/config..." + echo "" | tee -a ~/.ssh/config + echo "Host $BASTION_IP" | tee -a ~/.ssh/config + echo " User $CAPIDEPLOY_SSH_USER" | tee -a ~/.ssh/config + echo " StrictHostKeyChecking=no" | tee -a ~/.ssh/config + echo " UserKnownHostsFile=/dev/null" | tee -a ~/.ssh/config + echo " IdentityFile $CAPIDEPLOY_SSH_PRIVATE_KEY_PATH" | tee -a ~/.ssh/config +fi + +set -x + +./capideploy create_networking -p sample.jsonnet -v >> deploy.log +./capideploy create_security_groups -p sample.jsonnet -v >> deploy.log +./capideploy create_volumes "*" -p sample.jsonnet -v >> deploy.log +./capideploy create_instances "*" -p sample.jsonnet -v >> deploy.log +#./capideploy create_instances "bastion" -p sample.jsonnet -v >> deploy.log +./capideploy ping_instances '*' -p sample.jsonnet -n 20 >> deploy.log +#./capideploy ping_instances "bastion" -p sample.jsonnet -n 20 >> deploy.log +./capideploy attach_volumes "bastion" -p sample.jsonnet -v >> deploy.log + +# install_services swaps sshd services, so do not use bastion as jumphost while it's in transition +./capideploy install_services "bastion" -p sample.jsonnet -v >> deploy.log +./capideploy install_services "rabbitmq,prometheus,daemon*,cass*" -p sample.jsonnet -v >> deploy.log + +# Cassandra requires special treatment: stop and config/start +./capideploy stop_services "cass*" -p sample.jsonnet -v >> deploy.log +./capideploy config_services "cass*" -p sample.jsonnet -v >> deploy.log + +./capideploy config_services "bastion,rabbitmq,prometheus,daemon*" -p sample.jsonnet -v >> deploy.log +#./capideploy config_services "bastion" -p sample.jsonnet -v >> deploy.log + +ssh -o StrictHostKeyChecking=no -i $CAPIDEPLOY_SSH_PRIVATE_KEY_PATH -J $BASTION_IP $CAPIDEPLOY_SSH_USER@10.5.0.11 'nodetool describecluster;nodetool status' + +duration=$SECONDS +echo "$(($duration / 60))m $(($duration % 60))s elapsed." + +set +x +echo To run commands against this deployment, you will probably need this: +echo export BASTION_IP=$BASTION_IP \ No newline at end of file diff --git a/2_create_images.sh b/2_create_images.sh new file mode 100755 index 0000000..6f72243 --- /dev/null +++ b/2_create_images.sh @@ -0,0 +1,21 @@ +##!/bin/bash + +set +e # Continue on failure +set -x # Print commands + +SECONDS=0 +./capideploy stop_services "*" -p sample.jsonnet -v + +set -e # Exit on failure +./capideploy detach_volumes "bastion" -p sample.jsonnet -v + +# We want to be 100% sure that cassandra has stopped +#sleep 10 + +./capideploy create_snapshot_images "*" -p sample.jsonnet -v +#./capideploy create_snapshot_images "bastion" -p sample.jsonnet -v +./capideploy delete_instances "*" -p sample.jsonnet -v +#./capideploy delete_instances "bastion" -p sample.jsonnet -v +./capideploy list_deployment_resources -p sample.jsonnet -v +duration=$SECONDS +echo "$(($duration / 60))m $(($duration % 60))s elapsed." diff --git a/3_restore_instances.sh b/3_restore_instances.sh new file mode 100755 index 0000000..7b2c829 --- /dev/null +++ b/3_restore_instances.sh @@ -0,0 +1,24 @@ +##!/bin/bash +set -e # Exit on failure +set -x # Print commands + +SECONDS=0 + +./capideploy create_instances_from_snapshot_images "*" -p sample.jsonnet -v +#./capideploy create_instances_from_snapshot_images "bastion" -p sample.jsonnet -v + +./capideploy ping_instances '*' -p sample.jsonnet -n 50 +#./capideploy ping_instances 'bastion' -p sample.jsonnet -n 50 + +./capideploy attach_volumes "bastion" -p sample.jsonnet -v +./capideploy start_services "*" -p sample.jsonnet -v +#./capideploy start_services "bastion" -p sample.jsonnet -v + +# Cassandra requires one more cycle to embrace the fact that data/log firectories /data0,/data1 are gone +./capideploy stop_services "cass*" -p sample.jsonnet -v +./capideploy start_services "cass*" -p sample.jsonnet -v + +duration=$SECONDS +echo "$(($duration / 60))m $(($duration % 60))s elapsed." + +./capideploy list_deployment_resources -p sample.jsonnet -v diff --git a/4_delete_images.sh b/4_delete_images.sh new file mode 100755 index 0000000..af6d97d --- /dev/null +++ b/4_delete_images.sh @@ -0,0 +1,7 @@ +##!/bin/bash + +set -e # Exit on failure +set -x # Print commands + +./capideploy delete_snapshot_images "*" -p sample.jsonnet -v +./capideploy list_deployment_resources -p sample.jsonnet diff --git a/5_undeploy.sh b/5_undeploy.sh new file mode 100755 index 0000000..02b1b1b --- /dev/null +++ b/5_undeploy.sh @@ -0,0 +1,19 @@ +##!/bin/bash + +set +e # Continue on failure +set -x # Print commands + +./capideploy delete_snapshot_images "*" -p sample.jsonnet -v > undeploy.log + +./capideploy stop_services "*" -p sample.jsonnet -v >> undeploy.log + +set -e # Exit on failure +./capideploy detach_volumes "bastion" -p sample.jsonnet -v >> undeploy.log +./capideploy delete_instances "*" -p sample.jsonnet -v >> undeploy.log +./capideploy delete_volumes "*" -p sample.jsonnet -v >> undeploy.log + +./capideploy delete_security_groups -p sample.jsonnet -v >> undeploy.log +./capideploy delete_networking -p sample.jsonnet -v >> undeploy.log +./capideploy delete_floating_ips -p sample.jsonnet -v >> undeploy.log + +./capideploy list_deployment_resources -p sample.jsonnet diff --git a/README.md b/README.md index 277203c..c0d982f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,229 @@ -# capillaries-deploy -AWS/Azure deployment for Capillaries +# IAM settings + +You can run capideploy under your AWS root account, but this is generally discouraged. Let's pretend that capideploy is executed by a third party and you want to grant that third party some specific permissions that allow that third party to create Capillaries deployment in AWS. Giving a third party access to your AWS resources is a standard practice and the recommended way to do that is to use IAM roles. This section discusses the AWS IAM preparation steps to create the necessary role structure. Basic familiarity with AWS console is required. + +## Users and groups + +Let's assume all capideploy activities are performed on behalf of an IAM user named `UserCapideployOperator`. As a first step, create this user in `IAM->Users` section of AWS console. In `IAM->User groups`, create a group `GroupCapideployOperators` and add `UserCapideployOperator` to it. + +## Policies and roles + +### PolicyAccessCapillariesTestbucket and RoleAccessCapillariesTestbucket + +Your AWS deployment will need to read and write files from/to S3 bucket. As per [Capillaries S3 instructions](https://github.com/capillariesio/capillaries/blob/main/doc/s3.md), we assume that you already have an S3 bucket for your future Capillaries deployment, let's assume the name of the bucket is `capillaries-testbucket` and it has `Block all public access` setting on. And here is the key difference: +- Capillaries test S3 bucket access described in that doc uses user-based access model (bucket policy explicitly gives the user `arn:aws:iam:::user/UserAccessCapillariesTestbucket` access to the bucket); +- capideploy S3 bucket access model uses a separate policy and a separate role with this policy attached, and Capillaries instances can assume that role. + +In `IAM->Policies`, let's create a policy `PolicyAccessCapillariesTestbucket` that allows access to the bucket we will be using: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": "s3:ListBucket", + "Resource": "arn:aws:s3:::capillaries-testbucket" + }, + { + "Effect": "Allow", + "Action": [ + "s3:DeleteObject", + "s3:GetObject", + "s3:PutObject" + ], + "Resource": "arn:aws:s3:::capillaries-testbucket/*" + } + ] +} +``` + +In `IAM->Roles`, create a role `RoleAccessCapillariesTestbucket` with `Trusted entity type` set to `AWS Service` and: +- attach the newly created `PolicyAccessCapillariesTestbucket` to it (`Permissions` tab); +- under `Trust relationships`, make sure that ec2 service can assume this role: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "ec2.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] +} +``` + +Please note that, since we created therole with `Trusted entity type` set to `AWS Service`, `RoleAccessCapillariesTestbucket` has two ARNa, as a role and as an instance profile: + +| Name type | Name | +| - | - | +| ARN | arn:aws:iam:::role/RoleAccessCapillariesTestbucket | +| Instance profile ARN | arn:aws:iam:::instance-profile/RoleAccessCapillariesTestbucket | + +Run the following command as AWS root or as `UserCapideployOperator` (if you have already assigned `iam:GetInstanceProfile` permission to it, see below): + +``` +$ aws iam get-instance-profile --instance-profile-name RoleAccessCapillariesTestbucket +``` + +The result shows that role `RoleAccessCapillariesTestbucket` is "wrapped" by instance profile `RoleAccessCapillariesTestbucket`. + +### PolicyCapideployOperator + +As we agreed above, `UserCapideployOperator` (who potentially can be a third party), needs only a very restricted set of permissions. This user will need permissions to do two major things: +- create/delete AWS resources (networks, subnets, instances etc) that will provide infrastructure to run Capillaries binaries and Cassandra cluster +- give created instances permission to read/write config/data files from/to S3 bucket + +In IAM->Policies, create a customer-managed policy PolicyCapideployOperator: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "PolicyCapideployOperatorCreateInfra", + "Effect": "Allow", + "Action": [ + "ec2:AllocateAddress", + "ec2:AssociateAddress", + "ec2:AssociateIamInstanceProfile", + "ec2:AssociateRouteTable", + "ec2:AttachInternetGateway", + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CreateImage", + "ec2:CreateInternetGateway", + "ec2:CreateNatGateway", + "ec2:CreateRoute", + "ec2:CreateRouteTable", + "ec2:CreateSecurityGroup", + "ec2:CreateSubnet", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:CreateVpc", + "ec2:DeleteInternetGateway", + "ec2:DeleteNatGateway", + "ec2:DeleteRouteTable", + "ec2:DeleteSecurityGroup", + "ec2:DeleteSnapshot", + "ec2:DeleteSubnet", + "ec2:DeleteVolume", + "ec2:DeleteVpc", + "ec2:DeregisterImage", + "ec2:DescribeAddresses", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypes", + "ec2:DescribeInternetGateways", + "ec2:DescribeKeyPairs", + "ec2:DescribeNatGateways", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSnapshots", + "ec2:DescribeSubnets", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DescribeVpcs", + "ec2:DetachInternetGateway", + "ec2:DetachVolume", + "ec2:ReleaseAddress", + "ec2:RunInstances", + "ec2:TerminateInstances", + "iam:GetInstanceProfile", + "tag:GetResources" + ], + "Resource": "*" + }, + { + "Sid": "PolicyCapideployOperatorPassRoleAccessBucket", + "Effect": "Allow", + "Action": "iam:PassRole", + "Resource": "arn:aws:iam:::role/RoleAccessCapillariesTestbucket" + } + ] +} +``` + +The first part is obvious: it lists all AWS API calls performed by capideploy. As for the second part,it adds PassRole permission for `RoleAccessCapillariesTestbucket` created above. Without this permission, `AssociateIamInstanceProfile` call (that tells AWS to allow instances to access the bucket) will fail. + +Just in case - to list all AWS API calls used by capideploy, run: +```shell +grep -r -e "ec2Client\.[A-Za-z]*" --include "*.go" +grep -r -e "tClient\.[A-Za-z]*" --include "*.go" +``` + +## Attach PolicyCapideployOperator to GroupCapideployOperators + +In `IAM->User groups->GroupCapideployOperators->Permissions`, attach `PolicyCapideployOperator`. + +# Environment variables used by Capideploy + +Sample .rc file to run before Capildeploy contains variables used in the .jsonnet file: +``` +# SSH access to EC2 instances +export CAPIDEPLOY_SSH_USER=ubuntu +# Name of the keypair stored at AWS +export CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME=sampledeployment005-root-key +# Exported PEM file with private SSH key from the AWS keypair +export CAPIDEPLOY_SSH_PRIVATE_KEY_PATH=/home/johndoe/.ssh/sampledeployment005_rsa + +# NGINX IP address filter: your IP address(es) or cidr(s), for example: "135.23.0.0/16,136.104.0.21" +export CAPIDEPLOY_BASTION_ALLOWED_IPS="..." +export CAPIDEPLOY_EXTERNAL_WEBAPI_PORT=6544 + +# This is where capideploy takes Capillaries binaries from, +# see https://github.com/capillariesio/capillaries/blob/main/binaries_upload.sh +export CAPIDEPLOY_CAPILLARIES_RELEASE_URL=https://capillaries-release.s3.us-east-1.amazonaws.com/latest + +# RabbitMQ admin access (RabbitMQ Mgmt UI) +export CAPIDEPLOY_RABBITMQ_ADMIN_NAME=... +export CAPIDEPLOY_RABBITMQ_ADMIN_PASS=... + +# RabbitMQ user access (used by Capillaries components to talk to RabbitMQ) +export CAPIDEPLOY_RABBITMQ_USER_NAME=... +export CAPIDEPLOY_RABBITMQ_USER_PASS=... + +# ~/.aws/config: default/region (without it, AWS API will not locate S3 buckets) +export CAPIDEPLOY_S3_AWS_DEFAULT_REGION=us-east-1 + +# Capideploy will use this instance profile when creating instances that need access to S3 bucket +export CAPIDEPLOY_INSTANCE_PROFILE_WITH_S3_ACCESS=RoleAccessCapillariesTestbucket +``` + +# Create deployment + +Run `1_deploy.sh`. If everything goes well, it will create a Capillaries deployment accessible at BASTION_IP address returned by `1_deploy.sh` (capideploy does not use DNS, so you will have to access your deployment by IP address). + +# Processing data using created deployment + +[Capillaries repository](https://github.com/capillariesio/capillaries) has a few tests that are ready to run in the cloud deployment: +- [lookup quicktest S3](https://github.com/capillariesio/capillaries/tree/main/test/code/lookup/quicktest_s3): run `1_create_data_s3.sh` and `2_one_run_cloud.sh` +- [Fannie Mae quicktest S3](https://github.com/capillariesio/capillaries/tree/main/test/code/fannie_mae/quicktest_s3): run `1_copy_data_s3.sh` and `2_one_run_cloud.sh` +- [Fannie Mae bigtest](https://github.com/capillariesio/capillaries/tree/main/test/code/fannie_mae/bigtest): run `1_copy_data.sh` and `2_one_run_cloud.sh` +- [Portfolio bigtest](https://github.com/capillariesio/capillaries/tree/main/test/code/portfolio/bigtest): run `1_create_data.sh` and `2_one_run_cloud.sh` + +You will probably have to run these tests using `UserAccessCapillariesTestbucket` IAM user as per [Capillaries S3 instructions](https://github.com/capillariesio/capillaries/blob/main/doc/s3.md): that user should have access to the S3 bucket to upload/download config/data files. + +Please note that in order to run these tests or your own scripts in your newly created deployment you only need access to the S3 bucket and HTTP access to the bastion host (which should allow HTTP access from all machines matching CAPIDEPLOY_BASTION_ALLOWED_IPS address or cidr). `UserCapideployOperator` user is not involved at this point. + +In general, you can start a Capillaries run in your deployment via REST API as follows: + +```shell +CAPILLARIES_AWS_TESTBUCKET=capillaries-testbucket +keyspace="lookup_quicktest_s3" +cfgS3=s3://$CAPILLARIES_AWS_TESTBUCKET/capi_cfg/lookup_quicktest +outS3=s3://$CAPILLARIES_AWS_TESTBUCKET/capi_out/lookup_quicktest +scriptFile=$cfgS3/script.json +paramsFile=$cfgS3/script_params_one_run_s3.json +webapiUrl=http://$BASTION_IP:6544 +startNodes=read_orders,read_order_items +curl -s -w "\n" -d '{"script_uri":"'$scriptFile'", "script_params_uri":"'$paramsFile'", "start_nodes":"'$startNodes'"}' -H "Content-Type: application/json" -X POST $webapiUrl"/ks/$keyspace/run" +``` + +# Delete deployment + +To delete all AWS resources that your deployment uses, run `5-undeploy.sh`. \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..ed89a3a --- /dev/null +++ b/go.mod @@ -0,0 +1,35 @@ +module github.com/capillariesio/capillaries-deploy + +go 1.22 + +require ( + github.com/aws/aws-sdk-go-v2/config v1.27.11 + github.com/aws/aws-sdk-go-v2/service/ec2 v1.157.0 + golang.org/x/crypto v0.21.0 +) + +require ( + github.com/aws/aws-sdk-go-v2 v1.26.1 + github.com/jmespath/go-jmespath v0.4.0 // indirect +) + +require ( + github.com/aws/aws-sdk-go-v2/credentials v1.17.11 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.5 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.5 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 // indirect + github.com/aws/aws-sdk-go-v2/service/cloudcontrol v1.18.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.7 // indirect + github.com/aws/aws-sdk-go-v2/service/resourcegroups v1.22.1 // indirect + github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.21.4 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.20.5 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.4 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.28.6 // indirect + github.com/aws/smithy-go v1.20.2 // indirect + github.com/google/go-jsonnet v0.20.0 // indirect + golang.org/x/sys v0.18.0 // indirect + gopkg.in/yaml.v2 v2.2.8 // indirect + sigs.k8s.io/yaml v1.1.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..800f903 --- /dev/null +++ b/go.sum @@ -0,0 +1,56 @@ +github.com/aws/aws-sdk-go-v2 v1.26.1 h1:5554eUqIYVWpU0YmeeYZ0wU64H2VLBs8TlhRB2L+EkA= +github.com/aws/aws-sdk-go-v2 v1.26.1/go.mod h1:ffIFB97e2yNsv4aTSGkqtHnppsIJzw7G7BReUZ3jCXM= +github.com/aws/aws-sdk-go-v2/config v1.27.11 h1:f47rANd2LQEYHda2ddSCKYId18/8BhSRM4BULGmfgNA= +github.com/aws/aws-sdk-go-v2/config v1.27.11/go.mod h1:SMsV78RIOYdve1vf36z8LmnszlRWkwMQtomCAI0/mIE= +github.com/aws/aws-sdk-go-v2/credentials v1.17.11 h1:YuIB1dJNf1Re822rriUOTxopaHHvIq0l/pX3fwO+Tzs= +github.com/aws/aws-sdk-go-v2/credentials v1.17.11/go.mod h1:AQtFPsDH9bI2O+71anW6EKL+NcD7LG3dpKGMV4SShgo= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1 h1:FVJ0r5XTHSmIHJV6KuDmdYhEpvlHpiSd38RQWhut5J4= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1/go.mod h1:zusuAeqezXzAB24LGuzuekqMAEgWkVYukBec3kr3jUg= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.5 h1:aw39xVGeRWlWx9EzGVnhOR4yOjQDHPQ6o6NmBlscyQg= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.5/go.mod h1:FSaRudD0dXiMPK2UjknVwwTYyZMRsHv3TtkabsZih5I= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.5 h1:PG1F3OD1szkuQPzDw3CIQsRIrtTlUC3lP84taWzHlq0= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.5/go.mod h1:jU1li6RFryMz+so64PpKtudI+QzbKoIEivqdf6LNpOc= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY= +github.com/aws/aws-sdk-go-v2/service/cloudcontrol v1.18.4 h1:y9xLchBUDKriRuDsA6OwwzgP9binHw67dR0uicHmOQQ= +github.com/aws/aws-sdk-go-v2/service/cloudcontrol v1.18.4/go.mod h1:oOvzqGwjzl5fyWi0C7YfOalzMDS8R4yapREwUVV5gBY= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.157.0 h1:BCNvChkZM4xqssztw+rFllaDnoS4Hm6bZ20XBj8RsI0= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.157.0/go.mod h1:xejKuuRDjz6z5OqyeLsz01MlOqqW7CqpAB4PabNvpu8= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2 h1:Ji0DY1xUsUr3I8cHps0G+XM3WWU16lP6yG8qu1GAZAs= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2/go.mod h1:5CsjAbs3NlGQyZNFACh+zztPDI7fU6eW9QsxjfnuBKg= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.7 h1:ogRAwT1/gxJBcSWDMZlgyFUM962F51A5CRhDLbxLdmo= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.7/go.mod h1:YCsIZhXfRPLFFCl5xxY+1T9RKzOKjCut+28JSX2DnAk= +github.com/aws/aws-sdk-go-v2/service/resourcegroups v1.22.1 h1:NqzW0QkKFraEclvcwJn/GZfY7n70opE+Lvw5E8fyu9g= +github.com/aws/aws-sdk-go-v2/service/resourcegroups v1.22.1/go.mod h1:+Kmpl4w+kCRyagQIIUWpnj0RWYHeBuZELNGu4G1COtY= +github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.21.4 h1:c1jtPWZSmgMmPkCgwv67GE0ugdEgnLVo/BHR1wl3Dm0= +github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.21.4/go.mod h1:FWw+Jnx+SlpsrU/NQ/f7f+1RdixTApZiU2o9FOubiDQ= +github.com/aws/aws-sdk-go-v2/service/sso v1.20.5 h1:vN8hEbpRnL7+Hopy9dzmRle1xmDc7o8tmY0klsr175w= +github.com/aws/aws-sdk-go-v2/service/sso v1.20.5/go.mod h1:qGzynb/msuZIE8I75DVRCUXw3o3ZyBmUvMwQ2t/BrGM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.4 h1:Jux+gDDyi1Lruk+KHF91tK2KCuY61kzoCpvtvJJBtOE= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.4/go.mod h1:mUYPBhaF2lGiukDEjJX2BLRRKTmoUSitGDUgM4tRxak= +github.com/aws/aws-sdk-go-v2/service/sts v1.28.6 h1:cwIxeBttqPN3qkaAjcEcsh8NYr8n2HZPkcKgPAi1phU= +github.com/aws/aws-sdk-go-v2/service/sts v1.28.6/go.mod h1:FZf1/nKNEkHdGGJP/cI2MoIMquumuRK6ol3QQJNDxmw= +github.com/aws/smithy-go v1.20.2 h1:tbp628ireGtzcHDDmLT/6ADHidqnwgF57XOXZe6tp4Q= +github.com/aws/smithy-go v1.20.2/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-jsonnet v0.20.0 h1:WG4TTSARuV7bSm4PMB4ohjxe33IHT5WVTrJSU33uT4g= +github.com/google/go-jsonnet v0.20.0/go.mod h1:VbgWF9JX7ztlv770x/TolZNGGFfiHEVx9G6ca2eUmeA= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs= +sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= diff --git a/pkg/cld/cldaws/README.md b/pkg/cld/cldaws/README.md new file mode 100644 index 0000000..85419ff --- /dev/null +++ b/pkg/cld/cldaws/README.md @@ -0,0 +1 @@ +No project-related code here please. This code is intended to run as part of the alternative (commercial?) deployment mechanism, not capideploy tool. \ No newline at end of file diff --git a/pkg/cld/cldaws/floating_ips.go b/pkg/cld/cldaws/floating_ips.go new file mode 100644 index 0000000..3d46374 --- /dev/null +++ b/pkg/cld/cldaws/floating_ips.go @@ -0,0 +1,55 @@ +package cldaws + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +func GetPublicIpAddressAllocationAssociatedInstanceByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, ipName string) (string, string, string, error) { + out, err := ec2Client.DescribeAddresses(goCtx, &ec2.DescribeAddressesInput{Filters: []types.Filter{{Name: aws.String("tag:Name"), Values: []string{ipName}}}}) + lb.AddObject(fmt.Sprintf("DescribeAddresses(tag:Name=%s)", ipName), out) + if err != nil { + return "", "", "", fmt.Errorf("cannot get public ip named %s: %s", ipName, err.Error()) + } + if len(out.Addresses) == 0 { + return "", "", "", nil + } + + var allocationId string + if out.Addresses[0].AllocationId != nil { + allocationId = *out.Addresses[0].AllocationId + } + + var instanceId string + if out.Addresses[0].InstanceId != nil { + instanceId = *out.Addresses[0].InstanceId + } + + return *out.Addresses[0].PublicIp, allocationId, instanceId, nil +} + +func AllocateFloatingIpByName(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, ipName string) (string, error) { + out, err := ec2Client.AllocateAddress(goCtx, &ec2.AllocateAddressInput{TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeElasticIp, + Tags: mapToTags(ipName, tags)}}}) + lb.AddObject(fmt.Sprintf("AllocateAddress(tag:Name=%s)", ipName), out) + if err != nil { + return "", fmt.Errorf("cannot allocate %s IP address:%s", ipName, err.Error()) + } + + return *out.PublicIp, nil +} + +func ReleaseFloatingIpByAllocationId(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, allocationId string) error { + out, err := ec2Client.ReleaseAddress(goCtx, &ec2.ReleaseAddressInput{AllocationId: aws.String(allocationId)}) + lb.AddObject(fmt.Sprintf("ReleaseAddress(allocationId=%s)", allocationId), out) + if err != nil { + return fmt.Errorf("cannot release IP address allocation id %s: %s", allocationId, err.Error()) + } + return nil +} diff --git a/pkg/cld/cldaws/instances.go b/pkg/cld/cldaws/instances.go new file mode 100644 index 0000000..6bd77d5 --- /dev/null +++ b/pkg/cld/cldaws/instances.go @@ -0,0 +1,357 @@ +package cldaws + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +func stringToInstanceType(instanceTypeString string) (types.InstanceType, error) { + for _, instanceType := range types.InstanceTypeT2Nano.Values() { + if string(instanceType) == instanceTypeString { + return instanceType, nil + } + } + return types.InstanceTypeT2Nano, fmt.Errorf("unknown instance type %s", instanceTypeString) +} + +func GetInstanceType(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, flavorName string) (string, error) { + out, err := ec2Client.DescribeInstanceTypes(goCtx, &ec2.DescribeInstanceTypesInput{ + InstanceTypes: []types.InstanceType{types.InstanceType(flavorName)}}) + lb.AddObject(fmt.Sprintf("DescribeInstanceTypes(InstanceType=%s)", flavorName), out) + if err != nil { + return "", fmt.Errorf("cannot find flavor %s:%s", flavorName, err.Error()) + } + if len(out.InstanceTypes) == 0 { + return "", fmt.Errorf("found zero results for flavor %s", flavorName) + } + return string(out.InstanceTypes[0].InstanceType), nil // "t2.2xlarge" +} + +func GetImageInfoById(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, imageId string) (types.ImageState, []types.BlockDeviceMapping, error) { + out, err := ec2Client.DescribeImages(goCtx, &ec2.DescribeImagesInput{Filters: []types.Filter{{ + Name: aws.String("image-id"), Values: []string{imageId}}}}) + lb.AddObject(fmt.Sprintf("DescribeImages(image-id=%s)", imageId), out) + if err != nil { + return "", nil, fmt.Errorf("cannot find image %s:%s", imageId, err.Error()) + } + if len(out.Images) == 0 { + return "", nil, fmt.Errorf("found zero results for image %s", imageId) + } + return out.Images[0].State, out.Images[0].BlockDeviceMappings, nil +} + +func GetImageInfoByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, imageName string) (string, types.ImageState, []types.BlockDeviceMapping, error) { + out, err := ec2Client.DescribeImages(goCtx, &ec2.DescribeImagesInput{Filters: []types.Filter{{ + Name: aws.String("tag:Name"), Values: []string{imageName}}}}) + lb.AddObject(fmt.Sprintf("DescribeImages(tag:Name=%s)", imageName), out) + if err != nil { + return "", "", nil, fmt.Errorf("cannot find image %s:%s", imageName, err.Error()) + } + if len(out.Images) == 0 { + return "", "", nil, nil + } + return *out.Images[0].ImageId, out.Images[0].State, out.Images[0].BlockDeviceMappings, nil +} + +func VerifyKeypair(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, keypairName string) error { + out, err := ec2Client.DescribeKeyPairs(goCtx, &ec2.DescribeKeyPairsInput{Filters: []types.Filter{{ + Name: aws.String("key-name"), Values: []string{keypairName}}}}) + lb.AddObject(fmt.Sprintf("DescribeKeyPairs(key-name=%s)", keypairName), out) + if err != nil { + return fmt.Errorf("cannot find keypair %s:%s", keypairName, err.Error()) + } + if len(out.KeyPairs) == 0 { + return fmt.Errorf("found zero keypairs %s", keypairName) + } + return nil +} + +func GetInstanceIdAndStateByHostName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, instName string) (string, types.InstanceStateName, error) { + out, err := ec2Client.DescribeInstances(goCtx, &ec2.DescribeInstancesInput{Filters: []types.Filter{{Name: aws.String("tag:Name"), Values: []string{instName}}}}) + lb.AddObject(fmt.Sprintf("DescribeInstances(tag:Name=%s)", instName), out) + if err != nil { + return "", types.InstanceStateNameTerminated, fmt.Errorf("cannot find instance by name %s:%s", instName, err.Error()) + } + if len(out.Reservations) == 0 { + return "", types.InstanceStateNameTerminated, nil + } + if len(out.Reservations[0].Instances) == 0 { + return "", types.InstanceStateNameTerminated, fmt.Errorf("found zero instances in reservations[0] for hostinstNamename %s", instName) + } + + // If there are more than one instance, we want to return the one which is Running, or at least Pending + var instanceId string + var instanceStateName string + for resIdx := 0; resIdx < len(out.Reservations); resIdx++ { + for instIdx := 0; instIdx < len(out.Reservations[resIdx].Instances); instIdx++ { + inst := out.Reservations[resIdx].Instances[instIdx] + if inst.State.Name == types.InstanceStateNameRunning { + return *inst.InstanceId, inst.State.Name, nil + } + if inst.State.Name == types.InstanceStateNamePending { + instanceId = *inst.InstanceId + instanceStateName = string(inst.State.Name) + } else if instanceStateName != string(types.InstanceStateNamePending) { + instanceId = *inst.InstanceId + instanceStateName = string(inst.State.Name) + } + } + } + return instanceId, types.InstanceStateName(instanceStateName), nil +} + +func getInstanceStateName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, instanceId string) (types.InstanceStateName, error) { + out, err := ec2Client.DescribeInstances(goCtx, &ec2.DescribeInstancesInput{InstanceIds: []string{instanceId}}) + lb.AddObject(fmt.Sprintf("DescribeInstances(instanceId=%s)", instanceId), out) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return "", nil + } + return "", fmt.Errorf("cannot find instance by id %s:%s", instanceId, err.Error()) + } + if len(out.Reservations) == 0 { + return "", nil + } + if len(out.Reservations[0].Instances) == 0 { + return "", fmt.Errorf("found zero instances in reservations[0] for instanceId %s", instanceId) + } + + for resIdx := 0; resIdx < len(out.Reservations); resIdx++ { + for instIdx := 0; instIdx < len(out.Reservations[resIdx].Instances); instIdx++ { + inst := out.Reservations[resIdx].Instances[instIdx] + if *inst.InstanceId == instanceId { + return out.Reservations[resIdx].Instances[instIdx].State.Name, nil + } + } + } + return "", nil +} + +func CreateInstance(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, + instanceTypeString string, + imageId string, + instName string, + privateIpAddress string, + securityGroupId string, + rootKeyName string, + subnetId string, + blockDeviceMappings []types.BlockDeviceMapping, + timeoutSeconds int) (string, error) { + + instanceType, err := stringToInstanceType(instanceTypeString) + if err != nil { + return "", err + } + + if imageId == "" || instName == "" || privateIpAddress == "" || securityGroupId == "" || rootKeyName == "" || subnetId == "" { + return "", fmt.Errorf("empty parameter not allowed: imageId (%s), instName (%s), privateIpAddress (%s), securityGroupId (%s), rootKeyName (%s), subnetId (%s)", + imageId, instName, privateIpAddress, securityGroupId, rootKeyName, subnetId) + } + + // NOTE: AWS doesn't allow to specify hostname on creation, it assigns names like "ip-10-5-0-11" + runOut, err := ec2Client.RunInstances(goCtx, &ec2.RunInstancesInput{ + InstanceType: instanceType, + ImageId: aws.String(imageId), + MinCount: aws.Int32(1), + MaxCount: aws.Int32(1), + KeyName: aws.String(rootKeyName), + SecurityGroupIds: []string{securityGroupId}, + SubnetId: aws.String(subnetId), + PrivateIpAddress: aws.String(privateIpAddress), + BlockDeviceMappings: blockDeviceMappings, + TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeInstance, + Tags: mapToTags(instName, tags)}}}) + lb.AddObject(fmt.Sprintf("RunInstances(InstanceType=%s,ImageId=%s,tag:Name=%s)", instanceType, imageId, instName), runOut) + if err != nil { + return "", fmt.Errorf("cannot create instance %s: %s", instName, err.Error()) + } + if len(runOut.Instances) == 0 { + return "", fmt.Errorf("got zero instances when creating %s", instName) + } + + newId := *runOut.Instances[0].InstanceId + + if newId == "" { + return "", fmt.Errorf("aws returned empty instance id for %s", instName) + } + + startWaitTs := time.Now() + for { + stateName, err := getInstanceStateName(ec2Client, goCtx, lb, newId) + if err != nil { + return "", err + } + // If no state name returned - the instance creation has just began, give it some time + if stateName != "" { + if stateName == types.InstanceStateNameRunning { + break + } + if stateName != types.InstanceStateNamePending { + return "", fmt.Errorf("%s(%s) was built, but the status is unknown: %s", instName, newId, stateName) + } + } + if time.Since(startWaitTs).Seconds() > float64(timeoutSeconds) { + return "", fmt.Errorf("giving up after waiting for %s(%s) to be created", instName, newId) + } + time.Sleep(1 * time.Second) + } + return newId, nil +} + +func AssignAwsFloatingIp(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, instanceId string, ipAddress string) (string, error) { + out, err := ec2Client.AssociateAddress(goCtx, &ec2.AssociateAddressInput{ + InstanceId: aws.String(instanceId), + PublicIp: aws.String(ipAddress)}) + lb.AddObject(fmt.Sprintf("AssociateAddress(instanceId=%s,ipAddress=%s)", instanceId, ipAddress), out) + if err != nil { + return "", fmt.Errorf("cannot assign public IP %s to %s: %s", ipAddress, instanceId, err.Error()) + } + if *out.AssociationId == "" { + return "", fmt.Errorf("assigning public IP %s to %s returned empty association id", ipAddress, instanceId) + } + return *out.AssociationId, nil +} + +func DeleteInstance(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, instanceId string, timeoutSeconds int) error { + out, err := ec2Client.TerminateInstances(goCtx, &ec2.TerminateInstancesInput{InstanceIds: []string{instanceId}}) + lb.AddObject(fmt.Sprintf("TerminateInstances(instanceId=%s)", instanceId), out) + if err != nil { + return fmt.Errorf("cannot delete instance %s: %s", instanceId, err.Error()) + } + if len(out.TerminatingInstances) == 0 { + return fmt.Errorf("got zero terminating instances when deleting %s", instanceId) + } + + startWaitTs := time.Now() + for { + stateName, err := getInstanceStateName(ec2Client, goCtx, lb, instanceId) + if err != nil { + return err + } + + // If no state name returned - the instance is gone already (a bit too fast, but possible in theory) + if stateName == "" { + break + } + if stateName == types.InstanceStateNameTerminated { + break + } + if stateName != types.InstanceStateNameShuttingDown && stateName != types.InstanceStateNameRunning { + return fmt.Errorf("%s was deleted, but the state is unknown: %s", instanceId, stateName) + } + if time.Since(startWaitTs).Seconds() > float64(timeoutSeconds) { + return fmt.Errorf("giving up after waiting for %s to be deleted", instanceId) + } + time.Sleep(1 * time.Second) + } + return nil +} + +func StopInstance(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, instanceId string, timeoutSeconds int) error { + out, err := ec2Client.StopInstances(goCtx, &ec2.StopInstancesInput{InstanceIds: []string{instanceId}}) + lb.AddObject(fmt.Sprintf("StopInstances(instanceId=%s)", instanceId), out) + if err != nil { + return fmt.Errorf("cannot dtop instance %s: %s", instanceId, err.Error()) + } + + startWaitTs := time.Now() + for { + stateName, err := getInstanceStateName(ec2Client, goCtx, lb, instanceId) + if err != nil { + return err + } + + if stateName == types.InstanceStateNameStopped { + break + } + if stateName != types.InstanceStateNameStopping { + return fmt.Errorf("cannot stop instance %s, uknown state: %s", instanceId, stateName) + } + if time.Since(startWaitTs).Seconds() > float64(timeoutSeconds) { + return fmt.Errorf("giving up after waiting for instance %s to be stop", instanceId) + } + time.Sleep(1 * time.Second) + } + return nil +} + +// aws ec2 create-image --region "us-east-1" --instance-id i-03c10fd5566a08476 --name ami-i-03c10fd5566a08476 --no-reboot +func CreateImageFromInstance(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, imageName string, instanceId string, timeoutSeconds int) (string, error) { + out, err := ec2Client.CreateImage(goCtx, &ec2.CreateImageInput{ + InstanceId: aws.String(instanceId), + Name: aws.String(imageName), + TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeImage, + Tags: mapToTags(imageName, tags)}}}) + lb.AddObject(fmt.Sprintf("CreateImage(imageName=%s,instanceId=%s)", imageName, instanceId), out) + if err != nil { + return "", fmt.Errorf("cannot create snapshot image %s from instance %s: %s", imageName, instanceId, err.Error()) + } + + imageId := *out.ImageId + + startWaitTs := time.Now() + for { + state, _, err := GetImageInfoById(ec2Client, goCtx, lb, imageId) + if err != nil { + return "", err + } + if state == types.ImageStateAvailable { + break + } + if state != types.ImageStatePending { + return "", fmt.Errorf("image %s(%s) was built, but the status is unknown: %s", imageName, imageId, state) + } + if time.Since(startWaitTs).Seconds() > float64(timeoutSeconds) { + return "", fmt.Errorf("giving up after waiting for image %s(%s) to be created for %ds", imageName, imageId, timeoutSeconds) + } + time.Sleep(1 * time.Second) + } + return imageId, nil +} + +func DeregisterImage(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, imageId string) error { + out, err := ec2Client.DeregisterImage(goCtx, &ec2.DeregisterImageInput{ImageId: aws.String(imageId)}) + lb.AddObject(fmt.Sprintf("DeregisterImage(imageId=%s)", imageId), out) + if err != nil { + return fmt.Errorf("cannot delete image %s:%s", imageId, err.Error()) + } + return nil +} + +func DeleteSnapshot(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, volSnapshotId string) error { + out, err := ec2Client.DeleteSnapshot(goCtx, &ec2.DeleteSnapshotInput{SnapshotId: aws.String(volSnapshotId)}) + lb.AddObject(fmt.Sprintf("DeleteSnapshot(volSnapshotId=%s)", volSnapshotId), out) + if err != nil { + return fmt.Errorf("cannot delete snapshot %s:%s", volSnapshotId, err.Error()) + } + return nil +} + +func AssociateInstanceProfile(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, instanceId string, instanceProfileName string) error { + iamInstanceProfileSpec := types.IamInstanceProfileSpecification{} + if strings.HasPrefix(instanceProfileName, "arn:aws:iam") { + iamInstanceProfileSpec.Arn = aws.String(instanceProfileName) + } else { + iamInstanceProfileSpec.Name = aws.String(instanceProfileName) + } + out, err := ec2Client.AssociateIamInstanceProfile(goCtx, &ec2.AssociateIamInstanceProfileInput{ + InstanceId: aws.String(instanceId), + IamInstanceProfile: &iamInstanceProfileSpec}) + lb.AddObject(fmt.Sprintf("AssociateInstanceProfile(instanceId=%s,instanceProfileName=%s)", instanceId, instanceProfileName), out) + if err != nil { + return fmt.Errorf("cannot associate instance profile %s with %s: %s", instanceProfileName, instanceId, err.Error()) + } + if *out.IamInstanceProfileAssociation.InstanceId == "" { + return fmt.Errorf("associating instance profile %s with %s returned empty instance id", instanceProfileName, instanceId) + } + return nil +} diff --git a/pkg/cld/cldaws/networking.go b/pkg/cld/cldaws/networking.go new file mode 100644 index 0000000..04cc2aa --- /dev/null +++ b/pkg/cld/cldaws/networking.go @@ -0,0 +1,454 @@ +package cldaws + +import ( + "context" + "fmt" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +func GetSubnetIdByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, subnetName string) (string, error) { + out, err := ec2Client.DescribeSubnets(goCtx, &ec2.DescribeSubnetsInput{Filters: []types.Filter{{ + Name: aws.String("tag:Name"), Values: []string{subnetName}}}}) + lb.AddObject(fmt.Sprintf("DescribeSubnets(tag:Name=%s)", subnetName), out) + if err != nil { + return "", fmt.Errorf("cannot describe subnet %s: %s", subnetName, err.Error()) + } + if len(out.Subnets) == 0 { + return "", nil + } + return *out.Subnets[0].SubnetId, nil +} + +func CreateSubnet(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, vpcId string, subnetName string, cidr string, availabilityZone string) (string, error) { + if vpcId == "" || subnetName == "" || cidr == "" || availabilityZone == "" { + return "", fmt.Errorf("empty parameter not allowed: vpcId (%s), subnetName (%s), cidr (%s), availabilityZone (%s)", vpcId, subnetName, cidr, availabilityZone) + } + outCreate, err := ec2Client.CreateSubnet(goCtx, &ec2.CreateSubnetInput{ + VpcId: aws.String(vpcId), + CidrBlock: aws.String(cidr), + AvailabilityZone: aws.String(availabilityZone), + TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeSubnet, + Tags: mapToTags(subnetName, tags)}}}) + lb.AddObject(fmt.Sprintf("CreateSubnet(vpcId=%s,subnetName=%s,cidr=%s,availabilityZone=%s)", vpcId, subnetName, cidr, availabilityZone), outCreate) + if err != nil { + return "", fmt.Errorf("cannot create subnet %s: %s", subnetName, err.Error()) + } + + // TODO: dhcp options and allocation pools? + + return *outCreate.Subnet.SubnetId, nil +} + +func DeleteSubnet(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, subnetId string) error { + out, err := ec2Client.DeleteSubnet(goCtx, &ec2.DeleteSubnetInput{SubnetId: aws.String(subnetId)}) + lb.AddObject(fmt.Sprintf("DeleteSubnet(subnetId=%s)", subnetId), out) + if err != nil { + return fmt.Errorf("cannot delete subnet %s: %s", subnetId, err.Error()) + } + return nil +} + +func GetVpcIdByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, vpcName string) (string, error) { + if vpcName == "" { + return "", fmt.Errorf("empty parameter not allowed: vpcName (%s)", vpcName) + } + out, err := ec2Client.DescribeVpcs(goCtx, &ec2.DescribeVpcsInput{Filters: []types.Filter{{ + Name: aws.String("tag:Name"), Values: []string{vpcName}}}}) + lb.AddObject(fmt.Sprintf("DescribeVpcs(tag:Name=%s)", vpcName), out) + if err != nil { + return "", fmt.Errorf("cannot describe vpc (network) %s: %s", vpcName, err.Error()) + } + + if len(out.Vpcs) > 0 { + return *out.Vpcs[0].VpcId, nil + } + + return "", nil +} + +func CreateVpc(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, vpcName string, cidrBlock string, timeoutSeconds int) (string, error) { + if vpcName == "" || cidrBlock == "" { + return "", fmt.Errorf("empty parameter not allowed: vpcName (%s), cidrBlock (%s)", vpcName, cidrBlock) + } + outCreate, err := ec2Client.CreateVpc(goCtx, &ec2.CreateVpcInput{ + CidrBlock: aws.String(cidrBlock), + TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeVpc, + Tags: mapToTags(vpcName, tags)}}}) + + lb.AddObject(fmt.Sprintf("CreateVpc(vpcName=%s,cidr=%s)", vpcName, cidrBlock), outCreate) + if err != nil { + return "", fmt.Errorf("cannot create vpc (network) %s: %s", vpcName, err.Error()) + } + if outCreate.Vpc == nil { + return "", fmt.Errorf("cannot create vpc (network) %s: returned empty vpc", vpcName) + } + + newVpcId := *outCreate.Vpc.VpcId + + startWaitTs := time.Now() + for { + out, err := ec2Client.DescribeVpcs(goCtx, &ec2.DescribeVpcsInput{Filters: []types.Filter{{ + Name: aws.String("vpc-id"), Values: []string{newVpcId}}}}) + lb.AddObject(fmt.Sprintf("DescribeVpcs(vpc-id=%s)", newVpcId), out) + if err != nil { + return "", fmt.Errorf("cannot query for newly created vpc (network) by id %s: %s", newVpcId, err.Error()) + } + if len(out.Vpcs) == 0 { + return "", fmt.Errorf("cannot query for newly created vpc (network) by id %s: returned zero vpcs", newVpcId) + } + + status := out.Vpcs[0].State + + if status == types.VpcStateAvailable { + break + } + if status != types.VpcStatePending { + return "", fmt.Errorf("vpc (network) %s was created, but has unexpected status %s", newVpcId, status) + } + if time.Since(startWaitTs).Seconds() > float64(timeoutSeconds) { + return "", fmt.Errorf("giving up after waiting for vpc (network) %s to be created after %ds", newVpcId, timeoutSeconds) + } + time.Sleep(1 * time.Second) + } + + return newVpcId, nil +} + +func DeleteVpc(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, vpcId string) error { + out, err := ec2Client.DeleteVpc(goCtx, &ec2.DeleteVpcInput{VpcId: aws.String(vpcId)}) + lb.AddObject(fmt.Sprintf("DeleteVpc(vpcId=%s)", vpcId), out) + if err != nil { + return fmt.Errorf("cannot delete vpc (network) %s: %s", vpcId, err.Error()) + } + return nil +} + +func CreateInternetGatewayRoute(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, routeTableId string, destinationCidrBlock string, internetGatewayId string) error { + if routeTableId == "" || destinationCidrBlock == "" || internetGatewayId == "" { + return fmt.Errorf("empty parameter not allowed: routeTableId (%s), destinationCidrBlock (%s), internetGatewayId (%s)", routeTableId, destinationCidrBlock, internetGatewayId) + } + out, err := ec2Client.CreateRoute(goCtx, &ec2.CreateRouteInput{ + RouteTableId: aws.String(routeTableId), + DestinationCidrBlock: aws.String(destinationCidrBlock), + GatewayId: aws.String(internetGatewayId)}) + lb.AddObject(fmt.Sprintf("CreateRoute(routeTableId=%s,destinationCidrBlock=%s,internetGatewayId=%s)", routeTableId, destinationCidrBlock, internetGatewayId), out) + if err != nil { + return fmt.Errorf("cannot create route for internet gateway (router) %s, route table %s: %s", internetGatewayId, routeTableId, err.Error()) + } + + if !*out.Return { + return fmt.Errorf("cannot create route for internet gateway (router) %s, route table %s: result false", internetGatewayId, routeTableId) + } + + return nil +} + +func CreateNatGatewayRoute(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, routeTableId string, destinationCidrBlock string, natGatewayId string) error { + if routeTableId == "" || destinationCidrBlock == "" || natGatewayId == "" { + return fmt.Errorf("empty parameter not allowed: routeTableId (%s), destinationCidrBlock (%s), natGatewayId (%s)", routeTableId, destinationCidrBlock, natGatewayId) + } + out, err := ec2Client.CreateRoute(goCtx, &ec2.CreateRouteInput{ + RouteTableId: aws.String(routeTableId), + DestinationCidrBlock: aws.String(destinationCidrBlock), + NatGatewayId: aws.String(natGatewayId)}) + lb.AddObject(fmt.Sprintf("CreateRoute(routeTableId=%s,destinationCidrBlock=%s,natGatewayId=%s)", routeTableId, destinationCidrBlock, natGatewayId), out) + if err != nil { + return fmt.Errorf("cannot create route for nat gateway %s, route table %s: %s", natGatewayId, routeTableId, err.Error()) + } + + if !*out.Return { + return fmt.Errorf("cannot create route for nat gateway %s, route table %s: result false", natGatewayId, routeTableId) + } + + return nil +} + +func GetNatGatewayIdAndStateByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, natGatewayName string) (string, types.NatGatewayState, error) { + out, err := ec2Client.DescribeNatGateways(goCtx, &ec2.DescribeNatGatewaysInput{Filter: []types.Filter{{Name: aws.String("tag:Name"), Values: []string{natGatewayName}}}}) + lb.AddObject(fmt.Sprintf("DescribeNatGateways(tag:Name=%s)", natGatewayName), out) + if err != nil { + return "", types.NatGatewayStateDeleted, fmt.Errorf("cannot describe natgw %s: %s", natGatewayName, err.Error()) + } + if len(out.NatGateways) == 0 { + return "", types.NatGatewayStateDeleted, nil + } + + var natGatewayId string + stateName := types.NatGatewayStateFailed + for resIdx := 0; resIdx < len(out.NatGateways); resIdx++ { + if out.NatGateways[resIdx].State == types.NatGatewayStateAvailable { + return *out.NatGateways[resIdx].NatGatewayId, out.NatGateways[resIdx].State, nil + } + + if out.NatGateways[resIdx].State == types.NatGatewayStatePending { + natGatewayId = *out.NatGateways[resIdx].NatGatewayId + stateName = out.NatGateways[resIdx].State + } else if stateName != types.NatGatewayStatePending { + natGatewayId = *out.NatGateways[resIdx].NatGatewayId + stateName = out.NatGateways[resIdx].State + } + } + + return natGatewayId, stateName, nil +} + +func CreateNatGateway(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, natGatewayName string, subnetId string, publicIpAllocationId string, timeoutSeconds int) (string, error) { + if natGatewayName == "" || subnetId == "" || publicIpAllocationId == "" { + return "", fmt.Errorf("empty parameter not allowed: natGatewayName (%s), subnetId (%s), publicIpAllocationId (%s)", natGatewayName, subnetId, publicIpAllocationId) + } + outCreateNatgw, err := ec2Client.CreateNatGateway(goCtx, &ec2.CreateNatGatewayInput{ + SubnetId: aws.String(subnetId), + AllocationId: aws.String(publicIpAllocationId), + TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeNatgateway, + Tags: mapToTags(natGatewayName, tags)}}}) + lb.AddObject(fmt.Sprintf("CreateNatGateway(natGatewayName=%s,subnetId=%s,publicIpAllocationId=%s)", natGatewayName, subnetId, publicIpAllocationId), outCreateNatgw) + if err != nil { + return "", fmt.Errorf("cannot create nat gateway %s: %s", natGatewayName, err.Error()) + } + + natGatewayId := *outCreateNatgw.NatGateway.NatGatewayId + + if natGatewayId == "" { + return "", fmt.Errorf("cannot create nat gateway %s: got empty nat gateway id", natGatewayName) + } + + startWaitTs := time.Now() + for { + outDescribeNatgw, err := ec2Client.DescribeNatGateways(goCtx, &ec2.DescribeNatGatewaysInput{Filter: []types.Filter{{ + Name: aws.String("nat-gateway-id"), Values: []string{natGatewayId}}}}) + lb.AddObject(fmt.Sprintf("DescribeNatGateways(nat-gateway-id=%s)", natGatewayId), outDescribeNatgw) + if err != nil { + return "", fmt.Errorf("cannot query for newly created nat gateway %s(%s): %s", natGatewayName, natGatewayId, err.Error()) + } + + if len(outDescribeNatgw.NatGateways) == 0 { + return "", fmt.Errorf("cannot query for newly created nat gateway %s(%s): no nat gateways returned", natGatewayName, natGatewayId) + } + + status := outDescribeNatgw.NatGateways[0].State + + if status == types.NatGatewayStateAvailable { + break + } + if status != types.NatGatewayStatePending { + return "", fmt.Errorf("nat gateway %s was created, but has unexpected status %s", natGatewayId, status) + } + if time.Since(startWaitTs).Seconds() > float64(timeoutSeconds) { + return "", fmt.Errorf("giving up after waiting for nat gateway %s to be created after %ds", natGatewayId, timeoutSeconds) + } + time.Sleep(3 * time.Second) + } + return natGatewayId, nil +} + +func DeleteNatGateway(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, natGatewayId string, timeoutSeconds int) error { + outDeleteNatgw, err := ec2Client.DeleteNatGateway(goCtx, &ec2.DeleteNatGatewayInput{ + NatGatewayId: aws.String(natGatewayId)}) + lb.AddObject(fmt.Sprintf("DeleteNatGateway(natGatewayId=%s)", natGatewayId), outDeleteNatgw) + if err != nil { + return fmt.Errorf("cannot delete nat gateway %s: %s", natGatewayId, err.Error()) + } + + // Wait until natgw is trully gone, otherwise internet gateway (router) deletion may choke with + // Network vpc-... has some mapped public address(es). Please unmap those public address(es) before detaching the gateway. + startWaitTs := time.Now() + for { + outDescribeNatgw, err := ec2Client.DescribeNatGateways(goCtx, &ec2.DescribeNatGatewaysInput{Filter: []types.Filter{{ + Name: aws.String("nat-gateway-id"), Values: []string{natGatewayId}}}}) + lb.AddObject(fmt.Sprintf("DescribeNatGateways(nat-gateway-id=%s)", natGatewayId), outDescribeNatgw) + if err != nil { + return fmt.Errorf("cannot query for deleted nat gateway %s: %s", natGatewayId, err.Error()) + } + + if len(outDescribeNatgw.NatGateways) == 0 { + return fmt.Errorf("cannot query for deleted nat gateway %s: no nat gateways returned", natGatewayId) + } + + status := outDescribeNatgw.NatGateways[0].State + + if status == types.NatGatewayStateDeleted { + break + } + if status != types.NatGatewayStateDeleting { + return fmt.Errorf("nat gateway %s was deleted, but has unexpected status %s", natGatewayId, status) + } + if time.Since(startWaitTs).Seconds() > float64(timeoutSeconds) { + return fmt.Errorf("giving up after waiting for nat gateway %s to be deleted after %ds", natGatewayId, timeoutSeconds) + } + time.Sleep(3 * time.Second) + } + return nil +} + +func CreateRouteTableForVpc(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, routeTableName string, vpcId string) (string, error) { + if routeTableName == "" || vpcId == "" { + return "", fmt.Errorf("empty parameter not allowed: routeTableName (%s), vpcId (%s)", routeTableName, vpcId) + } + out, err := ec2Client.CreateRouteTable(goCtx, &ec2.CreateRouteTableInput{ + VpcId: aws.String(vpcId), + TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeRouteTable, + Tags: mapToTags(routeTableName, tags)}}}) + lb.AddObject(fmt.Sprintf("CreateRouteTable(routeTableName=%s,vpcId=%s)", routeTableName, vpcId), out) + if err != nil { + return "", fmt.Errorf("cannot create route table %s: %s", routeTableName, err.Error()) + } + return *out.RouteTable.RouteTableId, nil +} + +func GetRouteTableByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, routeTableName string) (string, string, error) { + out, err := ec2Client.DescribeRouteTables(goCtx, &ec2.DescribeRouteTablesInput{ + Filters: []types.Filter{{Name: aws.String("tag:Name"), Values: []string{routeTableName}}}}) + lb.AddObject(fmt.Sprintf("DescribeRouteTable(tag:Name=%s)", routeTableName), out) + if err != nil { + return "", "", fmt.Errorf("cannot find route table %s: %s", routeTableName, err.Error()) + } + if len(out.RouteTables) == 0 { + return "", "", nil + } + return *out.RouteTables[0].RouteTableId, *out.RouteTables[0].VpcId, nil +} + +func DeleteRouteTable(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, routeTableId string) error { + out, err := ec2Client.DeleteRouteTable(goCtx, &ec2.DeleteRouteTableInput{RouteTableId: aws.String(routeTableId)}) + lb.AddObject(fmt.Sprintf("DeleteRouteTable(RouteTableId=%s)", routeTableId), out) + if err != nil { + return fmt.Errorf("cannot delete route table %s: %s", routeTableId, err.Error()) + } + return nil +} + +func AssociateRouteTableWithSubnet(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, routeTableId string, subnetId string) (string, error) { + if routeTableId == "" || subnetId == "" { + return "", fmt.Errorf("empty parameter not allowed: routeTableId (%s), subnetId (%s)", routeTableId, subnetId) + } + out, err := ec2Client.AssociateRouteTable(goCtx, &ec2.AssociateRouteTableInput{ + RouteTableId: aws.String(routeTableId), + SubnetId: aws.String(subnetId)}) + lb.AddObject(fmt.Sprintf("AssociateRouteTable(routeTableId=%s,subnetId=%s)", routeTableId, subnetId), out) + if err != nil { + return "", fmt.Errorf("cannot associate route table %s with subnet %s: %s", routeTableId, subnetId, err.Error()) + } + if *out.AssociationId == "" { + return "", fmt.Errorf("cannot associate route table %s with subnet %s: got empty association id", routeTableId, subnetId) + } + return *out.AssociationId, nil +} + +func GetInternetGatewayIdByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, internetGatewayName string) (string, error) { + out, err := ec2Client.DescribeInternetGateways(goCtx, &ec2.DescribeInternetGatewaysInput{Filters: []types.Filter{{Name: aws.String("tag:Name"), Values: []string{internetGatewayName}}}}) + lb.AddObject(fmt.Sprintf("DescribeInternetGateways(tag:Name=%s)", internetGatewayName), out) + if err != nil { + return "", fmt.Errorf("cannot describe internet gateway (router) %s: %s", internetGatewayName, err.Error()) + } + if len(out.InternetGateways) > 0 { + return *out.InternetGateways[0].InternetGatewayId, nil + } + return "", nil +} + +func CreateInternetGateway(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, internetGatewayName string) (string, error) { + if internetGatewayName == "" { + return "", fmt.Errorf("empty parameter not allowed: internetGatewayName (%s)", internetGatewayName) + } + outCreateRouter, err := ec2Client.CreateInternetGateway(goCtx, &ec2.CreateInternetGatewayInput{ + TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeInternetGateway, + Tags: mapToTags(internetGatewayName, tags)}}}) + lb.AddObject(fmt.Sprintf("CreateInternetGateway(tag:Name=%s)", internetGatewayName), outCreateRouter) + if err != nil { + return "", fmt.Errorf("cannot create internet gateway (router) %s: %s", internetGatewayName, err.Error()) + } + + if *outCreateRouter.InternetGateway.InternetGatewayId == "" { + return "", fmt.Errorf("cannot create internet gateway (router) %s: empty id returned", internetGatewayName) + } + + // No need to wait/verify for creations: a router is created synchronously + + return *outCreateRouter.InternetGateway.InternetGatewayId, nil +} + +func DeleteInternetGateway(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, internetGatewayId string) error { + out, err := ec2Client.DeleteInternetGateway(goCtx, &ec2.DeleteInternetGatewayInput{ + InternetGatewayId: aws.String(internetGatewayId)}) + lb.AddObject(fmt.Sprintf("DeleteInternetGateway(internetGatewayId=%s)", internetGatewayId), out) + if err != nil { + return fmt.Errorf("cannot delete internet gateway (router) %s: %s", internetGatewayId, err.Error()) + } + return nil +} + +func GetInternetGatewayVpcAttachmentById(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, internetGatewayId string) (string, types.AttachmentStatus, error) { + if internetGatewayId == "" { + return "", types.AttachmentStatusDetached, fmt.Errorf("empty parameter not allowed: internetGatewayId (%s)", internetGatewayId) + } + out, err := ec2Client.DescribeInternetGateways(goCtx, &ec2.DescribeInternetGatewaysInput{ + Filters: []types.Filter{{Name: aws.String("internet-gateway-id"), Values: []string{internetGatewayId}}}}) + lb.AddObject(fmt.Sprintf("DescribeInternetGateways(internet-gateway-id=%s)", internetGatewayId), out) + if err != nil { + return "", types.AttachmentStatusDetached, fmt.Errorf("cannot verify internet gateway (router) %s: %s", internetGatewayId, err.Error()) + } + if len(out.InternetGateways) == 0 { + return "", types.AttachmentStatusDetached, fmt.Errorf("cannot verify internet gateway (router) %s: zero internet gateways returned", internetGatewayId) + } + if len(out.InternetGateways[0].Attachments) == 0 { + return "", types.AttachmentStatusDetached, nil + } + return *out.InternetGateways[0].Attachments[0].VpcId, out.InternetGateways[0].Attachments[0].State, nil +} + +func AttachInternetGatewayToVpc(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, internetGatewayId string, vpcId string) error { + if internetGatewayId == "" || vpcId == "" { + return fmt.Errorf("empty parameter not allowed: internetGatewayId (%s), vpcId (%s)", internetGatewayId, vpcId) + } + out, err := ec2Client.AttachInternetGateway(goCtx, &ec2.AttachInternetGatewayInput{ + InternetGatewayId: aws.String(internetGatewayId), + VpcId: aws.String(vpcId)}) + lb.AddObject(fmt.Sprintf("AttachInternetGateway(internetGatewayId=%s,vpcId=%s)", internetGatewayId, vpcId), out) + if err != nil { + return fmt.Errorf("cannot attach internet gateway (router) %s to vpc %s: %s", internetGatewayId, vpcId, err.Error()) + } + return nil +} + +func DetachInternetGatewayFromVpc(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, internetGatewayId string, vpcId string) error { + if internetGatewayId == "" || vpcId == "" { + return fmt.Errorf("empty parameter not allowed: internetGatewayId (%s), vpcId (%s)", internetGatewayId, vpcId) + } + out, err := ec2Client.DetachInternetGateway(goCtx, &ec2.DetachInternetGatewayInput{ + InternetGatewayId: aws.String(internetGatewayId), + VpcId: aws.String(vpcId)}) + lb.AddObject(fmt.Sprintf("DetachInternetGateway(internetGatewayId=%s,vpcId=%s)", internetGatewayId, vpcId), out) + if err != nil { + return fmt.Errorf("cannot detach internet gateway (router) %s from vpc %s: %s", internetGatewayId, vpcId, err.Error()) + } + return nil +} + +func GetVpcDefaultRouteTable(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, vpcId string) (string, error) { + if vpcId == "" { + return "", fmt.Errorf("empty parameter not allowed: vpcId (%s)", vpcId) + } + out, err := ec2Client.DescribeRouteTables(goCtx, &ec2.DescribeRouteTablesInput{ + Filters: []types.Filter{ + {Name: aws.String("association.main"), Values: []string{"true"}}, + {Name: aws.String("vpc-id"), Values: []string{vpcId}}}}) + lb.AddObject(fmt.Sprintf("DescribeRouteTables(association.main=true,vpc-id=%s)", vpcId), out) + if err != nil { + return "", fmt.Errorf("cannot obtain default (main) route table for vpc %s: %s", vpcId, err.Error()) + } + if len(out.RouteTables) == 0 { + return "", fmt.Errorf("cannot obtain default (main) route table for vpc %s: no route tables returned", vpcId) + } + + return *out.RouteTables[0].RouteTableId, nil +} diff --git a/pkg/cld/cldaws/resources.go b/pkg/cld/cldaws/resources.go new file mode 100644 index 0000000..a2d663d --- /dev/null +++ b/pkg/cld/cldaws/resources.go @@ -0,0 +1,279 @@ +package cldaws + +import ( + "context" + "fmt" + "sort" + "strings" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + tagging "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" + taggingTypes "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi/types" + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +type BilledState string + +const ( + BilledStateUnknown BilledState = "unknown" + BilledStateBilled BilledState = "billed" + BilledStateUnbilled BilledState = "unbilled" +) + +type Resource struct { + Svc string + Type string + Id string + Name string + State string + Billed BilledState +} + +func (r *Resource) String() string { + return fmt.Sprintf("%s,%s,%s,%s,%s,%s", r.Svc, r.Type, r.Name, r.Id, r.State, r.Billed) +} + +func arnToResource(arn string) Resource { + r := Resource{ + Svc: "unknown", + Type: "unknown", + Id: "unknown", + State: "unknown", + Billed: BilledStateUnknown, + } + s := strings.Split(arn, "/") + if len(s) >= 2 { + r.Id = s[1] + } + s = strings.Split(s[0], ":") + if len(s) >= 3 { + r.Svc = s[2] + } + if len(s) >= 6 { + r.Type = s[5] + } + return r +} + +func getInstanceBilledState(state types.InstanceStateName) BilledState { + if state == types.InstanceStateNamePending || state == types.InstanceStateNameRunning { + return BilledStateBilled + } else { + return BilledStateUnbilled + } +} + +func getVolumeBilledState(state types.VolumeState) BilledState { + if state == types.VolumeStateAvailable || state == types.VolumeStateCreating || state == types.VolumeStateInUse { + return BilledStateBilled + } else { + return BilledStateUnbilled + } +} + +func getNatGatewayBilledState(state types.NatGatewayState) BilledState { + if state == types.NatGatewayStatePending || state == types.NatGatewayStateAvailable { + return BilledStateBilled + } else { + return BilledStateUnbilled + } +} + +func getVpcBilledState(state types.VpcState) BilledState { + if state == types.VpcStatePending || state == types.VpcStateAvailable { + return BilledStateBilled + } else { + return BilledStateUnbilled + } +} + +func getImageBilledState(state types.ImageState) BilledState { + if state == types.ImageStateAvailable || state == types.ImageStateDisabled || state == types.ImageStateError || state == types.ImageStatePending || state == types.ImageStateTransient { + return BilledStateBilled + } else { + return BilledStateUnbilled + } +} +func getSnapshotBilledState(_ types.SnapshotState) BilledState { + return BilledStateBilled +} + +func getResourceState(ec2Client *ec2.Client, goCtx context.Context, r *Resource) (string, BilledState, error) { + switch r.Svc { + case "ec2": + switch r.Type { + case "elastic-ip": + out, err := ec2Client.DescribeAddresses(goCtx, &ec2.DescribeAddressesInput{AllocationIds: []string{r.Id}}) + if err != nil { + return "", "", err + } + return *out.Addresses[0].PublicIp, BilledStateBilled, nil + case "vpc": + out, err := ec2Client.DescribeVpcs(goCtx, &ec2.DescribeVpcsInput{VpcIds: []string{r.Id}}) + if err != nil { + return "", "", err + } + return string(out.Vpcs[0].State), getVpcBilledState(out.Vpcs[0].State), nil + case "subnet": + out, err := ec2Client.DescribeSubnets(goCtx, &ec2.DescribeSubnetsInput{SubnetIds: []string{r.Id}}) + if err != nil { + return "", "", err + } + return string(out.Subnets[0].State), BilledStateBilled, nil + case "security-group": + _, err := ec2Client.DescribeSecurityGroups(goCtx, &ec2.DescribeSecurityGroupsInput{GroupIds: []string{r.Id}}) + if err != nil { + return "", "", err + } + return "present", BilledStateBilled, nil + case "route-table": + out, err := ec2Client.DescribeRouteTables(goCtx, &ec2.DescribeRouteTablesInput{RouteTableIds: []string{r.Id}}) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return "doesnotexist", BilledStateUnbilled, nil + } + return "", "", err + } + return fmt.Sprintf("%droutes", len(out.RouteTables[0].Routes)), BilledStateBilled, nil + case "instance": + out, err := ec2Client.DescribeInstances(goCtx, &ec2.DescribeInstancesInput{InstanceIds: []string{r.Id}}) + if err != nil { + return "", "", err + } + if len(out.Reservations) == 0 || len(out.Reservations[0].Instances) == 0 { + return "notfound", BilledStateUnbilled, nil + } + return string(out.Reservations[0].Instances[0].State.Name), getInstanceBilledState(out.Reservations[0].Instances[0].State.Name), nil + case "volume": + out, err := ec2Client.DescribeVolumes(goCtx, &ec2.DescribeVolumesInput{VolumeIds: []string{r.Id}}) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return "doesnotexist", BilledStateUnbilled, nil + } + return "", "", err + } + return string(out.Volumes[0].State), getVolumeBilledState(out.Volumes[0].State), nil + case "natgateway": + out, err := ec2Client.DescribeNatGateways(goCtx, &ec2.DescribeNatGatewaysInput{NatGatewayIds: []string{r.Id}}) + if err != nil { + if strings.Contains(err.Error(), "was not found") { + return "notfound", BilledStateUnbilled, nil + } + return "", "", err + } + return string(out.NatGateways[0].State), getNatGatewayBilledState(out.NatGateways[0].State), nil + case "internet-gateway": + out, err := ec2Client.DescribeInternetGateways(goCtx, &ec2.DescribeInternetGatewaysInput{InternetGatewayIds: []string{r.Id}}) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return "doesnotexist", BilledStateUnbilled, nil + } + return "", "", err + } + return fmt.Sprintf("%dattachments", len(out.InternetGateways[0].Attachments)), BilledStateBilled, nil + case "image": + out, err := ec2Client.DescribeImages(goCtx, &ec2.DescribeImagesInput{ImageIds: []string{r.Id}}) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return "doesnotexist", BilledStateUnbilled, nil + } + return "", "", err + } + return string(out.Images[0].State), getImageBilledState(out.Images[0].State), nil + + case "snapshot": + out, err := ec2Client.DescribeSnapshots(goCtx, &ec2.DescribeSnapshotsInput{SnapshotIds: []string{r.Id}}) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return "doesnotexist", BilledStateUnbilled, nil + } + return "", "", err + } + return string(out.Snapshots[0].State), getSnapshotBilledState(out.Snapshots[0].State), nil + default: + return "", "", fmt.Errorf("unsupported ec2 type %s", r.Type) + } + default: + return "", "", fmt.Errorf("unsupported svc %s", r.Svc) + } +} + +func getResourceNameTag(ec2Client *ec2.Client, goCtx context.Context, resourceId string) (string, error) { + out, err := ec2Client.DescribeTags(goCtx, &ec2.DescribeTagsInput{Filters: []types.Filter{{ + Name: aws.String("resource-id"), Values: []string{resourceId}}}}) + if err != nil { + return "", err + } + for _, tagDesc := range out.Tags { + if *tagDesc.Key == "Name" { + return *tagDesc.Value, nil + } + } + return "", nil +} + +func GetResourcesByTag(tClient *tagging.Client, ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, region string, tagName string, tagVal string) ([]string, error) { + resources := make([]*Resource, 0) + paginationToken := "" + for { + out, err := tClient.GetResources(goCtx, &tagging.GetResourcesInput{ + ResourcesPerPage: aws.Int32(100), + PaginationToken: &paginationToken, + TagFilters: []taggingTypes.TagFilter{{Key: aws.String(tagName), Values: []string{tagVal}}}}) + if err != nil { + return []string{}, err + } + + for _, rtMapping := range out.ResourceTagMappingList { + res := arnToResource(*rtMapping.ResourceARN) + state, billedState, err := getResourceState(ec2Client, goCtx, &res) + if err != nil { + lb.Add(err.Error()) + } else { + res.State = state + res.Billed = billedState + } + name, err := getResourceNameTag(ec2Client, goCtx, res.Id) + if err != nil { + lb.Add(err.Error()) + } else { + res.Name = name + } + resources = append(resources, &res) + } + paginationToken = *out.PaginationToken + if *out.PaginationToken == "" { + break + } + } + + sort.Slice(resources, func(i, j int) bool { + if resources[i].Svc < resources[j].Svc { + return true + } else if resources[i].Svc > resources[j].Svc { + return false + } else if resources[i].Type < resources[j].Type { + return true + } else if resources[i].Type > resources[j].Type { + return false + } else if resources[i].Name < resources[j].Name { + return true + } else if resources[i].Name > resources[j].Name { + return false + } else if resources[i].Id < resources[j].Id { + return true + } else if resources[i].Id > resources[j].Id { + return false + } else { + return true + } + }) + + result := make([]string, len(resources)) + for i, r := range resources { + result[i] = r.String() + } + return result, nil +} diff --git a/pkg/cld/cldaws/security_group.go b/pkg/cld/cldaws/security_group.go new file mode 100644 index 0000000..7162dca --- /dev/null +++ b/pkg/cld/cldaws/security_group.go @@ -0,0 +1,71 @@ +package cldaws + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +func GetSecurityGroupIdByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, securityGroupName string) (string, error) { + out, err := ec2Client.DescribeSecurityGroups(goCtx, &ec2.DescribeSecurityGroupsInput{Filters: []types.Filter{{ + Name: aws.String("tag:Name"), Values: []string{securityGroupName}}}}) + lb.AddObject(fmt.Sprintf("DescribeSecurityGroups(tag:Name=%s)", securityGroupName), out) + if err != nil { + return "", fmt.Errorf("cannot describe security group %s: %s", securityGroupName, err.Error()) + } + if len(out.SecurityGroups) > 0 { + return *out.SecurityGroups[0].GroupId, nil + } + return "", nil +} + +func CreateSecurityGroup(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, securityGroupName string, vpcId string) (string, error) { + if securityGroupName == "" || vpcId == "" { + return "", fmt.Errorf("empty parameter not allowed: securityGroupName (%s), vpcId (%s)", securityGroupName, vpcId) + } + out, err := ec2Client.CreateSecurityGroup(goCtx, &ec2.CreateSecurityGroupInput{ + VpcId: aws.String(vpcId), + GroupName: aws.String(securityGroupName), + Description: aws.String(securityGroupName), + TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeSecurityGroup, + Tags: mapToTags(securityGroupName, tags)}}}) + lb.AddObject(fmt.Sprintf("CreateSecurityGroup(securityGroupName=%s,vpcId=%s)", securityGroupName, vpcId), out) + if err != nil { + return "", fmt.Errorf("cannot create security group %s: %s", securityGroupName, err.Error()) + } + return *out.GroupId, nil +} + +func AuthorizeSecurityGroupIngress(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, securityGroupId string, ipProtocol string, port int32, cidr string) error { + if securityGroupId == "" || ipProtocol == "" || port == 0 || cidr == "" { + return fmt.Errorf("empty parameter not allowed: securityGroupId (%s), ipProtocol (%s), port (%d), cidr (%s)", securityGroupId, ipProtocol, port, cidr) + } + out, err := ec2Client.AuthorizeSecurityGroupIngress(goCtx, &ec2.AuthorizeSecurityGroupIngressInput{ + GroupId: aws.String(securityGroupId), + IpProtocol: aws.String(ipProtocol), + FromPort: aws.Int32(port), + ToPort: aws.Int32(port), + CidrIp: aws.String(cidr)}) + lb.AddObject(fmt.Sprintf("AuthorizeSecurityGroupIngress(securityGroupId=%s,ipProtocol=%s,port=%d,cidr=%s)", securityGroupId, ipProtocol, port, cidr), out) + if err != nil { + return fmt.Errorf("cannot authorize security group %s ingress: %s", securityGroupId, err.Error()) + } + if !*out.Return { + return fmt.Errorf("cannot authorize security group %s ingress: aws returned false", securityGroupId) + } + return nil +} + +func DeleteSecurityGroup(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, securityGroupId string) error { + out, err := ec2Client.DeleteSecurityGroup(goCtx, &ec2.DeleteSecurityGroupInput{GroupId: aws.String(securityGroupId)}) + lb.AddObject(fmt.Sprintf("DeleteSecurityGroup(GroupId=%s)", securityGroupId), out) + if err != nil { + return fmt.Errorf("cannot delete security group %s: %s", securityGroupId, err.Error()) + } + return nil +} diff --git a/pkg/cld/cldaws/util.go b/pkg/cld/cldaws/util.go new file mode 100644 index 0000000..13cec45 --- /dev/null +++ b/pkg/cld/cldaws/util.go @@ -0,0 +1,37 @@ +package cldaws + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +func TagResource(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, resourceId string, tagName string, tagMap map[string]string) error { + out, err := ec2Client.CreateTags(goCtx, &ec2.CreateTagsInput{ + Resources: []string{resourceId}, + Tags: mapToTags(tagName, tagMap)}) + lb.AddObject(fmt.Sprintf("CreateTags(resources=%s,tag:Name=%s)", resourceId, tagName), out) + if err != nil { + return fmt.Errorf("cannot tag resource %s: %s", resourceId, err.Error()) + } + return nil +} + +func mapToTags(tagName string, tagMap map[string]string) []types.Tag { + result := make([]types.Tag, len(tagMap)) + if tagMap != nil { + tagIdx := 0 + for tagName, tagVal := range tagMap { + result[tagIdx] = types.Tag{Key: aws.String(tagName), Value: aws.String(tagVal)} + tagIdx++ + } + } + if tagName != "" { + result = append(result, types.Tag{Key: aws.String("Name"), Value: aws.String(tagName)}) + } + return result +} diff --git a/pkg/cld/cldaws/volumes.go b/pkg/cld/cldaws/volumes.go new file mode 100644 index 0000000..9f4614b --- /dev/null +++ b/pkg/cld/cldaws/volumes.go @@ -0,0 +1,219 @@ +package cldaws + +import ( + "context" + "fmt" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +const InitVolumeAttachmentFunc string = ` +init_volume_attachment() +{ + local deviceName=$1 + local volumeMountPath=$2 + local permissions=$3 + local owner=$4 + + # Check if file system is already there + local deviceBlockId=$(blkid -s UUID -o value $deviceName) + if [ "$deviceBlockId" = "" ]; then + # Make file system (it outputs to stderr, so ignore it) + sudo mkfs.ext4 $deviceName 2>/dev/null + if [ "$?" -ne "0" ]; then + echo Error $?, cannot make file system + return $? + fi + fi + + deviceBlockId=$(sudo blkid -s UUID -o value $deviceName) + + # Create mount point + if [ ! -d "$volumeMountPath" ]; then + sudo mkdir -p $volumeMountPath + if [ "$?" -ne "0" ]; then + echo Error $?, cannot create mount dir $volumeMountPath + return $? + fi + fi + + # Mount point should exist by this time + sudo mount -o discard $deviceName $volumeMountPath + sudo systemctl daemon-reload + + # Set permissions + sudo chmod $permissions $volumeMountPath + if [ "$?" -ne "0" ]; then + echo Error $?, cannot change $volumeMountPath permissions to $permissions + return $? + fi + + if [ -n "$owner" ]; then + sudo chown $owner $volumeMountPath + if [ "$?" -ne "0" ]; then + echo Error $?, cannot change $volumeMountPath owner to $owner + return $? + fi + fi + + local alreadyMounted=$(cat /etc/fstab | grep $volumeMountPath) + if [ "$alreadyMounted" = "" ]; then + # Adds a line to /etc/fstab + echo "UUID=$deviceBlockId $volumeMountPath ext4 defaults 0 2 " | sudo tee -a /etc/fstab + fi + + # Report UUID + echo $deviceBlockId + return 0 +} +` + +func GetVolumeIdByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, volName string) (string, error) { + if volName == "" { + return "", fmt.Errorf("empty parameter not allowed: volName (%s)", volName) + } + out, err := ec2Client.DescribeVolumes(goCtx, &ec2.DescribeVolumesInput{ + Filters: []types.Filter{{Name: aws.String("tag:Name"), Values: []string{volName}}}}) + lb.AddObject(fmt.Sprintf("DescribeVolumes(tag:Name=%s)", volName), out) + if err != nil { + return "", fmt.Errorf("cannot describe volume %s: %s", volName, err.Error()) + } + if len(out.Volumes) == 0 { + return "", nil + } + return *out.Volumes[0].VolumeId, nil +} + +func GetVolumeAttachedDeviceById(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, volId string) (string, types.VolumeAttachmentState, error) { + if volId == "" { + return "", types.VolumeAttachmentStateDetached, fmt.Errorf("empty parameter not allowed: volId (%s)", volId) + } + out, err := ec2Client.DescribeVolumes(goCtx, &ec2.DescribeVolumesInput{VolumeIds: []string{volId}}) + lb.AddObject(fmt.Sprintf("DescribeVolumes(VolumeIds=%s)", volId), out) + if err != nil { + return "", types.VolumeAttachmentStateDetached, fmt.Errorf("cannot describe volume by id %s: %s", volId, err.Error()) + } + if len(out.Volumes) == 0 { + return "", types.VolumeAttachmentStateDetached, nil + } + if len(out.Volumes[0].Attachments) == 0 { + return "", types.VolumeAttachmentStateDetached, nil + } + return *out.Volumes[0].Attachments[0].Device, out.Volumes[0].Attachments[0].State, nil +} + +func stringToVolType(volTypeString string) (types.VolumeType, error) { + for _, volType := range types.VolumeTypeGp2.Values() { + if string(volType) == volTypeString { + return volType, nil + } + } + return types.VolumeTypeStandard, fmt.Errorf("unknown volume type %s", volTypeString) +} + +func CreateVolume(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, volName string, availabilityZone string, size int32, volTypeString string) (string, error) { + volType, err := stringToVolType(volTypeString) + if err != nil { + return "", err + } + if volName == "" || availabilityZone == "" || size == 0 { + return "", fmt.Errorf("empty parameter not allowed: volName (%s), availabilityZone (%s), size (%d)", volName, availabilityZone, size) + } + out, err := ec2Client.CreateVolume(goCtx, &ec2.CreateVolumeInput{ + AvailabilityZone: aws.String(availabilityZone), + Size: aws.Int32(size), + VolumeType: volType, + TagSpecifications: []types.TagSpecification{{ + ResourceType: types.ResourceTypeVolume, + Tags: mapToTags(volName, tags)}}}) + lb.AddObject(fmt.Sprintf("CreateVolume(volName=%s,availabilityZone=%s,size=%d)", volName, availabilityZone, size), out) + if err != nil { + return "", fmt.Errorf("cannot create volume %s: %s", volName, err.Error()) + } + return *out.VolumeId, nil +} + +func AttachVolume(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, volId string, instanceId string, suggestedDevice string, timeoutSeconds int) (string, error) { + if volId == "" || instanceId == "" || suggestedDevice == "" { + return "", fmt.Errorf("empty parameter not allowed: volId (%s), instanceId (%s), suggestedDevice (%s)", volId, instanceId, suggestedDevice) + } + out, err := ec2Client.AttachVolume(goCtx, &ec2.AttachVolumeInput{ + VolumeId: aws.String(volId), + InstanceId: aws.String(instanceId), + Device: &suggestedDevice}) + lb.AddObject(fmt.Sprintf("AttachVolume(volId=%s,instanceId=%s,suggestedDevice=%s)", volId, instanceId, suggestedDevice), out) + if err != nil { + return "", fmt.Errorf("cannot attach volume %s to instance %s as device %s : %s", volId, instanceId, suggestedDevice, err.Error()) + } + + newDevice := *out.Device + + startWaitTs := time.Now() + for { + foundDevice, state, err := GetVolumeAttachedDeviceById(ec2Client, goCtx, lb, volId) + if err != nil { + return "", err + } + if foundDevice != newDevice { + return "", fmt.Errorf("cannot attach volume %s to instance %s as device %s : creation returned device %s, but while waiting discovered another device %s for this volume", volId, instanceId, suggestedDevice, newDevice, foundDevice) + } + if state == types.VolumeAttachmentStateAttached { + break + } + if state != types.VolumeAttachmentStateAttaching { + return "", fmt.Errorf("cannot attach volume %s to instance %s as device %s : unknown state %s", volId, instanceId, suggestedDevice, state) + } + if time.Since(startWaitTs).Seconds() > float64(timeoutSeconds) { + return "", fmt.Errorf("giving up after waiting for volume %s to attach to instance %s as device %s", volId, instanceId, suggestedDevice) + } + time.Sleep(1 * time.Second) + } + + return newDevice, nil +} + +func DetachVolume(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, volId string, instanceId string, attachedDevice string, timeoutSeconds int) error { + if volId == "" || instanceId == "" || attachedDevice == "" { + return fmt.Errorf("empty parameter not allowed: volId (%s), instanceId (%s), attachedDevice (%s)", volId, instanceId, attachedDevice) + } + out, err := ec2Client.DetachVolume(goCtx, &ec2.DetachVolumeInput{ + VolumeId: aws.String(volId), + InstanceId: aws.String(instanceId), + Device: &attachedDevice}) + lb.AddObject(fmt.Sprintf("DetachVolume(volId=%s,instanceId=%s,attachedDevice=%s)", volId, instanceId, attachedDevice), out) + if err != nil { + return fmt.Errorf("cannot attach volume %s to instance %s: %s", volId, instanceId, err.Error()) + } + + startWaitTs := time.Now() + for { + _, state, err := GetVolumeAttachedDeviceById(ec2Client, goCtx, lb, volId) + if err != nil { + return err + } + if state == types.VolumeAttachmentStateDetached { + break + } + if state != types.VolumeAttachmentStateDetaching { + return fmt.Errorf("cannot detach volume %s to instance %s: unknown state %s", volId, instanceId, state) + } + if time.Since(startWaitTs).Seconds() > float64(timeoutSeconds) { + return fmt.Errorf("giving up after waiting for volume %s to detach from instance %s", volId, instanceId) + } + time.Sleep(1 * time.Second) + } + return nil +} + +func DeleteVolume(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, volId string) error { + out, err := ec2Client.DeleteVolume(goCtx, &ec2.DeleteVolumeInput{VolumeId: aws.String(volId)}) + lb.AddObject(fmt.Sprintf("DeleteVolume(VolumeId=%s)", volId), out) + if err != nil { + return fmt.Errorf("cannot delete volume %s: %s", volId, err.Error()) + } + return nil +} diff --git a/pkg/cmd/capideploy/capideploy.go b/pkg/cmd/capideploy/capideploy.go new file mode 100644 index 0000000..0ebf3cb --- /dev/null +++ b/pkg/cmd/capideploy/capideploy.go @@ -0,0 +1,584 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "log" + "os" + "reflect" + "regexp" + "strings" + "time" + + "github.com/capillariesio/capillaries-deploy/pkg/l" + "github.com/capillariesio/capillaries-deploy/pkg/prj" + "github.com/capillariesio/capillaries-deploy/pkg/provider" + "github.com/capillariesio/capillaries-deploy/pkg/rexec" +) + +const ( + CmdListDeploymentResources string = "list_deployment_resources" + CmdCreateFloatingIps string = "create_floating_ips" + CmdDeleteFloatingIps string = "delete_floating_ips" + CmdCreateSecurityGroups string = "create_security_groups" + CmdDeleteSecurityGroups string = "delete_security_groups" + CmdCreateNetworking string = "create_networking" + CmdDeleteNetworking string = "delete_networking" + CmdCreateVolumes string = "create_volumes" + CmdDeleteVolumes string = "delete_volumes" + CmdCreateInstances string = "create_instances" + CmdDeleteInstances string = "delete_instances" + CmdAttachVolumes string = "attach_volumes" + CmdDetachVolumes string = "detach_volumes" + CmdUploadFiles string = "upload_files" + CmdDownloadFiles string = "download_files" + CmdInstallServices string = "install_services" + CmdConfigServices string = "config_services" + CmdStartServices string = "start_services" + CmdStopServices string = "stop_services" + CmdPingInstances string = "ping_instances" + CmdCreateSnapshotImages string = "create_snapshot_images" + CmdCreateInstancesFromSnapshotImages string = "create_instances_from_snapshot_images" + CmdDeleteSnapshotImages string = "delete_snapshot_images" +) + +type SingleThreadCmdHandler func() (l.LogMsg, error) + +func DumpLogChan(logChan chan l.LogMsg) { + for len(logChan) > 0 { + msg := <-logChan + fmt.Println(string(msg)) + } +} + +func getNicknamesArg(entityName string) (string, error) { + if len(os.Args) < 3 { + return "", fmt.Errorf("not enough args, expected comma-separated list of %s or '*'", entityName) + } + if len(os.Args[2]) == 0 { + return "", fmt.Errorf("bad arg, expected comma-separated list of %s or '*'", entityName) + } + return os.Args[2], nil +} + +func filterByNickname[GenericDef prj.InstanceDef](nicknames string, sourceMap map[string]*GenericDef, entityName string) (map[string]*GenericDef, error) { + var defMap map[string]*GenericDef + rawNicknames := strings.Split(nicknames, ",") + defMap = map[string]*GenericDef{} + for _, rawNickname := range rawNicknames { + if strings.Contains(rawNickname, "*") { + matchFound := false + reNickname := regexp.MustCompile("^" + strings.ReplaceAll(rawNickname, "*", "[a-zA-Z0-9]*") + "$") + for fgNickname, fgDef := range sourceMap { + if reNickname.MatchString(fgNickname) { + matchFound = true + defMap[fgNickname] = fgDef + } + } + if !matchFound { + return nil, fmt.Errorf("no match found for %s '%s', available definitions: %s", entityName, rawNickname, reflect.ValueOf(sourceMap).MapKeys()) + } + } else { + fgDef, ok := sourceMap[rawNickname] + if !ok { + return nil, fmt.Errorf("definition for %s '%s' not found, available definitions: %s", entityName, rawNickname, reflect.ValueOf(sourceMap).MapKeys()) + } + defMap[rawNickname] = fgDef + } + } + return defMap, nil +} + +func waitForWorkers(errorsExpected int, errChan chan error, logChan chan l.LogMsg) int { + finalCmdErr := 0 + for errorsExpected > 0 { + select { + case cmdErr := <-errChan: + if cmdErr != nil { + finalCmdErr = 1 + fmt.Fprintf(os.Stderr, "%s\n", cmdErr.Error()) + } + errorsExpected-- + case msg := <-logChan: + fmt.Println(msg) + } + } + + DumpLogChan(logChan) + + return finalCmdErr +} + +func usage(flagset *flag.FlagSet) { + fmt.Printf(` +Capillaries deploy +Usage: capideploy [command parameters] [optional parameters] + +Commands: + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p -n + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p +`, + CmdListDeploymentResources, + + CmdCreateFloatingIps, + CmdDeleteFloatingIps, + CmdCreateSecurityGroups, + CmdDeleteSecurityGroups, + CmdCreateNetworking, + CmdDeleteNetworking, + + CmdCreateVolumes, + CmdAttachVolumes, + CmdDetachVolumes, + CmdDeleteVolumes, + + CmdCreateInstances, + CmdDeleteInstances, + CmdPingInstances, + + CmdInstallServices, + CmdConfigServices, + CmdStartServices, + CmdStopServices, + + CmdCreateSnapshotImages, + CmdCreateInstancesFromSnapshotImages, + CmdDeleteSnapshotImages, + ) + if flagset != nil { + fmt.Printf("\nParameters:\n") + flagset.PrintDefaults() + } +} + +// func createProject(templatePath string) error { +// vm := jsonnet.MakeVM() +// json, err := vm.EvaluateFile(templatePath) +// if err != nil { +// return err +// } +// fmt.Println(json) +// return nil +// } + +func ping(sshConfig *rexec.SshConfigDef, ipAddress string, verbosity bool, numberOfRepetitions int) (l.LogMsg, error) { + var err error + var logMsg l.LogMsg + + repetitions := 1 + if numberOfRepetitions > 1 { + repetitions = numberOfRepetitions + } + + lb := l.NewLogBuilder(l.CurFuncName()+" "+ipAddress, verbosity) + + for { + logMsg, err = rexec.ExecCommandOnInstance(sshConfig, ipAddress, "id", verbosity) + lb.Add(string(logMsg)) + repetitions-- + if err == nil || repetitions == 0 { + break + } + lb.Add(err.Error()) + time.Sleep(5 * time.Second) + } + + return lb.Complete(err) +} + +func main() { + if len(os.Args) <= 1 { + usage(nil) + os.Exit(1) + } + + // if os.Args[1] == CmdCreateProject { + // createPrjArgs := flag.NewFlagSet("create prj args", flag.ExitOnError) + // argTemplateFile := createPrjArgs.String("t", "capideploy.jsonnet", "Capideploy project template jsonnet file path") + + // if len(os.Args) <= 2 { + // usage(createPrjArgs) + // os.Exit(1) + // } + // parseErr := createPrjArgs.Parse(os.Args[2:]) + // if parseErr != nil { + // log.Fatalf(parseErr.Error()) + // } + // createPrjErr := createProject(*argTemplateFile) + // if createPrjErr != nil { + // log.Fatalf(createPrjErr.Error()) + // } + // os.Exit(0) + // } + + commonArgs := flag.NewFlagSet("run prj args", flag.ExitOnError) + argPrjFile := commonArgs.String("p", "capideploy.json", "Capideploy project jsonnet file path") + argVerbosity := commonArgs.Bool("v", false, "Verbose debug output") + argNumberOfRepetitions := commonArgs.Int("n", 1, "Number of repetitions") + argShowProjectDetails := commonArgs.Bool("s", false, "Show project details (may contain sensitive info)") + argIgnoreAttachedVolumes := commonArgs.Bool("i", false, "Ignore attached volumes on instance delete") + + cmdStartTs := time.Now() + + throttle := time.Tick(time.Second) // One call per second, to avoid error 429 on openstack/aws/azure calls + const maxWorkerThreads int = 50 + var logChan = make(chan l.LogMsg, maxWorkerThreads*5) + var sem = make(chan int, maxWorkerThreads) + var errChan chan error + var parseErr error + errorsExpected := 1 + //var prjPair *prj.ProjectPair + var project *prj.Project + //var fullPrjPath string + var prjErr error + + singleThreadCommands := map[string]SingleThreadCmdHandler{ + CmdListDeploymentResources: nil, + CmdCreateFloatingIps: nil, + CmdDeleteFloatingIps: nil, + CmdCreateSecurityGroups: nil, + CmdDeleteSecurityGroups: nil, + CmdCreateNetworking: nil, + CmdDeleteNetworking: nil, + } + + if _, ok := singleThreadCommands[os.Args[1]]; ok { + parseErr = commonArgs.Parse(os.Args[2:]) + } else { + parseErr = commonArgs.Parse(os.Args[3:]) + } + if parseErr != nil { + log.Fatalf(parseErr.Error()) + } + + project, prjErr = prj.LoadProject(*argPrjFile) + if prjErr != nil { + log.Fatalf(prjErr.Error()) + } + + deployProvider, deployProviderErr := provider.DeployProviderFactory(project, context.TODO(), *argVerbosity) + if deployProviderErr != nil { + log.Fatalf(deployProviderErr.Error()) + } + singleThreadCommands[CmdListDeploymentResources] = deployProvider.ListDeploymentResources + singleThreadCommands[CmdCreateFloatingIps] = deployProvider.CreateFloatingIps + singleThreadCommands[CmdDeleteFloatingIps] = deployProvider.DeleteFloatingIps + singleThreadCommands[CmdCreateSecurityGroups] = deployProvider.CreateSecurityGroups + singleThreadCommands[CmdDeleteSecurityGroups] = deployProvider.DeleteSecurityGroups + singleThreadCommands[CmdCreateNetworking] = deployProvider.CreateNetworking + singleThreadCommands[CmdDeleteNetworking] = deployProvider.DeleteNetworking + + if cmdHandler, ok := singleThreadCommands[os.Args[1]]; ok { + errChan = make(chan error, errorsExpected) + sem <- 1 + go func() { + logMsg, err := cmdHandler() + logChan <- logMsg + errChan <- err + <-sem + }() + } else if os.Args[1] == CmdCreateInstances || + os.Args[1] == CmdDeleteInstances || + os.Args[1] == CmdCreateSnapshotImages || + os.Args[1] == CmdCreateInstancesFromSnapshotImages || + os.Args[1] == CmdDeleteSnapshotImages { + nicknames, err := getNicknamesArg("instances") + if err != nil { + log.Fatalf(err.Error()) + } + instances, err := filterByNickname(nicknames, project.Instances, "instance") + if err != nil { + log.Fatalf(err.Error()) + } + + errorsExpected = len(instances) + errChan = make(chan error, errorsExpected) + + usedFlavors := map[string]string{} + usedImages := map[string]bool{} + if os.Args[1] == CmdCreateInstances || + os.Args[1] == CmdCreateInstancesFromSnapshotImages { + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + if err != nil { + log.Fatal(logMsgBastionIp) + } + + // Make sure image/flavor is supported + usedKeypairs := map[string]struct{}{} + for _, instDef := range instances { + usedFlavors[instDef.FlavorName] = "" + usedImages[instDef.ImageId] = false + usedKeypairs[instDef.RootKeyName] = struct{}{} + } + logMsg, err := deployProvider.HarvestInstanceTypesByFlavorNames(usedFlavors) + logChan <- logMsg + DumpLogChan(logChan) + if err != nil { + log.Fatalf(err.Error()) + } + + logMsg, err = deployProvider.HarvestImageIds(usedImages) + logChan <- logMsg + DumpLogChan(logChan) + if err != nil { + log.Fatalf(err.Error()) + } + + // Make sure the keypairs are there + logMsg, err = deployProvider.VerifyKeypairs(usedKeypairs) + logChan <- logMsg + DumpLogChan(logChan) + if err != nil { + log.Fatalf(err.Error()) + } + + fmt.Printf("Creating instances, consider clearing known_hosts to avoid ssh complaints:\n") + for _, i := range instances { + fmt.Printf("ssh-keygen -f ~/.ssh/known_hosts -R %s;\n", i.BestIpAddress()) + } + } + + switch os.Args[1] { + case CmdCreateInstances: + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + if err != nil { + log.Fatal(logMsgBastionIp) + } + for iNickname := range instances { + <-throttle + sem <- 1 + go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { + logMsg, err := deployProvider.CreateInstanceAndWaitForCompletion( + iNickname, + usedFlavors[project.Instances[iNickname].FlavorName], + project.Instances[iNickname].ImageId) + logChan <- logMsg + errChan <- err + <-sem + }(project, logChan, errChan, iNickname) + } + case CmdDeleteInstances: + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + if err != nil { + log.Fatal(logMsgBastionIp) + } + for iNickname := range instances { + <-throttle + sem <- 1 + go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { + logMsg, err := deployProvider.DeleteInstance(iNickname, *argIgnoreAttachedVolumes) + logChan <- logMsg + errChan <- err + <-sem + }(project, logChan, errChan, iNickname) + } + case CmdCreateSnapshotImages: + for iNickname := range instances { + <-throttle + sem <- 1 + go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { + logMsg, err := deployProvider.CreateSnapshotImage(iNickname) + logChan <- logMsg + errChan <- err + <-sem + }(project, logChan, errChan, iNickname) + } + case CmdCreateInstancesFromSnapshotImages: + for iNickname := range instances { + <-throttle + sem <- 1 + go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { + logMsg, err := deployProvider.CreateInstanceFromSnapshotImageAndWaitForCompletion(iNickname, + usedFlavors[project.Instances[iNickname].FlavorName]) + logChan <- logMsg + errChan <- err + <-sem + }(project, logChan, errChan, iNickname) + } + case CmdDeleteSnapshotImages: + for iNickname := range instances { + <-throttle + sem <- 1 + go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { + logMsg, err := deployProvider.DeleteSnapshotImage(iNickname) + logChan <- logMsg + errChan <- err + <-sem + }(project, logChan, errChan, iNickname) + } + default: + log.Fatalf("unknown create/delete instance command:" + os.Args[1]) + } + } else if os.Args[1] == CmdPingInstances || + os.Args[1] == CmdInstallServices || + os.Args[1] == CmdConfigServices || + os.Args[1] == CmdStartServices || + os.Args[1] == CmdStopServices { + nicknames, err := getNicknamesArg("instances") + if err != nil { + log.Fatalf(err.Error()) + } + + instances, err := filterByNickname(nicknames, project.Instances, "instance") + if err != nil { + log.Fatalf(err.Error()) + } + + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + if err != nil { + log.Fatal(logMsgBastionIp) + } + + errorsExpected = len(instances) + errChan = make(chan error, len(instances)) + for _, iDef := range instances { + <-throttle + sem <- 1 + go func(prj *prj.Project, logChan chan l.LogMsg, errChan chan error, iDef *prj.InstanceDef) { + var logMsg l.LogMsg + var finalErr error + switch os.Args[1] { + case CmdPingInstances: + logMsg, finalErr = ping(project.SshConfig, iDef.BestIpAddress(), *argVerbosity, *argNumberOfRepetitions) + + case CmdInstallServices: + // Make sure ping passes + logMsg, finalErr = ping(project.SshConfig, iDef.BestIpAddress(), *argVerbosity, 5) + + // If ping passed, it's ok to move on + if finalErr == nil { + logMsg, finalErr = rexec.ExecEmbeddedScriptsOnInstance(project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Install, iDef.Service.Env, *argVerbosity) + } + + case CmdConfigServices: + logMsg, finalErr = rexec.ExecEmbeddedScriptsOnInstance(project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Config, iDef.Service.Env, *argVerbosity) + + case CmdStartServices: + logMsg, finalErr = rexec.ExecEmbeddedScriptsOnInstance(project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Start, iDef.Service.Env, *argVerbosity) + + case CmdStopServices: + logMsg, finalErr = rexec.ExecEmbeddedScriptsOnInstance(project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Stop, iDef.Service.Env, *argVerbosity) + + default: + log.Fatalf("unknown service command:" + os.Args[1]) + } + + logChan <- logMsg + errChan <- finalErr + <-sem + }(project, logChan, errChan, iDef) + } + + } else if os.Args[1] == CmdCreateVolumes || os.Args[1] == CmdAttachVolumes || os.Args[1] == CmdDetachVolumes || os.Args[1] == CmdDeleteVolumes { + nicknames, err := getNicknamesArg("instances") + if err != nil { + log.Fatalf(err.Error()) + } + + instances, err := filterByNickname(nicknames, project.Instances, "instance") + if err != nil { + log.Fatalf(err.Error()) + } + + volCount := 0 + for _, iDef := range instances { + volCount += len(iDef.Volumes) + } + if volCount == 0 { + fmt.Printf("No volumes to create/attach/detach/delete") + os.Exit(0) + } + errorsExpected = volCount + errChan = make(chan error, volCount) + for iNickname, iDef := range instances { + for volNickname := range iDef.Volumes { + <-throttle + sem <- 1 + switch os.Args[1] { + case CmdCreateVolumes: + go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string, volNickname string) { + logMsg, err := deployProvider.CreateVolume(iNickname, volNickname) + logChan <- logMsg + errChan <- err + <-sem + }(project, logChan, errChan, iNickname, volNickname) + case CmdAttachVolumes: + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + if err != nil { + log.Fatal(logMsgBastionIp) + } + go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string, volNickname string) { + logMsg, err := deployProvider.AttachVolume(iNickname, volNickname) + logChan <- logMsg + errChan <- err + <-sem + }(project, logChan, errChan, iNickname, volNickname) + case CmdDetachVolumes: + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + if err != nil { + log.Fatal(logMsgBastionIp) + } + go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string, volNickname string) { + logMsg, err := deployProvider.DetachVolume(iNickname, volNickname) + logChan <- logMsg + errChan <- err + <-sem + }(project, logChan, errChan, iNickname, volNickname) + case CmdDeleteVolumes: + go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string, volNickname string) { + logMsg, err := deployProvider.DeleteVolume(iNickname, volNickname) + logChan <- logMsg + errChan <- err + <-sem + }(project, logChan, errChan, iNickname, volNickname) + default: + log.Fatalf("unknown command:" + os.Args[1]) + } + } + } + } else { + log.Fatalf("unknown command:" + os.Args[1]) + } + + finalCmdErr := waitForWorkers(errorsExpected, errChan, logChan) + + // // Save updated project template, it may have some new ids and timestamps + // if prjErr = prjPair.Template.SaveProject(fullPrjPath); prjErr != nil { + // log.Fatalf(prjErr.Error()) + // } + + if *argShowProjectDetails { + prjJsonBytes, err := json.MarshalIndent(project, "", " ") + if err != nil { + log.Fatalf("cannot show project json: %s", err.Error()) + } + fmt.Printf("%s\n", string(prjJsonBytes)) + } + + if finalCmdErr > 0 { + os.Exit(finalCmdErr) + } + + fmt.Printf("%s %sOK%s, elapsed %.3fs\n", os.Args[1], l.LogColorGreen, l.LogColorReset, time.Since(cmdStartTs).Seconds()) +} diff --git a/pkg/l/log_builder.go b/pkg/l/log_builder.go new file mode 100755 index 0000000..9e85f4c --- /dev/null +++ b/pkg/l/log_builder.go @@ -0,0 +1,74 @@ +package l + +import ( + "encoding/json" + "fmt" + "strings" + "time" +) + +type LogBuilder struct { + Sb *strings.Builder + IsVerbose bool + Header string + StartTs time.Time +} + +type LogMsg string + +const LogColorReset string = "\033[0m" +const LogColorRed string = "\033[31m" +const LogColorGreen string = "\033[32m" + +func NewLogBuilder(header string, isVerbose bool) *LogBuilder { + lb := LogBuilder{Sb: &strings.Builder{}, IsVerbose: isVerbose, Header: header, StartTs: time.Now()} + if lb.IsVerbose { + lb.Sb.WriteString("\n===============================================\n") + lb.Sb.WriteString(fmt.Sprintf("%s : started\n", lb.Header)) + } else { + lb.Sb.WriteString(fmt.Sprintf("%s : ", lb.Header)) + } + return &lb +} + +func AddLogMsg(sb *strings.Builder, logMsg LogMsg) { + sb.WriteString(string(logMsg)) +} + +func (lb *LogBuilder) AddObject(content string, o any) { + if !lb.IsVerbose { + return + } + lb.Sb.WriteString(fmt.Sprintf("%s\n", content)) + if o == nil { + lb.Sb.WriteString("nil") + return + } + b, err := json.Marshal(o) + if err == nil { + lb.Sb.WriteString(string(b) + "\n") + } else { + lb.Sb.WriteString(fmt.Sprintf("cannot marshal %T: %s", o, err.Error())) + } +} + +func (lb *LogBuilder) Add(content string) { + if !lb.IsVerbose { + return + } + lb.Sb.WriteString(fmt.Sprintf("%s\n", content)) +} + +func (lb *LogBuilder) Complete(err error) (LogMsg, error) { + if lb.IsVerbose { + lb.Sb.WriteString(fmt.Sprintf("%s : ", lb.Header)) + } + lb.Sb.WriteString(fmt.Sprintf("elapsed %.3fs, ", time.Since(lb.StartTs).Seconds())) + if err == nil { + lb.Sb.WriteString(LogColorGreen + "OK" + LogColorReset) + } else { + lb.Sb.WriteString(LogColorRed + err.Error() + LogColorReset) + } + lb.Sb.WriteString("\n") + return LogMsg(lb.Sb.String()), err +} diff --git a/pkg/l/util.go b/pkg/l/util.go new file mode 100644 index 0000000..42a8c86 --- /dev/null +++ b/pkg/l/util.go @@ -0,0 +1,14 @@ +package l + +import ( + "runtime" + "strings" +) + +func CurFuncName() string { + pc := make([]uintptr, 15) + n := runtime.Callers(2, pc) + frames := runtime.CallersFrames(pc[:n]) + frame, _ := frames.Next() + return frame.Function[strings.LastIndex(frame.Function, "/")+1:] +} diff --git a/pkg/prj/project.go b/pkg/prj/project.go new file mode 100755 index 0000000..556e4cc --- /dev/null +++ b/pkg/prj/project.go @@ -0,0 +1,503 @@ +package prj + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/capillariesio/capillaries-deploy/pkg/rexec" + "github.com/google/go-jsonnet" +) + +type InstancePurpose string + +const ( + InstancePurposeBastion InstancePurpose = "CAPIDEPLOY.INTERNAL.PURPOSE_BASTION" + InstancePurposeCassandra InstancePurpose = "CAPIDEPLOY.INTERNAL.PURPOSE_CASSANDRA" + InstancePurposeDaemon InstancePurpose = "CAPIDEPLOY.INTERNAL.PURPOSE_DAEMON" + InstancePurposeRabbitmq InstancePurpose = "CAPIDEPLOY.INTERNAL.PURPOSE_RABBITMQ" + InstancePurposePrometheus InstancePurpose = "CAPIDEPLOY.INTERNAL.PURPOSE_PROMETHEUS" +) + +type ExecTimeouts struct { + CreateInstance int `json:"create_instance"` + DeleteInstance int `json:"delete_instance"` + CreateNatGateway int `json:"create_nat_gateway"` + DeleteNatGateway int `json:"delete_nat_gateway"` + CreateNetwork int `json:"create_network"` + AttachVolume int `json:"attach_volume"` + DetachVolume int `json:"detach_volume"` + CreateImage int `json:"create_image"` + StopInstance int `json:"stop_instance"` +} + +func (t *ExecTimeouts) InitDefaults() { + if t.CreateInstance == 0 { + t.CreateInstance = 120 + } + if t.DeleteInstance == 0 { + t.DeleteInstance = 600 + } + if t.CreateNatGateway == 0 { + t.CreateNatGateway = 180 // It really may take that long + } + if t.DeleteNatGateway == 0 { + t.DeleteNatGateway = 180 // It may take AWS a while + } + if t.CreateNetwork == 0 { + t.CreateNetwork = 120 + } + if t.AttachVolume == 0 { + t.AttachVolume = 30 + } + if t.DetachVolume == 0 { + t.DetachVolume = 30 + } + if t.CreateImage == 0 { + t.CreateImage = 600 + } + if t.StopInstance == 0 { + t.StopInstance = 300 + } +} + +type SecurityGroupRuleDef struct { + Desc string `json:"desc"` // human-readable + //Id string `json:"id"` // guid + Protocol string `json:"protocol"` // tcp + Ethertype string `json:"ethertype"` // IPv4 + RemoteIp string `json:"remote_ip"` // 0.0.0.0/0 + Port int `json:"port"` // 22 + Direction string `json:"direction"` // ingress +} + +type SecurityGroupDef struct { + Name string `json:"name"` + //Id string `json:"id"` + Rules []*SecurityGroupRuleDef `json:"rules"` +} + +// func (sg *SecurityGroupDef) Clean() { +// sg.Id = "" +// for _, r := range sg.Rules { +// r.Id = "" +// } +// } + +type PrivateSubnetDef struct { + Name string `json:"name"` + //Id string `json:"id"` + Cidr string `json:"cidr"` + RouteTableToNatgwName string `json:"route_table_to_nat_gateway_name"` // AWS only + AvailabilityZone string `json:"availability_zone"` // AWS only + //RouteTableToNat string `json:"route_table_to_nat"` // AWS only +} + +// AWS-specific +type PublicSubnetDef struct { + Name string `json:"name"` + Cidr string `json:"cidr"` + AvailabilityZone string `json:"availability_zone"` + NatGatewayName string `json:"nat_gateway_name"` + NatGatewayExternalIpName string `json:"nat_gateway_external_ip_address_name"` + //Id string //`json:"id"` + //NatGatewayId string //`json:"nat_gateway_id"` + //NatGatewayExternalIp string //`json:"nat_gateway_public_ip"` +} + +type RouterDef struct { + Name string `json:"name"` + //Id string `json:"id"` +} + +type NetworkDef struct { + Name string `json:"name"` + //Id string `json:"id"` + Cidr string `json:"cidr"` + PrivateSubnet PrivateSubnetDef `json:"private_subnet"` + PublicSubnet PublicSubnetDef `json:"public_subnet"` + Router RouterDef `json:"router"` +} + +type VolumeDef struct { + Name string `json:"name"` + MountPoint string `json:"mount_point"` + Size int `json:"size"` + Type string `json:"type"` + Permissions int `json:"permissions"` + Owner string `json:"owner"` + AvailabilityZone string `json:"availability_zone"` + //VolumeId string `json:"id"` + //Device string `json:"device"` + //BlockDeviceId string `json:"block_device_id"` +} + +type ServiceCommandsDef struct { + Install []string `json:"install"` + Config []string `json:"config"` + Start []string `json:"start"` + Stop []string `json:"stop"` +} +type ServiceDef struct { + Env map[string]string `json:"env"` + Cmd ServiceCommandsDef `json:"cmd"` +} + +type UserDef struct { + Name string `json:"name"` + PublicKeyPath string `json:"public_key_path"` +} +type PrivateKeyDef struct { + Name string `json:"name"` + PrivateKeyPath string `json:"private_key_path"` +} +type InstanceDef struct { + Purpose string `json:"purpose"` + InstName string `json:"inst_name"` + //SecurityGroupNickname string `json:"security_group"` + SecurityGroupName string `json:"security_group_name"` + RootKeyName string `json:"root_key_name"` + IpAddress string `json:"ip_address"` + ExternalIpAddressName string `json:"external_ip_address_name,omitempty"` // Populated for bastion only + ExternalIpAddress string `json:"external_ip_address"` // Output only, populated for bastion only + FlavorName string `json:"flavor"` + ImageId string `json:"image_id"` + SubnetName string `json:"subnet_name"` + Volumes map[string]*VolumeDef `json:"volumes,omitempty"` + Service ServiceDef `json:"service"` + AssociatedInstanceProfile string `json:"associated_instance_profile"` + //SubnetType string `json:"subnet_type"` + //Id string `json:"id"` + //SnapshotImageId string `json:"snapshot_image_id"` + //UsesSshConfigExternalIpAddress bool `json:"uses_ssh_config_external_ip_address,omitempty"` +} + +func (iDef *InstanceDef) BestIpAddress() string { + if iDef.ExternalIpAddressName != "" { + if iDef.ExternalIpAddress == "" { + return "you-did-not-call-ensurebastionip" + } + return iDef.ExternalIpAddress + } + return iDef.IpAddress +} + +// func (iDef *InstanceDef) Clean() { +// iDef.Id = "" +// for _, volAttachDef := range iDef.Volumes { +// volAttachDef.Device = "" +// volAttachDef.BlockDeviceId = "" +// // Do not clean volAttachDef.VolumeId, it should be handled by delete_volumes +// } +// } + +type Project struct { + DeploymentName string `json:"deployment_name"` + SshConfig *rexec.SshConfigDef `json:"ssh_config"` + Timeouts ExecTimeouts `json:"timeouts"` + EnvVariablesUsed []string `json:"env_variables_used"` + SecurityGroups map[string]*SecurityGroupDef `json:"security_groups"` + Network NetworkDef `json:"network"` + Instances map[string]*InstanceDef `json:"instances"` + DeployProviderName string `json:"deploy_provider_name"` +} + +func (p *Project) InitDefaults() { + p.Timeouts.InitDefaults() +} + +const DeployProviderAws string = "aws" + +type ProjectPair struct { + // Template Project + Live Project + // ProjectFileDirPath string +} + +// func (prjPair *ProjectPair) SetSecurityGroupId(sgNickname string, newId string) { +// prjPair.Template.SecurityGroups[sgNickname].Id = newId +// prjPair.Live.SecurityGroups[sgNickname].Id = newId +// } + +// func (prjPair *ProjectPair) SetSecurityGroupRuleId(sgNickname string, ruleIdx int, newId string) { +// prjPair.Template.SecurityGroups[sgNickname].Rules[ruleIdx].Id = newId +// prjPair.Live.SecurityGroups[sgNickname].Rules[ruleIdx].Id = newId +// } + +// func (prjPair *ProjectPair) CleanSecurityGroup(sgNickname string) { +// prjPair.Template.SecurityGroups[sgNickname].Clean() +// prjPair.Live.SecurityGroups[sgNickname].Clean() +// } + +// func (prjPair *ProjectPair) SetNetworkId(newId string) { +// prjPair.Template.Network.Id = newId +// prjPair.Live.Network.Id = newId +// } + +// func (prjPair *ProjectPair) SetRouterId(newId string) { +// prjPair.Template.Network.Router.Id = newId +// prjPair.Live.Network.Router.Id = newId +// } + +// func (prjPair *ProjectPair) SetNatGatewayId(newId string) { +// prjPair.Template.Network.PublicSubnet.NatGatewayId = newId +// prjPair.Live.Network.PublicSubnet.NatGatewayId = newId +// } + +// func (prjPair *ProjectPair) SetRouteTableToNat(newId string) { +// prjPair.Template.Network.PrivateSubnet.RouteTableToNat = newId +// prjPair.Live.Network.PrivateSubnet.RouteTableToNat = newId +// } + +// func (prjPair *ProjectPair) SetPublicSubnetNatGatewayExternalIp(newIp string) { +// prjPair.Template.Network.PublicSubnet.NatGatewayExternalIp = newIp +// prjPair.Live.Network.PublicSubnet.NatGatewayExternalIp = newIp +// } + +// func (prjPair *ProjectPair) SetPrivateSubnetId(newId string) { +// prjPair.Template.Network.PrivateSubnet.Id = newId +// prjPair.Live.Network.PrivateSubnet.Id = newId +// } + +// func (prjPair *ProjectPair) SetPublicSubnetId(newId string) { +// prjPair.Template.Network.PublicSubnet.Id = newId +// prjPair.Live.Network.PublicSubnet.Id = newId +// } + +// func (prjPair *ProjectPair) SetVolumeId(iNickname string, volNickname string, newId string) { +// prjPair.Template.Instances[iNickname].Volumes[volNickname].VolumeId = newId +// prjPair.Live.Instances[iNickname].Volumes[volNickname].VolumeId = newId +// } + +// func (prjPair *ProjectPair) SetAttachedVolumeDevice(iNickname string, volNickname string, device string) { +// prjPair.Template.Instances[iNickname].Volumes[volNickname].Device = device +// prjPair.Live.Instances[iNickname].Volumes[volNickname].Device = device +// } + +// func (prjPair *ProjectPair) SetVolumeBlockDeviceId(iNickname string, volNickname string, newId string) { +// prjPair.Template.Instances[iNickname].Volumes[volNickname].BlockDeviceId = newId +// prjPair.Live.Instances[iNickname].Volumes[volNickname].BlockDeviceId = newId +// } + +// func (prjPair *ProjectPair) CleanInstance(iNickname string) { +// prjPair.Template.Instances[iNickname].Clean() +// prjPair.Live.Instances[iNickname].Clean() +// } + +// func (prjPair *ProjectPair) SetInstanceId(iNickname string, newId string) { +// prjPair.Template.Instances[iNickname].Id = newId +// prjPair.Live.Instances[iNickname].Id = newId +// } + +// func (prjPair *ProjectPair) SetInstanceSnapshotImageId(iNickname string, newId string) { +// prjPair.Template.Instances[iNickname].SnapshotImageId = newId +// prjPair.Live.Instances[iNickname].SnapshotImageId = newId +// } + +func (prj *Project) validate() error { + // Check instance presence and uniqueness: hostnames, ip addresses, security groups + hostnameMap := map[string]struct{}{} + internalIpMap := map[string]struct{}{} + bastionExternalIpInstanceNickname := "" + for iNickname, iDef := range prj.Instances { + if iDef.InstName == "" { + return fmt.Errorf("instance %s has empty Instname", iNickname) + } + if _, ok := hostnameMap[iDef.InstName]; ok { + return fmt.Errorf("instances share Instname %s", iDef.InstName) + } + hostnameMap[iDef.InstName] = struct{}{} + + if iDef.IpAddress == "" { + return fmt.Errorf("instance %s has empty ip address", iNickname) + } + if _, ok := internalIpMap[iDef.IpAddress]; ok { + return fmt.Errorf("instances share internal ip %s", iDef.IpAddress) + } + internalIpMap[iDef.IpAddress] = struct{}{} + + if iDef.ExternalIpAddressName != "" { + if iDef.ExternalIpAddressName != prj.SshConfig.BastionExternalIpAddressName { + return fmt.Errorf("instance %s has unexpeted external ip name %s, expected %s", iNickname, iDef.ExternalIpAddressName, prj.SshConfig.BastionExternalIpAddressName) + } + if bastionExternalIpInstanceNickname != "" { + return fmt.Errorf("instances %s,%s share external ip address %s", iNickname, bastionExternalIpInstanceNickname, prj.SshConfig.BastionExternalIpAddressName) + } + bastionExternalIpInstanceNickname = iNickname + } + + // Security groups + if iDef.SecurityGroupName == "" { + return fmt.Errorf("instance %s has empty security group name", iNickname) + } + + sgFound := false + for _, sgDef := range prj.SecurityGroups { + if sgDef.Name == iDef.SecurityGroupName { + sgFound = true + break + } + } + if !sgFound { + return fmt.Errorf("instance %s has invalid security group %s", iNickname, iDef.SecurityGroupName) + } + + // External ip address + + if iDef.ExternalIpAddressName != "" { + if iDef.ExternalIpAddressName != prj.SshConfig.BastionExternalIpAddressName && iDef.ExternalIpAddressName != prj.Network.PublicSubnet.NatGatewayExternalIpName { + return fmt.Errorf("instance %s has invalid external ip address name %s, expected %s or %s ", iNickname, iDef.SecurityGroupName, prj.SshConfig.BastionExternalIpAddressName, prj.Network.PublicSubnet.NatGatewayExternalIpName) + } + } + } + + // Need at least one floating ip address + if bastionExternalIpInstanceNickname == "" { + return fmt.Errorf("none of the instances is using ssh_config_external_ip, at least one must have it") + } + + scriptsMap := map[string]bool{} + if err := rexec.HarvestAllEmbeddedFilesPaths("", scriptsMap); err != nil { + return err + } + missingScriptsMap := map[string]struct{}{} + for _, iDef := range prj.Instances { + allInstanceScripts := append(append(append(iDef.Service.Cmd.Install, iDef.Service.Cmd.Config...), iDef.Service.Cmd.Start...), iDef.Service.Cmd.Stop...) + for _, scriptPath := range allInstanceScripts { + if _, ok := scriptsMap[scriptPath]; !ok { + missingScriptsMap[scriptPath] = struct{}{} + } else { + scriptsMap[scriptPath] = true + } + } + } + + // Verify that all scripts mentioned in the project are present + if len(missingScriptsMap) > 0 { + missingScripts := make([]string, len(missingScriptsMap)) + i := 0 + for scriptPath, _ := range missingScriptsMap { + missingScripts[i] = scriptPath + i++ + } + return fmt.Errorf("cannot find embedded script(s): %s", strings.Join(missingScripts, ",")) + } + + // Vice versa: verify all existing scripts are used + unusedScripts := make([]string, 0) + i := 0 + for scriptPath, isUsed := range scriptsMap { + if !isUsed { + unusedScripts = append(unusedScripts, scriptPath) + } + i++ + } + if len(unusedScripts) > 0 { + return fmt.Errorf("the following embedded scripts are not used in this project: %s", strings.Join(unusedScripts, ",")) + } + + return nil +} + +func LoadProject(prjFile string) (*Project, error) { + prjFullPath, err := filepath.Abs(prjFile) + if err != nil { + return nil, fmt.Errorf("cannot get absolute path of %s: %s", prjFile, err.Error()) + } + + if _, err := os.Stat(prjFullPath); err != nil { + return nil, fmt.Errorf("cannot find project file [%s]: [%s]", prjFullPath, err.Error()) + } + + vm := jsonnet.MakeVM() + prjString, err := vm.EvaluateFile(prjFile) + if err != nil { + return nil, err + } + + // prjBytes, err := os.ReadFile(prjFullPath) + // if err != nil { + // return nil, "", fmt.Errorf("cannot read project file %s: %s", prjFullPath, err.Error()) + // } + + //prjPair := ProjectPair{} + + // Read project + + // err = json.Unmarshal(prjBytes, &prjPair.Template) + // if err != nil { + // return nil, "", fmt.Errorf("cannot parse project file %s: %s", prjFullPath, err.Error()) + // } + + // prjString := string(prjBytes) + + envVars := map[string]string{} + missingVars := make([]string, 0) + r := regexp.MustCompile(`\{(CAPIDEPLOY[_A-Z0-9]+)\}`) + matches := r.FindAllStringSubmatch(prjString, -1) + for _, v := range matches { + envVar := v[1] + envVars[envVar] = os.Getenv(envVar) + if envVars[envVar] == "" { + missingVars = append(missingVars, envVar) + } + } + + if len(missingVars) > 0 { + return nil, fmt.Errorf("cannot load deployment project, missing env variables:\n%v", strings.Join(missingVars, "\n")) + } + + // Replace env vars + + // Revert unescaping in parameter values caused by JSON - we want to preserve `\n"` and `\"` + escapeReplacer := strings.NewReplacer("\n", "\\n", `"`, `\"`) + for k, v := range envVars { + prjString = strings.ReplaceAll(prjString, fmt.Sprintf("{%s}", k), escapeReplacer.Replace(v)) + } + + // Hacky way to provide bastion ip + // prjString = strings.ReplaceAll(prjString, "{CAPIDEPLOY.INTERNAL.BASTION_EXTERNAL_IP_ADDRESS}", prjPair.Template.SshConfig.BastionExternalIp) + + // Re-deserialize forom prjString, now with replaced params + + project := Project{} + if err := json.Unmarshal([]byte(prjString), &project); err != nil { + return nil, fmt.Errorf("cannot parse project file with replaced vars %s: %s", prjFullPath, err.Error()) + } + + if project.DeployProviderName != DeployProviderAws { + return nil, fmt.Errorf("cannot parse deploy provider name %s, expected [%s]", + project.DeployProviderName, + DeployProviderAws) + } + + // Defaults + + project.InitDefaults() + + if err := project.validate(); err != nil { + return nil, fmt.Errorf("cannot load project file %s: %s", prjFullPath, err.Error()) + } + + return &project, nil +} + +// func (prj *Project) SaveProject(fullPrjPath string) error { +// prjJsonBytes, err := json.MarshalIndent(prj, "", " ") +// if err != nil { +// return err +// } + +// fPrj, err := os.Create(fullPrjPath) +// if err != nil { +// return err +// } +// defer fPrj.Close() +// if _, err := fPrj.WriteString(string(prjJsonBytes)); err != nil { +// return err +// } +// return fPrj.Sync() +// } diff --git a/pkg/provider/aws_floating_ips.go b/pkg/provider/aws_floating_ips.go new file mode 100644 index 0000000..982b2fa --- /dev/null +++ b/pkg/provider/aws_floating_ips.go @@ -0,0 +1,141 @@ +package provider + +import ( + "context" + "fmt" + "strings" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/capillariesio/capillaries-deploy/pkg/cld/cldaws" + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +func ensureFloatingIp(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, ipName string) (string, error) { + existingIp, _, _, err := cldaws.GetPublicIpAddressAllocationAssociatedInstanceByName(ec2Client, goCtx, lb, ipName) + if err != nil { + return "", err + } + if existingIp != "" { + return existingIp, nil + } + return cldaws.AllocateFloatingIpByName(ec2Client, goCtx, tags, lb, ipName) +} + +func (p *AwsDeployProvider) CreateFloatingIps() (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + bastionIpName := p.GetCtx().Project.SshConfig.BastionExternalIpAddressName + bastionIpAddress, err := ensureFloatingIp(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, bastionIpName) + if err != nil { + return lb.Complete(err) + } + + p.GetCtx().Project.SshConfig.BastionExternalIp = bastionIpAddress + + // Tell the user about the bastion IP + reportPublicIp(p.GetCtx().Project) + + natgwIpName := p.GetCtx().Project.Network.PublicSubnet.NatGatewayExternalIpName + _, err = ensureFloatingIp(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, natgwIpName) + if err != nil { + return lb.Complete(err) + } + + return lb.Complete(nil) +} + +func releaseFloatingIpIfNotAllocated(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, ipName string) error { + existingIp, existingIpAllocationId, existingIpAssociatedInstance, err := cldaws.GetPublicIpAddressAllocationAssociatedInstanceByName(ec2Client, goCtx, lb, ipName) + if err != nil { + return err + } + if existingIp == "" { + return fmt.Errorf("cannot release ip named %s, it was not allocated", ipName) + } + if existingIpAssociatedInstance != "" { + return fmt.Errorf("cannot release ip named %s, it is associated with instance %s", ipName, existingIpAssociatedInstance) + } + return cldaws.ReleaseFloatingIpByAllocationId(ec2Client, goCtx, lb, existingIpAllocationId) +} + +func (p *AwsDeployProvider) DeleteFloatingIps() (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + bastionIpName := p.GetCtx().Project.SshConfig.BastionExternalIpAddressName + err := releaseFloatingIpIfNotAllocated(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, bastionIpName) + if err != nil { + return lb.Complete(err) + } + + err = releaseFloatingIpIfNotAllocated(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.PublicSubnet.NatGatewayExternalIpName) + if err != nil { + return lb.Complete(err) + } + //p.GetCtx().PrjPair.SetPublicSubnetNatGatewayExternalIp("") + + return lb.Complete(nil) +} + +// func (p *Project) SetSshBastionExternalIp(ipName string, newIp string) { +// //prjPair.Template.SshConfig.BastionExternalIp = newIp +// p.SshConfig.BastionExternalIp = newIp + +// // for _, iDef := range prjPair.Template.Instances { +// // if iDef.ExternalIpAddressName == ipName { +// // iDef.ExternalIpAddress = newIp +// // } +// // } +// for _, iDef := range p.Instances { +// if iDef.ExternalIpAddressName == ipName { +// iDef.ExternalIpAddress = newIp +// } + +// // In env variables +// replaceMap := map[string]string{} +// for varName, varValue := range iDef.Service.Env { +// if strings.Contains(varValue, "{CAPIDEPLOY.INTERNAL.BASTION_EXTERNAL_IP_ADDRESS}") { +// replaceMap[varName] = strings.ReplaceAll(varValue, "{CAPIDEPLOY.INTERNAL.BASTION_EXTERNAL_IP_ADDRESS}", newIp) +// } +// } +// for varName, varValue := range replaceMap { +// iDef.Service.Env[varName] = varValue +// } +// } + +// } + +func (p *AwsDeployProvider) PopulateInstanceExternalAddressByName() (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + ipAddressName := p.GetCtx().Project.SshConfig.BastionExternalIpAddressName + ipAddress, _, _, err := cldaws.GetPublicIpAddressAllocationAssociatedInstanceByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, ipAddressName) + if err != nil { + return lb.Complete(err) + } + + if ipAddress == "" { + return lb.Complete(fmt.Errorf("ip address %s was not allocated, did you call create_public_ips?", ipAddressName)) + } + + // Updates project: ssh config + p.GetCtx().Project.SshConfig.BastionExternalIp = ipAddress + + // Updates project: instances + for _, iDef := range p.GetCtx().Project.Instances { + if iDef.ExternalIpAddressName == ipAddressName { + iDef.ExternalIpAddress = ipAddress + } + + // In env variables + replaceMap := map[string]string{} + for varName, varValue := range iDef.Service.Env { + if strings.Contains(varValue, "{CAPIDEPLOY.INTERNAL.BASTION_EXTERNAL_IP_ADDRESS}") { + replaceMap[varName] = strings.ReplaceAll(varValue, "{CAPIDEPLOY.INTERNAL.BASTION_EXTERNAL_IP_ADDRESS}", ipAddress) + } + } + for varName, varValue := range replaceMap { + iDef.Service.Env[varName] = varValue + } + } + + return lb.Complete(nil) +} diff --git a/pkg/provider/aws_instances.go b/pkg/provider/aws_instances.go new file mode 100644 index 0000000..830f2f9 --- /dev/null +++ b/pkg/provider/aws_instances.go @@ -0,0 +1,386 @@ +package provider + +import ( + "context" + "fmt" + "strings" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/capillariesio/capillaries-deploy/pkg/cld/cldaws" + "github.com/capillariesio/capillaries-deploy/pkg/l" + "github.com/capillariesio/capillaries-deploy/pkg/prj" +) + +func (p *AwsDeployProvider) HarvestInstanceTypesByFlavorNames(flavorMap map[string]string) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + for flavorName := range flavorMap { + instanceType, err := cldaws.GetInstanceType(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, flavorName) + if err != nil { + return lb.Complete(err) + } + flavorMap[flavorName] = instanceType + } + return lb.Complete(nil) +} + +func (p *AwsDeployProvider) HarvestImageIds(imageMap map[string]bool) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + for imageId := range imageMap { + _, _, err := cldaws.GetImageInfoById(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageId) + if err != nil { + return lb.Complete(err) + } + imageMap[imageId] = true + } + return lb.Complete(nil) +} + +func (p *AwsDeployProvider) VerifyKeypairs(keypairMap map[string]struct{}) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + for keypairName := range keypairMap { + err := cldaws.VerifyKeypair(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, keypairName) + if err != nil { + return lb.Complete(err) + } + } + return lb.Complete(nil) +} + +func getInstanceSubnetId(p *AwsDeployProvider, lb *l.LogBuilder, iNickname string) (string, error) { + subnetName := p.GetCtx().Project.Instances[iNickname].SubnetName + + subnetId, err := cldaws.GetSubnetIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, subnetName) + if err != nil { + return "", err + } + + if subnetId == "" { + return "", fmt.Errorf("requested instance %s should be created in subnet %s, but this subnet does not exist yet, did you run create_networking?", iNickname, subnetName) + } + + return subnetId, nil +} + +func getInstanceSecurityGroupId(p *AwsDeployProvider, lb *l.LogBuilder, iNickname string) (string, error) { + sgName := p.GetCtx().Project.Instances[iNickname].SecurityGroupName + + sgId, err := cldaws.GetSecurityGroupIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, sgName) + if err != nil { + return "", err + } + + if sgId == "" { + return "", fmt.Errorf("requested instance %s should be created in security group %s, but this it does not exist yet, did you run create_security_groups?", iNickname, sgName) + } + + return sgId, nil +} + +func internalCreate(p *AwsDeployProvider, lb *l.LogBuilder, iNickname string, instanceTypeString string, imageId string, blockDeviceMappings []types.BlockDeviceMapping, subnetId string, securityGroupId string) error { + instName := p.GetCtx().Project.Instances[iNickname].InstName + + // Check if the instance already exists + + instanceId, foundInstanceStateByName, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, instName) + if err != nil { + return err + } + + // If floating ip is being requested (it's a bastion instance), but it's already assigned, fail + + externalIpAddressName := p.GetCtx().Project.Instances[iNickname].ExternalIpAddressName + var externalIpAddress string + if externalIpAddressName != "" { + foundExternalIpAddress, _, associatedInstanceId, err := cldaws.GetPublicIpAddressAllocationAssociatedInstanceByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, externalIpAddressName) + if err != nil { + return err + } + if associatedInstanceId != "" && associatedInstanceId != instanceId { + return fmt.Errorf("cannot create instance %s, floating ip %s is already assigned, see instance %s", instName, externalIpAddressName, associatedInstanceId) + } + externalIpAddress = foundExternalIpAddress + } + + if instanceId != "" { + if foundInstanceStateByName == types.InstanceStateNameRunning || foundInstanceStateByName == types.InstanceStateNamePending { + // Assuming it's the right instance, return ok + return nil + } else if foundInstanceStateByName != types.InstanceStateNameTerminated { + return fmt.Errorf("instance %s(%s) already there and has invalid state %s", instName, instanceId, foundInstanceStateByName) + } + } + + instanceId, err = cldaws.CreateInstance(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, + instanceTypeString, + imageId, + instName, + p.GetCtx().Project.Instances[iNickname].IpAddress, + securityGroupId, + p.GetCtx().Project.Instances[iNickname].RootKeyName, + subnetId, + blockDeviceMappings, + p.GetCtx().Project.Timeouts.CreateInstance) + if err != nil { + return err + } + + if externalIpAddress != "" { + _, err = cldaws.AssignAwsFloatingIp(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, instanceId, externalIpAddress) + if err != nil { + return err + } + } + + if p.GetCtx().Project.Instances[iNickname].AssociatedInstanceProfile != "" { + err = cldaws.AssociateInstanceProfile(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, instanceId, p.GetCtx().Project.Instances[iNickname].AssociatedInstanceProfile) + if err != nil { + return err + } + } + + return nil +} + +func (p *AwsDeployProvider) CreateInstanceAndWaitForCompletion(iNickname string, flavorId string, imageId string) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + + subnetId, err := getInstanceSubnetId(p, lb, iNickname) + if err != nil { + return lb.Complete(err) + } + + sgId, err := getInstanceSecurityGroupId(p, lb, iNickname) + if err != nil { + return lb.Complete(err) + } + + return lb.Complete(internalCreate(p, lb, iNickname, flavorId, imageId, nil, subnetId, sgId)) +} + +func getAttachedVolumeDeviceByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, volName string) (string, error) { + foundVolIdByName, err := cldaws.GetVolumeIdByName(ec2Client, goCtx, lb, volName) + if err != nil { + return "", err + } + + if foundVolIdByName == "" { + return "", fmt.Errorf("volume %s not found, cannot check if it has device name for it; have you removed the volume before detaching it?", volName) + } + + foundDevice, _, err := cldaws.GetVolumeAttachedDeviceById(ec2Client, goCtx, lb, foundVolIdByName) + if err != nil { + return "", err + } + + return foundDevice, nil +} + +func getAttachedVolumes(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, volumeDefMap map[string]*prj.VolumeDef) ([]string, error) { + attachedVols := make([]string, 0) + for volNickname, volDef := range volumeDefMap { + volDevice, err := getAttachedVolumeDeviceByName(ec2Client, goCtx, lb, volDef.Name) + if err != nil { + return []string{}, err + } + if volDevice != "" { + attachedVols = append(attachedVols, fmt.Sprintf("%s(%s)", volNickname, volDevice)) + } + } + return attachedVols, nil +} + +func (p *AwsDeployProvider) DeleteInstance(iNickname string, ignoreAttachedVolumes bool) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + + if !ignoreAttachedVolumes { + attachedVols, err := getAttachedVolumes(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].Volumes) + if err != nil { + return lb.Complete(err) + } + + if len(attachedVols) > 0 { + return lb.Complete(fmt.Errorf("cannot delete instance %s, detach volumes first: %s", iNickname, strings.Join(attachedVols, ","))) + } + } + + instName := p.GetCtx().Project.Instances[iNickname].InstName + + foundId, foundState, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, instName) + if err != nil { + return lb.Complete(err) + } + + if foundId != "" && foundState == types.InstanceStateNameTerminated { + lb.Add(fmt.Sprintf("will not delete instance %s, already terminated", iNickname)) + return lb.Complete(nil) + } else if foundId == "" { + lb.Add(fmt.Sprintf("will not delete instance %s, instance not found", iNickname)) + return lb.Complete(nil) + } + + return lb.Complete(cldaws.DeleteInstance(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundId, p.GetCtx().Project.Timeouts.DeleteInstance)) +} + +func (p *AwsDeployProvider) CreateSnapshotImage(iNickname string) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + + imageName := p.GetCtx().Project.Instances[iNickname].InstName + + foundImageId, foundImageState, _, err := cldaws.GetImageInfoByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageName) + if err != nil { + return lb.Complete(err) + } + + if foundImageId != "" || (foundImageState != "" && foundImageState != types.ImageStateDeregistered) { + return lb.Complete(fmt.Errorf("cannot create snaphost image %s, delete/deregister existing image %s first", imageName, foundImageId)) + } + + attachedVols, err := getAttachedVolumes(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].Volumes) + if err != nil { + return lb.Complete(err) + } + + if len(attachedVols) > 0 { + return lb.Complete(fmt.Errorf("cannot create snapshot image from instance %s, detach volumes first: %s", iNickname, strings.Join(attachedVols, ","))) + } + + foundInstanceId, foundInstanceState, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].InstName) + if err != nil { + return lb.Complete(err) + } + + if foundInstanceId == "" { + return lb.Complete(fmt.Errorf("cannot create snapshot image from instance %s, instance not found", iNickname)) + } + + if foundInstanceState != types.InstanceStateNameRunning && + foundInstanceState != types.InstanceStateNameStopped { + return lb.Complete(fmt.Errorf("cannot create snapshot image from instance %s, instance state is %s, expected running", iNickname, foundInstanceState)) + } + + if foundInstanceState != types.InstanceStateNameStopped { + err = cldaws.StopInstance(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundInstanceId, p.GetCtx().Project.Timeouts.StopInstance) + if err != nil { + return lb.Complete(err) + } + } + + imageId, err := cldaws.CreateImageFromInstance(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, + p.GetCtx().Project.Instances[iNickname].InstName, + foundInstanceId, + p.GetCtx().Project.Timeouts.CreateImage) + if err != nil { + return lb.Complete(err) + } + + _, blockDeviceMappings, err := cldaws.GetImageInfoById(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageId) + if err != nil { + return lb.Complete(err) + } + + // Tag each ebs mapping so the volume appears in the list of billed items + for _, mapping := range blockDeviceMappings { + if mapping.Ebs != nil { + if mapping.Ebs.SnapshotId != nil && *mapping.Ebs.SnapshotId != "" { + cldaws.TagResource(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, *mapping.Ebs.SnapshotId, p.GetCtx().Project.Instances[iNickname].InstName, p.GetCtx().Tags) + if err != nil { + return lb.Complete(err) + } + } + } + } + + return lb.Complete(nil) +} + +// aws ec2 run-instances --region "us-east-1" --image-id ami-0bfdcfac85eb09d46 --count 1 --instance-type c7g.large --key-name $CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME --subnet-id subnet-09e2ba71bb1a5df94 --security-group-id sg-090b9d1ef7a1d1914 --private-ip-address 10.5.1.10 +// aws ec2 associate-address --instance-id i-0c4b32d20a1671b1e --public-ip 54.86.220.208 +func (p *AwsDeployProvider) CreateInstanceFromSnapshotImageAndWaitForCompletion(iNickname string, flavorId string) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + + subnetId, err := getInstanceSubnetId(p, lb, iNickname) + if err != nil { + return lb.Complete(err) + } + + sgId, err := getInstanceSecurityGroupId(p, lb, iNickname) + if err != nil { + return lb.Complete(err) + } + + imageName := p.GetCtx().Project.Instances[iNickname].InstName + foundImageId, foundImageState, blockDeviceMappings, err := cldaws.GetImageInfoByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageName) + if err != nil { + return lb.Complete(err) + } + + if foundImageId == "" { + return lb.Complete(fmt.Errorf("cannot create instance for %s from snapshot image %s that is not found", iNickname, imageName)) + } + + if foundImageState != types.ImageStateAvailable { + return lb.Complete(fmt.Errorf("cannot create instance for %s from snapshot image %s of invalid state %s", iNickname, imageName, foundImageState)) + } + + isSnapshotIdFound := false + for _, mapping := range blockDeviceMappings { + if mapping.Ebs != nil { + if mapping.Ebs.SnapshotId != nil && *mapping.Ebs.SnapshotId != "" { + isSnapshotIdFound = true + } + } + } + + if !isSnapshotIdFound { + return lb.Complete(fmt.Errorf("cannot create instance from image %s/%s, image snapshot not found", iNickname, flavorId)) + } + + return lb.Complete(internalCreate(p, lb, iNickname, flavorId, foundImageId, blockDeviceMappings, subnetId, sgId)) +} + +func (p *AwsDeployProvider) DeleteSnapshotImage(iNickname string) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + + imageName := p.GetCtx().Project.Instances[iNickname].InstName + foundImageId, foundImageState, blockDeviceMappings, err := cldaws.GetImageInfoByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageName) + if err != nil { + return lb.Complete(err) + } + + if foundImageId == "" { + return lb.Complete(fmt.Errorf("cannot delete snapshot image %s for %s that is not found", imageName, iNickname)) + } + + if foundImageState == types.ImageStateDeregistered { + lb.Add(fmt.Sprintf("will not delete image for %s, already deregistred", iNickname)) + return lb.Complete(nil) + } + + snapshotId := "" + for _, mapping := range blockDeviceMappings { + if mapping.Ebs != nil { + if mapping.Ebs.SnapshotId != nil && *mapping.Ebs.SnapshotId != "" { + snapshotId = *mapping.Ebs.SnapshotId + } + } + } + + err = cldaws.DeregisterImage(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundImageId) + if err != nil { + return lb.Complete(err) + } + + // Now we can delete the snapshot + if snapshotId != "" { + err := cldaws.DeleteSnapshot(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, snapshotId) + if err != nil { + return lb.Complete(err) + } + } + + return lb.Complete(nil) +} diff --git a/pkg/provider/aws_networking.go b/pkg/provider/aws_networking.go new file mode 100644 index 0000000..911d46b --- /dev/null +++ b/pkg/provider/aws_networking.go @@ -0,0 +1,332 @@ +package provider + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/capillariesio/capillaries-deploy/pkg/cld/cldaws" + "github.com/capillariesio/capillaries-deploy/pkg/l" + "github.com/capillariesio/capillaries-deploy/pkg/prj" +) + +func ensureAwsVpc(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, networkDef *prj.NetworkDef, timeout int) (string, error) { + foundVpcIdByName, err := cldaws.GetVpcIdByName(ec2Client, goCtx, lb, networkDef.Name) + if err != nil { + return "", err + } + if foundVpcIdByName != "" { + return foundVpcIdByName, nil + } + return cldaws.CreateVpc(ec2Client, goCtx, tags, lb, networkDef.Name, networkDef.Cidr, timeout) +} + +func ensureAwsPrivateSubnet(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, networkId string, subnetDef *prj.PrivateSubnetDef) (string, error) { + foundSubnetIdByName, err := cldaws.GetSubnetIdByName(ec2Client, goCtx, lb, subnetDef.Name) + if err != nil { + return "", err + } + if foundSubnetIdByName != "" { + return foundSubnetIdByName, nil + } + return cldaws.CreateSubnet(ec2Client, goCtx, tags, lb, networkId, subnetDef.Name, subnetDef.Cidr, subnetDef.AvailabilityZone) +} + +func ensureAwsPublicSubnet(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, networkId string, subnetDef *prj.PublicSubnetDef) (string, error) { + foundSubnetIdByName, err := cldaws.GetSubnetIdByName(ec2Client, goCtx, lb, subnetDef.Name) + if err != nil { + return "", err + } + if foundSubnetIdByName != "" { + return foundSubnetIdByName, nil + } + + return cldaws.CreateSubnet(ec2Client, goCtx, tags, lb, networkId, subnetDef.Name, subnetDef.Cidr, subnetDef.AvailabilityZone) +} + +func ensureNatGatewayAndRoutePrivateSubnet(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, networkId string, publicSubnetId string, publicSubnetDef *prj.PublicSubnetDef, privateSubnetId string, privateSubnetDef *prj.PrivateSubnetDef, createNatGatewayTimeout int) error { + _, natGatewayPublicIpAllocationId, _, err := cldaws.GetPublicIpAddressAllocationAssociatedInstanceByName(ec2Client, goCtx, lb, publicSubnetDef.NatGatewayExternalIpName) + if err != nil { + return err + } + + // Get NAT gateway by name, create one if needed + + natGatewayName := publicSubnetDef.NatGatewayName + natGatewayId, foundNatGatewayStateByName, err := cldaws.GetNatGatewayIdAndStateByName(ec2Client, goCtx, lb, natGatewayName) + if err != nil { + return err + } + + if natGatewayId != "" && foundNatGatewayStateByName != types.NatGatewayStateDeleted { + if foundNatGatewayStateByName != types.NatGatewayStateAvailable { + return fmt.Errorf("cannot create nat gateway %s, it is already created and has invalid state %s", natGatewayName, foundNatGatewayStateByName) + } + } else { + natGatewayId, err = cldaws.CreateNatGateway(ec2Client, goCtx, tags, lb, natGatewayName, + publicSubnetId, + natGatewayPublicIpAllocationId, + createNatGatewayTimeout) + if err != nil { + return err + } + } + + // Create new route table id for this vpc + + routeTableId, err := cldaws.CreateRouteTableForVpc(ec2Client, goCtx, tags, lb, privateSubnetDef.RouteTableToNatgwName, networkId) + if err != nil { + return err + } + + // Associate this route table with the private subnet + + rtAssocId, err := cldaws.AssociateRouteTableWithSubnet(ec2Client, goCtx, lb, routeTableId, privateSubnetId) + if err != nil { + return err + } + + lb.Add(fmt.Sprintf("associated route table %s with private subnet %s: %s", routeTableId, privateSubnetId, rtAssocId)) + + // Add a record to a route table: tell all outbound 0.0.0.0/0 traffic to go through this nat gateway: + + if err := cldaws.CreateNatGatewayRoute(ec2Client, goCtx, lb, routeTableId, "0.0.0.0/0", natGatewayId); err != nil { + return err + } + + lb.Add(fmt.Sprintf("route table %s in private subnet %s points to nat gateway %s", routeTableId, privateSubnetId, natGatewayId)) + + return nil +} + +func ensureInternetGatewayAndRoutePublicSubnet(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, + routerName string, + networkId string, publicSubnetId string, publicSubnetDef *prj.PublicSubnetDef) error { + + // Get internet gateway (router) by name, create if needed + + var routerId string + foundRouterIdByName, err := cldaws.GetInternetGatewayIdByName(ec2Client, goCtx, lb, routerName) + if err != nil { + return err + } + + if foundRouterIdByName != "" { + routerId = foundRouterIdByName + } else { + routerId, err = cldaws.CreateInternetGateway(ec2Client, goCtx, tags, lb, routerName) + if err != nil { + return err + } + } + + // Is this internet gateway (router) attached to a vpc? + + attachedVpcId, _, err := cldaws.GetInternetGatewayVpcAttachmentById(ec2Client, goCtx, lb, routerId) + if err != nil { + return err + } + + // Attach if needed + + if attachedVpcId == "" { + if err := cldaws.AttachInternetGatewayToVpc(ec2Client, goCtx, lb, routerId, networkId); err != nil { + return err + } + } else if attachedVpcId != networkId { + return fmt.Errorf("internet gateway (router) %s seems to be attached to a wrong vpc %s already", routerName, attachedVpcId) + } + + // Obtain route table id for this vpc (it was automatically created for us and marked as 'main') + + routeTableId, err := cldaws.GetVpcDefaultRouteTable(ec2Client, goCtx, lb, networkId) + if err != nil { + return err + } + + // (optional) tag this route table for operator's convenience + + routeTableName := publicSubnetDef.Name + "_vpc_default_rt" + if err := cldaws.TagResource(ec2Client, goCtx, lb, routeTableId, routeTableName, nil); err != nil { + return err + } + + // Associate this default (main) route table with the public subnet + + assocId, err := cldaws.AssociateRouteTableWithSubnet(ec2Client, goCtx, lb, routeTableId, publicSubnetId) + if err != nil { + return err + } + lb.Add(fmt.Sprintf("associated route table %s with public subnet %s: %s", routeTableId, publicSubnetId, assocId)) + + // Add a record to a route table: tell all outbound 0.0.0.0/0 traffic to go through this internet gateway: + + if err := cldaws.CreateInternetGatewayRoute(ec2Client, goCtx, lb, routeTableId, "0.0.0.0/0", routerId); err != nil { + return err + } + lb.Add(fmt.Sprintf("route table %s in public subnet %s points to internet gateway (router) %s", routeTableId, publicSubnetId, routerId)) + + return nil +} + +func detachAndDeleteInternetGateway(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, internetGatewayName string) error { + foundId, err := cldaws.GetInternetGatewayIdByName(ec2Client, goCtx, lb, internetGatewayName) + if err != nil { + return err + } + + if foundId == "" { + lb.Add(fmt.Sprintf("will not delete internet gateway (router) %s, nothing to delete", internetGatewayName)) + return nil + } + + // Is it attached to a vpc? If yes, detach it. + + attachedVpcId, attachmentState, err := cldaws.GetInternetGatewayVpcAttachmentById(ec2Client, goCtx, lb, foundId) + if err != nil { + return err + } + + // NOTE: for unknown reason, I am getting "available" instead of "attached" here, so let's embrace it + if attachedVpcId != "" && + (attachmentState == types.AttachmentStatusAttached || attachmentState == types.AttachmentStatusAttaching || string(attachmentState) == "available") { + + // This may potentially throw: + // Network vpc-... has some mapped public address(es). Please unmap those public address(es) before detaching the gateway. + // if we do not wait for NAT gateway to be deleted completely + if err := cldaws.DetachInternetGatewayFromVpc(ec2Client, goCtx, lb, foundId, attachedVpcId); err != nil { + return err + } + lb.Add(fmt.Sprintf("detached internet gateway (router) %s from vpc %s", foundId, attachedVpcId)) + } else { + lb.Add(fmt.Sprintf("internet gateway (router) %s was not attached, no need to detach", foundId)) + } + + // Delete + return cldaws.DeleteInternetGateway(ec2Client, goCtx, lb, foundId) +} + +func checkAndDeleteNatGateway(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, natGatewayName string, timeout int) error { + foundId, foundState, err := cldaws.GetNatGatewayIdAndStateByName(ec2Client, goCtx, lb, natGatewayName) + if err != nil { + return err + } + + if foundId == "" || foundState == types.NatGatewayStateDeleted { + lb.Add(fmt.Sprintf("will not delete nat gateway %s, nothing to delete", natGatewayName)) + return nil + } + + return cldaws.DeleteNatGateway(ec2Client, goCtx, lb, foundId, timeout) +} + +func deleteAwsSubnet(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, subnetName string) error { + foundId, err := cldaws.GetSubnetIdByName(ec2Client, goCtx, lb, subnetName) + if err != nil { + return err + } + if foundId == "" { + lb.Add(fmt.Sprintf("will not delete subnet %s, nothing to delete", subnetName)) + return nil + } + + return cldaws.DeleteSubnet(ec2Client, goCtx, lb, foundId) +} + +func checkAndDeleteAwsVpcWithRouteTable(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, vpcName string, privateSubnetName string, privateSubnetRouteTableToNatgwName string) error { + foundVpcId, err := cldaws.GetVpcIdByName(ec2Client, goCtx, lb, vpcName) + if err != nil { + return err + } + + if foundVpcId == "" { + lb.Add(fmt.Sprintf("will not delete vpc %s, nothing to delete", vpcName)) + return nil + } + + // Delete the route table pointing to natgw (if we don't, AWS will consider them as dependencies and will not delete vpc) + foundRouteTableId, foundAttachedVpcId, err := cldaws.GetRouteTableByName(ec2Client, goCtx, lb, privateSubnetRouteTableToNatgwName) + if err != nil { + return err + } + if foundRouteTableId != "" { + if foundAttachedVpcId != "" && foundAttachedVpcId != foundVpcId { + return fmt.Errorf("cannot delete route table %s, it is attached to an unexpected vpc %s instead of %s", privateSubnetRouteTableToNatgwName, foundAttachedVpcId, foundVpcId) + } + if err := cldaws.DeleteRouteTable(ec2Client, goCtx, lb, foundRouteTableId); err != nil { + return err + } + } + + return cldaws.DeleteVpc(ec2Client, goCtx, lb, foundVpcId) +} + +func (p *AwsDeployProvider) CreateNetworking() (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + vpcId, err := ensureAwsVpc(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, &p.GetCtx().Project.Network, p.GetCtx().Project.Timeouts.CreateNetwork) + if err != nil { + return lb.Complete(err) + } + + privateSubnetId, err := ensureAwsPrivateSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, vpcId, &p.GetCtx().Project.Network.PrivateSubnet) + if err != nil { + return lb.Complete(err) + } + + publicSubnetId, err := ensureAwsPublicSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, + vpcId, &p.GetCtx().Project.Network.PublicSubnet) + if err != nil { + return lb.Complete(err) + } + + err = ensureInternetGatewayAndRoutePublicSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, + p.GetCtx().Project.Network.Router.Name, + vpcId, publicSubnetId, &p.GetCtx().Project.Network.PublicSubnet) + if err != nil { + return lb.Complete(err) + } + + err = ensureNatGatewayAndRoutePrivateSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, + vpcId, + publicSubnetId, &p.GetCtx().Project.Network.PublicSubnet, + privateSubnetId, &p.GetCtx().Project.Network.PrivateSubnet, + p.GetCtx().Project.Timeouts.CreateNatGateway) + if err != nil { + return lb.Complete(err) + } + + return lb.Complete(nil) +} + +func (p *AwsDeployProvider) DeleteNetworking() (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + err := checkAndDeleteNatGateway(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.PublicSubnet.NatGatewayName, p.GetCtx().Project.Timeouts.DeleteNatGateway) + if err != nil { + return lb.Complete(err) + } + + err = detachAndDeleteInternetGateway(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.Router.Name) + if err != nil { + return lb.Complete(err) + } + + err = deleteAwsSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.PublicSubnet.Name) + if err != nil { + return lb.Complete(err) + } + + err = deleteAwsSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.PrivateSubnet.Name) + if err != nil { + return lb.Complete(err) + } + + err = checkAndDeleteAwsVpcWithRouteTable(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.Name, p.GetCtx().Project.Network.PrivateSubnet.Name, p.GetCtx().Project.Network.PrivateSubnet.RouteTableToNatgwName) + if err != nil { + return lb.Complete(err) + } + + return lb.Complete(nil) +} diff --git a/pkg/provider/aws_security_group.go b/pkg/provider/aws_security_group.go new file mode 100644 index 0000000..6211353 --- /dev/null +++ b/pkg/provider/aws_security_group.go @@ -0,0 +1,79 @@ +package provider + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/capillariesio/capillaries-deploy/pkg/cld/cldaws" + "github.com/capillariesio/capillaries-deploy/pkg/l" + "github.com/capillariesio/capillaries-deploy/pkg/prj" +) + +func createAwsSecurityGroup(ec2Client *ec2.Client, goCtx context.Context, tags map[string]string, lb *l.LogBuilder, sgDef *prj.SecurityGroupDef, vpcId string) error { + groupId, err := cldaws.GetSecurityGroupIdByName(ec2Client, goCtx, lb, sgDef.Name) + if err != nil { + return err + } + + if groupId == "" { + groupId, err = cldaws.CreateSecurityGroup(ec2Client, goCtx, tags, lb, sgDef.Name, vpcId) + if err != nil { + return err + } + } + + for _, rule := range sgDef.Rules { + err := cldaws.AuthorizeSecurityGroupIngress(ec2Client, goCtx, lb, groupId, rule.Protocol, int32(rule.Port), rule.RemoteIp) + if err != nil { + return err + } + } + return nil +} + +func (p *AwsDeployProvider) CreateSecurityGroups() (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + vpcId, err := cldaws.GetVpcIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.Name) + if err != nil { + return lb.Complete(err) + } + + if vpcId == "" { + return lb.Complete(fmt.Errorf("cannot create security groups, vpc %s does not exist", p.GetCtx().Project.Network.Name)) + } + + for _, sgDef := range p.GetCtx().Project.SecurityGroups { + err := createAwsSecurityGroup(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, sgDef, vpcId) + if err != nil { + return lb.Complete(err) + } + } + return lb.Complete(nil) +} + +func deleteAwsSecurityGroup(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, sgDef *prj.SecurityGroupDef) error { + foundId, err := cldaws.GetSecurityGroupIdByName(ec2Client, goCtx, lb, sgDef.Name) + if err != nil { + return err + } + + if foundId == "" { + lb.Add(fmt.Sprintf("will not delete security group %s, nothing to delete", sgDef.Name)) + return nil + } + + return cldaws.DeleteSecurityGroup(ec2Client, goCtx, lb, foundId) +} + +func (p *AwsDeployProvider) DeleteSecurityGroups() (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + for _, sgDef := range p.GetCtx().Project.SecurityGroups { + err := deleteAwsSecurityGroup(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, sgDef) + if err != nil { + return lb.Complete(err) + } + } + return lb.Complete(nil) +} diff --git a/pkg/provider/aws_volumes.go b/pkg/provider/aws_volumes.go new file mode 100644 index 0000000..b2ab25e --- /dev/null +++ b/pkg/provider/aws_volumes.go @@ -0,0 +1,197 @@ +package provider + +import ( + "fmt" + "sort" + "strings" + + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/capillariesio/capillaries-deploy/pkg/cld/cldaws" + "github.com/capillariesio/capillaries-deploy/pkg/l" + "github.com/capillariesio/capillaries-deploy/pkg/prj" + "github.com/capillariesio/capillaries-deploy/pkg/rexec" +) + +func (p *AwsDeployProvider) CreateVolume(iNickname string, volNickname string) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + volDef := p.GetCtx().Project.Instances[iNickname].Volumes[volNickname] + foundVolIdByName, err := cldaws.GetVolumeIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, volDef.Name) + if err != nil { + return lb.Complete(err) + } + + if foundVolIdByName != "" { + lb.Add(fmt.Sprintf("volume %s(%s) already there", volDef.Name, foundVolIdByName)) + return lb.Complete(nil) + } + + _, err = cldaws.CreateVolume(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, volDef.Name, volDef.AvailabilityZone, int32(volDef.Size), volDef.Type) + if err != nil { + return lb.Complete(err) + } + + return lb.Complete(nil) +} + +// AWS hell https://stackoverflow.com/questions/70205661/correctly-specifying-device-name-for-ebs-volume-while-attaching-to-an-ec2-instan +func volNicknameToAwsSuggestedDeviceName(volumes map[string]*prj.VolumeDef, volNickname string) string { + // Sorted list of vol nicknames + volNicknames := make([]string, len(volumes)) + volCount := 0 + for volNickname := range volumes { + volNicknames[volCount] = volNickname + volCount++ + } + sort.Slice(volNicknames, func(i, j int) bool { return volNicknames[i] > volNicknames[j] }) + volDeviceSuffix := 'f' + for i := 0; i < len(volNicknames); i++ { + if volNicknames[i] == volNickname { + return "/dev/sd" + string(volDeviceSuffix) + } + volDeviceSuffix++ + } + return "invalid-device-for-vol-" + volNickname +} + +// Not used anymore, hopefully +// func awsFinalDeviceNameOld(suggestedDeviceName string) string { +// return strings.ReplaceAll(suggestedDeviceName, "/dev/sd", "/dev/xvd") +// } + +func awsFinalDeviceNameNitro(suggestedDeviceName string) string { + // See what lsblk shows for your case. + // This is very hacky, but I didn't spend time to do it the right way + deviceNameReplacer := strings.NewReplacer( + "/dev/sdf", "/dev/nvme1n1", + "/dev/sdg", "/dev/nvme2n1", + "/dev/sdh", "/dev/nvme3n1") + return deviceNameReplacer.Replace(suggestedDeviceName) +} + +func (p *AwsDeployProvider) AttachVolume(iNickname string, volNickname string) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + volDef := p.GetCtx().Project.Instances[iNickname].Volumes[volNickname] + + if volDef.MountPoint == "" || volDef.Permissions == 0 || volDef.Owner == "" { + return lb.Complete(fmt.Errorf("empty parameter not allowed: volDef.MountPoint (%s), volDef.Permissions (%d), volDef.Owner (%s)", volDef.MountPoint, volDef.Permissions, volDef.Owner)) + } + + foundVolIdByName, err := cldaws.GetVolumeIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, volDef.Name) + if err != nil { + return lb.Complete(err) + } + + foundDevice, foundAttachmentState, err := cldaws.GetVolumeAttachedDeviceById(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName) + if err != nil { + return lb.Complete(err) + } + + if foundDevice != "" { + if foundAttachmentState == types.VolumeAttachmentStateAttached { + return lb.Complete(nil) + } else { + return lb.Complete(fmt.Errorf("cannot attach volume %s: it's already attached to device %s, but has invalid attachment state %s", volDef.Name, foundDevice, foundAttachmentState)) + } + } + + suggestedDevice := volNicknameToAwsSuggestedDeviceName(p.GetCtx().Project.Instances[iNickname].Volumes, volNickname) + + // Attach + + foundInstanceIdByName, _, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].InstName) + if err != nil { + return lb.Complete(err) + } + + _, err = cldaws.AttachVolume(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName, foundInstanceIdByName, suggestedDevice, p.GetCtx().Project.Timeouts.AttachVolume) + if err != nil { + return lb.Complete(err) + } + + // Mount + + deviceBlockId, er := rexec.ExecSshAndReturnLastLine( + p.GetCtx().Project.SshConfig, + p.GetCtx().Project.Instances[iNickname].BestIpAddress(), + fmt.Sprintf("%s\ninit_volume_attachment %s %s %d '%s'", + cldaws.InitVolumeAttachmentFunc, + awsFinalDeviceNameNitro(suggestedDevice), // AWS final device here + volDef.MountPoint, + volDef.Permissions, + volDef.Owner)) + lb.Add(er.ToString()) + if er.Error != nil { + return lb.Complete(fmt.Errorf("cannot mount volume %s to instance %s: %s", volNickname, iNickname, er.Error.Error())) + } + + if deviceBlockId == "" || strings.HasPrefix(deviceBlockId, "Error") { + return lb.Complete(fmt.Errorf("cannot mount volume %s to instance %s, returned blockDeviceId is: %s", volNickname, iNickname, deviceBlockId)) + } + + return lb.Complete(nil) +} + +func (p *AwsDeployProvider) DetachVolume(iNickname string, volNickname string) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + volDef := p.GetCtx().Project.Instances[iNickname].Volumes[volNickname] + + foundVolIdByName, err := cldaws.GetVolumeIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, volDef.Name) + if err != nil { + return lb.Complete(err) + } + + if foundVolIdByName == "" { + lb.Add(fmt.Sprintf("volume %s not found, nothing to detach", volDef.Name)) + return lb.Complete(nil) + } + + foundDevice, _, err := cldaws.GetVolumeAttachedDeviceById(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName) + if err != nil { + return lb.Complete(err) + } + + if foundDevice == "" { + lb.Add(fmt.Sprintf("volume %s not mounted, nothing to detach", volDef.Name)) + return lb.Complete(nil) + } + + // Unmount + + er := rexec.ExecSsh( + p.GetCtx().Project.SshConfig, + p.GetCtx().Project.Instances[iNickname].BestIpAddress(), + fmt.Sprintf("sudo umount -d %s", volDef.MountPoint), map[string]string{}) + lb.Add(er.ToString()) + if er.Error != nil { + return lb.Complete(fmt.Errorf("cannot umount volume %s on instance %s: %s", volNickname, iNickname, er.Error.Error())) + } + + foundInstanceIdByName, _, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].InstName) + if err != nil { + return lb.Complete(err) + } + + // Detach + + return lb.Complete(cldaws.DetachVolume(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName, foundInstanceIdByName, foundDevice, p.GetCtx().Project.Timeouts.DetachVolume)) +} + +func (p *AwsDeployProvider) DeleteVolume(iNickname string, volNickname string) (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + + volDef := p.GetCtx().Project.Instances[iNickname].Volumes[volNickname] + foundVolIdByName, err := cldaws.GetVolumeIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, volDef.Name) + if err != nil { + return lb.Complete(err) + } + + if foundVolIdByName == "" { + lb.Add(fmt.Sprintf("volume %s not found, nothing to delete", volDef.Name)) + return lb.Complete(nil) + } + + return lb.Complete(cldaws.DeleteVolume(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName)) +} diff --git a/pkg/provider/deploy_provider.go b/pkg/provider/deploy_provider.go new file mode 100644 index 0000000..90a2be0 --- /dev/null +++ b/pkg/provider/deploy_provider.go @@ -0,0 +1,117 @@ +package provider + +import ( + "context" + "fmt" + "strings" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" + "github.com/capillariesio/capillaries-deploy/pkg/cld/cldaws" + "github.com/capillariesio/capillaries-deploy/pkg/l" + "github.com/capillariesio/capillaries-deploy/pkg/prj" +) + +type AwsCtx struct { + Config aws.Config + Ec2Client *ec2.Client + TaggingClient *resourcegroupstaggingapi.Client +} + +const TagCapiDeploy string = "CapiDeploy" + +type DeployCtx struct { + //PrjPair *prj.ProjectPair + Project *prj.Project + GoCtx context.Context + IsVerbose bool + Aws *AwsCtx + Tags map[string]string +} +type DeployProvider interface { + GetCtx() *DeployCtx + ListDeploymentResources() (l.LogMsg, error) + CreateFloatingIps() (l.LogMsg, error) + DeleteFloatingIps() (l.LogMsg, error) + CreateSecurityGroups() (l.LogMsg, error) + DeleteSecurityGroups() (l.LogMsg, error) + CreateNetworking() (l.LogMsg, error) + DeleteNetworking() (l.LogMsg, error) + HarvestInstanceTypesByFlavorNames(flavorMap map[string]string) (l.LogMsg, error) + HarvestImageIds(imageMap map[string]bool) (l.LogMsg, error) + VerifyKeypairs(keypairMap map[string]struct{}) (l.LogMsg, error) + CreateInstanceAndWaitForCompletion(iNickname string, flavorId string, imageId string) (l.LogMsg, error) + DeleteInstance(iNickname string, ignoreAttachedVolumes bool) (l.LogMsg, error) + CreateSnapshotImage(iNickname string) (l.LogMsg, error) + CreateInstanceFromSnapshotImageAndWaitForCompletion(iNickname string, flavorId string) (l.LogMsg, error) + DeleteSnapshotImage(iNickname string) (l.LogMsg, error) + CreateVolume(iNickname string, volNickname string) (l.LogMsg, error) + AttachVolume(iNickname string, volNickname string) (l.LogMsg, error) + DetachVolume(iNickname string, volNickname string) (l.LogMsg, error) + DeleteVolume(iNickname string, volNickname string) (l.LogMsg, error) + PopulateInstanceExternalAddressByName() (l.LogMsg, error) +} + +type AwsDeployProvider struct { + Ctx *DeployCtx +} + +func (p *AwsDeployProvider) GetCtx() *DeployCtx { + return p.Ctx +} + +func DeployProviderFactory(project *prj.Project, goCtx context.Context, isVerbose bool) (DeployProvider, error) { + if project.DeployProviderName == prj.DeployProviderAws { + cfg, err := config.LoadDefaultConfig(goCtx) + if err != nil { + return nil, err + } + + return &AwsDeployProvider{ + Ctx: &DeployCtx{ + Project: project, + GoCtx: goCtx, + IsVerbose: isVerbose, + Tags: map[string]string{TagCapiDeploy: project.DeploymentName}, + Aws: &AwsCtx{ + Ec2Client: ec2.NewFromConfig(cfg), + TaggingClient: resourcegroupstaggingapi.NewFromConfig(cfg), + }, + }, + }, nil + } + return nil, fmt.Errorf("unsupported deploy provider %s", project.DeployProviderName) +} + +func reportPublicIp(prj *prj.Project) { + fmt.Printf(` +Public IP reserved, now you can use it for SSH jumphost in your ~/.ssh/config: + +Host %s + User %s + StrictHostKeyChecking=no + UserKnownHostsFile=/dev/null + IdentityFile %s + +Also, you may find it convenient to use in your commands: + +export BASTION_IP=%s + +`, + prj.SshConfig.BastionExternalIp, + prj.SshConfig.User, + prj.SshConfig.PrivateKeyPath, + prj.SshConfig.BastionExternalIp) +} + +func (p *AwsDeployProvider) ListDeploymentResources() (l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + resources, err := cldaws.GetResourcesByTag(p.GetCtx().Aws.TaggingClient, p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Aws.Config.Region, TagCapiDeploy, p.Ctx.Project.DeploymentName) + if err != nil { + return lb.Complete(err) + } + fmt.Printf("%s\n", strings.Join(resources, "\n")) + return lb.Complete(nil) +} diff --git a/pkg/rexec/embedded.go b/pkg/rexec/embedded.go new file mode 100755 index 0000000..cb602c6 --- /dev/null +++ b/pkg/rexec/embedded.go @@ -0,0 +1,52 @@ +package rexec + +import ( + "embed" + "fmt" + "io/fs" + + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +//go:embed scripts/* +var embeddedScriptsFs embed.FS + +func ExecEmbeddedScriptsOnInstance(sshConfig *SshConfigDef, ipAddress string, embeddedScriptPaths []string, envVars map[string]string, isVerbose bool) (l.LogMsg, error) { + lb := l.NewLogBuilder(fmt.Sprintf("ExecEmbeddedScriptsOnInstance: %s on %s", embeddedScriptPaths, ipAddress), isVerbose) + + if len(embeddedScriptPaths) == 0 { + lb.Add(fmt.Sprintf("no commands to execute on %s", ipAddress)) + return lb.Complete(nil) + } + for _, embeddedScriptPath := range embeddedScriptPaths { + if err := execEmbeddedScriptOnInstance(sshConfig, lb, ipAddress, embeddedScriptPath, []string{}, envVars, isVerbose); err != nil { + return lb.Complete(err) + } + } + return lb.Complete(nil) +} + +func HarvestAllEmbeddedFilesPaths(curDirPath string, harvestedPathsMap map[string]bool) error { + return fs.WalkDir(embeddedScriptsFs, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() { + harvestedPathsMap[path] = false + } + return nil + }) +} + +func execEmbeddedScriptOnInstance(sshConfig *SshConfigDef, lb *l.LogBuilder, ipAddress string, embeddedScriptPath string, params []string, envVars map[string]string, isVerbose bool) error { + cmdBytes, err := embeddedScriptsFs.ReadFile(embeddedScriptPath) + if err != nil { + return err + } + er := ExecSsh(sshConfig, ipAddress, string(cmdBytes), envVars) + lb.Add(er.ToString()) + if er.Error != nil { + return fmt.Errorf("cannot execute script %s on %s: %s", embeddedScriptPath, ipAddress, er.Error.Error()) + } + return nil +} diff --git a/pkg/rexec/exec_ssh.go b/pkg/rexec/exec_ssh.go new file mode 100755 index 0000000..4a79a21 --- /dev/null +++ b/pkg/rexec/exec_ssh.go @@ -0,0 +1,207 @@ +package rexec + +import ( + "bytes" + "fmt" + "net" + "strings" + "time" + + "github.com/capillariesio/capillaries-deploy/pkg/l" + "golang.org/x/crypto/ssh" +) + +type ExecResult struct { + Cmd string + Stdout string + Stderr string + Elapsed float64 + Error error +} + +func (er *ExecResult) ToString() string { + var errString string + if er.Error != nil { + errString = er.Error.Error() + } + return fmt.Sprintf(` +----------------------- +cmd: +%s +stdout: +%s +stderr: +%s +error: +%s +remote cmd elapsed:%0.3f +----------------------- +`, er.Cmd, er.Stdout, er.Stderr, errString, er.Elapsed) +} + +type SshConfigDef struct { + BastionExternalIpAddressName string `json:"bastion_external_ip_address_name"` + BastionExternalIp string `json:"bastion_external_ip_address"` // Output only + Port int `json:"port"` + User string `json:"user"` + PrivateKeyPath string `json:"private_key_path"` +} + +type TunneledSshClient struct { + ProxySshClient *ssh.Client + TunneledTcpConn net.Conn + TunneledSshConn ssh.Conn + SshClient *ssh.Client +} + +func (tsc *TunneledSshClient) Close() { + if tsc.SshClient != nil { + tsc.SshClient.Close() + } + if tsc.TunneledSshConn != nil { + tsc.TunneledSshConn.Close() + } + if tsc.TunneledTcpConn != nil { + tsc.TunneledTcpConn.Close() + } + if tsc.ProxySshClient != nil { + tsc.ProxySshClient.Close() + } +} + +// Our jumphost implementation +func NewTunneledSshClient(sshConfig *SshConfigDef, ipAddress string) (*TunneledSshClient, error) { + bastionSshClientConfig, err := NewSshClientConfig( + sshConfig.User, + sshConfig.PrivateKeyPath) + if err != nil { + return nil, err + } + + bastionUrl := fmt.Sprintf("%s:%d", sshConfig.BastionExternalIp, sshConfig.Port) + + tsc := TunneledSshClient{} + + if ipAddress == sshConfig.BastionExternalIp { + // Go directly to bastion + tsc.SshClient, err = ssh.Dial("tcp", bastionUrl, bastionSshClientConfig) + if err != nil { + return nil, fmt.Errorf("dial direct to bastion %s failed: %s", bastionUrl, err.Error()) + } + } else { + // Dial twice + tsc.ProxySshClient, err = ssh.Dial("tcp", bastionUrl, bastionSshClientConfig) + if err != nil { + return nil, fmt.Errorf("dial to bastion proxy %s failed: %s", bastionUrl, err.Error()) + } + + internalUrl := fmt.Sprintf("%s:%d", ipAddress, sshConfig.Port) + + tsc.TunneledTcpConn, err = tsc.ProxySshClient.Dial("tcp", internalUrl) + if err != nil { + return nil, fmt.Errorf("dial to internal URL %s failed: %s", internalUrl, err.Error()) + } + + tunneledSshClientConfig, err := NewSshClientConfig( + sshConfig.User, + sshConfig.PrivateKeyPath) + if err != nil { + return nil, err + } + var chans <-chan ssh.NewChannel + var reqs <-chan *ssh.Request + tsc.TunneledSshConn, chans, reqs, err = ssh.NewClientConn(tsc.TunneledTcpConn, internalUrl, tunneledSshClientConfig) + if err != nil { + return nil, fmt.Errorf("cannot establish ssh connection via TCP tunnel to internal URL %s: %s", internalUrl, err.Error()) + } + + tsc.SshClient = ssh.NewClient(tsc.TunneledSshConn, chans, reqs) + } + + return &tsc, nil +} + +func ExecSsh(sshConfig *SshConfigDef, ipAddress string, cmd string, envVars map[string]string) ExecResult { + cmdBuilder := strings.Builder{} + for k, v := range envVars { + if strings.Contains(v, " ") { + cmdBuilder.WriteString(fmt.Sprintf("%s='%s'\n", k, v)) + } else { + cmdBuilder.WriteString(fmt.Sprintf("%s=%s\n", k, v)) + } + } + cmdBuilder.WriteString(cmd) + + tsc, err := NewTunneledSshClient(sshConfig, ipAddress) + if err != nil { + return ExecResult{cmdBuilder.String(), "", "", 0, err} + } + defer tsc.Close() + + session, err := tsc.SshClient.NewSession() + if err != nil { + return ExecResult{cmdBuilder.String(), "", "", 0, fmt.Errorf("cannot create session for %s: %s", ipAddress, err.Error())} + } + defer session.Close() + + var stdout, stderr bytes.Buffer + session.Stdout = &stdout + session.Stderr = &stderr + + // TODO: it would be nice to have an execution timeout + + runStartTime := time.Now() + err = session.Run(cmdBuilder.String()) + elapsed := time.Since(runStartTime).Seconds() + if err == nil { + if len(stderr.String()) > 0 { + err = fmt.Errorf("%s", stderr.String()) + } + } else { + if len(stderr.String()) > 0 { + // Add first string of stderr to the error + s := strings.Split(stderr.String(), "\n") + err = fmt.Errorf("%s;%s", err.Error(), s[0]) + } + } + + er := ExecResult{cmd, stdout.String(), stderr.String(), elapsed, err} + return er +} + +func ExecCommandOnInstance(sshConfig *SshConfigDef, ipAddress string, cmd string, isVerbose bool) (l.LogMsg, error) { + lb := l.NewLogBuilder(fmt.Sprintf("ExecCommandOnInstance: %s - %s", ipAddress, cmd), isVerbose) + er := ExecSsh(sshConfig, ipAddress, cmd, map[string]string{}) + lb.Add(er.ToString()) + if er.Error != nil { + return lb.Complete(er.Error) + } + return lb.Complete(nil) +} + +// Used for file transfer +func ExecSshForClient(sshClient *ssh.Client, cmd string) (string, string, error) { + session, err := sshClient.NewSession() + if err != nil { + return "", "", fmt.Errorf("cannot create session for %s: %s", sshClient.RemoteAddr(), err.Error()) + } + defer session.Close() + + var stdout, stderr bytes.Buffer + session.Stdout = &stdout + session.Stderr = &stderr + if err := session.Run(cmd); err != nil { + return stdout.String(), stderr.String(), fmt.Errorf("cannot execute '%s' at %s: %s (stderr: %s)", sshClient.RemoteAddr(), cmd, err.Error(), stderr.String()) + } + return stdout.String(), stderr.String(), nil +} + +// Used on volume attachment +func ExecSshAndReturnLastLine(sshConfig *SshConfigDef, ipAddress string, cmd string) (string, ExecResult) { + er := ExecSsh(sshConfig, ipAddress, cmd, map[string]string{}) + if er.Error != nil { + return "", er + } + lines := strings.Split(strings.Trim(er.Stdout, "\n "), "\n") + return strings.TrimSpace(lines[len(lines)-1]), er +} diff --git a/pkg/rexec/scripts/ca/install.sh b/pkg/rexec/scripts/ca/install.sh new file mode 100644 index 0000000..5c73739 --- /dev/null +++ b/pkg/rexec/scripts/ca/install.sh @@ -0,0 +1,22 @@ +if [ "$SSH_USER" = "" ]; then + echo Error, missing: SSH_USER=ubuntu + exit 1 +fi + +if [ "$CAPILLARIES_RELEASE_URL" = "" ]; then + echo Error, missing: CAPILLARIES_RELEASE_URL=https://capillaries-release.s3.us-east-1.amazonaws.com/latest + exit 1 +fi + +rm -fR /home/$SSH_USER/ca +mkdir -p /home/$SSH_USER/ca +cd /home/$SSH_USER/ca +curl -LOs $CAPILLARIES_RELEASE_URL/ca/ca.tgz +if [ "$?" -ne "0" ]; then + echo "Cannot download ca from $CAPILLARIES_RELEASE_URL/ca/ca.tgz to /home/$SSH_USER/ca" + exit $? +fi + +tar xvzf ca.tgz +sudo chmod 644 * +rm ca.tgz diff --git a/pkg/rexec/scripts/cassandra/config.sh b/pkg/rexec/scripts/cassandra/config.sh new file mode 100755 index 0000000..34f38f8 --- /dev/null +++ b/pkg/rexec/scripts/cassandra/config.sh @@ -0,0 +1,137 @@ +# https://www.jamescoyle.net/how-to/2448-create-a-simple-cassandra-cluster-with-3-nodes +# https://www.digitalocean.com/community/tutorials/how-to-install-cassandra-and-run-a-single-node-cluster-on-ubuntu-22-04 +# https://youkudbhelper.wordpress.com/2020/05/17/cassandradaemon-java731-cannot-start-node-if-snitchs-data-center-dc1-differs-from-previous-data-center-datacenter1/ +# https://stackoverflow.com/questions/38961502/cannot-start-cassandra-snitchs-datacenter-differs-from-previous + +if [ "$CASSANDRA_SEEDS" = "" ]; then + echo Error, missing: export CASSANDRA_SEEDS=10.5.0.11,10.5.0.12,10.5.0.13,10.5.0.14 + exit 1 +fi +if [ "$CASSANDRA_IP" = "" ]; then + echo Error, missing: export CASSANDRA_IP=10.5.0.11 + exit 1 +fi + +if [ "$NVME_REGEX" = "" ]; then + echo Error, missing: export NVME_REGEX="nvme[0-9]n[0-9] 558.8G" + exit 1 +fi + +if [[ "$NVME_REGEX" != nvme* ]]; then + echo Error, NVME_REGEX has unexpected format $NVME_REGEX + exit 1 +fi + +if [ "$(sudo systemctl status cassandra | grep running)" != "" ]; then + >&2 echo Cassandra is running, stop it before configuring it + exit 1 +fi + +sudo sed -i -e "s~seeds:[\: \"a-zA-Z0-9\.,]*~seeds: $CASSANDRA_SEEDS~g" /etc/cassandra/cassandra.yaml +sudo sed -i -e "s~listen_address:[\: \"a-zA-Z0-9\.]*~listen_address: $CASSANDRA_IP~g" /etc/cassandra/cassandra.yaml +sudo sed -i -e "s~rpc_address:[\: \"a-zA-Z0-9\.]*~rpc_address: $CASSANDRA_IP~g" /etc/cassandra/cassandra.yaml +sudo sed -i -e "s~endpoint_snitch:[\: \"a-zA-Z0-9\.]*~endpoint_snitch: SimpleSnitch~g" /etc/cassandra/cassandra.yaml +#sudo sed -i -e "s~prepared_statements_cache_size:[ a-zA-Z0-9]*~prepared_statements_cache_size: 500MiB~g" /etc/cassandra/cassandra.yaml + +# Data on attached volume. Comment out to store data on the ephemeral instance volume at /var/lib/cassandra/data. +#sudo sed -i -e "s~- /var/lib/cassandra/data~- /data/d~g" /etc/cassandra/cassandra.yaml +#sudo sed -i -e "s~- /var/lib/cassandra/data~- /mnt/ramdisk/data~g" /etc/cassandra/cassandra.yaml +sudo sed -i -e "s~- /var/lib/cassandra/data~~g" /etc/cassandra/cassandra.yaml +# One disk or two disks (Cassandra instances can have one ore two nvme drives) +if [ -d "/data1" ]; then + sudo sed -i -e "s~data_file_directories:[^\n]*~data_file_directories: [ /data0/d, /data1/d ]~g" /etc/cassandra/cassandra.yaml +else + sudo sed -i -e "s~data_file_directories:[^\n]*~data_file_directories: [ /data0/d ]~g" /etc/cassandra/cassandra.yaml +fi + +# Commitlog on attached volume. Comment out to store commitlog on the ephemeral instance volume at /var/lib/cassandra/commitlog. +#sudo sed -i -e "s~/var/lib/cassandra/commitlog~/data/c~g" /etc/cassandra/cassandra.yaml +#sudo sed -i -e "s~/var/lib/cassandra/commitlog~/mnt/ramdisk/commitlog~g" /etc/cassandra/cassandra.yaml +#sudo sed -i -e "s~/var/lib/cassandra/commitlog~~g" /etc/cassandra/cassandra.yaml +sudo sed -i -e "s~commitlog_directory:[^\n]*~commitlog_directory: /data0/c~g" /etc/cassandra/cassandra.yaml + +# Minimal number of vnodes, we do not need elasticity +sudo sed -i -e "s~num_tokens:[ 0-9]*~num_tokens: 1~g" /etc/cassandra/cassandra.yaml + +# No redundancy +sudo sed -i -e "s~allocate_tokens_for_local_replication_factor: [ 0-9]*~allocate_tokens_for_local_replication_factor: 1~g" /etc/cassandra/cassandra.yaml + +# If provided, use initial token list to decrease cluster starting time +if [ "$INITIAL_TOKEN" != "" ]; then + sudo sed -i -e "s~[ #]*initial_token:[^\n]*~initial_token: $INITIAL_TOKEN~g" /etc/cassandra/cassandra.yaml +fi + +# In test env, give enough time to Cassandra coordinator to complete the write (cassandra.yaml write_request_timeout_in_ms) +# so there is no doubt that coordinator is the bottleneck, +# and make sure client time out is more (not equal) than that to avoid gocql error "no response received from cassandra within timeout period". +# In prod environments, increasing write_request_timeout_in_ms and corresponding client timeout is not a solution. +sudo sed -i -e "s~write_request_timeout_in_ms:[ ]*[0-9]*~write_request_timeout_in_ms: 10000~g" /etc/cassandra/cassandra.yaml + +# Experimenting with key cache size +# Default is 5% of the heap 2000-100mb>, make it bigger (does not help) +# sudo sed -i -e "s~key_cache_size_in_mb:[ 0-9]*~key_cache_size_in_mb: 1000~g" /etc/cassandra/cassandra.yaml +# Do not store keys longer than 120s (does not help) +#sudo sed -i -e "s~key_cache_save_period:[ 0-9]*~key_cache_save_period: 120~g" /etc/cassandra/cassandra.yaml + +sudo rm -fR /var/lib/cassandra/data/* +sudo rm -fR /var/lib/cassandra/commitlog/* +if [ ! -d "/data0" ]; then + sudo rm -fR /data0/* +fi +if [ ! -d "/data1" ]; then + sudo rm -fR /data1/* +fi +sudo rm -fR /var/lib/cassandra/saved_caches/* + +# To avoid "Cannot start node if snitch’s data center (dc1) differs from previous data center (datacenter1)" +# error, keep using dc and rack variables as they are (dc1,rack1) in /etc/cassandra/cassandra-rackdc.properties +# but ignore the dc - it's a testing env +echo 'JVM_OPTS="$JVM_OPTS -Dcassandra.ignore_dc=true"' | sudo tee -a /etc/cassandra/cassandra-env.sh + +# We do not need this config file, delete it +sudo rm -f rm /etc/cassandra/cassandra-topology.properties + +# No need to logrotate, Cassandra uses log4j, configure it conservatively +sudo sed -i -e "s~[^<]*~10MB~g" /etc/cassandra/logback.xml +sudo sed -i -e "s~[^<]*~1GB~g" /etc/cassandra/logback.xml + +mount_device(){ + local mount_dir="/data"$1 + local device_name=$2 + echo Mounting $device_name at $mount_dir + if [ "$(lsblk -f | grep -E $device_name'[ ]+xfs')" == "" ]; then + echo Formatting partition + sudo mkfs -t xfs /dev/$device_name + else + echo Partition already formatted + fi + if [ ! -d "$mount_dir" ]; then + echo Creating $mount_dir + sudo mkdir $mount_dir + else + echo $mount_dir already created + fi + if [ "$(lsblk -f | grep $mount_dir)" == "" ]; then + echo Mounting... + sudo mount /dev/$device_name $mount_dir + else + echo Already mounted + fi + sudo chown cassandra $mount_dir + sudo chmod 777 $mount_dir; +} + +# "nvme[0-9]n[0-9] 558.8G" +# "loop[0-9] [0-9.]+M" +device_number=0 +lsblk | awk '{print $1,$4}' | grep -E "$NVME_REGEX" | awk '{print $1}' | +while read -r device_name; do + mount_device $device_number $device_name + device_number=$((device_number+1)) +done + +sudo systemctl start cassandra +if [ "$?" -ne "0" ]; then + echo Cannot start cassandra, exiting + exit $? +fi diff --git a/pkg/rexec/scripts/cassandra/install.sh b/pkg/rexec/scripts/cassandra/install.sh new file mode 100755 index 0000000..1b4095e --- /dev/null +++ b/pkg/rexec/scripts/cassandra/install.sh @@ -0,0 +1,143 @@ +if [ "$JMX_EXPORTER_VERSION" = "" ]; then + echo Error, missing: JMX_EXPORTER_VERSION=0.20.0 + exit 1 +fi + +echo "deb https://debian.cassandra.apache.org 41x main" | sudo tee -a /etc/apt/sources.list.d/cassandra.sources.list +# apt-key is deprecated. but still working, just silence it +curl -s https://downloads.apache.org/cassandra/KEYS | sudo apt-key add - 2>/dev/null + +# To avoid "Key is stored in legacy trusted.gpg keyring" in stderr +cd /etc/apt +sudo cp trusted.gpg trusted.gpg.d +cd ~ + +sudo DEBIAN_FRONTEND=noninteractive apt-get -y update + +#iostat +sudo DEBIAN_FRONTEND=noninteractive apt-get install -y sysstat + +# Cassandra requires Java 8 +sudo DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jdk openjdk-8-jre + +sudo DEBIAN_FRONTEND=noninteractive apt-get install -y cassandra + +sudo systemctl status cassandra +if [ "$?" -ne "0" ]; then + echo Bad cassandra service status, exiting + exit $? +fi + +# JMX Exporter +curl -LOs https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/$JMX_EXPORTER_VERSION/jmx_prometheus_javaagent-$JMX_EXPORTER_VERSION.jar +if [ "$?" -ne "0" ]; then + echo Cannot download JMX exporter, exiting + exit $? +fi +sudo mv jmx_prometheus_javaagent-$JMX_EXPORTER_VERSION.jar /usr/share/cassandra/lib/ +sudo chown cassandra /usr/share/cassandra/lib/jmx_prometheus_javaagent-$JMX_EXPORTER_VERSION.jar + +# JMX Exporter config +cat > jmx_exporter.yml << 'endmsgmarker' +lowercaseOutputLabelNames: true +lowercaseOutputName: true +whitelistObjectNames: ["org.apache.cassandra.metrics:*"] +# ColumnFamily is an alias for Table metrics +blacklistObjectNames: ["org.apache.cassandra.metrics:type=ColumnFamily,*"] +rules: +# Generic gauges with 0-2 labels +- pattern: org.apache.cassandra.metrics<>Value + name: cassandra_$1_$5 + type: GAUGE + labels: + "$1": "$4" + "$2": "$3" + +# +# Emulate Prometheus 'Summary' metrics for the exported 'Histogram's. +# TotalLatency is the sum of all latencies since server start +# +- pattern: org.apache.cassandra.metrics<>Count + name: cassandra_$1_$5$6_seconds_sum + type: UNTYPED + labels: + "$1": "$4" + "$2": "$3" + # Convert microseconds to seconds + valueFactor: 0.000001 + +- pattern: org.apache.cassandra.metrics<>Count + name: cassandra_$1_$5_seconds_count + type: UNTYPED + labels: + "$1": "$4" + "$2": "$3" + +- pattern: org.apache.cassandra.metrics<>Count + name: cassandra_$1_$5_count + type: UNTYPED + labels: + "$1": "$4" + "$2": "$3" + +- pattern: org.apache.cassandra.metrics<>(\d+)thPercentile + name: cassandra_$1_$5_seconds + type: GAUGE + labels: + "$1": "$4" + "$2": "$3" + quantile: "0.$6" + # Convert microseconds to seconds + valueFactor: 0.000001 + +- pattern: org.apache.cassandra.metrics<>(\d+)thPercentile + name: cassandra_$1_$5 + type: GAUGE + labels: + "$1": "$4" + "$2": "$3" + quantile: "0.$6" +endmsgmarker +sudo mv jmx_exporter.yml /etc/cassandra/ +sudo chown cassandra /etc/cassandra/jmx_exporter.yml + +# Let Cassandra know about JMX Exporter and config +echo 'JVM_OPTS="$JVM_OPTS -javaagent:/usr/share/cassandra/lib/jmx_prometheus_javaagent-'$JMX_EXPORTER_VERSION'.jar=7070:/etc/cassandra/jmx_exporter.yml"' | sudo tee -a /etc/cassandra/cassandra-env.sh + +# For now stop it. We will reconfigure it anywways +sudo systemctl stop cassandra + + +# RAM disk size in GB +# export RAM_DISK_SIZE=$(awk '/MemFree/ { printf "%.0f\n", $2/1024/2 }' /proc/meminfo) +# echo $RAM_DISK_SIZE +# sudo mkdir /mnt/ramdisk +# sudo chmod 777 /mnt/ramdisk +# sudo mount -t tmpfs -o size="$RAM_DISK_SIZE"m myramdisk /mnt/ramdisk +# if [ "$?" -ne "0" ]; then +# echo Cannot mount ramdisk, exiting +# exit $? +# fi + +# It requires cassandra user, so do not run it before you install Cassandra +# This will mount NVME devices to /data0, /data1, etc +# mount_device(){ +# mount_dir="/data"$1 +# device_name=$2 +# sudo mkfs -t xfs /dev/$device_name +# sudo mkdir $mount_dir +# sudo mount /dev/$device_name $mount_dir +# sudo chown cassandra $mount_dir +# sudo chmod 777 $mount_dir; +# } + +# # "nvme[0-9]n[0-9] 558.8G" +# # "loop[0-9] [0-9.]+M" +# device_number=0 +# lsblk | awk '{print $1,$4}' | grep -E "$NVME_REGEX" | awk '{print $1}' | +# while read -r device_name; do +# mount_device $device_number $device_name +# device_number=$((device_number+1)) +# done + +# echo $device_number disks matching: $NVME_REGEX diff --git a/pkg/rexec/scripts/cassandra/start.sh b/pkg/rexec/scripts/cassandra/start.sh new file mode 100755 index 0000000..657f803 --- /dev/null +++ b/pkg/rexec/scripts/cassandra/start.sh @@ -0,0 +1 @@ +sudo systemctl start cassandra \ No newline at end of file diff --git a/pkg/rexec/scripts/cassandra/stop.sh b/pkg/rexec/scripts/cassandra/stop.sh new file mode 100755 index 0000000..f6e4896 --- /dev/null +++ b/pkg/rexec/scripts/cassandra/stop.sh @@ -0,0 +1 @@ +sudo systemctl stop cassandra \ No newline at end of file diff --git a/pkg/rexec/scripts/common/iam_aws_credentials.sh b/pkg/rexec/scripts/common/iam_aws_credentials.sh new file mode 100644 index 0000000..bdf7bd5 --- /dev/null +++ b/pkg/rexec/scripts/common/iam_aws_credentials.sh @@ -0,0 +1,31 @@ +if [ "$SSH_USER" = "" ]; then + echo Error, missing: SSH_USER=ubuntu + exit 1 +fi + +# Not used, see associated instance profiles +# if [ "$S3_IAM_USER_AWS_ACCESS_KEY_ID" = "" ]; then +# echo Error, missing: S3_IAM_USER_AWS_ACCESS_KEY_ID=AK... +# exit 1 +# fi +# if [ "$S3_IAM_USER_AWS_SECRET_ACCESS_KEY" = "" ]; then +# echo Error, missing: S3_IAM_USER_AWS_SECRET_ACCESS_KEY=... +# exit 1 +# fi + +if [ "$S3_AWS_DEFAULT_REGION" = "" ]; then + echo Error, missing: S3_AWS_DEFAULT_REGION=us-east-1 + exit 1 +fi + +# Credentials and config for S3 access only +rm -fR /home/$SSH_USER/.aws +mkdir -p /home/$SSH_USER/.aws + +# sudo echo "[default]" > /home/$SSH_USER/.aws/credentials +# sudo echo "aws_access_key_id=$S3_IAM_USER_AWS_ACCESS_KEY_ID" >> /home/$SSH_USER/.aws/credentials +# sudo echo "aws_secret_access_key=$S3_IAM_USER_AWS_SECRET_ACCESS_KEY" >> /home/$SSH_USER/.aws/credentials + +sudo echo "[default]" > /home/$SSH_USER/.aws/config +sudo echo "region=$S3_AWS_DEFAULT_REGION" >> /home/$SSH_USER/.aws/config +sudo echo "output=json" >> /home/$SSH_USER/.aws/config diff --git a/pkg/rexec/scripts/common/increase_ssh_connection_limit.sh b/pkg/rexec/scripts/common/increase_ssh_connection_limit.sh new file mode 100755 index 0000000..2ca1bba --- /dev/null +++ b/pkg/rexec/scripts/common/increase_ssh_connection_limit.sh @@ -0,0 +1,19 @@ +# Default ssh connection limit from one client is 10, increase it +sudo sed -i -e "s~[# ]*MaxStartups[ ]*[0-9:]*~MaxStartups 10000~g" /etc/ssh/sshd_config +sudo sed -i -e "s~[# ]*MaxSessions[ ]*[0-9]*~MaxSessions 10000~g" /etc/ssh/sshd_config +sudo systemctl daemon-reload + +# The stuff below not required for Ubuntu 24.04, because: +# https://discourse.ubuntu.com/t/sshd-now-uses-socket-based-activation-ubuntu-22-10-and-later/30189 +# "In Ubuntu 24.04 LTS these settings are no longer migrated, but the port and address settings are pulled dynamically from sshd.conf via a systemd generator." +# Actually, it's beter not to mess with this piece of config at all. + +# Since kinetic, Ubuntu doesn't honour /etc/ssh/sshd_config +# (https://discourse.ubuntu.com/t/sshd-now-uses-socket-based-activation-ubuntu-22-10-and-later/30189/8) +# Since I can't find how to change MaxStartups for ssh.socket, let's roll back to ssh.service: +# Ignore stderr, this cmd has a habit of throwing "Synchronizing state of ssh.service with SysV service script with..." +#sudo systemctl enable --now ssh.service 2>/dev/null +# Ignore stderr, this cmd has a habit of throwing " Removed ..." +#sudo systemctl disable --now ssh.socket 2>/dev/null +# Now it's ok to reload (with ssh.socket we get "Unit sshd.service could not be found.") +#sudo systemctl reload sshd \ No newline at end of file diff --git a/pkg/rexec/scripts/common/replace_nameserver.sh b/pkg/rexec/scripts/common/replace_nameserver.sh new file mode 100755 index 0000000..a5fa3fd --- /dev/null +++ b/pkg/rexec/scripts/common/replace_nameserver.sh @@ -0,0 +1,16 @@ +# Disable it before changing DNS server, otherwise it may start updating +sudo systemctl stop unattended-upgrades + +# We are about to remove DNS server 127.0.0.53 that knows this host. Just save it in /etc/hosts +echo 127.0.0.1 $(hostname) | sudo tee -a /etc/hosts + +# Replace DNS server, default 127.0.0.53 knows nothing +sudo sed -i "s/nameserver[ ]*[0-9.]*/nameserver 8.8.8.8/" /etc/resolv.conf + +sudo resolvectl flush-caches + +sudo DEBIAN_FRONTEND=noninteractive apt-get -y update + +# Utilities for checking cloud performance, feel free to comment this out +# sudo DEBIAN_FRONTEND=noninteractive apt-get install -y iperf +# sudo DEBIAN_FRONTEND=noninteractive apt-get install -y sysbench \ No newline at end of file diff --git a/pkg/rexec/scripts/daemon/config.sh b/pkg/rexec/scripts/daemon/config.sh new file mode 100755 index 0000000..20d87d4 --- /dev/null +++ b/pkg/rexec/scripts/daemon/config.sh @@ -0,0 +1,72 @@ +# Make it as idempotent as possible, it can be called over and over + +if [ "$CASSANDRA_HOSTS" = "" ]; then + echo Error, missing: CASSANDRA_HOSTS='["10.5.0.11","10.5.0.12","10.5.0.13"]' + exit 1 +fi +if [ "$AMQP_URL" = "" ]; then + echo Error, missing: AMQP_URL=amqp://guest:guest@10.5.0.5/ + exit 1 +fi +if [ "$SSH_USER" = "" ]; then + echo Error, missing: SSH_USER=ubuntu + exit 1 +fi + +pkill -2 capidaemon +processid=$(pgrep capidaemon) +if [ "$processid" != "" ]; then + echo Trying pkill -9... + pkill -9 capidaemon 2> /dev/null + processid=$(pgrep capidaemon) + if [ "$processid" != "" ]; then + echo pkill -9 did not kill + exit 9 + fi +fi + +ENV_CONFIG_FILE=/home/$SSH_USER/bin/capidaemon.json + +sed -i -e 's~"url":[ ]*"[a-zA-Z0-9@\.:\/\-_$ ]*"~"url": "'"$AMQP_URL"'"~g' $ENV_CONFIG_FILE +sed -i -e 's~"hosts":[ ]*\[[0-9a-zA-Z\.\,\-_ "]*\]~"hosts": '$CASSANDRA_HOSTS"~g" $ENV_CONFIG_FILE +sed -i -e 's~"python_interpreter_path":[ ]*"[a-zA-Z0-9]*"~"python_interpreter_path": "python3"~g' $ENV_CONFIG_FILE +sed -i -e 's~"level":[ ]*"[a-zA-Z]*"~"level": "info"~g' $ENV_CONFIG_FILE + +#echo "Patching config to use ca at /home/"$SSH_USER"/ca" +#sed -i -e 's~"ca_path":[ ]*"[^\"]*"~"ca_path":"/home/'$SSH_USER'/ca"~g' $ENV_CONFIG_FILE +# If you want to use Ubuntu CA store: +#sed -i -e 's~"ca_path":[ ]*"[a-zA-Z0-9\.\/\-_]*"~"ca_path":"/usr/local/share/ca-certificates"~g' $ENV_CONFIG_FILE + + +# For our perf testing purposes, decrease latency at the expense of the message queue load +# sed -i -e 's~"dead_letter_ttl":[ ]*[0-9]*~"dead_letter_ttl": 100~g' $ENV_CONFIG_FILE + +# If you use your test Cassandra setup up to the limit, try to avoid "Operation timed out - received only 0 responses" +# Make replication factor at least 2 to make reads more available, 1 for faster writes +# https://stackoverflow.com/questions/38231621/cassandra-operation-timed-out +sed -i -e "s~\"keyspace_replication_config\":[ ]*\"[^\"]*\"~\"keyspace_replication_config\": \"{'class':'SimpleStrategy', 'replication_factor':1}\"~g" $ENV_CONFIG_FILE + +# In test env, give enough time to Cassandra coordinator to complete the write (cassandra.yaml write_request_timeout_in_ms) +# so there is no doubt that coordinator is the bottleneck, +# and make sure client time out is more (not equal) than that to avoid gocql error "no response received from cassandra within timeout period". +# In prod environments, increasing write_request_timeout_in_ms and corresponding client timeout is not a solution. +sed -i -e "s~\"timeout\":[ ]*[0-9]*~\"timeout\": 15000~g" $ENV_CONFIG_FILE + +# Default number writer workers may be pretty aggressive, +# watch for "Operation timed out - received only 0 responses" on writes, throttle it down to 10 or lower if needed +if [ "$DAEMON_DB_WRITERS" != "" ]; then + sed -i -e "s~\"writer_workers\":[ 0-9]*~\"writer_workers\": $DAEMON_DB_WRITERS~g" $ENV_CONFIG_FILE +fi + +# Thread pool size - number of workers handling RabbitMQ messages - is about using daemon instance CPU resources +if [ "$DAEMON_THREAD_POOL_SIZE" != "" ]; then + sed -i -e "s~\"thread_pool_size\":[ ]*[0-9]*~\"thread_pool_size\": $DAEMON_THREAD_POOL_SIZE~g" $ENV_CONFIG_FILE +fi + +sudo rm -fR /var/log/capidaemon +sudo mkdir /var/log/capidaemon +sudo chmod 777 /var/log/capidaemon +sudo chmod 744 /home/$SSH_USER/bin/capidaemon + +/home/$SSH_USER/bin/capidaemon >> /var/log/capidaemon/capidaemon.log 2>&1 & + diff --git a/pkg/rexec/scripts/daemon/install.sh b/pkg/rexec/scripts/daemon/install.sh new file mode 100644 index 0000000..3878156 --- /dev/null +++ b/pkg/rexec/scripts/daemon/install.sh @@ -0,0 +1,38 @@ +# Add all used Python modules here +# No need to install venv or pip, just proceed with python3-xyz +sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a apt-get install -y python3-dateutil + +CAPI_BINARY=capidaemon + +if [ "$SSH_USER" = "" ]; then + echo Error, missing: SSH_USER=ubuntu + exit 1 +fi + +if [ "$CAPILLARIES_RELEASE_URL" = "" ]; then + echo Error, missing: CAPILLARIES_RELEASE_URL=https://capillaries-release.s3.us-east-1.amazonaws.com/latest + exit 1 +fi + +if [ "$OS_ARCH" = "" ]; then + echo Error, missing: $CAPIDEPLOY_OS_ARCH=linux/amd64 + exit 1 +fi + +if [ ! -d /home/$SSH_USER/bin ]; then + mkdir -p /home/$SSH_USER/bin +fi + +cd /home/$SSH_USER/bin +curl -LOs $CAPILLARIES_RELEASE_URL/$OS_ARCH/$CAPI_BINARY.gz +if [ "$?" -ne "0" ]; then + echo "Cannot download $CAPILLARIES_RELEASE_URL/$OS_ARCH/$CAPI_BINARY.gz to /home/$SSH_USER/bin" + exit $? +fi +curl -LOs $CAPILLARIES_RELEASE_URL/$OS_ARCH/$CAPI_BINARY.json +if [ "$?" -ne "0" ]; then + echo "Cannot download from $CAPILLARIES_RELEASE_URL/$OS_ARCH/$CAPI_BINARY.json to /home/$SSH_USER/bin" + exit $? +fi +gzip -d -f $CAPI_BINARY.gz +chmod 744 $CAPI_BINARY diff --git a/pkg/rexec/scripts/daemon/start.sh b/pkg/rexec/scripts/daemon/start.sh new file mode 100755 index 0000000..3e9d082 --- /dev/null +++ b/pkg/rexec/scripts/daemon/start.sh @@ -0,0 +1,9 @@ +if [ "$SSH_USER" = "" ]; then + echo Error, missing: SSH_USER=ubuntu + exit 1 +fi + +processid=$(pgrep capidaemon) +if [ "$processid" = "" ]; then + /home/$SSH_USER/bin/capidaemon >> /var/log/capidaemon/capidaemon.log 2>&1 & +fi \ No newline at end of file diff --git a/pkg/rexec/scripts/daemon/stop.sh b/pkg/rexec/scripts/daemon/stop.sh new file mode 100755 index 0000000..d3e2c24 --- /dev/null +++ b/pkg/rexec/scripts/daemon/stop.sh @@ -0,0 +1,11 @@ +pkill -2 capidaemon +processid=$(pgrep capidaemon) +if [ "$processid" != "" ]; then + echo Trying pkill -9... + pkill -9 capidaemon 2> /dev/null + processid=$(pgrep capidaemon) + if [ "$processid" != "" ]; then + echo pkill -9 did not kill + exit 9 + fi +fi \ No newline at end of file diff --git a/pkg/rexec/scripts/logrotate/config_bastion.sh b/pkg/rexec/scripts/logrotate/config_bastion.sh new file mode 100755 index 0000000..bd30c15 --- /dev/null +++ b/pkg/rexec/scripts/logrotate/config_bastion.sh @@ -0,0 +1,32 @@ +# Make it as idempotent as possible, it can be called over and over + +# Logrotate +LOGROTATE_CONFIG_FILE=/etc/logrotate.d/capidaemon_logrotate.conf + +sudo rm -f $LOGROTATE_CONFIG_FILE +sudo tee $LOGROTATE_CONFIG_FILE < /mnt/capi_log + endscript +} +EOF + +sudo systemctl restart logrotate + +# Logrotate/Cron +# Make sure less /etc/cron.daily/logrotate has something like this (should be installed by logrotate installer): +# #!/bin/sh +# /usr/sbin/logrotate -s /var/lib/logrotate/logrotate.status /etc/logrotate.conf +# EXITVALUE=$? +# if [ $EXITVALUE != 0 ]; then +# /usr/bin/logger -t logrotate "ALERT exited abnormally with [$EXITVALUE]" +# fi +# exit 0 diff --git a/pkg/rexec/scripts/logrotate/config_capidaemon.sh b/pkg/rexec/scripts/logrotate/config_capidaemon.sh new file mode 100755 index 0000000..7de32df --- /dev/null +++ b/pkg/rexec/scripts/logrotate/config_capidaemon.sh @@ -0,0 +1,32 @@ +# Make it as idempotent as possible, it can be called over and over + +# Logrotate +LOGROTATE_CONFIG_FILE=/etc/logrotate.d/capidaemon_logrotate.conf + +sudo rm -f $LOGROTATE_CONFIG_FILE +sudo tee $LOGROTATE_CONFIG_FILE < /var/log/capidaemon + endscript +} +EOF + +sudo systemctl restart logrotate + +# Logrotate/Cron +# Make sure less /etc/cron.daily/logrotate has something like this (should be installed by logrotate installer): +# #!/bin/sh +# /usr/sbin/logrotate -s /var/lib/logrotate/logrotate.status /etc/logrotate.conf +# EXITVALUE=$? +# if [ $EXITVALUE != 0 ]; then +# /usr/bin/logger -t logrotate "ALERT exited abnormally with [$EXITVALUE]" +# fi +# exit 0 diff --git a/pkg/rexec/scripts/logrotate/start.sh b/pkg/rexec/scripts/logrotate/start.sh new file mode 100644 index 0000000..71b8050 --- /dev/null +++ b/pkg/rexec/scripts/logrotate/start.sh @@ -0,0 +1,2 @@ +sudo systemctl start logrotate +sudo systemctl start logrotate.timer \ No newline at end of file diff --git a/pkg/rexec/scripts/logrotate/stop.sh b/pkg/rexec/scripts/logrotate/stop.sh new file mode 100644 index 0000000..b0ce563 --- /dev/null +++ b/pkg/rexec/scripts/logrotate/stop.sh @@ -0,0 +1,2 @@ +sudo systemctl stop logrotate.timer +sudo systemctl stop logrotate \ No newline at end of file diff --git a/pkg/rexec/scripts/nginx/config_prometheus_reverse_proxy.sh b/pkg/rexec/scripts/nginx/config_prometheus_reverse_proxy.sh new file mode 100755 index 0000000..12fd1c7 --- /dev/null +++ b/pkg/rexec/scripts/nginx/config_prometheus_reverse_proxy.sh @@ -0,0 +1,36 @@ +# nginx reverse proxy +# https://www.digitalocean.com/community/tutorials/how-to-configure-nginx-as-a-reverse-proxy-on-ubuntu-22-04 + +if [ "$PROMETHEUS_IP" = "" ]; then + echo Error, missing: PROMETHEUS_IP=10.5.0.4 + exit 1 +fi + +PROMETHEUS_CONFIG_FILE=/etc/nginx/sites-available/prometheus +if [ -f "$PROMETHEUS_CONFIG_FILE" ]; then + sudo rm -f $PROMETHEUS_CONFIG_FILE +fi + +sudo tee $PROMETHEUS_CONFIG_FILE </dev/null +if [ "$?" -ne "0" ]; then + echo nginx config error, exiting + exit $? +fi + +sudo systemctl restart nginx \ No newline at end of file diff --git a/pkg/rexec/scripts/nginx/config_rabbitmq_reverse_proxy.sh b/pkg/rexec/scripts/nginx/config_rabbitmq_reverse_proxy.sh new file mode 100755 index 0000000..9adcb23 --- /dev/null +++ b/pkg/rexec/scripts/nginx/config_rabbitmq_reverse_proxy.sh @@ -0,0 +1,32 @@ +# nginx reverse proxy +# https://www.digitalocean.com/community/tutorials/how-to-configure-nginx-as-a-reverse-proxy-on-ubuntu-22-04 + +if [ "$RABBITMQ_IP" = "" ]; then + echo Error, missing: RABBITMQ_IP=10.5.0.5 + exit 1 +fi + +RABBITMQ_CONFIG_FILE=/etc/nginx/sites-available/rabbitmq +sudo rm -f $RABBITMQ_CONFIG_FILE + +sudo tee $RABBITMQ_CONFIG_FILE </dev/null +if [ "$?" -ne "0" ]; then + echo nginx config error, exiting + exit $? +fi + +sudo systemctl restart nginx \ No newline at end of file diff --git a/pkg/rexec/scripts/nginx/config_ui.sh b/pkg/rexec/scripts/nginx/config_ui.sh new file mode 100755 index 0000000..449f553 --- /dev/null +++ b/pkg/rexec/scripts/nginx/config_ui.sh @@ -0,0 +1,38 @@ +if [ "$SSH_USER" = "" ]; then + echo Error, missing: SSH_USER=ubuntu + exit 1 +fi + +UI_CONFIG_FILE=/etc/nginx/sites-available/ui +if [ -f "$UI_CONFIG_FILE" ]; then + sudo rm -f $UI_CONFIG_FILE +fi + +sudo tee $UI_CONFIG_FILE </dev/null +if [ "$?" -ne "0" ]; then + echo nginx config error, exiting + exit $? +fi + +sudo systemctl restart nginx \ No newline at end of file diff --git a/pkg/rexec/scripts/nginx/config_webapi_reverse_proxy.sh b/pkg/rexec/scripts/nginx/config_webapi_reverse_proxy.sh new file mode 100755 index 0000000..95f2a25 --- /dev/null +++ b/pkg/rexec/scripts/nginx/config_webapi_reverse_proxy.sh @@ -0,0 +1,41 @@ +# nginx reverse proxy +# https://www.digitalocean.com/community/tutorials/how-to-configure-nginx-as-a-reverse-proxy-on-ubuntu-22-04 + +if [ "$INTERNAL_WEBAPI_PORT" = "" ]; then + echo Error, missing: INTERNAL_WEBAPI_PORT=6543 + exit 1 +fi +if [ "$EXTERNAL_WEBAPI_PORT" = "" ]; then + echo Error, missing: EXTERNAL_WEBAPI_PORT=6544 + exit 1 +fi + +CONFIG_FILE=/etc/nginx/sites-available/webapi +if [ -f "$CONFIG_FILE" ]; then + sudo rm -f $CONFIG_FILE +fi + + +sudo tee $CONFIG_FILE </dev/null +if [ "$?" -ne "0" ]; then + echo nginx config error, exiting + exit $? +fi + +sudo systemctl restart nginx \ No newline at end of file diff --git a/pkg/rexec/scripts/nginx/config_whitelist.sh b/pkg/rexec/scripts/nginx/config_whitelist.sh new file mode 100755 index 0000000..6759539 --- /dev/null +++ b/pkg/rexec/scripts/nginx/config_whitelist.sh @@ -0,0 +1,22 @@ +if [ "$BASTION_ALLOWED_IPS" = "" ]; then + echo Error, missing: BASTION_ALLOWED_IPS=1.2.3.4/24,5.6.7.8/16 + exit 1 +fi + +if [ ! -d "/etc/nginx/includes" ]; then + sudo mkdir /etc/nginx/includes +fi + +WHITELIST_CONFIG_FILE=/etc/nginx/includes/allowed_ips.conf + +if [ -f "$WHITELIST_CONFIG_FILE" ]; then + sudo rm $WHITELIST_CONFIG_FILE +fi +sudo touch $WHITELIST_CONFIG_FILE + +IFS=',' read -ra CIDR <<< "$BASTION_ALLOWED_IPS" +for i in "${CIDR[@]}"; do + echo "allow $i;" | sudo tee -a $WHITELIST_CONFIG_FILE +done +echo "deny all;" | sudo tee -a $WHITELIST_CONFIG_FILE + diff --git a/pkg/rexec/scripts/nginx/install.sh b/pkg/rexec/scripts/nginx/install.sh new file mode 100755 index 0000000..947916f --- /dev/null +++ b/pkg/rexec/scripts/nginx/install.sh @@ -0,0 +1,15 @@ +# https://www.digitalocean.com/community/tutorials/how-to-configure-nginx-as-a-reverse-proxy-on-ubuntu-22-04 + +sudo DEBIAN_FRONTEND=noninteractive apt-get -y install nginx + +# Remove nginx stub site +sudo rm -f /etc/nginx/sites-enabled/default + +# nginx has a habit to write "syntax is ok" to stderr. Ignore it and rely on the exit code +sudo nginx -t 2>/dev/null +if [ "$?" -ne "0" ]; then + echo nginx config error, exiting + exit $? +fi + +sudo systemctl restart nginx \ No newline at end of file diff --git a/pkg/rexec/scripts/nginx/start.sh b/pkg/rexec/scripts/nginx/start.sh new file mode 100755 index 0000000..d84d2e4 --- /dev/null +++ b/pkg/rexec/scripts/nginx/start.sh @@ -0,0 +1 @@ +sudo systemctl start nginx \ No newline at end of file diff --git a/pkg/rexec/scripts/nginx/stop.sh b/pkg/rexec/scripts/nginx/stop.sh new file mode 100755 index 0000000..c6b874d --- /dev/null +++ b/pkg/rexec/scripts/nginx/stop.sh @@ -0,0 +1 @@ +sudo systemctl stop nginx \ No newline at end of file diff --git a/pkg/rexec/scripts/prometheus/config_node_exporter.sh b/pkg/rexec/scripts/prometheus/config_node_exporter.sh new file mode 100755 index 0000000..2692c6a --- /dev/null +++ b/pkg/rexec/scripts/prometheus/config_node_exporter.sh @@ -0,0 +1,32 @@ +# Make it as idempotent as possible, it can be called over and over + +# Prometheus node exporter +# https://www.digitalocean.com/community/tutorials/how-to-install-prometheus-on-ubuntu-16-04 + +PROMETHEUS_NODE_EXPORTER_SERVICE_FILE=/etc/systemd/system/node_exporter.service + +sudo rm -f $PROMETHEUS_NODE_EXPORTER_SERVICE_FILE + +sudo tee $PROMETHEUS_NODE_EXPORTER_SERVICE_FILE < /dev/null +if [ "$?" -ne "0" ]; then + echo localhost:9100/metrics + exit $? +fi diff --git a/pkg/rexec/scripts/prometheus/config_server.sh b/pkg/rexec/scripts/prometheus/config_server.sh new file mode 100755 index 0000000..3cd32af --- /dev/null +++ b/pkg/rexec/scripts/prometheus/config_server.sh @@ -0,0 +1,57 @@ +# Prometheus server (assuming node exporter also running on it) +# https://www.digitalocean.com/community/tutorials/how-to-install-prometheus-on-ubuntu-16-04 + +if [ "$PROMETHEUS_TARGETS" = "" ]; then + echo "Error, missing: PROMETHEUS_TARGETS=\'localhost:9100\',\'10.5.1.10:9100\'" + exit 1 +fi + +sudo systemctl stop prometheus 2>/dev/null + +PROMETHEUS_YAML_FILE=/etc/prometheus/prometheus.yml + +sudo rm -f $PROMETHEUS_YAML_FILE + +sudo tee $PROMETHEUS_YAML_FILE </dev/null + +if [ "$(uname -p)" == "x86_64" ]; then +ARCH=amd64 +else +ARCH=arm64 +fi + +# Download node exporter +EXPORTER_DL_FILE=node_exporter-$PROMETHEUS_NODE_EXPORTER_VERSION.linux-$ARCH +cd ~ +sudo rm -f $EXPORTER_DL_FILE.tar.gz +echo Downloading https://github.com/prometheus/node_exporter/releases/download/v$PROMETHEUS_NODE_EXPORTER_VERSION/$EXPORTER_DL_FILE.tar.gz ... +curl -LOs https://github.com/prometheus/node_exporter/releases/download/v$PROMETHEUS_NODE_EXPORTER_VERSION/$EXPORTER_DL_FILE.tar.gz +if [ "$?" -ne "0" ]; then + echo Cannot download, exiting + exit $? +fi +tar xvf $EXPORTER_DL_FILE.tar.gz + +sudo cp $EXPORTER_DL_FILE/node_exporter /usr/local/bin +sudo chown node_exporter:node_exporter /usr/local/bin/node_exporter + +rm -rf $EXPORTER_DL_FILE.tar.gz $EXPORTER_DL_FILE + diff --git a/pkg/rexec/scripts/prometheus/install_server.sh b/pkg/rexec/scripts/prometheus/install_server.sh new file mode 100755 index 0000000..4233e58 --- /dev/null +++ b/pkg/rexec/scripts/prometheus/install_server.sh @@ -0,0 +1,54 @@ +if [ "$PROMETHEUS_VERSION" = "" ]; then + echo Error, missing: PROMETHEUS_VERSION=2.41.0 + exit 1 +fi + +# Create users +sudo useradd --no-create-home --shell /bin/false prometheus + +# Before we download the Prometheus binaries, create the necessary directories for storing Prometheus’ files and data. Following standard Linux conventions, we’ll create a directory in /etc for Prometheus’ configuration files and a directory in /var/lib for its data. +sudo mkdir /etc/prometheus +sudo mkdir /var/lib/prometheus + +# Now, set the user and group ownership on the new directories to the prometheus user. +sudo chown prometheus:prometheus /etc/prometheus +sudo chown prometheus:prometheus /var/lib/prometheus + +if [ "$(uname -p)" == "x86_64" ]; then +ARCH=amd64 +else +ARCH=arm64 +fi + +# Downloading Prometheus +PROMETHEUS_DL_FILE=prometheus-$PROMETHEUS_VERSION.linux-$ARCH +cd ~ +sudo rm -f $PROMETHEUS_DL_FILE.gz +echo Downloading https://github.com/prometheus/prometheus/releases/download/v$PROMETHEUS_VERSION/$PROMETHEUS_DL_FILE.tar.gz +curl -LOs https://github.com/prometheus/prometheus/releases/download/v$PROMETHEUS_VERSION/$PROMETHEUS_DL_FILE.tar.gz +if [ "$?" -ne "0" ]; then + echo Cannot download, exiting + exit $? +fi +tar xvf $PROMETHEUS_DL_FILE.tar.gz + +# Copy the two binaries to the /usr/local/bin directory. + +sudo cp $PROMETHEUS_DL_FILE/prometheus /usr/local/bin/ +sudo cp $PROMETHEUS_DL_FILE/promtool /usr/local/bin/ + +# Set the user and group ownership on the binaries to the prometheus user created in Step 1. +sudo chown prometheus:prometheus /usr/local/bin/prometheus +sudo chown prometheus:prometheus /usr/local/bin/promtool + +# Copy the consoles and console_libraries directories to /etc/prometheus. +sudo cp -r $PROMETHEUS_DL_FILE/consoles /etc/prometheus +sudo cp -r $PROMETHEUS_DL_FILE/console_libraries /etc/prometheus + +# Set the user and group ownership on the directories to the prometheus user. Using the -R flag will ensure that ownership is set on the files inside the directory as well. +sudo chown -R prometheus:prometheus /etc/prometheus/consoles +sudo chown -R prometheus:prometheus /etc/prometheus/console_libraries + +# Lastly, remove the leftover files from your home directory as they are no longer needed. +rm -rf $PROMETHEUS_DL_FILE.tar.gz $PROMETHEUS_DL_FILE + diff --git a/pkg/rexec/scripts/prometheus/start_server.sh b/pkg/rexec/scripts/prometheus/start_server.sh new file mode 100755 index 0000000..7be750a --- /dev/null +++ b/pkg/rexec/scripts/prometheus/start_server.sh @@ -0,0 +1 @@ +sudo systemctl start prometheus diff --git a/pkg/rexec/scripts/prometheus/stop_server.sh b/pkg/rexec/scripts/prometheus/stop_server.sh new file mode 100755 index 0000000..d0aef59 --- /dev/null +++ b/pkg/rexec/scripts/prometheus/stop_server.sh @@ -0,0 +1 @@ +sudo systemctl stop prometheus diff --git a/pkg/rexec/scripts/rabbitmq/config.sh b/pkg/rexec/scripts/rabbitmq/config.sh new file mode 100755 index 0000000..ed1a182 --- /dev/null +++ b/pkg/rexec/scripts/rabbitmq/config.sh @@ -0,0 +1,40 @@ +if [ "$RABBITMQ_ADMIN_NAME" = "" ]; then + echo Error, missing: RABBITMQ_ADMIN_NAME=... + exit 1 +fi +if [ "$RABBITMQ_ADMIN_PASS" = "" ]; then + echo Error, missing: RABBITMQ_ADMIN_PASS=... + exit 1 +fi +if [ "$RABBITMQ_USER_NAME" = "" ]; then + echo Error, missing: RABBITMQ_USER_NAME=... + exit 1 +fi +if [ "$RABBITMQ_ADMIN_PASS" = "" ]; then + echo Error, missing: RABBITMQ_USER_PASS=... + exit 1 +fi + +# Mkae sure it's started +sudo systemctl start rabbitmq-server + +# Enable mgmt console +sudo rabbitmq-plugins list +sudo rabbitmq-plugins enable rabbitmq_management + +# Console user mgmt +sudo rabbitmqctl add_user $RABBITMQ_ADMIN_NAME $RABBITMQ_ADMIN_PASS 2>/dev/null +sudo rabbitmqctl set_user_tags $RABBITMQ_ADMIN_NAME administrator +sudo rabbitmqctl set_permissions -p / $RABBITMQ_ADMIN_NAME ".*" ".*" ".*" +sudo rabbitmqctl list_users +sudo rabbitmqctl delete_user guest 2>/dev/null + +# Capillaries uses this account +sudo rabbitmqctl add_user $RABBITMQ_USER_NAME $RABBITMQ_USER_PASS 2>/dev/null +sudo rabbitmqctl set_permissions -p / $RABBITMQ_USER_NAME ".*" ".*" ".*" + +curl -s http://localhost:15672 +if [ "$?" -ne "0" ]; then + echo Cannot check localhost:15672 + exit $? +fi diff --git a/pkg/rexec/scripts/rabbitmq/install.sh b/pkg/rexec/scripts/rabbitmq/install.sh new file mode 100755 index 0000000..a4b6ded --- /dev/null +++ b/pkg/rexec/scripts/rabbitmq/install.sh @@ -0,0 +1,115 @@ +sudo DEBIAN_FRONTEND=noninteractive add-apt-repository -y ppa:rabbitmq/rabbitmq-erlang +sudo DEBIAN_FRONTEND=noninteractive apt-get update -y + +# Erlang from https://launchpad.net/~rabbitmq/+archive/ubuntu/rabbitmq-erlang +ERLANG_VER=1:26.2.5-1rmq1ppa1~ubuntu24.04.1 +sudo DEBIAN_FRONTEND=noninteractive apt-get -y install erlang-base=$ERLANG_VER \ + erlang-asn1=$ERLANG_VER erlang-crypto=$ERLANG_VER erlang-eldap=$ERLANG_VER erlang-ftp=$ERLANG_VER erlang-inets=$ERLANG_VER \ + erlang-mnesia=$ERLANG_VER erlang-os-mon=$ERLANG_VER erlang-parsetools=$ERLANG_VER erlang-public-key=$ERLANG_VER \ + erlang-runtime-tools=$ERLANG_VER erlang-snmp=$ERLANG_VER erlang-ssl=$ERLANG_VER \ + erlang-syntax-tools=$ERLANG_VER erlang-tftp=$ERLANG_VER erlang-tools=$ERLANG_VER erlang-xmerl=$ERLANG_VER + +# RabbitMQ server +RABBITMQ_VER=3.12.1-1ubuntu1 +sudo DEBIAN_FRONTEND=noninteractive apt-get -y --fix-missing install rabbitmq-server=$RABBITMQ_VER + +# https://www.cherryservers.com/blog/how-to-install-and-start-using-rabbitmq-on-ubuntu-22-04 + +# sudo DEBIAN_FRONTEND=noninteractive apt-get -y install gnupg apt-transport-https + +# curl -1sLf "https://keys.openpgp.org/vks/v1/by-fingerprint/0A9AF2115F4687BD29803A206B73A36E6026DFCA" | sudo gpg --dearmor | sudo tee /usr/share/keyrings/com.rabbitmq.team.gpg > /dev/null +# curl -1sLf "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf77f1eda57ebb1cc" | sudo gpg --dearmor | sudo tee /usr/share/keyrings/net.launchpad.ppa.rabbitmq.erlang.gpg > /dev/null +# curl -1sLf "https://packagecloud.io/rabbitmq/rabbitmq-server/gpgkey" | sudo gpg --dearmor | sudo tee /usr/share/keyrings/io.packagecloud.rabbitmq.gpg > /dev/null + +# # Use RabbitMQ "jammy" release for Ubuntu 22.04: +# sudo tee /etc/apt/sources.list.d/rabbitmq.list < /dev/null +# ## Community mirror of Cloudsmith: modern Erlang repository +# curl -1sLf https://github.com/rabbitmq/signing-keys/releases/download/3.0/cloudsmith.rabbitmq-erlang.E495BB49CC4BBE5B.key | sudo gpg --dearmor | sudo tee /usr/share/keyrings/rabbitmq.E495BB49CC4BBE5B.gpg > /dev/null +# ## Community mirror of Cloudsmith: RabbitMQ repository +# curl -1sLf https://github.com/rabbitmq/signing-keys/releases/download/3.0/cloudsmith.rabbitmq-server.9F4587F226208342.key | sudo gpg --dearmor | sudo tee /usr/share/keyrings/rabbitmq.9F4587F226208342.gpg > /dev/null + +# ## Add apt repositories maintained by Team RabbitMQ +# sudo tee /etc/apt/sources.list.d/rabbitmq.list < /dev/null +# curl -1sLf "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf77f1eda57ebb1cc" | sudo gpg --dearmor | sudo tee /usr/share/keyrings/net.launchpad.ppa.rabbitmq.erlang.gpg > /dev/null + +# sudo DEBIAN_FRONTEND=noninteractive apt-get install apt-transport-https + +# sudo tee /etc/apt/sources.list.d/rabbitmq.list <> /mnt/capi_log/capiwebapi.log 2>&1 & + diff --git a/pkg/rexec/scripts/webapi/install.sh b/pkg/rexec/scripts/webapi/install.sh new file mode 100644 index 0000000..4526aa2 --- /dev/null +++ b/pkg/rexec/scripts/webapi/install.sh @@ -0,0 +1,34 @@ +CAPI_BINARY=capiwebapi + +if [ "$SSH_USER" = "" ]; then + echo Error, missing: SSH_USER=ubuntu + exit 1 +fi + +if [ "$CAPILLARIES_RELEASE_URL" = "" ]; then + echo Error, missing: CAPILLARIES_RELEASE_URL=https://capillaries-release.s3.us-east-1.amazonaws.com/latest + exit 1 +fi + +if [ "$OS_ARCH" = "" ]; then + echo Error, missing: $CAPIDEPLOY_OS_ARCH=linux/amd64 + exit 1 +fi + +if [ ! -d /home/$SSH_USER/bin ]; then + mkdir -p /home/$SSH_USER/bin +fi + +cd /home/$SSH_USER/bin +curl -LOs $CAPILLARIES_RELEASE_URL/$OS_ARCH/$CAPI_BINARY.gz +if [ "$?" -ne "0" ]; then + echo "Cannot download $CAPILLARIES_RELEASE_URL/$OS_ARCH/$CAPI_BINARY.gz to /home/$SSH_USER/bin" + exit $? +fi +curl -LOs $CAPILLARIES_RELEASE_URL/$OS_ARCH/$CAPI_BINARY.json +if [ "$?" -ne "0" ]; then + echo "Cannot download from $CAPILLARIES_RELEASE_URL/$OS_ARCH/$CAPI_BINARY.json to /home/$SSH_USER/bin" + exit $? +fi +gzip -d -f $CAPI_BINARY.gz +chmod 744 $CAPI_BINARY diff --git a/pkg/rexec/scripts/webapi/start.sh b/pkg/rexec/scripts/webapi/start.sh new file mode 100644 index 0000000..1641dee --- /dev/null +++ b/pkg/rexec/scripts/webapi/start.sh @@ -0,0 +1,9 @@ +if [ "$SSH_USER" = "" ]; then + echo Error, missing: SSH_USER=ubuntu + exit 1 +fi + +processid=$(pgrep capiwebapi) +if [ "$processid" = "" ]; then + /home/$SSH_USER/bin/capiwebapi >> /mnt/capi_log/capiwebapi.log 2>&1 & +fi \ No newline at end of file diff --git a/pkg/rexec/scripts/webapi/stop.sh b/pkg/rexec/scripts/webapi/stop.sh new file mode 100644 index 0000000..d64aba6 --- /dev/null +++ b/pkg/rexec/scripts/webapi/stop.sh @@ -0,0 +1,5 @@ +pkill -2 capiwebapi +processid=$(pgrep capiwebapi) +if [ "$processid" != "" ]; then + pkill -9 capiwebapi +fi \ No newline at end of file diff --git a/pkg/rexec/ssh.go b/pkg/rexec/ssh.go new file mode 100644 index 0000000..232341a --- /dev/null +++ b/pkg/rexec/ssh.go @@ -0,0 +1,92 @@ +package rexec + +import ( + "crypto/x509" + "encoding/pem" + "errors" + "fmt" + "net" + "os" + "path/filepath" + "strings" + "time" + + "golang.org/x/crypto/ssh" +) + +func signerFromPem(pemBytes []byte) (ssh.Signer, error) { + + // read pem block + err := errors.New("cannot decode pem block, no key found") + pemBlock, _ := pem.Decode(pemBytes) + if pemBlock == nil { + return nil, err + } + + // NOTE handle key encrypted with password here if needed, x509.DecryptPEMBlock is obsolete + + // generate signer instance from plain key + signer, err := ssh.ParsePrivateKey(pemBytes) + if err != nil { + return nil, fmt.Errorf("cannot parsie plain private key %s", err.Error()) + } + + return signer, nil +} + +func parsePemBlock(block *pem.Block) (any, error) { + switch block.Type { + case "RSA PRIVATE KEY": + key, err := x509.ParsePKCS1PrivateKey(block.Bytes) + if err != nil { + return nil, fmt.Errorf("cannot parse PKCS private key %s", err.Error()) + } else { + return key, nil + } + case "EC PRIVATE KEY": + key, err := x509.ParseECPrivateKey(block.Bytes) + if err != nil { + return nil, fmt.Errorf("cannot parse EC private key %s", err.Error()) + } else { + return key, nil + } + case "DSA PRIVATE KEY": + key, err := ssh.ParseDSAPrivateKey(block.Bytes) + if err != nil { + return nil, fmt.Errorf("cannot parse DSA private key %s", err.Error()) + } else { + return key, nil + } + default: + return nil, fmt.Errorf("cannot parse private key, unsupported key type %s", block.Type) + } +} + +func NewSshClientConfig(user string, privateKeyPath string) (*ssh.ClientConfig, error) { + keyPath := privateKeyPath + if strings.HasPrefix(keyPath, "~/") { + homeDir, _ := os.UserHomeDir() + keyPath = filepath.Join(homeDir, keyPath[2:]) + } + pemBytes, err := os.ReadFile(keyPath) + if err != nil { + return nil, fmt.Errorf("cannot read private key file %s: %s", keyPath, err.Error()) + } + + signer, err := signerFromPem(pemBytes) + if err != nil { + return nil, err + } + + return &ssh.ClientConfig{ + Timeout: time.Duration(10 * time.Second), + User: user, + Auth: []ssh.AuthMethod{ + ssh.PublicKeys(signer), + }, + HostKeyCallback: func(hostname string, remote net.Addr, key ssh.PublicKey) error { + // use known_hosts file if you care about host validation + return nil + }, + }, nil +} diff --git a/sample.jsonnet b/sample.jsonnet new file mode 100644 index 0000000..3440b75 --- /dev/null +++ b/sample.jsonnet @@ -0,0 +1,552 @@ +{ + // Variables to play with + + local dep_name = 'sampleaws001', // Can be any combination of alphanumeric characters. Make it unique. + local subnet_availability_zone = 'us-east-1a', // AWS-specific + local deployment_flavor_power = 'aws.arm64.c7g.8', // 1. aws or azure, 2. amd64 or arm64, 3. Flavor family, 4. Number of cores in Cassandra nodes. Daemon cores are 4 times less. + + // Cassandra cluster size - 4,8,16 + local cassandra_total_nodes = 4, + + // You probably will not change anything below this line + + // max: daemon_cores*1.5 (which is the same as cassandra cores / 4 * 1.5) + local DEFAULT_DAEMON_THREAD_POOL_SIZE = std.toString(std.parseInt(std.split(deployment_flavor_power,".")[3]) / 4 * 1.5), + + // Depends on cassandra latency, reasonable values are 5-20. Let it be: + // - max perf (->100% CPU): cassandra cores / 2: 8->4 16->8 32->16 64->32 + // - cpnservative: cassandra cores / 4: 8->2 16->4 32->8 64->16 + local DEFAULT_DAEMON_DB_WRITERS = std.toString(std.parseInt(std.split(deployment_flavor_power,".")[3]) / 2), + + // If tasks are CPU-intensive (Python calc), make it equal to cassandra_total_nodes, otherwise cassandra_total_nodes/2 may be enough + local daemon_total_instances = cassandra_total_nodes, + + // It's very unlikely that you need to change anything below this line + + local architecture = std.split(deployment_flavor_power,".")[1], // amd64 or arm64 + local os_arch = 'linux/' + architecture, + + // Network + local vpc_cidr = '10.5.0.0/16', // AWS only + local private_subnet_cidr = '10.5.0.0/24', + local public_subnet_cidr = '10.5.1.0/24', // AWS only + + // Internal IPs + local internal_bastion_ip = '10.5.1.10', + local prometheus_ip = '10.5.0.4', + local rabbitmq_ip = '10.5.0.5', + local daemon_ips = + if daemon_total_instances == 2 then ['10.5.0.101', '10.5.0.102'] + else if daemon_total_instances == 4 then ['10.5.0.101', '10.5.0.102', '10.5.0.103', '10.5.0.104'] + else if daemon_total_instances == 8 then ['10.5.0.101', '10.5.0.102', '10.5.0.103', '10.5.0.104', '10.5.0.105', '10.5.0.106', '10.5.0.107', '10.5.0.108'] + else if daemon_total_instances == 16 then ['10.5.0.101', '10.5.0.102', '10.5.0.103', '10.5.0.104', '10.5.0.105', '10.5.0.106', '10.5.0.107', '10.5.0.108', '10.5.0.109', '10.5.0.110', '10.5.0.111', '10.5.0.112', '10.5.0.113', '10.5.0.114', '10.5.0.115', '10.5.0.116'] + else [], + local cassandra_ips = + if cassandra_total_nodes == 4 then ['10.5.0.11', '10.5.0.12', '10.5.0.13', '10.5.0.14'] + else if cassandra_total_nodes == 8 then ['10.5.0.11', '10.5.0.12', '10.5.0.13', '10.5.0.14', '10.5.0.15', '10.5.0.16', '10.5.0.17', '10.5.0.18'] + else if cassandra_total_nodes == 16 then ['10.5.0.11', '10.5.0.12', '10.5.0.13', '10.5.0.14', '10.5.0.15', '10.5.0.16', '10.5.0.17', '10.5.0.18', '10.5.0.19', '10.5.0.20', '10.5.0.21', '10.5.0.22', '10.5.0.23', '10.5.0.24', '10.5.0.25', '10.5.0.26'] + else [], + + // Cassandra-specific + local cassandra_tokens = // Initial tokens to speedup bootstrapping + if cassandra_total_nodes == 4 then ['-9223372036854775808', '-4611686018427387904', '0', '4611686018427387904'] + else if cassandra_total_nodes == 8 then ['-9223372036854775808', '-6917529027641081856', '-4611686018427387904', '-2305843009213693952', '0', '2305843009213693952', '4611686018427387904', '6917529027641081856'] + else if cassandra_total_nodes == 16 then ['-9223372036854775808','-8070450532247928832','-6917529027641081856','-5764607523034234880','-4611686018427387904','-3458764513820540928','-2305843009213693952','-1152921504606846976','0','1152921504606846976','2305843009213693952','3458764513820540928','4611686018427387904','5764607523034234880','6917529027641081856','8070450532247928832'] + else [], + local cassandra_seeds = std.join(',', cassandra_ips), // Used by cassandra nodes, all are seeds to avoid bootstrapping + local cassandra_hosts = "'[\"" + std.join('","', cassandra_ips) + "\"]'", // Used by daemons "'[\"10.5.0.11\",\"10.5.0.12\",\"10.5.0.13\",\"10.5.0.14\",\"10.5.0.15\",\"10.5.0.16\",\"10.5.0.17\",\"10.5.0.18\"]'", + + // Instances + local instance_image_id = + if architecture == 'arm64' then 'ami-01296213d823247f2' // ubuntu/images/hvm-ssd-gp3/ubuntu-noble-24.04-arm64-server-20240615 + else if architecture == 'amd64' then 'ami-02f9afd340e6c0065' // ubuntu/images/hvm-ssd-gp3/ubuntu-noble-24.04-amd64-server-20240606 + else 'unknown-architecture-unknown-image', + + // local instance_image_id = + // if architecture == 'arm64' then 'ami-09b2701695676705d'// ubuntu/images/hvm-ssd/ubuntu-lunar-23.04-arm64-server-20240117 // 'ami-064b469793e32e5d2' ubuntu/images/hvm-ssd/ubuntu-lunar-23.04-arm64-server-20230904 + // else if architecture == 'amd64' then 'ami-0d8583a0d8d6dd14f' //ubuntu/images/hvm-ssd/ubuntu-lunar-23.04-amd64-server-20230714 + // else 'unknown-architecture-unknown-image', + + local instance_flavor = getFromMap({ + 'aws.amd64.c5a.4': {cassandra:'c5ad.xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]G', daemon: 'c6a.medium', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, + 'aws.amd64.c5a.8': {cassandra:'c5ad.2xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]G', daemon: 'c6a.large', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, + 'aws.amd64.c5a.16': {cassandra:'c5ad.4xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]G', daemon: 'c6a.xlarge', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, + 'aws.amd64.c5a.32': {cassandra:'c5ad.8xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] 558.8G', daemon: 'c6a.2xlarge', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, + 'aws.amd64.c5a.64': {cassandra:'c5ad.16xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]T', daemon: 'c6a.4xlarge', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, + 'aws.arm64.c7g.4': {cassandra:'c7gd.xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]G', daemon: 'c7g.medium', rabbitmq: 'c7g.medium', prometheus: 'c7g.medium', bastion: 'c7g.large'}, + 'aws.arm64.c7g.8': {cassandra:'c7gd.2xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]G', daemon: 'c7g.large', rabbitmq: 'c7g.medium', prometheus: 'c7g.medium', bastion: 'c7g.large'}, + 'aws.arm64.c7g.16': {cassandra:'c7gd.4xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] 884.8G', daemon: 'c7g.xlarge', rabbitmq: 'c7g.medium', prometheus: 'c7g.medium', bastion: 'c7g.large'}, + 'aws.arm64.c7g.32': {cassandra:'c7gd.8xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] 1.7T', daemon: 'c7g.2xlarge', rabbitmq: 'c7g.medium', prometheus: 'c7g.medium', bastion: 'c7g.large'}, + 'aws.arm64.c7g.64': {cassandra:'c7gd.16xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] 1.7T', daemon: 'c7g.4xlarge', rabbitmq: 'c7g.medium', prometheus: 'c7g.medium', bastion: 'c7g.large'} + }, deployment_flavor_power), + + // Volumes + local volume_availability_zone = subnet_availability_zone, // Keep it simple + + // Prometheus and exporters versions + local prometheus_node_exporter_version = '1.6.0', + local prometheus_server_version = '2.45.0', + local jmx_exporter_version = '0.20.0', + + // Used by Prometheus "\\'localhost:9100\\',\\'10.5.1.10:9100\\',\\'10.5.0.5:9100\\',\\'10.5.0.11:9100\\'...", + local prometheus_targets = std.format("\\'localhost:9100\\',\\'%s:9100\\',\\'%s:9100\\',", [internal_bastion_ip, rabbitmq_ip]) + // Prometheus node exporter + "\\'" + std.join(":9100\\',\\'", cassandra_ips) + ":9100\\'," + // Prometheus node exporter + "\\'" + std.join(":7070\\',\\'", cassandra_ips) + ":7070\\'," + // JMX exporter + "\\'" + std.join(":9100\\',\\'", daemon_ips) + ":9100\\'", // Prometheus node exporter + + deployment_name: dep_name, + deploy_provider_name: std.split(deployment_flavor_power,".")[0], + + ssh_config: { + bastion_external_ip_address_name: dep_name + '_bastion_external_ip_name', + // external_ip_address: '', + port: 22, + user: '{CAPIDEPLOY_SSH_USER}', + private_key_path: '{CAPIDEPLOY_SSH_PRIVATE_KEY_PATH}', + }, + timeouts: { + }, + + network: { + name: dep_name + '_network', + cidr: vpc_cidr, + private_subnet: { + name: dep_name + '_private_subnet', + route_table_to_nat_gateway_name: dep_name + '_private_subnet_rt_to_natgw', + cidr: private_subnet_cidr, + availability_zone: subnet_availability_zone, + }, + public_subnet: { + name: dep_name + '_public_subnet', + cidr: public_subnet_cidr, + availability_zone: subnet_availability_zone, + nat_gateway_name: dep_name + '_natgw', + nat_gateway_external_ip_address_name: dep_name + '_natgw_external_ip_name', + }, + router: { // aka AWS internet gateway + name: dep_name + '_router', + }, + }, + security_groups: { + bastion: { + name: dep_name + '_bastion_security_group', + rules: [ + { + desc: 'SSH', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: '0.0.0.0/0', + port: 22, + direction: 'ingress', + }, + { + desc: 'Prometheus UI reverse proxy', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: '0.0.0.0/0', + port: 9090, + direction: 'ingress', + }, + { + desc: 'Prometheus node exporter', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 9100, + direction: 'ingress', + }, + { + desc: 'RabbitMQ UI reverse proxy', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: '0.0.0.0/0', + port: 15672, + direction: 'ingress', + }, + { + desc: 'rsyslog receiver', + protocol: 'udp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 514, + direction: 'ingress', + }, + { + desc: 'Capillaries external webapi', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: '0.0.0.0/0', + port: 6544, + direction: 'ingress', + }, + { + desc: 'Capillaries UI nginx', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: '0.0.0.0/0', + port: 80, + direction: 'ingress', + }, + ], + }, + internal: { + name: dep_name + '_internal_security_group', + rules: [ + { + desc: 'SSH', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 22, + direction: 'ingress', + }, + { + desc: 'Prometheus UI internal', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 9090, + direction: 'ingress', + }, + { + desc: 'Prometheus node exporter', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 9100, + direction: 'ingress', + }, + { + desc: 'JMX exporter', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 7070, + direction: 'ingress', + }, + { + desc: 'Cassandra JMX', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 7199, + direction: 'ingress', + }, + { + desc: 'Cassandra cluster comm', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 7000, + direction: 'ingress', + }, + { + desc: 'Cassandra API', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 9042, + direction: 'ingress', + }, + { + desc: 'RabbitMQ API', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 5672, + direction: 'ingress', + }, + { + desc: 'RabbitMQ UI', + protocol: 'tcp', + ethertype: 'IPv4', + remote_ip: $.network.cidr, + port: 15672, + direction: 'ingress', + }, + ], + }, + }, + + // Only alphanumeric characters allowed in instance names! No underscores, no dashes, no dots, no spaces - nada. + + local bastion_instance = { + bastion: { + purpose: 'CAPIDEPLOY.INTERNAL.PURPOSE_BASTION', + inst_name: dep_name + '-bastion', + root_key_name: '{CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME}', + ip_address: internal_bastion_ip, + external_ip_address_name: $.ssh_config.bastion_external_ip_address_name, + flavor: instance_flavor.bastion, + image_id: instance_image_id, + security_group_name: $.security_groups.bastion.name, + subnet_name: $.network.public_subnet.name, + associated_instance_profile: '{CAPIDEPLOY_INSTANCE_PROFILE_WITH_S3_ACCESS}', + volumes: { + 'log': { + name: dep_name + '_log', + availability_zone: volume_availability_zone, + mount_point: '/mnt/capi_log', + size: 10, + type: 'gp2', // No need for a top-spedd drive + permissions: 777, + owner: $.ssh_config.user, + }, + }, + service: { + env: { + CAPILLARIES_RELEASE_URL: '{CAPIDEPLOY_CAPILLARIES_RELEASE_URL}', + OS_ARCH: os_arch, + // S3_IAM_USER_AWS_ACCESS_KEY_ID: '{CAPIDEPLOY_S3_IAM_USER_AWS_ACCESS_KEY_ID}', + // S3_IAM_USER_AWS_SECRET_ACCESS_KEY: '{CAPIDEPLOY_S3_IAM_USER_AWS_SECRET_ACCESS_KEY}', + S3_AWS_DEFAULT_REGION: '{CAPIDEPLOY_S3_AWS_DEFAULT_REGION}', + AMQP_URL: 'amqp://{CAPIDEPLOY_RABBITMQ_USER_NAME}:{CAPIDEPLOY_RABBITMQ_USER_PASS}@' + rabbitmq_ip + '/', + CASSANDRA_HOSTS: cassandra_hosts, + PROMETHEUS_IP: prometheus_ip, + PROMETHEUS_NODE_EXPORTER_VERSION: prometheus_node_exporter_version, + RABBITMQ_IP: rabbitmq_ip, + SSH_USER: $.ssh_config.user, + NETWORK_CIDR: $.network.cidr, + BASTION_ALLOWED_IPS: '{CAPIDEPLOY_BASTION_ALLOWED_IPS}', + EXTERNAL_IP_ADDRESS: '{CAPIDEPLOY.INTERNAL.BASTION_EXTERNAL_IP_ADDRESS}', // internal: capideploy populates it from ssh_config.external_ip_address after loading project file; used by webui and webapi config.sh + EXTERNAL_WEBAPI_PORT: '{CAPIDEPLOY_EXTERNAL_WEBAPI_PORT}', + INTERNAL_WEBAPI_PORT: '6543', + }, + cmd: { + install: [ + 'scripts/common/replace_nameserver.sh', + 'scripts/common/increase_ssh_connection_limit.sh', + 'scripts/prometheus/install_node_exporter.sh', + 'scripts/nginx/install.sh', + 'scripts/ca/install.sh', + 'scripts/common/iam_aws_credentials.sh', + 'scripts/toolbelt/install.sh', + 'scripts/webapi/install.sh', + 'scripts/ui/install.sh', + ], + config: [ + 'scripts/prometheus/config_node_exporter.sh', + 'scripts/rsyslog/config_catchall_log_receiver.sh', + 'scripts/logrotate/config_bastion.sh', + 'scripts/toolbelt/config.sh', + 'scripts/webapi/config.sh', + 'scripts/ui/config.sh', + 'scripts/nginx/config_whitelist.sh', + 'scripts/nginx/config_ui.sh', + 'scripts/nginx/config_webapi_reverse_proxy.sh', + 'scripts/nginx/config_prometheus_reverse_proxy.sh', + 'scripts/nginx/config_rabbitmq_reverse_proxy.sh', + ], + start: [ + 'scripts/rsyslog/start.sh', + 'scripts/logrotate/start.sh', + 'scripts/webapi/start.sh', + 'scripts/nginx/start.sh', + ], + stop: [ + 'scripts/webapi/stop.sh', + 'scripts/nginx/stop.sh', + 'scripts/logrotate/stop.sh', + 'scripts/rsyslog/stop.sh', + ], + }, + }, + }, + }, + + local rabbitmq_instance = { + rabbitmq: { + purpose: 'CAPIDEPLOY.INTERNAL.PURPOSE_RABBITMQ', + inst_name: dep_name + '-rabbitmq', + root_key_name: '{CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME}', + ip_address: rabbitmq_ip, + flavor: instance_flavor.rabbitmq, + image_id: instance_image_id, + security_group_name: $.security_groups.internal.name, + subnet_name: $.network.private_subnet.name, + service: { + env: { + INTERNAL_BASTION_IP: internal_bastion_ip, + PROMETHEUS_NODE_EXPORTER_VERSION: prometheus_node_exporter_version, + RABBITMQ_ADMIN_NAME: '{CAPIDEPLOY_RABBITMQ_ADMIN_NAME}', + RABBITMQ_ADMIN_PASS: '{CAPIDEPLOY_RABBITMQ_ADMIN_PASS}', + RABBITMQ_USER_NAME: '{CAPIDEPLOY_RABBITMQ_USER_NAME}', + RABBITMQ_USER_PASS: '{CAPIDEPLOY_RABBITMQ_USER_PASS}', + }, + cmd: { + install: [ + 'scripts/common/replace_nameserver.sh', + 'scripts/prometheus/install_node_exporter.sh', + 'scripts/rabbitmq/install.sh', + ], + config: [ + 'scripts/prometheus/config_node_exporter.sh', + 'scripts/rabbitmq/config.sh', + 'scripts/rsyslog/config_rabbitmq_log_sender.sh', + ], + start: [ + 'scripts/rabbitmq/start.sh', + ], + stop: [ + 'scripts/rabbitmq/stop.sh', + ], + }, + }, + }, + }, + + local prometheus_instance = { + prometheus: { + purpose: 'CAPIDEPLOY.INTERNAL.PURPOSE_PROMETHEUS', + inst_name: dep_name + '-prometheus', + root_key_name: '{CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME}', + ip_address: prometheus_ip, + flavor: instance_flavor.prometheus, + image_id: instance_image_id, + security_group_name: $.security_groups.internal.name, + subnet_name: $.network.private_subnet.name, + service: { + env: { + PROMETHEUS_NODE_EXPORTER_VERSION: prometheus_node_exporter_version, + PROMETHEUS_TARGETS: prometheus_targets, + PROMETHEUS_VERSION: prometheus_server_version, + }, + cmd: { + install: [ + 'scripts/common/replace_nameserver.sh', + 'scripts/prometheus/install_server.sh', + 'scripts/prometheus/install_node_exporter.sh', + ], + config: [ + 'scripts/prometheus/config_server.sh', + 'scripts/prometheus/config_node_exporter.sh', + ], + start: [ + 'scripts/prometheus/start_server.sh', + ], + stop: [ + 'scripts/prometheus/stop_server.sh', + ], + }, + }, + }, + }, + + local cass_instances = { + [e.nickname]: { + purpose: 'CAPIDEPLOY.INTERNAL.PURPOSE_CASSANDRA', + inst_name: e.inst_name, + root_key_name: '{CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME}', + ip_address: e.ip_address, + flavor: instance_flavor.cassandra, + image_id: instance_image_id, + security_group_name: $.security_groups.internal.name, + subnet_name: $.network.private_subnet.name, + service: { + env: { + INTERNAL_BASTION_IP: internal_bastion_ip, + CASSANDRA_IP: e.ip_address, + CASSANDRA_SEEDS: cassandra_seeds, + INITIAL_TOKEN: e.token, + PROMETHEUS_NODE_EXPORTER_VERSION: prometheus_node_exporter_version, + JMX_EXPORTER_VERSION: jmx_exporter_version, + NVME_REGEX: instance_flavor.cass_nvme_regex, + }, + cmd: { + install: [ + 'scripts/common/replace_nameserver.sh', + 'scripts/prometheus/install_node_exporter.sh', + 'scripts/cassandra/install.sh', + ], + config: [ + 'scripts/prometheus/config_node_exporter.sh', + 'scripts/cassandra/config.sh', + 'scripts/rsyslog/config_cassandra_log_sender.sh', + ], + start: [ + 'scripts/cassandra/start.sh', + 'scripts/rsyslog/restart.sh', // It's stupid, but on AWS machines it's required, otherwise the log is not picked up when it appears. + ], + stop: [ + 'scripts/cassandra/stop.sh', + ], + }, + }, + } + for e in std.mapWithIndex(function(i, v) { + nickname: std.format('cass%03d', i + 1), + inst_name: dep_name + '-' + self.nickname, + token: cassandra_tokens[i], + ip_address: v, + }, cassandra_ips) + }, + + local daemon_instances = { + [e.nickname]: { + purpose: 'CAPIDEPLOY.INTERNAL.PURPOSE_DAEMON', + inst_name: e.inst_name, + root_key_name: '{CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME}', + ip_address: e.ip_address, + flavor: instance_flavor.daemon, + image_id: instance_image_id, + security_group_name: $.security_groups.internal.name, + subnet_name: $.network.private_subnet.name, + associated_instance_profile: '{CAPIDEPLOY_INSTANCE_PROFILE_WITH_S3_ACCESS}', + service: { + env: { + INTERNAL_BASTION_IP: internal_bastion_ip, + CAPILLARIES_RELEASE_URL: '{CAPIDEPLOY_CAPILLARIES_RELEASE_URL}', + OS_ARCH: os_arch, + // S3_IAM_USER_AWS_ACCESS_KEY_ID: '{CAPIDEPLOY_S3_IAM_USER_AWS_ACCESS_KEY_ID}', + // S3_IAM_USER_AWS_SECRET_ACCESS_KEY: '{CAPIDEPLOY_S3_IAM_USER_AWS_SECRET_ACCESS_KEY}', + S3_AWS_DEFAULT_REGION: '{CAPIDEPLOY_S3_AWS_DEFAULT_REGION}', + AMQP_URL: 'amqp://{CAPIDEPLOY_RABBITMQ_USER_NAME}:{CAPIDEPLOY_RABBITMQ_USER_PASS}@' + rabbitmq_ip + '/', + CASSANDRA_HOSTS: cassandra_hosts, + DAEMON_THREAD_POOL_SIZE: DEFAULT_DAEMON_THREAD_POOL_SIZE, + DAEMON_DB_WRITERS: DEFAULT_DAEMON_DB_WRITERS, + PROMETHEUS_NODE_EXPORTER_VERSION: prometheus_node_exporter_version, + SSH_USER: $.ssh_config.user, + }, + cmd: { + install: [ + 'scripts/common/replace_nameserver.sh', + "scripts/daemon/install.sh", + 'scripts/prometheus/install_node_exporter.sh', + 'scripts/common/iam_aws_credentials.sh', + 'scripts/ca/install.sh', + 'scripts/daemon/install.sh', + ], + config: [ + 'scripts/logrotate/config_capidaemon.sh', + 'scripts/prometheus/config_node_exporter.sh', + 'scripts/daemon/config.sh', + 'scripts/rsyslog/config_capidaemon_log_sender.sh', // This should go after daemon/config.sh, otherwise rsyslog sender does not pick up /var/log/capidaemon/capidaemon.log + ], + start: [ + 'scripts/daemon/start.sh', + 'scripts/rsyslog/restart.sh', // It's stupid, but on AWS machines it's required, otherwise the log is not picked up when it appears. + ], + stop: [ + 'scripts/daemon/stop.sh', + ], + }, + }, + } + for e in std.mapWithIndex(function(i, v) { + nickname: std.format('daemon%03d', i + 1), + inst_name: dep_name + '-' + self.nickname, + ip_address: v, + }, daemon_ips) + }, + + instances: bastion_instance + rabbitmq_instance + prometheus_instance + cass_instances + daemon_instances, + + local getFromMap = function(m, k) + if std.length(m[k]) > 0 then m[k] else "unknown--key-" + k, + + local getFromDoubleMap = function(m, k1, k2) + if std.length(m[k1]) > 0 then + if std.length(m[k1][k2]) > 0 then m[k1][k2] else "no-key-" + k2 + else "unknown-key-" + k1, +} +