From 57c8a8ae21a229cb91aba83322362b597202cd0f Mon Sep 17 00:00:00 2001 From: KH <96801545+kleineshertz@users.noreply.github.com> Date: Sat, 27 Jul 2024 14:27:14 -0700 Subject: [PATCH] 2024 07 chained deploy cmds (#4) * Chain deploy commands * Replace env variables before loading jsonnet * Readme: IAM mgmt details --- .vscode/launch.json | 7 +- 1_deploy.sh | 4 +- README.md | 784 ++++++++++++++------------ pkg/cld/cldaws/networking.go | 28 +- pkg/cld/resource.go | 14 +- pkg/cmd/capideploy/capideploy.go | 697 +++++------------------ pkg/l/log_builder.go | 8 +- pkg/prj/project.go | 53 +- pkg/provider/aws_deployments.go | 43 ++ pkg/provider/aws_floating_ips.go | 48 +- pkg/provider/aws_instances.go | 94 +-- pkg/provider/aws_networking.go | 97 ++-- pkg/provider/aws_provider.go | 40 ++ pkg/provider/aws_security_group.go | 26 +- pkg/provider/aws_volumes.go | 73 ++- pkg/provider/deploy_provider.go | 657 ++++++++++++++++++--- pkg/provider/deploy_provider_impl.go | 74 +++ pkg/rexec/exec_ssh.go | 6 +- pkg/rexec/scripts/daemon/config.sh | 33 +- pkg/rexec/scripts/daemon/stop.sh | 35 +- pkg/rexec/scripts/rabbitmq/install.sh | 2 +- pkg/rexec/ssh.go | 43 +- sample.jsonnet | 44 +- 23 files changed, 1658 insertions(+), 1252 deletions(-) create mode 100644 pkg/provider/aws_deployments.go create mode 100644 pkg/provider/aws_provider.go create mode 100644 pkg/provider/deploy_provider_impl.go diff --git a/.vscode/launch.json b/.vscode/launch.json index 80b5c93..18c2081 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -11,11 +11,10 @@ "mode": "debug", "cwd": "${workspaceFolder}", "program": "${workspaceFolder}/pkg/cmd/capideploy/capideploy.go", - "envFile": "${env:HOME}/capideploy_aws.rc", + "envFile": "${env:HOME}/capideploy_aws.env", "args": [ - "delete_networking", - "-p=sample.jsonnet", - "-v" + "check_cassandra_status", + "-p=sample.jsonnet", "-v" ] }, ] diff --git a/1_deploy.sh b/1_deploy.sh index 2a72ec4..a6e5883 100755 --- a/1_deploy.sh +++ b/1_deploy.sh @@ -40,7 +40,7 @@ if ! grep -q "$BASTION_IP" ~/.ssh/config; then echo " User $CAPIDEPLOY_SSH_USER" | tee -a ~/.ssh/config echo " StrictHostKeyChecking=no" | tee -a ~/.ssh/config echo " UserKnownHostsFile=/dev/null" | tee -a ~/.ssh/config - echo " IdentityFile $CAPIDEPLOY_SSH_PRIVATE_KEY_PATH" | tee -a ~/.ssh/config + echo " IdentityFile $CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_PRIVATE_KEY_OR_PATH" | tee -a ~/.ssh/config fi set -x @@ -65,7 +65,7 @@ set -x ./capideploy config_services "bastion,rabbitmq,prometheus,daemon*" -p sample.jsonnet -v >> deploy.log #./capideploy config_services "bastion" -p sample.jsonnet -v >> deploy.log -ssh -o StrictHostKeyChecking=no -i $CAPIDEPLOY_SSH_PRIVATE_KEY_PATH -J $BASTION_IP $CAPIDEPLOY_SSH_USER@10.5.0.11 'nodetool describecluster;nodetool status' +ssh -o StrictHostKeyChecking=no -i $CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_PRIVATE_KEY_OR_PATH -J $BASTION_IP $CAPIDEPLOY_SSH_USER@10.5.0.11 'nodetool describecluster;nodetool status' duration=$SECONDS echo "$(($duration / 60))m $(($duration % 60))s elapsed." diff --git a/README.md b/README.md index 38cde59..ed273bb 100644 --- a/README.md +++ b/README.md @@ -1,357 +1,427 @@ -# IAM settings - -You can run capideploy under your AWS root account, but this is generally discouraged. Chances are you want to run capideploy as some IAM user, or even better, let's pretend that capideploy is executed by some third party or a temporary contractor. You want to grant that third party some specific permissions that allow that third party to create Capillaries deployment in your AWS workspace. Giving a third party access to your AWS resources is a standard practice and the recommended way to do that is to use IAM roles. This section discusses the AWS IAM preparation steps to create the necessary role structure. Basic familiarity with AWS console is required. - -## Users and groups - -Let's assume all capideploy activities are performed on behalf of an IAM user named `UserCapideployOperator`. As a first step, create this user in `IAM->Users` section of AWS console. In `IAM->User groups`, create a group `GroupCapideployOperators` and add `UserCapideployOperator` to it. - -Create credentials for `UserCapideployOperator` and save them in UserCapideployOperator.rc: -``` -export AWS_ACCESS_KEY_ID=AK... -export AWS_SECRET_ACCESS_KEY=... -export AWS_DEFAULT_REGION=us-east-1 -``` - -If you want to run capideploy unnder this account (not under some SaaS provider account as described below), run this .rc file before running capideploy, so AWS SDK can use those credentials. - -## Policies and roles - -### PolicyAccessCapillariesTestbucket and RoleAccessCapillariesTestbucket - -Your AWS deployment will need to read and write files from/to S3 bucket. As per [Capillaries S3 instructions](https://github.com/capillariesio/capillaries/blob/main/doc/s3.md), we assume that you already have an S3 bucket for your future Capillaries deployment, let's assume the name of the bucket is `capillaries-testbucket` (in fact, it will be more like `acmme-corp-prod-files`) and it has `Block all public access` setting on (assuming you do not want strangers to see your files). And here is the key difference: -- Capillaries test S3 bucket access described in that doc uses user-based access model (bucket policy explicitly gives the user `arn:aws:iam:::user/UserAccessCapillariesTestbucket` access to the bucket); -- capideploy S3 bucket access model uses a separate policy and a separate role with this policy attached, and Capillaries instances can assume that role. - -In `IAM->Policies`, let's create a policy `PolicyAccessCapillariesTestbucket` that allows access to the bucket we will be using: - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": "s3:ListBucket", - "Resource": "arn:aws:s3:::capillaries-testbucket" - }, - { - "Effect": "Allow", - "Action": [ - "s3:DeleteObject", - "s3:GetObject", - "s3:PutObject" - ], - "Resource": "arn:aws:s3:::capillaries-testbucket/*" - } - ] -} -``` - -In `IAM->Roles`, create a role `RoleAccessCapillariesTestbucket` with `Trusted entity type` set to `AWS Service` and: -- attach the newly created `PolicyAccessCapillariesTestbucket` to it (`Permissions` tab); -- under `Trust relationships`, make sure that ec2 service can assume this role: - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "Service": "ec2.amazonaws.com" - }, - "Action": "sts:AssumeRole" - } - ] -} -``` - -Please note that, since we created therole with `Trusted entity type` set to `AWS Service`, `RoleAccessCapillariesTestbucket` has two ARNa, as a role and as an instance profile: - -| Name type | Name | -| - | - | -| ARN | arn:aws:iam:::role/RoleAccessCapillariesTestbucket | -| Instance profile ARN | arn:aws:iam:::instance-profile/RoleAccessCapillariesTestbucket | - -Run the following command as AWS root or as `UserCapideployOperator` (if you have already assigned `iam:GetInstanceProfile` permission to it, see below): - -``` -$ aws iam get-instance-profile --instance-profile-name RoleAccessCapillariesTestbucket -``` - -The result shows that role `RoleAccessCapillariesTestbucket` is "wrapped" by instance profile `RoleAccessCapillariesTestbucket`. - -### PolicyCapideployOperator - -As we agreed above, `UserCapideployOperator` (who potentially can be a third party), needs only a very restricted set of permissions. This user will need permissions to do two major things: -- create/delete AWS resources (networks, subnets, instances etc) that will provide infrastructure to run Capillaries binaries and Cassandra cluster -- give created instances permission to read/write config/data files from/to S3 bucket - -In IAM->Policies, create a customer-managed policy PolicyCapideployOperator: - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "ec2:AllocateAddress", - "ec2:AssociateAddress", - "ec2:AssociateIamInstanceProfile", - "ec2:AssociateRouteTable", - "ec2:AttachInternetGateway", - "ec2:AttachVolume", - "ec2:AuthorizeSecurityGroupIngress", - "ec2:CreateImage", - "ec2:CreateInternetGateway", - "ec2:CreateNatGateway", - "ec2:CreateRoute", - "ec2:CreateRouteTable", - "ec2:CreateSecurityGroup", - "ec2:CreateSubnet", - "ec2:CreateTags", - "ec2:CreateVolume", - "ec2:CreateVpc", - "ec2:DeleteInternetGateway", - "ec2:DeleteNatGateway", - "ec2:DeleteRouteTable", - "ec2:DeleteSecurityGroup", - "ec2:DeleteSnapshot", - "ec2:DeleteSubnet", - "ec2:DeleteVolume", - "ec2:DeleteVpc", - "ec2:DeregisterImage", - "ec2:DescribeAddresses", - "ec2:DescribeImages", - "ec2:DescribeInstances", - "ec2:DescribeInstanceTypes", - "ec2:DescribeInternetGateways", - "ec2:DescribeKeyPairs", - "ec2:DescribeNatGateways", - "ec2:DescribeRouteTables", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSnapshots", - "ec2:DescribeSubnets", - "ec2:DescribeTags", - "ec2:DescribeVolumes", - "ec2:DescribeVpcs", - "ec2:DetachInternetGateway", - "ec2:DetachVolume", - "ec2:ReleaseAddress", - "ec2:RunInstances", - "ec2:TerminateInstances", - "iam:GetInstanceProfile", - "tag:GetResources" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": "iam:PassRole", - "Resource": "arn:aws:iam:::role/RoleAccessCapillariesTestbucket" - } - ] -} -``` - -The first part is obvious: it lists all AWS API calls performed by capideploy. As for the second part,it adds PassRole permission for `RoleAccessCapillariesTestbucket` created above. Without this permission, `AssociateIamInstanceProfile` call (that tells AWS to allow instances to access the bucket) will fail. - -Just in case - to list all AWS API calls used by capideploy, run: -```shell -grep -r -e "ec2Client\.[A-Za-z]*" --include "*.go" -grep -r -e "tClient\.[A-Za-z]*" --include "*.go" -``` - -## Attach PolicyCapideployOperator to GroupCapideployOperators - -In `IAM->User groups->GroupCapideployOperators->Permissions`, attach `PolicyCapideployOperator`. - -# IAM Settings - SaaS scenario - -capideploy can be executed by a third-party, like some SaaS provider or a contractor who needs access to your AWS resources. If you have to do that, the following additional settings are required. Assuming "you" are the "customer" of the SaaS provider. - -## SaaS user - -In SaaS provider console `IAM->Users`, create a new user `UserSaasCapideployOperator`. This will be the account capideply will be running under. Create credentials for `UserSaasCapideployOperator` and save them in UserSaasCapideployOperator.rc: -``` -export AWS_ACCESS_KEY_ID=AK... -export AWS_SECRET_ACCESS_KEY=... -export AWS_DEFAULT_REGION=us-east-1 -``` - -If you want to run capideploy unnder this SaaS account (not under your `UserCapideployOperator` account as described above), run this .rc file before running capideploy, so AWS SDK can use those credentials. - -## SaaS policy - -In SaaS provider console `IAM->Policies`, create a new policy `PolicySaasCapideployOperator` as follows: -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "ec2:AllocateAddress", - "ec2:AssociateAddress", - "ec2:AssociateIamInstanceProfile", - "ec2:AssociateRouteTable", - "ec2:AttachInternetGateway", - "ec2:AttachVolume", - "ec2:AuthorizeSecurityGroupIngress", - "ec2:CreateImage", - "ec2:CreateInternetGateway", - "ec2:CreateNatGateway", - "ec2:CreateRoute", - "ec2:CreateRouteTable", - "ec2:CreateSecurityGroup", - "ec2:CreateSubnet", - "ec2:CreateTags", - "ec2:CreateVolume", - "ec2:CreateVpc", - "ec2:DeleteInternetGateway", - "ec2:DeleteNatGateway", - "ec2:DeleteRouteTable", - "ec2:DeleteSecurityGroup", - "ec2:DeleteSnapshot", - "ec2:DeleteSubnet", - "ec2:DeleteVolume", - "ec2:DeleteVpc", - "ec2:DeregisterImage", - "ec2:DescribeAddresses", - "ec2:DescribeImages", - "ec2:DescribeInstances", - "ec2:DescribeInstanceTypes", - "ec2:DescribeInternetGateways", - "ec2:DescribeKeyPairs", - "ec2:DescribeNatGateways", - "ec2:DescribeRouteTables", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSnapshots", - "ec2:DescribeSubnets", - "ec2:DescribeTags", - "ec2:DescribeVolumes", - "ec2:DescribeVpcs", - "ec2:DetachInternetGateway", - "ec2:DetachVolume", - "ec2:ReleaseAddress", - "ec2:RunInstances", - "ec2:TerminateInstances", - "iam:GetInstanceProfile", - "tag:GetResources", - "iam:PassRole", - "sts:AssumeRole" - ], - "Resource": "*" - } - ] -} -``` - -This policy is very similar to your `PolicyCapideployOperator`, but there are two important differences: -- it allows `iam:PassRole` for *all* resources (because SaaS provider user will work with many customers, it will need access not only to your `arn:aws:iam:::role/RoleAccessCapillariesTestbucket`, but to all relevant roles from many customers) -- it allows `sts:AssumeRole`, capideploy will call AWS API `AssumeRole("arn:aws:iam:::role/RoleCapideployOperator", externalId)` when establishing an AWS service session, so it will create/delete all resources on your (``) behalf. - -Attach `PolicySaasCapideployOperator` to `UserSaasCapideployOperator`. - -## SaaS customer - trust UserSaasCapideployOperator - -In your AWS console's `IAM->Roles->RoleCapideployOperator->Trusted relationships`, add: -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "AWS": "arn:aws:iam:::user/UserSaasCapideployOperator" - }, - "Action": "sts:AssumeRole", - "Condition": { - "StringEquals": { - "sts:ExternalId": "someExternalId" - } - } - } - ] -} -``` - -This will allow `UserSaasCapideployOperator` to perform all actions listed in your (customer's) `PolicySaasCapideployOperator` on your (customer's) AWS resources. - -## capideploy SaaS parameters - -If you want to run capideploy as SaaS provider's `UserSaasCapideployOperator`, make sure to specify `-r` and `-e` parameters, for example: -```shell -./capideploy list_deployment_resources -p sample.jsonnet -r arn:aws:iam:::role/RoleCapideployOperator -e someExternalId -``` - -They will tell capideploy to assume the specified role before performing any action, so it will look like someone from your AWS account performs them. - -# Environment variables used by Capideploy - -Sample .rc file to run before Capildeploy contains variables used in the .jsonnet file: -``` -# SSH access to EC2 instances -export CAPIDEPLOY_SSH_USER=ubuntu -# Name of the keypair stored at AWS -export CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME=sampledeployment005-root-key -# Exported PEM file with private SSH key from the AWS keypair -export CAPIDEPLOY_SSH_PRIVATE_KEY_PATH=/home/johndoe/.ssh/sampledeployment005_rsa - -# NGINX IP address filter: your IP address(es) or cidr(s), for example: "135.23.0.0/16,136.104.0.21" -export CAPIDEPLOY_BASTION_ALLOWED_IPS="..." -export CAPIDEPLOY_EXTERNAL_WEBAPI_PORT=6544 - -# This is where capideploy takes Capillaries binaries from, -# see https://github.com/capillariesio/capillaries/blob/main/binaries_upload.sh -export CAPIDEPLOY_CAPILLARIES_RELEASE_URL=https://capillaries-release.s3.us-east-1.amazonaws.com/latest - -# RabbitMQ admin access (RabbitMQ Mgmt UI) -export CAPIDEPLOY_RABBITMQ_ADMIN_NAME=... -export CAPIDEPLOY_RABBITMQ_ADMIN_PASS=... - -# RabbitMQ user access (used by Capillaries components to talk to RabbitMQ) -export CAPIDEPLOY_RABBITMQ_USER_NAME=... -export CAPIDEPLOY_RABBITMQ_USER_PASS=... - -# ~/.aws/config: default/region (without it, AWS API will not locate S3 buckets, it goes to /home/$SSH_USER/.aws/config) -export CAPIDEPLOY_S3_AWS_DEFAULT_REGION=us-east-1 - -# Capideploy will use this instance profile when creating instances that need access to S3 bucket -export CAPIDEPLOY_INSTANCE_PROFILE_WITH_S3_ACCESS=RoleAccessCapillariesTestbucket -``` - -# Create deployment - -Run `1_deploy.sh`. If everything goes well, it will create a Capillaries deployment accessible at BASTION_IP address returned by `1_deploy.sh` (capideploy does not use DNS, so you will have to access your deployment by IP address). - -# Processing data using created deployment - -[Capillaries repository](https://github.com/capillariesio/capillaries) has a few tests that are ready to run in the cloud deployment: -- [lookup quicktest S3](https://github.com/capillariesio/capillaries/tree/main/test/code/lookup/quicktest_s3): run `1_create_data_s3.sh` and `2_one_run_cloud.sh` -- [Fannie Mae quicktest S3](https://github.com/capillariesio/capillaries/tree/main/test/code/fannie_mae/quicktest_s3): run `1_copy_data_s3.sh` and `2_one_run_cloud.sh` -- [Fannie Mae bigtest](https://github.com/capillariesio/capillaries/tree/main/test/code/fannie_mae/bigtest): run `1_copy_data.sh` and `2_one_run_cloud.sh` -- [Portfolio bigtest](https://github.com/capillariesio/capillaries/tree/main/test/code/portfolio/bigtest): run `1_create_data.sh` and `2_one_run_cloud.sh` - -You will probably have to run these tests using `UserAccessCapillariesTestbucket` IAM user as per [Capillaries S3 instructions](https://github.com/capillariesio/capillaries/blob/main/doc/s3.md): that user should have access to the S3 bucket to upload/download config/data files. - -Please note that in order to run these tests or your own scripts in your newly created deployment you only need access to the S3 bucket and HTTP access to the bastion host (which should allow HTTP access from all machines matching CAPIDEPLOY_BASTION_ALLOWED_IPS address or cidr). `UserCapideployOperator` user is not involved at this point. - -In general, you can start a Capillaries run in your deployment via REST API as follows: - -```shell -CAPILLARIES_AWS_TESTBUCKET=capillaries-testbucket -keyspace="lookup_quicktest_s3" -cfgS3=s3://$CAPILLARIES_AWS_TESTBUCKET/capi_cfg/lookup_quicktest -outS3=s3://$CAPILLARIES_AWS_TESTBUCKET/capi_out/lookup_quicktest -scriptFile=$cfgS3/script.json -paramsFile=$cfgS3/script_params_one_run_s3.json -webapiUrl=http://$BASTION_IP:6544 -startNodes=read_orders,read_order_items -curl -s -w "\n" -d '{"script_uri":"'$scriptFile'", "script_params_uri":"'$paramsFile'", "start_nodes":"'$startNodes'"}' -H "Content-Type: application/json" -X POST $webapiUrl"/ks/$keyspace/run" -``` - -# Delete deployment - -To delete all AWS resources that your deployment uses, run `5-undeploy.sh`. \ No newline at end of file +# IAM settings + +This section makes an attempt to cover all possible aspects of IAM access management for Capillaries and Capillaries deployment. It requires some understanding of how AWS resource access management works. + +## AWS identities involved + +There are two sides to this story: +- AWS identities used by Capillaries binaries (webapi, daemon) after the deploymenti is complete ("AWS: who is trying to access S3 buckets with Capillaries data and configuration files?") +- AWS identities used to create a deployment ("AWS: who runs capideploy?") + +Let's start with IAM access management for Capillaries binaries. + +### AWS identity for Capillaries binaries + +Capillaries binaries running in your AWS deployment will need to read and write files from/to S3 bucket. If you paid close attention to [Capillaries S3 setup for integration tests](https://github.com/capillariesio/capillaries/blob/main/doc/s3.md), you probably remember that we used a dedicated test IAM user `UserAccessCapillariesTestbucket` and provided its AWS credentials whin building [test Docker images](https://github.com/capillariesio/capillaries/blob/main/docker-compose.yml). Bucket policy explicitly gives the user `arn:aws:iam:::user/UserAccessCapillariesTestbucket` access to the buckets. + +For production environment, it make more sense to use AWS feature called `instance profile`: it allows binaries running on specific ec2 instances to access specified AWS resources (S3 buckets, in our case) without using user credentials. capideploy S3 bucket access model will use a separate policy and a separate role with this policy attached, and Capillaries instances can assume that role using instance profile mechanism. + +### AWS identity for deployment creation and maintenance + +Which AWS identity should be used to run capideploy? There are some options. + +1. Run capideploy under your AWS root account, but this is generally discouraged. It's a trivial case and we will not be considering it. + +2. Run capideploy under some dedicated IAM account `UserCapideployOperator` that has permissions to create and maintain all kinds of required AWS resources. + +3. Pretend that capideploy is executed by some third party that does not have an IAM account within your AWS account. You want to grant that third party some specific permissions that allow that third party to create Capillaries deployment in your AWS account workspace. Giving a third party access to your AWS resources is a standard practice and the recommended way to do that is to use IAM roles. We will be using `PolicySaasCapideployOperator` attached to `UserSaasCapideployOperator` on the `SaaS` side, and we will ask customer's AWS account to trust `UserSaasCapideployOperator` with the permissions to create and maintain AWS resource within customer's AWS account. + +The rest of the `IAM settings` section discusses the AWS IAM preparation steps to create the necessary role structure for `2` and `3`. Basic familiarity with AWS console is required. Through the document we will be referring to two different AWS accounts: +- customer's ("your") AWS account: this account that will be billed by Amazon for all created AWS resources; +- "SaaS" AWS account: this account will use its `UserSaasCapideployOperator` user to assume a role within customer's (your) AWS account, no resources will be created within "SaaS" AWS account. + +## Users (customer's AWS account) + +Let's assume all capideploy activities (creation and maintenance of AWS resources) are performed on behalf of an IAM user named `UserCapideployOperator`. As a first step, create this user in `IAM->Users` section of customer's (your) AWS console. + +Create credentials for `UserCapideployOperator` and save them in UserCapideployOperator.rc: +``` +export AWS_ACCESS_KEY_ID=AK... +export AWS_SECRET_ACCESS_KEY=... +export AWS_DEFAULT_REGION=us-east-1 +``` + +If you want to run capideploy unnder this account (not under some SaaS provider account as described below), run this .rc file before running capideploy, so AWS SDK can use those credentials. + +## Policies and roles (customer's AWS account) + +### PolicyAccessCapillariesTestbucket and RoleAccessCapillariesTestbucket + +This section discusses the steps required to implement `instance profile`-based S3 bucket access mentioned above. + +Capillaries binaries running in your AWS deployment will need to read and write files from/to S3 bucket. As per [Capillaries S3 instructions](https://github.com/capillariesio/capillaries/blob/main/doc/s3.md), we assume that you already have an S3 bucket for your future Capillaries deployment, let's assume the name of the bucket is `capillaries-testbucket` (in fact, it will be more like `acmme-corp-prod-files`) and it has `Block all public access` setting on (assuming you do not want strangers to see your files). + +In `IAM->Policies`, let's create a policy `PolicyAccessCapillariesTestbucket` that allows access to the bucket we will be using: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": "s3:ListBucket", + "Resource": "arn:aws:s3:::capillaries-testbucket" + }, + { + "Effect": "Allow", + "Action": [ + "s3:DeleteObject", + "s3:GetObject", + "s3:PutObject" + ], + "Resource": "arn:aws:s3:::capillaries-testbucket/*" + } + ] +} +``` + +In `IAM->Roles`, create a role `RoleAccessCapillariesTestbucket` with `Trusted entity type` set to `AWS Service` and: +- attach the newly created `PolicyAccessCapillariesTestbucket` to it (`Permissions` tab); +- under `Trust relationships`, make sure that ec2 service can assume this role: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "ec2.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] +} +``` + +Please note that, since we created therole with `Trusted entity type` set to `AWS Service`, `RoleAccessCapillariesTestbucket` has two ARNs, as a role and as an instance profile: + +| Name type | Name | +| - | - | +| ARN | arn:aws:iam:::role/RoleAccessCapillariesTestbucket | +| Instance profile ARN | arn:aws:iam:::instance-profile/RoleAccessCapillariesTestbucket | + +Run the following command as AWS root or as `UserCapideployOperator` (if you have already assigned `iam:GetInstanceProfile` permission to it, see below): + +``` +$ aws iam get-instance-profile --instance-profile-name RoleAccessCapillariesTestbucket +``` + +The result shows that role `RoleAccessCapillariesTestbucket` is "wrapped" by instance profile `RoleAccessCapillariesTestbucket`. + +### PolicyCapideployOperator (customer's AWS account) + +As we agreed above, `UserCapideployOperator` (who potentially can be a third party), needs only a very restricted set of permissions. This user will need permissions to do two major things: +- create/delete AWS resources (networks, subnets, instances etc) that will provide infrastructure to run Capillaries binaries and Cassandra cluster +- give created instances permission to read/write config/data files from/to S3 bucket + +In IAM->Policies, create a customer-managed policy PolicyCapideployOperator: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AllocateAddress", + "ec2:AssociateAddress", + "ec2:AssociateIamInstanceProfile", + "ec2:AssociateRouteTable", + "ec2:AttachInternetGateway", + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CreateImage", + "ec2:CreateInternetGateway", + "ec2:CreateNatGateway", + "ec2:CreateRoute", + "ec2:CreateRouteTable", + "ec2:CreateSecurityGroup", + "ec2:CreateSubnet", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:CreateVpc", + "ec2:DeleteInternetGateway", + "ec2:DeleteNatGateway", + "ec2:DeleteRouteTable", + "ec2:DeleteSecurityGroup", + "ec2:DeleteSnapshot", + "ec2:DeleteSubnet", + "ec2:DeleteVolume", + "ec2:DeleteVpc", + "ec2:DeregisterImage", + "ec2:DescribeAddresses", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypes", + "ec2:DescribeInternetGateways", + "ec2:DescribeKeyPairs", + "ec2:DescribeNatGateways", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSnapshots", + "ec2:DescribeSubnets", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DescribeVpcs", + "ec2:DetachInternetGateway", + "ec2:DetachVolume", + "ec2:ReleaseAddress", + "ec2:RunInstances", + "ec2:TerminateInstances", + "iam:GetInstanceProfile", + "tag:GetResources" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": "iam:PassRole", + "Resource": "arn:aws:iam:::role/RoleAccessCapillariesTestbucket" + } + ] +} +``` + +The first part is obvious: it lists all AWS API calls performed by capideploy. As for the second part,it adds PassRole permission for `RoleAccessCapillariesTestbucket` created above. Without this permission, `AssociateIamInstanceProfile` call (that tells AWS to allow instances to access the bucket) will fail. + +Just in case - to list all AWS API calls used by capideploy, run: +```shell +grep -r -e "ec2Client\.[A-Za-z]*" --include "*.go" +grep -r -e "tClient\.[A-Za-z]*" --include "*.go" +``` + +## Attach PolicyCapideployOperator to UserCapideployOperators (customer's AWS account) + +In `IAM->Users->UserCapideployOperators->Permissions`, attach `PolicyCapideployOperator`. + +# IAM Settings - SaaS scenario + +This section is relevant only for those who decide to use the third IAM scenario with `UserSaasCapideployOperator` and it assumes "you" are the "customer" of the SaaS provider and you give this SaaS provider access to your AWS resources. + +## SaaS user (SaaS AWS account) + +In SaaS provider console `IAM->Users`, create a new user `UserSaasCapideployOperator`. This will be the account capideply will be running under. Create credentials for `UserSaasCapideployOperator` and save them in UserSaasCapideployOperator.rc: +``` +export AWS_ACCESS_KEY_ID=AK... +export AWS_SECRET_ACCESS_KEY=... +export AWS_DEFAULT_REGION=us-east-1 +``` + +If you want to run capideploy unnder this SaaS account (not under your `UserCapideployOperator` account as described above), run this .rc file before running capideploy, so AWS SDK can use those credentials. + +## SaaS policy (SaaS AWS account) + +In SaaS provider console `IAM->Policies`, create a new policy `PolicySaasCapideployOperator` as follows: +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AllocateAddress", + "ec2:AssociateAddress", + "ec2:AssociateIamInstanceProfile", + "ec2:AssociateRouteTable", + "ec2:AttachInternetGateway", + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CreateImage", + "ec2:CreateInternetGateway", + "ec2:CreateNatGateway", + "ec2:CreateRoute", + "ec2:CreateRouteTable", + "ec2:CreateSecurityGroup", + "ec2:CreateSubnet", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:CreateVpc", + "ec2:DeleteInternetGateway", + "ec2:DeleteNatGateway", + "ec2:DeleteRouteTable", + "ec2:DeleteSecurityGroup", + "ec2:DeleteSnapshot", + "ec2:DeleteSubnet", + "ec2:DeleteVolume", + "ec2:DeleteVpc", + "ec2:DeregisterImage", + "ec2:DescribeAddresses", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypes", + "ec2:DescribeInternetGateways", + "ec2:DescribeKeyPairs", + "ec2:DescribeNatGateways", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSnapshots", + "ec2:DescribeSubnets", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DescribeVpcs", + "ec2:DetachInternetGateway", + "ec2:DetachVolume", + "ec2:ReleaseAddress", + "ec2:RunInstances", + "ec2:TerminateInstances", + "iam:GetInstanceProfile", + "tag:GetResources", + "iam:PassRole", + "sts:AssumeRole" + ], + "Resource": "*" + } + ] +} +``` + +This policy is very similar to your `PolicyCapideployOperator`, but there are two important differences: +- it allows `iam:PassRole` for *all* resources (because SaaS provider user will work with many customers, it will need access not only to your `arn:aws:iam:::role/RoleAccessCapillariesTestbucket`, but to all relevant roles from many customers) +- it allows `sts:AssumeRole`, capideploy will call AWS API `AssumeRole("arn:aws:iam:::role/RoleCapideployOperator", externalId)` when establishing an AWS service session, so it will create/delete all resources on your (``) behalf. + +Attach `PolicySaasCapideployOperator` to `UserSaasCapideployOperator`. + +## Trust UserSaasCapideployOperator (customer's AWS account) + +In your AWS console's `IAM->Roles->RoleCapideployOperator->Trusted relationships`, add: +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam:::user/UserSaasCapideployOperator" + }, + "Action": "sts:AssumeRole", + "Condition": { + "StringEquals": { + "sts:ExternalId": "someExternalId" + } + } + } + ] +} +``` + +This will allow `UserSaasCapideployOperator` to perform all actions listed in your (customer's) `PolicySaasCapideployOperator` on your (customer's) AWS resources. + +## capideploy SaaS parameters + +If you want to run capideploy as SaaS provider's `UserSaasCapideployOperator`, make sure to set these environment variables: +```shell +export CAPIDEPLOY_AWS_ROLE_TO_ASSUME_ARN="arn:aws:iam:::role/RoleCapideployOperator" +export CAPIDEPLOY_AWS_ROLE_TO_ASSUME_EXTERNAL_ID="..." +``` + +They will tell capideploy to assume the specified role before performing any action, so it will look like someone from customer's (your) AWS account performs them. If you are not sure about what external-id is, there are a lot of AWS-related articles that cover it. In two words: random GUID is good enough. + +# Environment variables used by Capideploy + +Sample capideploy_aws.rc file to run before Capildeploy contains variables used in the .jsonnet file: +``` +# Variables used in jsonnet + + +# Alphanumeric characters only. Make it unique. +export CAPIDEPLOY_DEPLOYMENT_NAME="sampleaws001" +# Subnets, volumes and instances created here +export CAPIDEPLOY_SUBNET_AVAILABILITY_ZONE="us-east-1c" +# 1. aws or azure, 2. amd64 or arm64, 3. Flavor family, 4. Number of cores in Cassandra nodes. Daemon cores are 4 times less. +export CAPIDEPLOY_DEPLOYMENT_FLAVOR_POWER="aws.arm64.c7g.8" +# Cassandra cluster size - 4,8,16,32 +export CAPIDEPLOY_CASSANDRA_CLUSTER_SIZE="4" + +# SSH access to EC2 instances +export CAPIDEPLOY_SSH_USER=ubuntu +# Name of the keypair stored at AWS +export CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME=sampledeployment005-root-key +# Exported PEM file with private SSH key from the AWS keypair: either a file (/home/johndoe/.ssh/sampledeployment005_rsa) or PEM key contents +export CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_PRIVATE_KEY_OR_PATH="-----BEGIN RSA PRIVATE KEY-----...-----END RSA PRIVATE KEY-----" + +# NGINX IP address filter: your IP address(es) or cidr(s), for example: "135.23.0.0/16,136.104.0.21" +export CAPIDEPLOY_BASTION_ALLOWED_IPS="..." + +# This is where capideploy takes Capillaries binaries from, see https://github.com/capillariesio/capillaries/blob/main/binaries_upload.sh +export CAPIDEPLOY_CAPILLARIES_RELEASE_URL=https://capillaries-release.s3.us-east-1.amazonaws.com/latest + +# RabbitMQ admin access (RabbitMQ Mgmt UI), can be anything +export CAPIDEPLOY_RABBITMQ_ADMIN_NAME=... +export CAPIDEPLOY_RABBITMQ_ADMIN_PASS=... + +# RabbitMQ user access (used by Capillaries components to talk to RabbitMQ), can be anything +export CAPIDEPLOY_RABBITMQ_USER_NAME=... +export CAPIDEPLOY_RABBITMQ_USER_PASS=... + +# Goes to /home/$SSH_USER/.aws/config: default/region (without it, AWS API called by Capillaries binaries will not locate S3 buckets) +export CAPIDEPLOY_S3_AWS_DEFAULT_REGION=us-east-1 + +# Capideploy will use this instance profile when creating instances that need access to S3 bucket +export CAPIDEPLOY_AWS_INSTANCE_PROFILE_WITH_S3_ACCESS=RoleAccessCapillariesTestbucket + + +# Variables not used in jsonnet, but used by capideploy binaries. It's just more convenient to use env variables instead of cmd parameters + + +# These two variables are required only for the arn:aws:iam:::user/UserSaasCapideployOperator scenario. +# If CAPIDEPLOY_AWS_ROLE_TO_ASSUME_ARN is empty, capideploy runs under arn:aws:iam:::user/UserCapideployOperator +# ARN of the role to assume, if needed +export CAPIDEPLOY_AWS_ROLE_TO_ASSUME_ARN="arn:aws:iam::...:role/RoleCapideployOperator" +# External id of the role to assume, can be empty. If CAPIDEPLOY_AWS_ROLE_TO_ASSUME_ARN is specified, it is recommended to use external id +export CAPIDEPLOY_AWS_ROLE_TO_ASSUME_EXTERNAL_ID="..." + + +# Variables not used in jsonnet, but used by AWS SDK called from capideploy binaries + + +# arn:aws:iam:::user/UserCapideployOperator or arn:aws:iam:::user/UserSaasCapideployOperator +export AWS_ACCESS_KEY_ID=AK... +export AWS_SECRET_ACCESS_KEY=... +export AWS_DEFAULT_REGION=us-east-1 +``` + +# Create deployment + +Run + +``` +source ~/capideploy_aws.rc +./capideploy deployment_create -p sample.jsonnet -v > deploy.log +``` + +If everything goes well, it will create a Capillaries deployment accessible at BASTION_IP address (see deploy.log). capideploy does not use DNS, so you will have to access your deployment by IP address. + +# Processing data using created deployment + +[Capillaries repository](https://github.com/capillariesio/capillaries) has a few tests that are ready to run in the cloud deployment: +- [lookup quicktest S3](https://github.com/capillariesio/capillaries/blob/main/main/test/code/lookup/quicktest_s3): run `test_one_run_cloud.sh` +- [Fannie Mae quicktest S3](https://github.com/capillariesio/capillaries/blob/main/main/test/code/fannie_mae/quicktest_s3): run `test_one_run_cloud.sh` +- [Fannie Mae bigtest](https://github.com/capillariesio/capillaries/blob/main/main/test/code/fannie_mae/bigtest_cloud): run `test_one_run.sh` +- [Portfolio bigtest](https://github.com/capillariesio/capillaries/blob/main/main/test/code/portfolio/bigtest_cloud): run `test_one_run.sh` + +You will probably have to run these tests using `UserAccessCapillariesTestbucket` IAM user as per [Capillaries S3 instructions](https://github.com/capillariesio/capillaries/blob/main/doc/s3.md): that user should have access to the S3 bucket to upload/download config/data files. + +Please note that in order to run these tests or your own scripts in your newly created deployment you only need access to the S3 bucket and HTTP access to the bastion host (which should allow HTTP access from all machines matching CAPIDEPLOY_BASTION_ALLOWED_IPS address or cidr). `UserCapideployOperator` or `UserSaasCapideployOperator` users are NOT involved at this point. + +In general, you can start a Capillaries run in your deployment via REST API as follows: + +```shell +CAPILLARIES_AWS_TESTBUCKET=capillaries-testbucket +keyspace="lookup_quicktest_s3" +cfgS3=s3://$CAPILLARIES_AWS_TESTBUCKET/capi_cfg/lookup_quicktest +outS3=s3://$CAPILLARIES_AWS_TESTBUCKET/capi_out/lookup_quicktest +scriptFile=$cfgS3/script.json +paramsFile=$cfgS3/script_params_one_run_s3.json +webapiUrl=http://$BASTION_IP:6544 +startNodes=read_orders,read_order_items +curl -s -w "\n" -d '{"script_uri":"'$scriptFile'", "script_params_uri":"'$paramsFile'", "start_nodes":"'$startNodes'"}' -H "Content-Type: application/json" -X POST $webapiUrl"/ks/$keyspace/run" +``` + +# Delete deployment + +To delete all AWS resources that your deployment uses, run +``` +source ~/capideploy_aws.rc +./capideploy deployment_delete -p sample.jsonnet -v -i > undeploy.log +``` \ No newline at end of file diff --git a/pkg/cld/cldaws/networking.go b/pkg/cld/cldaws/networking.go index 04cc2aa..c55d7d5 100644 --- a/pkg/cld/cldaws/networking.go +++ b/pkg/cld/cldaws/networking.go @@ -304,17 +304,22 @@ func CreateRouteTableForVpc(ec2Client *ec2.Client, goCtx context.Context, tags m return *out.RouteTable.RouteTableId, nil } -func GetRouteTableByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, routeTableName string) (string, string, error) { +func GetRouteTableByName(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, routeTableName string) (string, string, string, error) { out, err := ec2Client.DescribeRouteTables(goCtx, &ec2.DescribeRouteTablesInput{ Filters: []types.Filter{{Name: aws.String("tag:Name"), Values: []string{routeTableName}}}}) lb.AddObject(fmt.Sprintf("DescribeRouteTable(tag:Name=%s)", routeTableName), out) if err != nil { - return "", "", fmt.Errorf("cannot find route table %s: %s", routeTableName, err.Error()) + return "", "", "", fmt.Errorf("cannot find route table %s: %s", routeTableName, err.Error()) } if len(out.RouteTables) == 0 { - return "", "", nil + return "", "", "", nil } - return *out.RouteTables[0].RouteTableId, *out.RouteTables[0].VpcId, nil + + var associatedSubnetId string + if len(out.RouteTables[0].Associations) > 0 { + associatedSubnetId = *out.RouteTables[0].Associations[0].SubnetId + } + return *out.RouteTables[0].RouteTableId, *out.RouteTables[0].VpcId, associatedSubnetId, nil } func DeleteRouteTable(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, routeTableId string) error { @@ -434,9 +439,9 @@ func DetachInternetGatewayFromVpc(ec2Client *ec2.Client, goCtx context.Context, return nil } -func GetVpcDefaultRouteTable(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, vpcId string) (string, error) { +func GetVpcDefaultRouteTable(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogBuilder, vpcId string) (string, string, error) { if vpcId == "" { - return "", fmt.Errorf("empty parameter not allowed: vpcId (%s)", vpcId) + return "", "", fmt.Errorf("empty parameter not allowed: vpcId (%s)", vpcId) } out, err := ec2Client.DescribeRouteTables(goCtx, &ec2.DescribeRouteTablesInput{ Filters: []types.Filter{ @@ -444,11 +449,16 @@ func GetVpcDefaultRouteTable(ec2Client *ec2.Client, goCtx context.Context, lb *l {Name: aws.String("vpc-id"), Values: []string{vpcId}}}}) lb.AddObject(fmt.Sprintf("DescribeRouteTables(association.main=true,vpc-id=%s)", vpcId), out) if err != nil { - return "", fmt.Errorf("cannot obtain default (main) route table for vpc %s: %s", vpcId, err.Error()) + return "", "", fmt.Errorf("cannot obtain default (main) route table for vpc %s: %s", vpcId, err.Error()) } if len(out.RouteTables) == 0 { - return "", fmt.Errorf("cannot obtain default (main) route table for vpc %s: no route tables returned", vpcId) + return "", "", fmt.Errorf("cannot obtain default (main) route table for vpc %s: no route tables returned", vpcId) + } + + var associatedSubnetId string + if len(out.RouteTables[0].Associations) > 0 && out.RouteTables[0].Associations[0].SubnetId != nil { + associatedSubnetId = *out.RouteTables[0].Associations[0].SubnetId } - return *out.RouteTables[0].RouteTableId, nil + return *out.RouteTables[0].RouteTableId, associatedSubnetId, nil } diff --git a/pkg/cld/resource.go b/pkg/cld/resource.go index 760947e..f975e71 100644 --- a/pkg/cld/resource.go +++ b/pkg/cld/resource.go @@ -15,13 +15,13 @@ const DeploymentOperatorTagName string = "DeploymentOperator" const DeploymentOperatorTagValue string = "capideploy" type Resource struct { - DeploymentName string - Svc string - Type string - Id string - Name string - State string - BilledState ResourceBilledState + DeploymentName string `json:"deployment_name"` + Svc string `json:"svc"` + Type string `json:"type"` + Id string `json:"id"` + Name string `json:"name"` + State string `json:"state"` + BilledState ResourceBilledState `json:"billed_state"` } func (r *Resource) String() string { diff --git a/pkg/cmd/capideploy/capideploy.go b/pkg/cmd/capideploy/capideploy.go index 398128c..ccd5963 100644 --- a/pkg/cmd/capideploy/capideploy.go +++ b/pkg/cmd/capideploy/capideploy.go @@ -2,117 +2,17 @@ package main import ( "context" - "encoding/json" "flag" "fmt" "log" "os" - "reflect" - "regexp" "strings" - "time" "github.com/capillariesio/capillaries-deploy/pkg/cld" - "github.com/capillariesio/capillaries-deploy/pkg/l" "github.com/capillariesio/capillaries-deploy/pkg/prj" "github.com/capillariesio/capillaries-deploy/pkg/provider" - "github.com/capillariesio/capillaries-deploy/pkg/rexec" ) -const ( - CmdListDeployments string = "list_deployments" - CmdListDeploymentResources string = "list_deployment_resources" - CmdCreateFloatingIps string = "create_floating_ips" - CmdDeleteFloatingIps string = "delete_floating_ips" - CmdCreateSecurityGroups string = "create_security_groups" - CmdDeleteSecurityGroups string = "delete_security_groups" - CmdCreateNetworking string = "create_networking" - CmdDeleteNetworking string = "delete_networking" - CmdCreateVolumes string = "create_volumes" - CmdDeleteVolumes string = "delete_volumes" - CmdCreateInstances string = "create_instances" - CmdDeleteInstances string = "delete_instances" - CmdAttachVolumes string = "attach_volumes" - CmdDetachVolumes string = "detach_volumes" - CmdUploadFiles string = "upload_files" - CmdDownloadFiles string = "download_files" - CmdInstallServices string = "install_services" - CmdConfigServices string = "config_services" - CmdStartServices string = "start_services" - CmdStopServices string = "stop_services" - CmdPingInstances string = "ping_instances" - CmdCreateSnapshotImages string = "create_snapshot_images" - CmdCreateInstancesFromSnapshotImages string = "create_instances_from_snapshot_images" - CmdDeleteSnapshotImages string = "delete_snapshot_images" -) - -type SingleThreadCmdHandler func() (l.LogMsg, error) - -func DumpLogChan(logChan chan l.LogMsg) { - for len(logChan) > 0 { - msg := <-logChan - fmt.Println(string(msg)) - } -} - -func getNicknamesArg(entityName string) (string, error) { - if len(os.Args) < 3 { - return "", fmt.Errorf("not enough args, expected comma-separated list of %s or '*'", entityName) - } - if len(os.Args[2]) == 0 { - return "", fmt.Errorf("bad arg, expected comma-separated list of %s or '*'", entityName) - } - return os.Args[2], nil -} - -func filterByNickname[GenericDef prj.InstanceDef](nicknames string, sourceMap map[string]*GenericDef, entityName string) (map[string]*GenericDef, error) { - var defMap map[string]*GenericDef - rawNicknames := strings.Split(nicknames, ",") - defMap = map[string]*GenericDef{} - for _, rawNickname := range rawNicknames { - if strings.Contains(rawNickname, "*") { - matchFound := false - reNickname := regexp.MustCompile("^" + strings.ReplaceAll(rawNickname, "*", "[a-zA-Z0-9]*") + "$") - for fgNickname, fgDef := range sourceMap { - if reNickname.MatchString(fgNickname) { - matchFound = true - defMap[fgNickname] = fgDef - } - } - if !matchFound { - return nil, fmt.Errorf("no match found for %s '%s', available definitions: %s", entityName, rawNickname, reflect.ValueOf(sourceMap).MapKeys()) - } - } else { - fgDef, ok := sourceMap[rawNickname] - if !ok { - return nil, fmt.Errorf("definition for %s '%s' not found, available definitions: %s", entityName, rawNickname, reflect.ValueOf(sourceMap).MapKeys()) - } - defMap[rawNickname] = fgDef - } - } - return defMap, nil -} - -func waitForWorkers(errorsExpected int, errChan chan error, logChan chan l.LogMsg) int { - finalCmdErr := 0 - for errorsExpected > 0 { - select { - case cmdErr := <-errChan: - if cmdErr != nil { - finalCmdErr = 1 - fmt.Fprintf(os.Stderr, "%s\n", cmdErr.Error()) - } - errorsExpected-- - case msg := <-logChan: - fmt.Println(msg) - } - } - - DumpLogChan(logChan) - - return finalCmdErr -} - func usage(flagset *flag.FlagSet) { fmt.Printf(` Capillaries deploy @@ -124,51 +24,69 @@ Commands: %s -p %s -p %s -p + %s -p %s -p - %s -p - %s -p - %s -p - %s -p - %s -p - %s -p - %s -p - %s -p -n - %s -p - %s -p - %s -p - %s -p - %s -p - %s -p - %s -p -`, - CmdListDeployments, - CmdListDeploymentResources, - CmdCreateFloatingIps, - CmdDeleteFloatingIps, - CmdCreateSecurityGroups, - CmdDeleteSecurityGroups, - CmdCreateNetworking, - CmdDeleteNetworking, - - CmdCreateVolumes, - CmdAttachVolumes, - CmdDetachVolumes, - CmdDeleteVolumes, - - CmdCreateInstances, - CmdDeleteInstances, - CmdPingInstances, + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p - CmdInstallServices, - CmdConfigServices, - CmdStartServices, - CmdStopServices, + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p -n + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p + %s -p - CmdCreateSnapshotImages, - CmdCreateInstancesFromSnapshotImages, - CmdDeleteSnapshotImages, + %s -p +`, + provider.CmdDeploymentCreate, + provider.CmdDeploymentCreateImages, + provider.CmdDeploymentRestoreInstances, + provider.CmdDeploymentDeleteImages, + provider.CmdDeploymentDelete, + + provider.CmdListDeployments, + provider.CmdListDeploymentResources, + + provider.CmdCreateFloatingIps, + provider.CmdDeleteFloatingIps, + provider.CmdCreateSecurityGroups, + provider.CmdDeleteSecurityGroups, + provider.CmdCreateNetworking, + provider.CmdDeleteNetworking, + + provider.CmdCreateVolumes, + provider.CmdAttachVolumes, + provider.CmdDetachVolumes, + provider.CmdDeleteVolumes, + + provider.CmdCreateInstances, + provider.CmdDeleteInstances, + provider.CmdPingInstances, + + provider.CmdInstallServices, + provider.CmdConfigServices, + provider.CmdStartServices, + provider.CmdStopServices, + + provider.CmdCreateSnapshotImages, + provider.CmdCreateInstancesFromSnapshotImages, + provider.CmdDeleteSnapshotImages, + + provider.CmdCheckCassStatus, ) if flagset != nil { fmt.Printf("\nParameters:\n") @@ -176,450 +94,123 @@ Commands: } } -// func createProject(templatePath string) error { -// vm := jsonnet.MakeVM() -// json, err := vm.EvaluateFile(templatePath) -// if err != nil { -// return err -// } -// fmt.Println(json) -// return nil -// } - -func ping(sshConfig *rexec.SshConfigDef, ipAddress string, verbosity bool, numberOfRepetitions int) (l.LogMsg, error) { - var err error - var logMsg l.LogMsg - - repetitions := 1 - if numberOfRepetitions > 1 { - repetitions = numberOfRepetitions - } - - lb := l.NewLogBuilder(l.CurFuncName()+" "+ipAddress, verbosity) - - for { - logMsg, err = rexec.ExecCommandOnInstance(sshConfig, ipAddress, "id", verbosity) - lb.Add(string(logMsg)) - repetitions-- - if err == nil || repetitions == 0 { - break - } - lb.Add(err.Error()) - time.Sleep(5 * time.Second) - } - - return lb.Complete(err) -} - func main() { if len(os.Args) <= 1 { usage(nil) os.Exit(1) } - // if os.Args[1] == CmdCreateProject { - // createPrjArgs := flag.NewFlagSet("create prj args", flag.ExitOnError) - // argTemplateFile := createPrjArgs.String("t", "capideploy.jsonnet", "Capideploy project template jsonnet file path") - - // if len(os.Args) <= 2 { - // usage(createPrjArgs) - // os.Exit(1) - // } - // parseErr := createPrjArgs.Parse(os.Args[2:]) - // if parseErr != nil { - // log.Fatalf(parseErr.Error()) - // } - // createPrjErr := createProject(*argTemplateFile) - // if createPrjErr != nil { - // log.Fatalf(createPrjErr.Error()) - // } - // os.Exit(0) - // } - commonArgs := flag.NewFlagSet("run prj args", flag.ExitOnError) - argPrjFile := commonArgs.String("p", "capideploy.json", "Capideploy project jsonnet file path") + argPrjFile := commonArgs.String("p", "capideploy.jsonnet", "Capideploy project jsonnet file path") argVerbosity := commonArgs.Bool("v", false, "Verbose debug output") - argNumberOfRepetitions := commonArgs.Int("n", 1, "Number of repetitions") + argNumberOfRepetitions := commonArgs.Int("n", 50, "Number of repetitions") argShowProjectDetails := commonArgs.Bool("s", false, "Show project details (may contain sensitive info)") argIgnoreAttachedVolumes := commonArgs.Bool("i", false, "Ignore attached volumes on instance delete") - argAssumeRole := commonArgs.String("r", "", "A role from another AWS account to assume, act like a third-party service") - argAssumeRoleExternalId := commonArgs.String("e", "", "When a role from another AWS account is assumed, use this external-id (optional, but encouraged)") - - cmdStartTs := time.Now() - - throttle := time.Tick(time.Second) // One call per second, to avoid error 429 on openstack/aws/azure calls - const maxWorkerThreads int = 50 - var logChan = make(chan l.LogMsg, maxWorkerThreads*5) - var sem = make(chan int, maxWorkerThreads) - var errChan chan error - var parseErr error - errorsExpected := 1 - //var prjPair *prj.ProjectPair - var project *prj.Project - //var fullPrjPath string - var prjErr error - singleThreadCommands := map[string]SingleThreadCmdHandler{ - CmdCreateFloatingIps: nil, - CmdDeleteFloatingIps: nil, - CmdCreateSecurityGroups: nil, - CmdDeleteSecurityGroups: nil, - CmdCreateNetworking: nil, - CmdDeleteNetworking: nil, - } - - if _, ok := singleThreadCommands[os.Args[1]]; ok { - parseErr = commonArgs.Parse(os.Args[2:]) - } else { - parseErr = commonArgs.Parse(os.Args[3:]) + cmd := os.Args[1] + nicknames := "" + parseFromArgIdx := 2 + if provider.IsCmdRequiresNicknames(cmd) { + if len(os.Args) <= 2 { + usage(commonArgs) + os.Exit(1) + } + nicknames = os.Args[2] + parseFromArgIdx = 3 } + parseErr := commonArgs.Parse(os.Args[parseFromArgIdx:]) if parseErr != nil { log.Fatalf(parseErr.Error()) } + var project *prj.Project + var prjErr error project, prjErr = prj.LoadProject(*argPrjFile) if prjErr != nil { log.Fatalf(prjErr.Error()) } - deployProvider, deployProviderErr := provider.DeployProviderFactory(project, context.TODO(), &provider.AssumeRoleConfig{RoleArn: *argAssumeRole, ExternalId: *argAssumeRoleExternalId}, *argVerbosity) + // Unbuffered channels: write immediately to stdout/stderr/file/whatever + cOut := make(chan string) + cErr := make(chan string) + cDone := make(chan int) + go func(cOut <-chan string, cErr <-chan string, cDone <-chan int) { + for { + select { + case strOut := <-cOut: + fmt.Fprintf(os.Stdout, "%s\n", strOut) + case strErr := <-cErr: + fmt.Fprintf(os.Stderr, "%s\n", strErr) + case <-cDone: + return + } + } + }(cOut, cErr, cDone) + + deployProvider, deployProviderErr := provider.DeployProviderFactory(project, context.TODO(), + &provider.AssumeRoleConfig{ + RoleArn: os.Getenv("CAPIDEPLOY_AWS_ROLE_TO_ASSUME_ARN"), + ExternalId: os.Getenv("CAPIDEPLOY_AWS_ROLE_TO_ASSUME_EXTERNAL_ID")}, + *argVerbosity, cOut, cErr) if deployProviderErr != nil { + cDone <- 0 log.Fatalf(deployProviderErr.Error()) } - if os.Args[1] == CmdListDeployments { - mapResourceCount, logMsg, err := deployProvider.ListDeployments() - fmt.Println(logMsg) - if err != nil { - log.Fatalf(err.Error()) - } - - deploymentStrings := make([]string, len(mapResourceCount)) - deploymentIdx := 0 - totalResourceCount := 0 - for deploymentName, deploymentResCount := range mapResourceCount { - deploymentStrings[deploymentIdx] = fmt.Sprintf("%s,%d", deploymentName, deploymentResCount) - deploymentIdx++ - totalResourceCount += deploymentResCount - } - fmt.Printf("%s\n", strings.Join(deploymentStrings, "\n")) - fmt.Printf("Deployments: %d, resources: %d\n", len(mapResourceCount), totalResourceCount) - os.Exit(0) - } else if os.Args[1] == CmdListDeploymentResources { - resources, logMsg, err := deployProvider.ListDeploymentResources() - fmt.Println(logMsg) - if err != nil { - log.Fatalf(err.Error()) - } - resourceStrings := make([]string, len(resources)) - activeCount := 0 - for resIdx, res := range resources { - resourceStrings[resIdx] = res.String() - if res.BilledState != cld.ResourceBilledStateTerminated { - activeCount++ - } - } - fmt.Printf("%s\n", strings.Join(resourceStrings, "\n")) - fmt.Printf("Total: %d, potentially billed: %d\n", len(resources), activeCount) - } - - singleThreadCommands[CmdCreateFloatingIps] = deployProvider.CreateFloatingIps - singleThreadCommands[CmdDeleteFloatingIps] = deployProvider.DeleteFloatingIps - singleThreadCommands[CmdCreateSecurityGroups] = deployProvider.CreateSecurityGroups - singleThreadCommands[CmdDeleteSecurityGroups] = deployProvider.DeleteSecurityGroups - singleThreadCommands[CmdCreateNetworking] = deployProvider.CreateNetworking - singleThreadCommands[CmdDeleteNetworking] = deployProvider.DeleteNetworking - - if cmdHandler, ok := singleThreadCommands[os.Args[1]]; ok { - errChan = make(chan error, errorsExpected) - sem <- 1 - go func() { - logMsg, err := cmdHandler() - logChan <- logMsg - errChan <- err - <-sem - }() - } else if os.Args[1] == CmdCreateInstances || - os.Args[1] == CmdDeleteInstances || - os.Args[1] == CmdCreateSnapshotImages || - os.Args[1] == CmdCreateInstancesFromSnapshotImages || - os.Args[1] == CmdDeleteSnapshotImages { - nicknames, err := getNicknamesArg("instances") - if err != nil { - log.Fatalf(err.Error()) - } - instances, err := filterByNickname(nicknames, project.Instances, "instance") - if err != nil { - log.Fatalf(err.Error()) - } - - errorsExpected = len(instances) - errChan = make(chan error, errorsExpected) - - usedFlavors := map[string]string{} - usedImages := map[string]bool{} - if os.Args[1] == CmdCreateInstances || - os.Args[1] == CmdCreateInstancesFromSnapshotImages { - logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() - if err != nil { - log.Fatal(logMsgBastionIp) - } - - // Make sure image/flavor is supported - usedKeypairs := map[string]struct{}{} - for _, instDef := range instances { - usedFlavors[instDef.FlavorName] = "" - usedImages[instDef.ImageId] = false - usedKeypairs[instDef.RootKeyName] = struct{}{} - } - logMsg, err := deployProvider.HarvestInstanceTypesByFlavorNames(usedFlavors) - logChan <- logMsg - DumpLogChan(logChan) - if err != nil { - log.Fatalf(err.Error()) - } - - logMsg, err = deployProvider.HarvestImageIds(usedImages) - logChan <- logMsg - DumpLogChan(logChan) - if err != nil { - log.Fatalf(err.Error()) - } - - // Make sure the keypairs are there - logMsg, err = deployProvider.VerifyKeypairs(usedKeypairs) - logChan <- logMsg - DumpLogChan(logChan) - if err != nil { - log.Fatalf(err.Error()) - } - - fmt.Printf("Creating instances, consider clearing known_hosts to avoid ssh complaints:\n") - for _, i := range instances { - fmt.Printf("ssh-keygen -f ~/.ssh/known_hosts -R %s;\n", i.BestIpAddress()) - } - } - - switch os.Args[1] { - case CmdCreateInstances: - logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() - if err != nil { - log.Fatal(logMsgBastionIp) - } - for iNickname := range instances { - <-throttle - sem <- 1 - go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { - logMsg, err := deployProvider.CreateInstanceAndWaitForCompletion( - iNickname, - usedFlavors[project.Instances[iNickname].FlavorName], - project.Instances[iNickname].ImageId) - logChan <- logMsg - errChan <- err - <-sem - }(project, logChan, errChan, iNickname) - } - case CmdDeleteInstances: - logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() - if err != nil { - log.Fatal(logMsgBastionIp) - } - for iNickname := range instances { - <-throttle - sem <- 1 - go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { - logMsg, err := deployProvider.DeleteInstance(iNickname, *argIgnoreAttachedVolumes) - logChan <- logMsg - errChan <- err - <-sem - }(project, logChan, errChan, iNickname) - } - case CmdCreateSnapshotImages: - for iNickname := range instances { - <-throttle - sem <- 1 - go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { - logMsg, err := deployProvider.CreateSnapshotImage(iNickname) - logChan <- logMsg - errChan <- err - <-sem - }(project, logChan, errChan, iNickname) - } - case CmdCreateInstancesFromSnapshotImages: - for iNickname := range instances { - <-throttle - sem <- 1 - go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { - logMsg, err := deployProvider.CreateInstanceFromSnapshotImageAndWaitForCompletion(iNickname, - usedFlavors[project.Instances[iNickname].FlavorName]) - logChan <- logMsg - errChan <- err - <-sem - }(project, logChan, errChan, iNickname) - } - case CmdDeleteSnapshotImages: - for iNickname := range instances { - <-throttle - sem <- 1 - go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string) { - logMsg, err := deployProvider.DeleteSnapshotImage(iNickname) - logChan <- logMsg - errChan <- err - <-sem - }(project, logChan, errChan, iNickname) - } - default: - log.Fatalf("unknown create/delete instance command:" + os.Args[1]) - } - } else if os.Args[1] == CmdPingInstances || - os.Args[1] == CmdInstallServices || - os.Args[1] == CmdConfigServices || - os.Args[1] == CmdStartServices || - os.Args[1] == CmdStopServices { - nicknames, err := getNicknamesArg("instances") - if err != nil { - log.Fatalf(err.Error()) - } - - instances, err := filterByNickname(nicknames, project.Instances, "instance") - if err != nil { - log.Fatalf(err.Error()) - } - - logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() - if err != nil { - log.Fatal(logMsgBastionIp) - } - - errorsExpected = len(instances) - errChan = make(chan error, len(instances)) - for _, iDef := range instances { - <-throttle - sem <- 1 - go func(prj *prj.Project, logChan chan l.LogMsg, errChan chan error, iDef *prj.InstanceDef) { - var logMsg l.LogMsg - var finalErr error - switch os.Args[1] { - case CmdPingInstances: - logMsg, finalErr = ping(project.SshConfig, iDef.BestIpAddress(), *argVerbosity, *argNumberOfRepetitions) - - case CmdInstallServices: - // Make sure ping passes - logMsg, finalErr = ping(project.SshConfig, iDef.BestIpAddress(), *argVerbosity, 5) - - // If ping passed, it's ok to move on - if finalErr == nil { - logMsg, finalErr = rexec.ExecEmbeddedScriptsOnInstance(project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Install, iDef.Service.Env, *argVerbosity) - } - - case CmdConfigServices: - logMsg, finalErr = rexec.ExecEmbeddedScriptsOnInstance(project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Config, iDef.Service.Env, *argVerbosity) - - case CmdStartServices: - logMsg, finalErr = rexec.ExecEmbeddedScriptsOnInstance(project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Start, iDef.Service.Env, *argVerbosity) - - case CmdStopServices: - logMsg, finalErr = rexec.ExecEmbeddedScriptsOnInstance(project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Stop, iDef.Service.Env, *argVerbosity) - - default: - log.Fatalf("unknown service command:" + os.Args[1]) - } - - logChan <- logMsg - errChan <- finalErr - <-sem - }(project, logChan, errChan, iDef) - } - - } else if os.Args[1] == CmdCreateVolumes || os.Args[1] == CmdAttachVolumes || os.Args[1] == CmdDetachVolumes || os.Args[1] == CmdDeleteVolumes { - nicknames, err := getNicknamesArg("instances") - if err != nil { - log.Fatalf(err.Error()) - } - - instances, err := filterByNickname(nicknames, project.Instances, "instance") - if err != nil { - log.Fatalf(err.Error()) - } - - volCount := 0 - for _, iDef := range instances { - volCount += len(iDef.Volumes) - } - if volCount == 0 { - fmt.Printf("No volumes to create/attach/detach/delete") - os.Exit(0) - } - errorsExpected = volCount - errChan = make(chan error, volCount) - for iNickname, iDef := range instances { - for volNickname := range iDef.Volumes { - <-throttle - sem <- 1 - switch os.Args[1] { - case CmdCreateVolumes: - go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string, volNickname string) { - logMsg, err := deployProvider.CreateVolume(iNickname, volNickname) - logChan <- logMsg - errChan <- err - <-sem - }(project, logChan, errChan, iNickname, volNickname) - case CmdAttachVolumes: - logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() - if err != nil { - log.Fatal(logMsgBastionIp) - } - go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string, volNickname string) { - logMsg, err := deployProvider.AttachVolume(iNickname, volNickname) - logChan <- logMsg - errChan <- err - <-sem - }(project, logChan, errChan, iNickname, volNickname) - case CmdDetachVolumes: - logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() - if err != nil { - log.Fatal(logMsgBastionIp) - } - go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string, volNickname string) { - logMsg, err := deployProvider.DetachVolume(iNickname, volNickname) - logChan <- logMsg - errChan <- err - <-sem - }(project, logChan, errChan, iNickname, volNickname) - case CmdDeleteVolumes: - go func(project *prj.Project, logChan chan l.LogMsg, errChan chan error, iNickname string, volNickname string) { - logMsg, err := deployProvider.DeleteVolume(iNickname, volNickname) - logChan <- logMsg - errChan <- err - <-sem - }(project, logChan, errChan, iNickname, volNickname) - default: - log.Fatalf("unknown command:" + os.Args[1]) + if len(os.Args) >= 3 { + nicknames = os.Args[2] + } + + if nicknames == "" && provider.IsCmdRequiresNicknames(cmd) { + usage(commonArgs) + log.Fatalf("nicknames argument expected but missing") + } + + var finalErr error + if cmd == provider.CmdListDeployments { + mapResourceCount, err := deployProvider.ListDeployments(cOut, cErr) + if err == nil { + sb := strings.Builder{} + totalDeployments := 0 + totalResources := 0 + for deploymentName, resCount := range mapResourceCount { + sb.WriteString(fmt.Sprintf("%s,%d\n", deploymentName, resCount)) + totalDeployments++ + totalResources += resCount + } + sb.WriteString(fmt.Sprintf("Deployments: %d, resources %d", totalDeployments, totalResources)) + cOut <- sb.String() + } + finalErr = err + } else if cmd == provider.CmdListDeploymentResources { + resources, err := deployProvider.ListDeploymentResources(cOut, cErr) + if err == nil { + sb := strings.Builder{} + billedResources := 0 + for _, res := range resources { + sb.WriteString(fmt.Sprintf("%s\n", res.String())) + if res.BilledState == cld.ResourceBilledStateActive { + billedResources++ } } + sb.WriteString(fmt.Sprintf("Resources: %d, billed %d", len(resources), billedResources)) + cOut <- sb.String() } + finalErr = err } else { - log.Fatalf("unknown command:" + os.Args[1]) + finalErr = deployProvider.ExecCmdWithNoResult(cmd, nicknames, &provider.ExecArgs{IgnoreAttachedVolumes: *argIgnoreAttachedVolumes, Verbosity: *argVerbosity, NumberOfRepetitions: *argNumberOfRepetitions, ShowProjectDetails: *argShowProjectDetails}, cOut, cErr) } - finalCmdErr := waitForWorkers(errorsExpected, errChan, logChan) + cDone <- 0 - // // Save updated project template, it may have some new ids and timestamps - // if prjErr = prjPair.Template.SaveProject(fullPrjPath); prjErr != nil { - // log.Fatalf(prjErr.Error()) - // } - - if *argShowProjectDetails { - prjJsonBytes, err := json.MarshalIndent(project, "", " ") - if err != nil { - log.Fatalf("cannot show project json: %s", err.Error()) - } - fmt.Printf("%s\n", string(prjJsonBytes)) - } - - if finalCmdErr > 0 { - os.Exit(finalCmdErr) + if finalErr != nil { + os.Exit(1) } - - fmt.Printf("%s %sOK%s, elapsed %.3fs\n", os.Args[1], l.LogColorGreen, l.LogColorReset, time.Since(cmdStartTs).Seconds()) + os.Exit(0) } + +/* +TODO: +-r and -e go to env variables +ssh key from file to env variable and NewSshClientConfig changed accordingly +*/ diff --git a/pkg/l/log_builder.go b/pkg/l/log_builder.go index 9e85f4c..4eaf1fa 100755 --- a/pkg/l/log_builder.go +++ b/pkg/l/log_builder.go @@ -31,10 +31,6 @@ func NewLogBuilder(header string, isVerbose bool) *LogBuilder { return &lb } -func AddLogMsg(sb *strings.Builder, logMsg LogMsg) { - sb.WriteString(string(logMsg)) -} - func (lb *LogBuilder) AddObject(content string, o any) { if !lb.IsVerbose { return @@ -59,6 +55,10 @@ func (lb *LogBuilder) Add(content string) { lb.Sb.WriteString(fmt.Sprintf("%s\n", content)) } +func (lb *LogBuilder) AddAlways(content string) { + lb.Sb.WriteString(fmt.Sprintf("%s\n", content)) +} + func (lb *LogBuilder) Complete(err error) (LogMsg, error) { if lb.IsVerbose { lb.Sb.WriteString(fmt.Sprintf("%s : ", lb.Header)) diff --git a/pkg/prj/project.go b/pkg/prj/project.go index 556e4cc..3e9bab9 100755 --- a/pkg/prj/project.go +++ b/pkg/prj/project.go @@ -150,10 +150,7 @@ type UserDef struct { Name string `json:"name"` PublicKeyPath string `json:"public_key_path"` } -type PrivateKeyDef struct { - Name string `json:"name"` - PrivateKeyPath string `json:"private_key_path"` -} + type InstanceDef struct { Purpose string `json:"purpose"` InstName string `json:"inst_name"` @@ -198,11 +195,11 @@ type Project struct { DeploymentName string `json:"deployment_name"` SshConfig *rexec.SshConfigDef `json:"ssh_config"` Timeouts ExecTimeouts `json:"timeouts"` - EnvVariablesUsed []string `json:"env_variables_used"` SecurityGroups map[string]*SecurityGroupDef `json:"security_groups"` Network NetworkDef `json:"network"` Instances map[string]*InstanceDef `json:"instances"` DeployProviderName string `json:"deploy_provider_name"` + // EnvVariablesUsed []string `json:"env_variables_used"` } func (p *Project) InitDefaults() { @@ -412,32 +409,16 @@ func LoadProject(prjFile string) (*Project, error) { return nil, fmt.Errorf("cannot find project file [%s]: [%s]", prjFullPath, err.Error()) } - vm := jsonnet.MakeVM() - prjString, err := vm.EvaluateFile(prjFile) + byteJsonnet, err := os.ReadFile(prjFullPath) if err != nil { return nil, err } - - // prjBytes, err := os.ReadFile(prjFullPath) - // if err != nil { - // return nil, "", fmt.Errorf("cannot read project file %s: %s", prjFullPath, err.Error()) - // } - - //prjPair := ProjectPair{} - - // Read project - - // err = json.Unmarshal(prjBytes, &prjPair.Template) - // if err != nil { - // return nil, "", fmt.Errorf("cannot parse project file %s: %s", prjFullPath, err.Error()) - // } - - // prjString := string(prjBytes) + strJsonnet := string(byteJsonnet) envVars := map[string]string{} missingVars := make([]string, 0) r := regexp.MustCompile(`\{(CAPIDEPLOY[_A-Z0-9]+)\}`) - matches := r.FindAllStringSubmatch(prjString, -1) + matches := r.FindAllStringSubmatch(strJsonnet, -1) for _, v := range matches { envVar := v[1] envVars[envVar] = os.Getenv(envVar) @@ -447,7 +428,7 @@ func LoadProject(prjFile string) (*Project, error) { } if len(missingVars) > 0 { - return nil, fmt.Errorf("cannot load deployment project, missing env variables:\n%v", strings.Join(missingVars, "\n")) + return nil, fmt.Errorf("cannot load deployment project from %s, missing env variables: %s\n", prjFullPath, strings.Join(missingVars, "\n")) } // Replace env vars @@ -455,13 +436,27 @@ func LoadProject(prjFile string) (*Project, error) { // Revert unescaping in parameter values caused by JSON - we want to preserve `\n"` and `\"` escapeReplacer := strings.NewReplacer("\n", "\\n", `"`, `\"`) for k, v := range envVars { - prjString = strings.ReplaceAll(prjString, fmt.Sprintf("{%s}", k), escapeReplacer.Replace(v)) + strJsonnet = strings.ReplaceAll(strJsonnet, fmt.Sprintf("{%s}", k), escapeReplacer.Replace(v)) } - // Hacky way to provide bastion ip - // prjString = strings.ReplaceAll(prjString, "{CAPIDEPLOY.INTERNAL.BASTION_EXTERNAL_IP_ADDRESS}", prjPair.Template.SshConfig.BastionExternalIp) + fTemp, err := os.CreateTemp("", "capideploy_jsonnet") + if err != nil { + return nil, err + } + _, err = fTemp.WriteString(strJsonnet) + if err != nil { + fTemp.Close() + return nil, err + } + fTemp.Close() + + // Run jsonnet engine agains a file with replaced env vars - // Re-deserialize forom prjString, now with replaced params + vm := jsonnet.MakeVM() + prjString, err := vm.EvaluateFile(fTemp.Name()) + if err != nil { + return nil, err + } project := Project{} if err := json.Unmarshal([]byte(prjString), &project); err != nil { diff --git a/pkg/provider/aws_deployments.go b/pkg/provider/aws_deployments.go new file mode 100644 index 0000000..d446687 --- /dev/null +++ b/pkg/provider/aws_deployments.go @@ -0,0 +1,43 @@ +package provider + +import ( + "github.com/aws/aws-sdk-go-v2/aws" + taggingTypes "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi/types" + "github.com/capillariesio/capillaries-deploy/pkg/cld" + "github.com/capillariesio/capillaries-deploy/pkg/cld/cldaws" + "github.com/capillariesio/capillaries-deploy/pkg/l" +) + +func (p *AwsDeployProvider) listDeployments() (map[string]int, l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) + resources, err := cldaws.GetResourcesByTag(p.DeployCtx.Aws.TaggingClient, p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Aws.Config.Region, + []taggingTypes.TagFilter{{Key: aws.String(cld.DeploymentOperatorTagName), Values: []string{cld.DeploymentOperatorTagValue}}}, false) + if err != nil { + logMsg, err := lb.Complete(err) + return nil, logMsg, err + } + deploymentResCount := map[string]int{} + for _, res := range resources { + if deploymentNameCount, ok := deploymentResCount[res.DeploymentName]; ok { + deploymentResCount[res.DeploymentName] = deploymentNameCount + 1 + } else { + deploymentResCount[res.DeploymentName] = 1 + } + } + logMsg, _ := lb.Complete(nil) + return deploymentResCount, logMsg, nil +} + +func (p *AwsDeployProvider) listDeploymentResources() ([]*cld.Resource, l.LogMsg, error) { + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) + resources, err := cldaws.GetResourcesByTag(p.DeployCtx.Aws.TaggingClient, p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Aws.Config.Region, + []taggingTypes.TagFilter{ + {Key: aws.String(cld.DeploymentOperatorTagName), Values: []string{cld.DeploymentOperatorTagValue}}, + {Key: aws.String(cld.DeploymentNameTagName), Values: []string{p.DeployCtx.Project.DeploymentName}}}, true) + if err != nil { + logMsg, err := lb.Complete(err) + return nil, logMsg, err + } + logMsg, _ := lb.Complete(nil) + return resources, logMsg, nil +} diff --git a/pkg/provider/aws_floating_ips.go b/pkg/provider/aws_floating_ips.go index 982b2fa..c2aaece 100644 --- a/pkg/provider/aws_floating_ips.go +++ b/pkg/provider/aws_floating_ips.go @@ -22,21 +22,37 @@ func ensureFloatingIp(ec2Client *ec2.Client, goCtx context.Context, tags map[str } func (p *AwsDeployProvider) CreateFloatingIps() (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) - bastionIpName := p.GetCtx().Project.SshConfig.BastionExternalIpAddressName - bastionIpAddress, err := ensureFloatingIp(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, bastionIpName) + bastionIpName := p.DeployCtx.Project.SshConfig.BastionExternalIpAddressName + bastionIpAddress, err := ensureFloatingIp(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, bastionIpName) if err != nil { return lb.Complete(err) } - p.GetCtx().Project.SshConfig.BastionExternalIp = bastionIpAddress + p.DeployCtx.Project.SshConfig.BastionExternalIp = bastionIpAddress // Tell the user about the bastion IP - reportPublicIp(p.GetCtx().Project) + lb.AddAlways(fmt.Sprintf(` +Public IP reserved, now you can use it for SSH jumphost in your ~/.ssh/config: - natgwIpName := p.GetCtx().Project.Network.PublicSubnet.NatGatewayExternalIpName - _, err = ensureFloatingIp(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, natgwIpName) +Host %s +User %s +StrictHostKeyChecking=no +UserKnownHostsFile=/dev/null +IdentityFile + +Also, you may find it convenient to use in your commands: + +export BASTION_IP=%s + +`, + p.DeployCtx.Project.SshConfig.BastionExternalIp, + p.DeployCtx.Project.SshConfig.User, + p.DeployCtx.Project.SshConfig.BastionExternalIp)) + + natgwIpName := p.DeployCtx.Project.Network.PublicSubnet.NatGatewayExternalIpName + _, err = ensureFloatingIp(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, natgwIpName) if err != nil { return lb.Complete(err) } @@ -59,15 +75,15 @@ func releaseFloatingIpIfNotAllocated(ec2Client *ec2.Client, goCtx context.Contex } func (p *AwsDeployProvider) DeleteFloatingIps() (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) - bastionIpName := p.GetCtx().Project.SshConfig.BastionExternalIpAddressName - err := releaseFloatingIpIfNotAllocated(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, bastionIpName) + bastionIpName := p.DeployCtx.Project.SshConfig.BastionExternalIpAddressName + err := releaseFloatingIpIfNotAllocated(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, bastionIpName) if err != nil { return lb.Complete(err) } - err = releaseFloatingIpIfNotAllocated(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.PublicSubnet.NatGatewayExternalIpName) + err = releaseFloatingIpIfNotAllocated(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Network.PublicSubnet.NatGatewayExternalIpName) if err != nil { return lb.Complete(err) } @@ -105,9 +121,9 @@ func (p *AwsDeployProvider) DeleteFloatingIps() (l.LogMsg, error) { // } func (p *AwsDeployProvider) PopulateInstanceExternalAddressByName() (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) - ipAddressName := p.GetCtx().Project.SshConfig.BastionExternalIpAddressName - ipAddress, _, _, err := cldaws.GetPublicIpAddressAllocationAssociatedInstanceByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, ipAddressName) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) + ipAddressName := p.DeployCtx.Project.SshConfig.BastionExternalIpAddressName + ipAddress, _, _, err := cldaws.GetPublicIpAddressAllocationAssociatedInstanceByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, ipAddressName) if err != nil { return lb.Complete(err) } @@ -117,10 +133,10 @@ func (p *AwsDeployProvider) PopulateInstanceExternalAddressByName() (l.LogMsg, e } // Updates project: ssh config - p.GetCtx().Project.SshConfig.BastionExternalIp = ipAddress + p.DeployCtx.Project.SshConfig.BastionExternalIp = ipAddress // Updates project: instances - for _, iDef := range p.GetCtx().Project.Instances { + for _, iDef := range p.DeployCtx.Project.Instances { if iDef.ExternalIpAddressName == ipAddressName { iDef.ExternalIpAddress = ipAddress } diff --git a/pkg/provider/aws_instances.go b/pkg/provider/aws_instances.go index 830f2f9..1c059a6 100644 --- a/pkg/provider/aws_instances.go +++ b/pkg/provider/aws_instances.go @@ -13,10 +13,10 @@ import ( ) func (p *AwsDeployProvider) HarvestInstanceTypesByFlavorNames(flavorMap map[string]string) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) for flavorName := range flavorMap { - instanceType, err := cldaws.GetInstanceType(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, flavorName) + instanceType, err := cldaws.GetInstanceType(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, flavorName) if err != nil { return lb.Complete(err) } @@ -26,10 +26,10 @@ func (p *AwsDeployProvider) HarvestInstanceTypesByFlavorNames(flavorMap map[stri } func (p *AwsDeployProvider) HarvestImageIds(imageMap map[string]bool) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) for imageId := range imageMap { - _, _, err := cldaws.GetImageInfoById(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageId) + _, _, err := cldaws.GetImageInfoById(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, imageId) if err != nil { return lb.Complete(err) } @@ -39,10 +39,10 @@ func (p *AwsDeployProvider) HarvestImageIds(imageMap map[string]bool) (l.LogMsg, } func (p *AwsDeployProvider) VerifyKeypairs(keypairMap map[string]struct{}) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) for keypairName := range keypairMap { - err := cldaws.VerifyKeypair(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, keypairName) + err := cldaws.VerifyKeypair(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, keypairName) if err != nil { return lb.Complete(err) } @@ -51,9 +51,9 @@ func (p *AwsDeployProvider) VerifyKeypairs(keypairMap map[string]struct{}) (l.Lo } func getInstanceSubnetId(p *AwsDeployProvider, lb *l.LogBuilder, iNickname string) (string, error) { - subnetName := p.GetCtx().Project.Instances[iNickname].SubnetName + subnetName := p.DeployCtx.Project.Instances[iNickname].SubnetName - subnetId, err := cldaws.GetSubnetIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, subnetName) + subnetId, err := cldaws.GetSubnetIdByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, subnetName) if err != nil { return "", err } @@ -66,9 +66,9 @@ func getInstanceSubnetId(p *AwsDeployProvider, lb *l.LogBuilder, iNickname strin } func getInstanceSecurityGroupId(p *AwsDeployProvider, lb *l.LogBuilder, iNickname string) (string, error) { - sgName := p.GetCtx().Project.Instances[iNickname].SecurityGroupName + sgName := p.DeployCtx.Project.Instances[iNickname].SecurityGroupName - sgId, err := cldaws.GetSecurityGroupIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, sgName) + sgId, err := cldaws.GetSecurityGroupIdByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, sgName) if err != nil { return "", err } @@ -81,21 +81,21 @@ func getInstanceSecurityGroupId(p *AwsDeployProvider, lb *l.LogBuilder, iNicknam } func internalCreate(p *AwsDeployProvider, lb *l.LogBuilder, iNickname string, instanceTypeString string, imageId string, blockDeviceMappings []types.BlockDeviceMapping, subnetId string, securityGroupId string) error { - instName := p.GetCtx().Project.Instances[iNickname].InstName + instName := p.DeployCtx.Project.Instances[iNickname].InstName // Check if the instance already exists - instanceId, foundInstanceStateByName, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, instName) + instanceId, foundInstanceStateByName, err := cldaws.GetInstanceIdAndStateByHostName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, instName) if err != nil { return err } // If floating ip is being requested (it's a bastion instance), but it's already assigned, fail - externalIpAddressName := p.GetCtx().Project.Instances[iNickname].ExternalIpAddressName + externalIpAddressName := p.DeployCtx.Project.Instances[iNickname].ExternalIpAddressName var externalIpAddress string if externalIpAddressName != "" { - foundExternalIpAddress, _, associatedInstanceId, err := cldaws.GetPublicIpAddressAllocationAssociatedInstanceByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, externalIpAddressName) + foundExternalIpAddress, _, associatedInstanceId, err := cldaws.GetPublicIpAddressAllocationAssociatedInstanceByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, externalIpAddressName) if err != nil { return err } @@ -114,29 +114,29 @@ func internalCreate(p *AwsDeployProvider, lb *l.LogBuilder, iNickname string, in } } - instanceId, err = cldaws.CreateInstance(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, + instanceId, err = cldaws.CreateInstance(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, instanceTypeString, imageId, instName, - p.GetCtx().Project.Instances[iNickname].IpAddress, + p.DeployCtx.Project.Instances[iNickname].IpAddress, securityGroupId, - p.GetCtx().Project.Instances[iNickname].RootKeyName, + p.DeployCtx.Project.Instances[iNickname].RootKeyName, subnetId, blockDeviceMappings, - p.GetCtx().Project.Timeouts.CreateInstance) + p.DeployCtx.Project.Timeouts.CreateInstance) if err != nil { return err } if externalIpAddress != "" { - _, err = cldaws.AssignAwsFloatingIp(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, instanceId, externalIpAddress) + _, err = cldaws.AssignAwsFloatingIp(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, instanceId, externalIpAddress) if err != nil { return err } } - if p.GetCtx().Project.Instances[iNickname].AssociatedInstanceProfile != "" { - err = cldaws.AssociateInstanceProfile(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, instanceId, p.GetCtx().Project.Instances[iNickname].AssociatedInstanceProfile) + if p.DeployCtx.Project.Instances[iNickname].AssociatedInstanceProfile != "" { + err = cldaws.AssociateInstanceProfile(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, instanceId, p.DeployCtx.Project.Instances[iNickname].AssociatedInstanceProfile) if err != nil { return err } @@ -146,7 +146,7 @@ func internalCreate(p *AwsDeployProvider, lb *l.LogBuilder, iNickname string, in } func (p *AwsDeployProvider) CreateInstanceAndWaitForCompletion(iNickname string, flavorId string, imageId string) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.DeployCtx.IsVerbose) subnetId, err := getInstanceSubnetId(p, lb, iNickname) if err != nil { @@ -194,10 +194,10 @@ func getAttachedVolumes(ec2Client *ec2.Client, goCtx context.Context, lb *l.LogB } func (p *AwsDeployProvider) DeleteInstance(iNickname string, ignoreAttachedVolumes bool) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.DeployCtx.IsVerbose) if !ignoreAttachedVolumes { - attachedVols, err := getAttachedVolumes(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].Volumes) + attachedVols, err := getAttachedVolumes(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Instances[iNickname].Volumes) if err != nil { return lb.Complete(err) } @@ -207,9 +207,9 @@ func (p *AwsDeployProvider) DeleteInstance(iNickname string, ignoreAttachedVolum } } - instName := p.GetCtx().Project.Instances[iNickname].InstName + instName := p.DeployCtx.Project.Instances[iNickname].InstName - foundId, foundState, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, instName) + foundId, foundState, err := cldaws.GetInstanceIdAndStateByHostName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, instName) if err != nil { return lb.Complete(err) } @@ -222,15 +222,15 @@ func (p *AwsDeployProvider) DeleteInstance(iNickname string, ignoreAttachedVolum return lb.Complete(nil) } - return lb.Complete(cldaws.DeleteInstance(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundId, p.GetCtx().Project.Timeouts.DeleteInstance)) + return lb.Complete(cldaws.DeleteInstance(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, foundId, p.DeployCtx.Project.Timeouts.DeleteInstance)) } func (p *AwsDeployProvider) CreateSnapshotImage(iNickname string) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.DeployCtx.IsVerbose) - imageName := p.GetCtx().Project.Instances[iNickname].InstName + imageName := p.DeployCtx.Project.Instances[iNickname].InstName - foundImageId, foundImageState, _, err := cldaws.GetImageInfoByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageName) + foundImageId, foundImageState, _, err := cldaws.GetImageInfoByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, imageName) if err != nil { return lb.Complete(err) } @@ -239,7 +239,7 @@ func (p *AwsDeployProvider) CreateSnapshotImage(iNickname string) (l.LogMsg, err return lb.Complete(fmt.Errorf("cannot create snaphost image %s, delete/deregister existing image %s first", imageName, foundImageId)) } - attachedVols, err := getAttachedVolumes(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].Volumes) + attachedVols, err := getAttachedVolumes(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Instances[iNickname].Volumes) if err != nil { return lb.Complete(err) } @@ -248,7 +248,7 @@ func (p *AwsDeployProvider) CreateSnapshotImage(iNickname string) (l.LogMsg, err return lb.Complete(fmt.Errorf("cannot create snapshot image from instance %s, detach volumes first: %s", iNickname, strings.Join(attachedVols, ","))) } - foundInstanceId, foundInstanceState, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].InstName) + foundInstanceId, foundInstanceState, err := cldaws.GetInstanceIdAndStateByHostName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Instances[iNickname].InstName) if err != nil { return lb.Complete(err) } @@ -263,21 +263,21 @@ func (p *AwsDeployProvider) CreateSnapshotImage(iNickname string) (l.LogMsg, err } if foundInstanceState != types.InstanceStateNameStopped { - err = cldaws.StopInstance(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundInstanceId, p.GetCtx().Project.Timeouts.StopInstance) + err = cldaws.StopInstance(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, foundInstanceId, p.DeployCtx.Project.Timeouts.StopInstance) if err != nil { return lb.Complete(err) } } - imageId, err := cldaws.CreateImageFromInstance(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, - p.GetCtx().Project.Instances[iNickname].InstName, + imageId, err := cldaws.CreateImageFromInstance(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, + p.DeployCtx.Project.Instances[iNickname].InstName, foundInstanceId, - p.GetCtx().Project.Timeouts.CreateImage) + p.DeployCtx.Project.Timeouts.CreateImage) if err != nil { return lb.Complete(err) } - _, blockDeviceMappings, err := cldaws.GetImageInfoById(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageId) + _, blockDeviceMappings, err := cldaws.GetImageInfoById(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, imageId) if err != nil { return lb.Complete(err) } @@ -286,7 +286,7 @@ func (p *AwsDeployProvider) CreateSnapshotImage(iNickname string) (l.LogMsg, err for _, mapping := range blockDeviceMappings { if mapping.Ebs != nil { if mapping.Ebs.SnapshotId != nil && *mapping.Ebs.SnapshotId != "" { - cldaws.TagResource(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, *mapping.Ebs.SnapshotId, p.GetCtx().Project.Instances[iNickname].InstName, p.GetCtx().Tags) + cldaws.TagResource(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, *mapping.Ebs.SnapshotId, p.DeployCtx.Project.Instances[iNickname].InstName, p.DeployCtx.Tags) if err != nil { return lb.Complete(err) } @@ -300,7 +300,7 @@ func (p *AwsDeployProvider) CreateSnapshotImage(iNickname string) (l.LogMsg, err // aws ec2 run-instances --region "us-east-1" --image-id ami-0bfdcfac85eb09d46 --count 1 --instance-type c7g.large --key-name $CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_NAME --subnet-id subnet-09e2ba71bb1a5df94 --security-group-id sg-090b9d1ef7a1d1914 --private-ip-address 10.5.1.10 // aws ec2 associate-address --instance-id i-0c4b32d20a1671b1e --public-ip 54.86.220.208 func (p *AwsDeployProvider) CreateInstanceFromSnapshotImageAndWaitForCompletion(iNickname string, flavorId string) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.DeployCtx.IsVerbose) subnetId, err := getInstanceSubnetId(p, lb, iNickname) if err != nil { @@ -312,8 +312,8 @@ func (p *AwsDeployProvider) CreateInstanceFromSnapshotImageAndWaitForCompletion( return lb.Complete(err) } - imageName := p.GetCtx().Project.Instances[iNickname].InstName - foundImageId, foundImageState, blockDeviceMappings, err := cldaws.GetImageInfoByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageName) + imageName := p.DeployCtx.Project.Instances[iNickname].InstName + foundImageId, foundImageState, blockDeviceMappings, err := cldaws.GetImageInfoByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, imageName) if err != nil { return lb.Complete(err) } @@ -343,16 +343,16 @@ func (p *AwsDeployProvider) CreateInstanceFromSnapshotImageAndWaitForCompletion( } func (p *AwsDeployProvider) DeleteSnapshotImage(iNickname string) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName()+":"+iNickname, p.DeployCtx.IsVerbose) - imageName := p.GetCtx().Project.Instances[iNickname].InstName - foundImageId, foundImageState, blockDeviceMappings, err := cldaws.GetImageInfoByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, imageName) + imageName := p.DeployCtx.Project.Instances[iNickname].InstName + foundImageId, foundImageState, blockDeviceMappings, err := cldaws.GetImageInfoByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, imageName) if err != nil { return lb.Complete(err) } if foundImageId == "" { - return lb.Complete(fmt.Errorf("cannot delete snapshot image %s for %s that is not found", imageName, iNickname)) + return lb.Complete(nil) } if foundImageState == types.ImageStateDeregistered { @@ -369,14 +369,14 @@ func (p *AwsDeployProvider) DeleteSnapshotImage(iNickname string) (l.LogMsg, err } } - err = cldaws.DeregisterImage(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundImageId) + err = cldaws.DeregisterImage(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, foundImageId) if err != nil { return lb.Complete(err) } // Now we can delete the snapshot if snapshotId != "" { - err := cldaws.DeleteSnapshot(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, snapshotId) + err := cldaws.DeleteSnapshot(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, snapshotId) if err != nil { return lb.Complete(err) } diff --git a/pkg/provider/aws_networking.go b/pkg/provider/aws_networking.go index 911d46b..381f598 100644 --- a/pkg/provider/aws_networking.go +++ b/pkg/provider/aws_networking.go @@ -73,29 +73,44 @@ func ensureNatGatewayAndRoutePrivateSubnet(ec2Client *ec2.Client, goCtx context. } } - // Create new route table id for this vpc - - routeTableId, err := cldaws.CreateRouteTableForVpc(ec2Client, goCtx, tags, lb, privateSubnetDef.RouteTableToNatgwName, networkId) + routeTableId, associatedVpcId, associatedSubnetId, err := cldaws.GetRouteTableByName(ec2Client, goCtx, lb, privateSubnetDef.RouteTableToNatgwName) if err != nil { return err } - // Associate this route table with the private subnet + if associatedVpcId != "" && associatedVpcId != networkId { + return fmt.Errorf("cannot use existing route table %s(%s), it's already associated with wrong network id %s", privateSubnetDef.RouteTableToNatgwName, routeTableId, associatedVpcId) + } - rtAssocId, err := cldaws.AssociateRouteTableWithSubnet(ec2Client, goCtx, lb, routeTableId, privateSubnetId) - if err != nil { - return err + if associatedSubnetId != "" && associatedSubnetId != privateSubnetId { + return fmt.Errorf("cannot use existing route table %s(%s), it's already associated with wrong subnet id %s", privateSubnetDef.RouteTableToNatgwName, routeTableId, associatedSubnetId) } - lb.Add(fmt.Sprintf("associated route table %s with private subnet %s: %s", routeTableId, privateSubnetId, rtAssocId)) + // Create new route table id for this vpc + + if routeTableId == "" { + routeTableId, err = cldaws.CreateRouteTableForVpc(ec2Client, goCtx, tags, lb, privateSubnetDef.RouteTableToNatgwName, networkId) + if err != nil { + return err + } - // Add a record to a route table: tell all outbound 0.0.0.0/0 traffic to go through this nat gateway: + // Associate this route table with the private subnet - if err := cldaws.CreateNatGatewayRoute(ec2Client, goCtx, lb, routeTableId, "0.0.0.0/0", natGatewayId); err != nil { - return err - } + rtAssocId, err := cldaws.AssociateRouteTableWithSubnet(ec2Client, goCtx, lb, routeTableId, privateSubnetId) + if err != nil { + return err + } + + lb.Add(fmt.Sprintf("associated route table %s with private subnet %s: %s", routeTableId, privateSubnetId, rtAssocId)) - lb.Add(fmt.Sprintf("route table %s in private subnet %s points to nat gateway %s", routeTableId, privateSubnetId, natGatewayId)) + // Add a record to a route table: tell all outbound 0.0.0.0/0 traffic to go through this nat gateway: + + if err := cldaws.CreateNatGatewayRoute(ec2Client, goCtx, lb, routeTableId, "0.0.0.0/0", natGatewayId); err != nil { + return err + } + + lb.Add(fmt.Sprintf("route table %s in private subnet %s points to nat gateway %s", routeTableId, privateSubnetId, natGatewayId)) + } return nil } @@ -140,7 +155,7 @@ func ensureInternetGatewayAndRoutePublicSubnet(ec2Client *ec2.Client, goCtx cont // Obtain route table id for this vpc (it was automatically created for us and marked as 'main') - routeTableId, err := cldaws.GetVpcDefaultRouteTable(ec2Client, goCtx, lb, networkId) + routeTableId, associatedSubnetId, err := cldaws.GetVpcDefaultRouteTable(ec2Client, goCtx, lb, networkId) if err != nil { return err } @@ -152,13 +167,19 @@ func ensureInternetGatewayAndRoutePublicSubnet(ec2Client *ec2.Client, goCtx cont return err } - // Associate this default (main) route table with the public subnet + // Associate this default (main) route table with the public subnet if needed - assocId, err := cldaws.AssociateRouteTableWithSubnet(ec2Client, goCtx, lb, routeTableId, publicSubnetId) - if err != nil { - return err + if associatedSubnetId != "" && associatedSubnetId != publicSubnetId { + return fmt.Errorf("cannot asociate default route table %s with public subnet %s because it's already associated with %s", routeTableId, publicSubnetId, associatedSubnetId) + } + + if associatedSubnetId == "" { + assocId, err := cldaws.AssociateRouteTableWithSubnet(ec2Client, goCtx, lb, routeTableId, publicSubnetId) + if err != nil { + return err + } + lb.Add(fmt.Sprintf("associated route table %s with public subnet %s: %s", routeTableId, publicSubnetId, assocId)) } - lb.Add(fmt.Sprintf("associated route table %s with public subnet %s: %s", routeTableId, publicSubnetId, assocId)) // Add a record to a route table: tell all outbound 0.0.0.0/0 traffic to go through this internet gateway: @@ -246,7 +267,7 @@ func checkAndDeleteAwsVpcWithRouteTable(ec2Client *ec2.Client, goCtx context.Con } // Delete the route table pointing to natgw (if we don't, AWS will consider them as dependencies and will not delete vpc) - foundRouteTableId, foundAttachedVpcId, err := cldaws.GetRouteTableByName(ec2Client, goCtx, lb, privateSubnetRouteTableToNatgwName) + foundRouteTableId, foundAttachedVpcId, _, err := cldaws.GetRouteTableByName(ec2Client, goCtx, lb, privateSubnetRouteTableToNatgwName) if err != nil { return err } @@ -263,36 +284,36 @@ func checkAndDeleteAwsVpcWithRouteTable(ec2Client *ec2.Client, goCtx context.Con } func (p *AwsDeployProvider) CreateNetworking() (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) - vpcId, err := ensureAwsVpc(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, &p.GetCtx().Project.Network, p.GetCtx().Project.Timeouts.CreateNetwork) + vpcId, err := ensureAwsVpc(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, &p.DeployCtx.Project.Network, p.DeployCtx.Project.Timeouts.CreateNetwork) if err != nil { return lb.Complete(err) } - privateSubnetId, err := ensureAwsPrivateSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, vpcId, &p.GetCtx().Project.Network.PrivateSubnet) + privateSubnetId, err := ensureAwsPrivateSubnet(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, vpcId, &p.DeployCtx.Project.Network.PrivateSubnet) if err != nil { return lb.Complete(err) } - publicSubnetId, err := ensureAwsPublicSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, - vpcId, &p.GetCtx().Project.Network.PublicSubnet) + publicSubnetId, err := ensureAwsPublicSubnet(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, + vpcId, &p.DeployCtx.Project.Network.PublicSubnet) if err != nil { return lb.Complete(err) } - err = ensureInternetGatewayAndRoutePublicSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, - p.GetCtx().Project.Network.Router.Name, - vpcId, publicSubnetId, &p.GetCtx().Project.Network.PublicSubnet) + err = ensureInternetGatewayAndRoutePublicSubnet(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, + p.DeployCtx.Project.Network.Router.Name, + vpcId, publicSubnetId, &p.DeployCtx.Project.Network.PublicSubnet) if err != nil { return lb.Complete(err) } - err = ensureNatGatewayAndRoutePrivateSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, + err = ensureNatGatewayAndRoutePrivateSubnet(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, vpcId, - publicSubnetId, &p.GetCtx().Project.Network.PublicSubnet, - privateSubnetId, &p.GetCtx().Project.Network.PrivateSubnet, - p.GetCtx().Project.Timeouts.CreateNatGateway) + publicSubnetId, &p.DeployCtx.Project.Network.PublicSubnet, + privateSubnetId, &p.DeployCtx.Project.Network.PrivateSubnet, + p.DeployCtx.Project.Timeouts.CreateNatGateway) if err != nil { return lb.Complete(err) } @@ -301,29 +322,29 @@ func (p *AwsDeployProvider) CreateNetworking() (l.LogMsg, error) { } func (p *AwsDeployProvider) DeleteNetworking() (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) - err := checkAndDeleteNatGateway(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.PublicSubnet.NatGatewayName, p.GetCtx().Project.Timeouts.DeleteNatGateway) + err := checkAndDeleteNatGateway(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Network.PublicSubnet.NatGatewayName, p.DeployCtx.Project.Timeouts.DeleteNatGateway) if err != nil { return lb.Complete(err) } - err = detachAndDeleteInternetGateway(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.Router.Name) + err = detachAndDeleteInternetGateway(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Network.Router.Name) if err != nil { return lb.Complete(err) } - err = deleteAwsSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.PublicSubnet.Name) + err = deleteAwsSubnet(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Network.PublicSubnet.Name) if err != nil { return lb.Complete(err) } - err = deleteAwsSubnet(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.PrivateSubnet.Name) + err = deleteAwsSubnet(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Network.PrivateSubnet.Name) if err != nil { return lb.Complete(err) } - err = checkAndDeleteAwsVpcWithRouteTable(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.Name, p.GetCtx().Project.Network.PrivateSubnet.Name, p.GetCtx().Project.Network.PrivateSubnet.RouteTableToNatgwName) + err = checkAndDeleteAwsVpcWithRouteTable(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Network.Name, p.DeployCtx.Project.Network.PrivateSubnet.Name, p.DeployCtx.Project.Network.PrivateSubnet.RouteTableToNatgwName) if err != nil { return lb.Complete(err) } diff --git a/pkg/provider/aws_provider.go b/pkg/provider/aws_provider.go new file mode 100644 index 0000000..30df3c6 --- /dev/null +++ b/pkg/provider/aws_provider.go @@ -0,0 +1,40 @@ +package provider + +import ( + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" + "github.com/capillariesio/capillaries-deploy/pkg/cld" +) + +// AWS-specific + +type AwsCtx struct { + Config aws.Config + Ec2Client *ec2.Client + TaggingClient *resourcegroupstaggingapi.Client +} + +// Everything below is generic. This type will support DeployProvider (public) and deployProviderImpl (internal) + +type AwsDeployProvider struct { + DeployCtx *DeployCtx +} + +func (p *AwsDeployProvider) getDeployCtx() *DeployCtx { + return p.DeployCtx +} + +// DeployProvider implementation + +func (p *AwsDeployProvider) ListDeployments(cOut chan<- string, cErr chan<- string) (map[string]int, error) { + return genericListDeployments(p, cOut, cErr) +} + +func (p *AwsDeployProvider) ListDeploymentResources(cOut chan<- string, cErr chan<- string) ([]*cld.Resource, error) { + return genericListDeploymentResources(p, cOut, cErr) +} + +func (p *AwsDeployProvider) ExecCmdWithNoResult(cmd string, nicknames string, execArgs *ExecArgs, cOut chan<- string, cErr chan<- string) error { + return genericExecCmdWithNoResult(p, cmd, nicknames, execArgs, cOut, cErr) +} diff --git a/pkg/provider/aws_security_group.go b/pkg/provider/aws_security_group.go index 6211353..c07159c 100644 --- a/pkg/provider/aws_security_group.go +++ b/pkg/provider/aws_security_group.go @@ -21,31 +21,31 @@ func createAwsSecurityGroup(ec2Client *ec2.Client, goCtx context.Context, tags m if err != nil { return err } - } - for _, rule := range sgDef.Rules { - err := cldaws.AuthorizeSecurityGroupIngress(ec2Client, goCtx, lb, groupId, rule.Protocol, int32(rule.Port), rule.RemoteIp) - if err != nil { - return err + for _, rule := range sgDef.Rules { + err := cldaws.AuthorizeSecurityGroupIngress(ec2Client, goCtx, lb, groupId, rule.Protocol, int32(rule.Port), rule.RemoteIp) + if err != nil { + return err + } } } return nil } func (p *AwsDeployProvider) CreateSecurityGroups() (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) - vpcId, err := cldaws.GetVpcIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Network.Name) + vpcId, err := cldaws.GetVpcIdByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Network.Name) if err != nil { return lb.Complete(err) } if vpcId == "" { - return lb.Complete(fmt.Errorf("cannot create security groups, vpc %s does not exist", p.GetCtx().Project.Network.Name)) + return lb.Complete(fmt.Errorf("cannot create security groups, vpc %s does not exist", p.DeployCtx.Project.Network.Name)) } - for _, sgDef := range p.GetCtx().Project.SecurityGroups { - err := createAwsSecurityGroup(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, sgDef, vpcId) + for _, sgDef := range p.DeployCtx.Project.SecurityGroups { + err := createAwsSecurityGroup(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, sgDef, vpcId) if err != nil { return lb.Complete(err) } @@ -68,9 +68,9 @@ func deleteAwsSecurityGroup(ec2Client *ec2.Client, goCtx context.Context, lb *l. } func (p *AwsDeployProvider) DeleteSecurityGroups() (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) - for _, sgDef := range p.GetCtx().Project.SecurityGroups { - err := deleteAwsSecurityGroup(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, sgDef) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) + for _, sgDef := range p.DeployCtx.Project.SecurityGroups { + err := deleteAwsSecurityGroup(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, sgDef) if err != nil { return lb.Complete(err) } diff --git a/pkg/provider/aws_volumes.go b/pkg/provider/aws_volumes.go index b2ab25e..4c98610 100644 --- a/pkg/provider/aws_volumes.go +++ b/pkg/provider/aws_volumes.go @@ -13,10 +13,10 @@ import ( ) func (p *AwsDeployProvider) CreateVolume(iNickname string, volNickname string) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) - volDef := p.GetCtx().Project.Instances[iNickname].Volumes[volNickname] - foundVolIdByName, err := cldaws.GetVolumeIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, volDef.Name) + volDef := p.DeployCtx.Project.Instances[iNickname].Volumes[volNickname] + foundVolIdByName, err := cldaws.GetVolumeIdByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, volDef.Name) if err != nil { return lb.Complete(err) } @@ -26,7 +26,7 @@ func (p *AwsDeployProvider) CreateVolume(iNickname string, volNickname string) ( return lb.Complete(nil) } - _, err = cldaws.CreateVolume(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, p.GetCtx().Tags, lb, volDef.Name, volDef.AvailabilityZone, int32(volDef.Size), volDef.Type) + _, err = cldaws.CreateVolume(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, p.DeployCtx.Tags, lb, volDef.Name, volDef.AvailabilityZone, int32(volDef.Size), volDef.Type) if err != nil { return lb.Complete(err) } @@ -70,51 +70,48 @@ func awsFinalDeviceNameNitro(suggestedDeviceName string) string { } func (p *AwsDeployProvider) AttachVolume(iNickname string, volNickname string) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) - volDef := p.GetCtx().Project.Instances[iNickname].Volumes[volNickname] + volDef := p.DeployCtx.Project.Instances[iNickname].Volumes[volNickname] if volDef.MountPoint == "" || volDef.Permissions == 0 || volDef.Owner == "" { return lb.Complete(fmt.Errorf("empty parameter not allowed: volDef.MountPoint (%s), volDef.Permissions (%d), volDef.Owner (%s)", volDef.MountPoint, volDef.Permissions, volDef.Owner)) } - foundVolIdByName, err := cldaws.GetVolumeIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, volDef.Name) + foundVolIdByName, err := cldaws.GetVolumeIdByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, volDef.Name) if err != nil { return lb.Complete(err) } - foundDevice, foundAttachmentState, err := cldaws.GetVolumeAttachedDeviceById(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName) + foundDevice, foundAttachmentState, err := cldaws.GetVolumeAttachedDeviceById(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, foundVolIdByName) if err != nil { return lb.Complete(err) } - if foundDevice != "" { - if foundAttachmentState == types.VolumeAttachmentStateAttached { - return lb.Complete(nil) - } else { - return lb.Complete(fmt.Errorf("cannot attach volume %s: it's already attached to device %s, but has invalid attachment state %s", volDef.Name, foundDevice, foundAttachmentState)) - } + if foundDevice != "" && foundAttachmentState != types.VolumeAttachmentStateAttached { + return lb.Complete(fmt.Errorf("cannot attach volume %s: it's already attached to device %s, but has invalid attachment state %s", volDef.Name, foundDevice, foundAttachmentState)) } - suggestedDevice := volNicknameToAwsSuggestedDeviceName(p.GetCtx().Project.Instances[iNickname].Volumes, volNickname) - - // Attach + suggestedDevice := volNicknameToAwsSuggestedDeviceName(p.DeployCtx.Project.Instances[iNickname].Volumes, volNickname) - foundInstanceIdByName, _, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].InstName) - if err != nil { - return lb.Complete(err) - } + if foundDevice == "" { + // Attach + foundInstanceIdByName, _, err := cldaws.GetInstanceIdAndStateByHostName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Instances[iNickname].InstName) + if err != nil { + return lb.Complete(err) + } - _, err = cldaws.AttachVolume(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName, foundInstanceIdByName, suggestedDevice, p.GetCtx().Project.Timeouts.AttachVolume) - if err != nil { - return lb.Complete(err) + _, err = cldaws.AttachVolume(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, foundVolIdByName, foundInstanceIdByName, suggestedDevice, p.DeployCtx.Project.Timeouts.AttachVolume) + if err != nil { + return lb.Complete(err) + } } // Mount deviceBlockId, er := rexec.ExecSshAndReturnLastLine( - p.GetCtx().Project.SshConfig, - p.GetCtx().Project.Instances[iNickname].BestIpAddress(), + p.DeployCtx.Project.SshConfig, + p.DeployCtx.Project.Instances[iNickname].BestIpAddress(), fmt.Sprintf("%s\ninit_volume_attachment %s %s %d '%s'", cldaws.InitVolumeAttachmentFunc, awsFinalDeviceNameNitro(suggestedDevice), // AWS final device here @@ -134,11 +131,11 @@ func (p *AwsDeployProvider) AttachVolume(iNickname string, volNickname string) ( } func (p *AwsDeployProvider) DetachVolume(iNickname string, volNickname string) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) - volDef := p.GetCtx().Project.Instances[iNickname].Volumes[volNickname] + volDef := p.DeployCtx.Project.Instances[iNickname].Volumes[volNickname] - foundVolIdByName, err := cldaws.GetVolumeIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, volDef.Name) + foundVolIdByName, err := cldaws.GetVolumeIdByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, volDef.Name) if err != nil { return lb.Complete(err) } @@ -148,7 +145,7 @@ func (p *AwsDeployProvider) DetachVolume(iNickname string, volNickname string) ( return lb.Complete(nil) } - foundDevice, _, err := cldaws.GetVolumeAttachedDeviceById(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName) + foundDevice, _, err := cldaws.GetVolumeAttachedDeviceById(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, foundVolIdByName) if err != nil { return lb.Complete(err) } @@ -161,29 +158,29 @@ func (p *AwsDeployProvider) DetachVolume(iNickname string, volNickname string) ( // Unmount er := rexec.ExecSsh( - p.GetCtx().Project.SshConfig, - p.GetCtx().Project.Instances[iNickname].BestIpAddress(), + p.DeployCtx.Project.SshConfig, + p.DeployCtx.Project.Instances[iNickname].BestIpAddress(), fmt.Sprintf("sudo umount -d %s", volDef.MountPoint), map[string]string{}) lb.Add(er.ToString()) if er.Error != nil { return lb.Complete(fmt.Errorf("cannot umount volume %s on instance %s: %s", volNickname, iNickname, er.Error.Error())) } - foundInstanceIdByName, _, err := cldaws.GetInstanceIdAndStateByHostName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Project.Instances[iNickname].InstName) + foundInstanceIdByName, _, err := cldaws.GetInstanceIdAndStateByHostName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, p.DeployCtx.Project.Instances[iNickname].InstName) if err != nil { return lb.Complete(err) } // Detach - return lb.Complete(cldaws.DetachVolume(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName, foundInstanceIdByName, foundDevice, p.GetCtx().Project.Timeouts.DetachVolume)) + return lb.Complete(cldaws.DetachVolume(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, foundVolIdByName, foundInstanceIdByName, foundDevice, p.DeployCtx.Project.Timeouts.DetachVolume)) } func (p *AwsDeployProvider) DeleteVolume(iNickname string, volNickname string) (l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) + lb := l.NewLogBuilder(l.CurFuncName(), p.DeployCtx.IsVerbose) - volDef := p.GetCtx().Project.Instances[iNickname].Volumes[volNickname] - foundVolIdByName, err := cldaws.GetVolumeIdByName(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, volDef.Name) + volDef := p.DeployCtx.Project.Instances[iNickname].Volumes[volNickname] + foundVolIdByName, err := cldaws.GetVolumeIdByName(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, volDef.Name) if err != nil { return lb.Complete(err) } @@ -193,5 +190,5 @@ func (p *AwsDeployProvider) DeleteVolume(iNickname string, volNickname string) ( return lb.Complete(nil) } - return lb.Complete(cldaws.DeleteVolume(p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, foundVolIdByName)) + return lb.Complete(cldaws.DeleteVolume(p.DeployCtx.Aws.Ec2Client, p.DeployCtx.GoCtx, lb, foundVolIdByName)) } diff --git a/pkg/provider/deploy_provider.go b/pkg/provider/deploy_provider.go index 978c4cd..bc24a53 100644 --- a/pkg/provider/deploy_provider.go +++ b/pkg/provider/deploy_provider.go @@ -2,77 +2,203 @@ package provider import ( "context" + "encoding/json" "fmt" + "reflect" + "regexp" + "strings" + "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/credentials/stscreds" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" - taggingTypes "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi/types" "github.com/aws/aws-sdk-go-v2/service/sts" "github.com/capillariesio/capillaries-deploy/pkg/cld" - "github.com/capillariesio/capillaries-deploy/pkg/cld/cldaws" "github.com/capillariesio/capillaries-deploy/pkg/l" "github.com/capillariesio/capillaries-deploy/pkg/prj" + "github.com/capillariesio/capillaries-deploy/pkg/rexec" ) -type AwsCtx struct { - Config aws.Config - Ec2Client *ec2.Client - TaggingClient *resourcegroupstaggingapi.Client +const ( + CmdDeploymentCreate string = "deployment_create" + CmdDeploymentCreateImages string = "deployment_create_images" + CmdDeploymentRestoreInstances string = "deployment_restore_instances" + CmdDeploymentDeleteImages string = "deployment_delete_images" + CmdDeploymentDelete string = "deployment_delete" + CmdListDeployments string = "list_deployments" + CmdListDeploymentResources string = "list_deployment_resources" + CmdCreateFloatingIps string = "create_floating_ips" + CmdDeleteFloatingIps string = "delete_floating_ips" + CmdCreateSecurityGroups string = "create_security_groups" + CmdDeleteSecurityGroups string = "delete_security_groups" + CmdCreateNetworking string = "create_networking" + CmdDeleteNetworking string = "delete_networking" + CmdCreateVolumes string = "create_volumes" + CmdDeleteVolumes string = "delete_volumes" + CmdCreateInstances string = "create_instances" + CmdDeleteInstances string = "delete_instances" + CmdAttachVolumes string = "attach_volumes" + CmdDetachVolumes string = "detach_volumes" + CmdUploadFiles string = "upload_files" + CmdDownloadFiles string = "download_files" + CmdInstallServices string = "install_services" + CmdConfigServices string = "config_services" + CmdStartServices string = "start_services" + CmdStopServices string = "stop_services" + CmdPingInstances string = "ping_instances" + CmdCreateSnapshotImages string = "create_snapshot_images" + CmdCreateInstancesFromSnapshotImages string = "create_instances_from_snapshot_images" + CmdDeleteSnapshotImages string = "delete_snapshot_images" + CmdCheckCassStatus string = "check_cassandra_status" +) + +type StopOnFailType int + +const ( + StopOnFail StopOnFailType = iota + IgnoreFail +) + +type ExecArgs struct { + IgnoreAttachedVolumes bool + Verbosity bool + NumberOfRepetitions int + ShowProjectDetails bool +} + +type CombinedCmdCall struct { + Cmd string + Nicknames string + OnFail StopOnFailType +} + +var combinedCmdCallSeqMap map[string][]CombinedCmdCall = map[string][]CombinedCmdCall{ + CmdDeploymentCreate: { + {CmdCreateFloatingIps, "", StopOnFail}, + {CmdCreateNetworking, "", StopOnFail}, + {CmdCreateSecurityGroups, "", StopOnFail}, + {CmdCreateVolumes, "*", StopOnFail}, + {CmdCreateInstances, "*", StopOnFail}, + {CmdPingInstances, "*", StopOnFail}, + {CmdAttachVolumes, "bastion", StopOnFail}, + {CmdInstallServices, "bastion", StopOnFail}, + {CmdInstallServices, "rabbitmq,prometheus,daemon*,cass*", StopOnFail}, + {CmdStopServices, "cass*", StopOnFail}, + {CmdConfigServices, "cass*", StopOnFail}, + {CmdConfigServices, "bastion,rabbitmq,prometheus,daemon*", StopOnFail}, + {CmdCheckCassStatus, "", StopOnFail}}, + CmdDeploymentCreateImages: { + {CmdStopServices, "*", IgnoreFail}, + {CmdDetachVolumes, "bastion", StopOnFail}, + {CmdCreateSnapshotImages, "*", StopOnFail}, + {CmdDeleteInstances, "*", StopOnFail}}, + CmdDeploymentRestoreInstances: { + {CmdCreateInstancesFromSnapshotImages, "*", StopOnFail}, + {CmdPingInstances, "*", StopOnFail}, + {CmdAttachVolumes, "bastion", StopOnFail}, + {CmdStartServices, "*", StopOnFail}, + {CmdStopServices, "cass*", StopOnFail}, + {CmdConfigServices, "cass*", StopOnFail}}, + CmdDeploymentDeleteImages: { + {CmdDeleteSnapshotImages, "*", StopOnFail}}, + CmdDeploymentDelete: { + {CmdDeleteSnapshotImages, "*", StopOnFail}, + {CmdStopServices, "*", IgnoreFail}, + {CmdDetachVolumes, "bastion", StopOnFail}, + {CmdDeleteInstances, "*", IgnoreFail}, + {CmdDeleteVolumes, "*", IgnoreFail}, + {CmdDeleteSecurityGroups, "*", IgnoreFail}, + {CmdDeleteNetworking, "*", IgnoreFail}, + {CmdDeleteFloatingIps, "*", IgnoreFail}}} + +func IsCmdRequiresNicknames(cmd string) bool { + return cmd == CmdCreateVolumes || + cmd == CmdDeleteVolumes || + cmd == CmdCreateInstances || + cmd == CmdDeleteInstances || + cmd == CmdAttachVolumes || + cmd == CmdDetachVolumes || + cmd == CmdUploadFiles || + cmd == CmdDownloadFiles || + cmd == CmdInstallServices || + cmd == CmdConfigServices || + cmd == CmdStartServices || + cmd == CmdStopServices || + cmd == CmdPingInstances || + cmd == CmdCreateSnapshotImages || + cmd == CmdCreateInstancesFromSnapshotImages || + cmd == CmdDeleteSnapshotImages } type DeployCtx struct { - //PrjPair *prj.ProjectPair Project *prj.Project GoCtx context.Context IsVerbose bool - Aws *AwsCtx Tags map[string]string + // AWS members: + Aws *AwsCtx + // Azure members: } + type DeployProvider interface { - GetCtx() *DeployCtx - ListDeployments() (map[string]int, l.LogMsg, error) - ListDeploymentResources() ([]*cld.Resource, l.LogMsg, error) - CreateFloatingIps() (l.LogMsg, error) - DeleteFloatingIps() (l.LogMsg, error) - CreateSecurityGroups() (l.LogMsg, error) - DeleteSecurityGroups() (l.LogMsg, error) - CreateNetworking() (l.LogMsg, error) - DeleteNetworking() (l.LogMsg, error) - HarvestInstanceTypesByFlavorNames(flavorMap map[string]string) (l.LogMsg, error) - HarvestImageIds(imageMap map[string]bool) (l.LogMsg, error) - VerifyKeypairs(keypairMap map[string]struct{}) (l.LogMsg, error) - CreateInstanceAndWaitForCompletion(iNickname string, flavorId string, imageId string) (l.LogMsg, error) - DeleteInstance(iNickname string, ignoreAttachedVolumes bool) (l.LogMsg, error) - CreateSnapshotImage(iNickname string) (l.LogMsg, error) - CreateInstanceFromSnapshotImageAndWaitForCompletion(iNickname string, flavorId string) (l.LogMsg, error) - DeleteSnapshotImage(iNickname string) (l.LogMsg, error) - CreateVolume(iNickname string, volNickname string) (l.LogMsg, error) - AttachVolume(iNickname string, volNickname string) (l.LogMsg, error) - DetachVolume(iNickname string, volNickname string) (l.LogMsg, error) - DeleteVolume(iNickname string, volNickname string) (l.LogMsg, error) - PopulateInstanceExternalAddressByName() (l.LogMsg, error) + ListDeployments(cOut chan<- string, cErr chan<- string) (map[string]int, error) + ListDeploymentResources(cOut chan<- string, cErr chan<- string) ([]*cld.Resource, error) + ExecCmdWithNoResult(cmd string, nicknames string, execArgs *ExecArgs, cOut chan<- string, cErr chan<- string) error } -type AwsDeployProvider struct { - Ctx *DeployCtx +func genericListDeployments(p deployProviderImpl, cOut chan<- string, cErr chan<- string) (map[string]int, error) { + mapResourceCount, logMsg, err := p.listDeployments() + cOut <- string(logMsg) + if err != nil { + cErr <- err.Error() + } + return mapResourceCount, err } -func (p *AwsDeployProvider) GetCtx() *DeployCtx { - return p.Ctx +func genericListDeploymentResources(p deployProviderImpl, cOut chan<- string, cErr chan<- string) ([]*cld.Resource, error) { + resources, logMsg, err := p.listDeploymentResources() + cOut <- string(logMsg) + if err != nil { + cErr <- err.Error() + } + return resources, err +} + +func genericExecCmdWithNoResult(p deployProviderImpl, cmd string, nicknames string, execArgs *ExecArgs, cOut chan<- string, cErr chan<- string) error { + if combinedCmdCallSeq, ok := combinedCmdCallSeqMap[cmd]; ok { + for _, combinedCmdCallSeq := range combinedCmdCallSeq { + err := execSimpleParallelCmd(p, combinedCmdCallSeq.Cmd, combinedCmdCallSeq.Nicknames, execArgs, cOut, cErr) + if err != nil && combinedCmdCallSeq.OnFail == StopOnFail { + return err + } + } + return nil + } else { + return execSimpleParallelCmd(p, cmd, nicknames, execArgs, cOut, cErr) + } } type AssumeRoleConfig struct { - RoleArn string - ExternalId string + // AWS members: + RoleArn string `json:"role_arn"` + ExternalId string `json:"external_id"` + // Azure members: } -func DeployProviderFactory(project *prj.Project, goCtx context.Context, assumeRoleCfg *AssumeRoleConfig, isVerbose bool) (DeployProvider, error) { +func DeployProviderFactory(project *prj.Project, goCtx context.Context, assumeRoleCfg *AssumeRoleConfig, isVerbose bool, cOut chan<- string, cErr chan<- string) (DeployProvider, error) { if project.DeployProviderName == prj.DeployProviderAws { cfg, err := config.LoadDefaultConfig(goCtx) if err != nil { + cErr <- err.Error() + return nil, err + } + + callerIdentityOutBefore, err := sts.NewFromConfig(cfg).GetCallerIdentity(goCtx, &sts.GetCallerIdentityInput{}) + if err != nil { + err = fmt.Errorf("cannot get caller identity before assuming role %s: %s", assumeRoleCfg.RoleArn, err.Error()) + cErr <- err.Error() return nil, err } @@ -81,12 +207,29 @@ func DeployProviderFactory(project *prj.Project, goCtx context.Context, assumeRo func(o *stscreds.AssumeRoleOptions) { o.ExternalID = aws.String(assumeRoleCfg.ExternalId) o.RoleSessionName = "third-party-capideploy-assumes-role-provided-by-customer" + o.Duration = time.Duration(1 * time.Hour) }) cfg.Credentials = aws.NewCredentialsCache(creds) + + callerIdentityOutAfter, err := sts.NewFromConfig(cfg).GetCallerIdentity(goCtx, &sts.GetCallerIdentityInput{}) + if err != nil { + err = fmt.Errorf("cannot get caller identity after assuming role %s: %s", assumeRoleCfg.RoleArn, err.Error()) + cErr <- err.Error() + return nil, err + } + + if *callerIdentityOutBefore.Arn == *callerIdentityOutAfter.Arn { + err = fmt.Errorf("cannot proceed with the same caller identity after assuming role %s: %s", assumeRoleCfg.RoleArn, *callerIdentityOutAfter.Arn) + cErr <- err.Error() + return nil, err + } + cOut <- fmt.Sprintf("Caller identity (role assumed): %s", *callerIdentityOutAfter.Arn) + } else { + cOut <- fmt.Sprintf("Caller identity (no role assumed): %s", *callerIdentityOutBefore.Arn) } return &AwsDeployProvider{ - Ctx: &DeployCtx{ + DeployCtx: &DeployCtx{ Project: project, GoCtx: goCtx, IsVerbose: isVerbose, @@ -103,56 +246,412 @@ func DeployProviderFactory(project *prj.Project, goCtx context.Context, assumeRo return nil, fmt.Errorf("unsupported deploy provider %s", project.DeployProviderName) } -func reportPublicIp(prj *prj.Project) { - fmt.Printf(` -Public IP reserved, now you can use it for SSH jumphost in your ~/.ssh/config: +const MaxWorkerThreads int = 50 -Host %s - User %s - StrictHostKeyChecking=no - UserKnownHostsFile=/dev/null - IdentityFile %s +type SingleThreadCmdHandler func() (l.LogMsg, error) -Also, you may find it convenient to use in your commands: +func pingOneHost(sshConfig *rexec.SshConfigDef, ipAddress string, verbosity bool, numberOfRepetitions int) (l.LogMsg, error) { + var err error + var logMsg l.LogMsg -export BASTION_IP=%s + repetitions := 1 + if numberOfRepetitions > 1 { + repetitions = numberOfRepetitions + } -`, - prj.SshConfig.BastionExternalIp, - prj.SshConfig.User, - prj.SshConfig.PrivateKeyPath, - prj.SshConfig.BastionExternalIp) -} -func (p *AwsDeployProvider) ListDeployments() (map[string]int, l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) - resources, err := cldaws.GetResourcesByTag(p.GetCtx().Aws.TaggingClient, p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Aws.Config.Region, - []taggingTypes.TagFilter{{Key: aws.String(cld.DeploymentOperatorTagName), Values: []string{cld.DeploymentOperatorTagValue}}}, false) - if err != nil { - logMsg, err := lb.Complete(err) - return nil, logMsg, err + lb := l.NewLogBuilder(l.CurFuncName()+" "+ipAddress, verbosity) + + for { + logMsg, err = rexec.ExecCommandOnInstance(sshConfig, ipAddress, "id", verbosity) + lb.Add(string(logMsg)) + repetitions-- + if err == nil || repetitions == 0 { + break + } + lb.Add(err.Error()) + time.Sleep(5 * time.Second) } - deploymentResCount := map[string]int{} - for _, res := range resources { - if deploymentNameCount, ok := deploymentResCount[res.DeploymentName]; ok { - deploymentResCount[res.DeploymentName] = deploymentNameCount + 1 + + return lb.Complete(err) +} + +func filterByNickname[GenericDef prj.InstanceDef](nicknames string, sourceMap map[string]*GenericDef, entityName string) (map[string]*GenericDef, error) { + var defMap map[string]*GenericDef + rawNicknames := strings.Split(nicknames, ",") + defMap = map[string]*GenericDef{} + for _, rawNickname := range rawNicknames { + if strings.Contains(rawNickname, "*") { + matchFound := false + reNickname := regexp.MustCompile("^" + strings.ReplaceAll(rawNickname, "*", "[a-zA-Z0-9]*") + "$") + for fgNickname, fgDef := range sourceMap { + if reNickname.MatchString(fgNickname) { + matchFound = true + defMap[fgNickname] = fgDef + } + } + if !matchFound { + return nil, fmt.Errorf("no match found for %s '%s', available definitions: %s", entityName, rawNickname, reflect.ValueOf(sourceMap).MapKeys()) + } } else { - deploymentResCount[res.DeploymentName] = 1 + fgDef, ok := sourceMap[rawNickname] + if !ok { + return nil, fmt.Errorf("definition for %s '%s' not found, available definitions: %s", entityName, rawNickname, reflect.ValueOf(sourceMap).MapKeys()) + } + defMap[rawNickname] = fgDef } } - logMsg, _ := lb.Complete(nil) - return deploymentResCount, logMsg, nil + return defMap, nil } -func (p *AwsDeployProvider) ListDeploymentResources() ([]*cld.Resource, l.LogMsg, error) { - lb := l.NewLogBuilder(l.CurFuncName(), p.GetCtx().IsVerbose) - resources, err := cldaws.GetResourcesByTag(p.GetCtx().Aws.TaggingClient, p.GetCtx().Aws.Ec2Client, p.GetCtx().GoCtx, lb, p.GetCtx().Aws.Config.Region, - []taggingTypes.TagFilter{ - {Key: aws.String(cld.DeploymentOperatorTagName), Values: []string{cld.DeploymentOperatorTagValue}}, - {Key: aws.String(cld.DeploymentNameTagName), Values: []string{p.Ctx.Project.DeploymentName}}}, true) - if err != nil { - logMsg, err := lb.Complete(err) - return nil, logMsg, err +func execSimpleParallelCmd(deployProvider deployProviderImpl, cmd string, nicknames string, execArgs *ExecArgs, cOut chan<- string, cErr chan<- string) error { + cmdStartTs := time.Now() + throttle := time.NewTicker(time.Second) // One call per second, to avoid error 429 on openstack/aws/azure calls + var sem = make(chan int, MaxWorkerThreads) + var errChan chan error + var errorsExpected int + + singleThreadNoResultCommands := map[string]SingleThreadCmdHandler{ + CmdCreateFloatingIps: deployProvider.CreateFloatingIps, + CmdDeleteFloatingIps: deployProvider.DeleteFloatingIps, + CmdCreateSecurityGroups: deployProvider.CreateSecurityGroups, + CmdDeleteSecurityGroups: deployProvider.DeleteSecurityGroups, + CmdCreateNetworking: deployProvider.CreateNetworking, + CmdDeleteNetworking: deployProvider.DeleteNetworking, + CmdCheckCassStatus: deployProvider.CheckCassStatus, + } + + if cmdHandler, ok := singleThreadNoResultCommands[cmd]; ok { + if cmd == CmdCheckCassStatus { + // We need Cassandra node ip addresses populated + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + cOut <- string(logMsgBastionIp) + if err != nil { + cErr <- err.Error() + return err + } + } + errorsExpected = 1 + errChan = make(chan error, errorsExpected) + sem <- 1 + go func() { + logMsg, err := cmdHandler() + cOut <- string(logMsg) + errChan <- err + <-sem + }() + } else if cmd == CmdCreateInstances || + cmd == CmdDeleteInstances || + cmd == CmdCreateSnapshotImages || + cmd == CmdCreateInstancesFromSnapshotImages || + cmd == CmdDeleteSnapshotImages { + if len(nicknames) == 0 { + err := fmt.Errorf("not enough args, expected comma-separated list of instances or '*'") + cErr <- err.Error() + return err + } + + instances, err := filterByNickname(nicknames, deployProvider.getDeployCtx().Project.Instances, "instance") + if err != nil { + cErr <- err.Error() + return err + } + + errorsExpected = len(instances) + errChan = make(chan error, errorsExpected) + + usedFlavors := map[string]string{} + usedImages := map[string]bool{} + if cmd == CmdCreateInstances || + cmd == CmdCreateInstancesFromSnapshotImages { + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + cOut <- string(logMsgBastionIp) + if err != nil { + cErr <- err.Error() + return err + } + + // Make sure image/flavor is supported + usedKeypairs := map[string]struct{}{} + for _, instDef := range instances { + usedFlavors[instDef.FlavorName] = "" + usedImages[instDef.ImageId] = false + usedKeypairs[instDef.RootKeyName] = struct{}{} + } + logMsg, err := deployProvider.HarvestInstanceTypesByFlavorNames(usedFlavors) + cOut <- string(logMsg) + if err != nil { + cErr <- err.Error() + return err + } + + logMsg, err = deployProvider.HarvestImageIds(usedImages) + cOut <- string(logMsg) + if err != nil { + cErr <- err.Error() + return err + } + + // Make sure the keypairs are there + logMsg, err = deployProvider.VerifyKeypairs(usedKeypairs) + cOut <- string(logMsg) + if err != nil { + cErr <- err.Error() + return err + } + + cOut <- "Creating instances, consider clearing known_hosts to avoid ssh complaints:" + for _, i := range instances { + cOut <- fmt.Sprintf("ssh-keygen -f ~/.ssh/known_hosts -R %s;", i.BestIpAddress()) + } + } + + switch cmd { + case CmdCreateInstances: + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + cOut <- string(logMsgBastionIp) + if err != nil { + cErr <- err.Error() + return err + } + for iNickname := range instances { + <-throttle.C + sem <- 1 + go func(project *prj.Project, logChan chan<- string, errChan chan<- error, iNickname string) { + logMsg, err := deployProvider.CreateInstanceAndWaitForCompletion( + iNickname, + usedFlavors[deployProvider.getDeployCtx().Project.Instances[iNickname].FlavorName], + deployProvider.getDeployCtx().Project.Instances[iNickname].ImageId) + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iNickname) + } + case CmdDeleteInstances: + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + cOut <- string(logMsgBastionIp) + if err != nil { + cErr <- err.Error() + return err + } + for iNickname := range instances { + <-throttle.C + sem <- 1 + go func(project *prj.Project, logChan chan<- string, errChan chan<- error, iNickname string) { + logMsg, err := deployProvider.DeleteInstance(iNickname, execArgs.IgnoreAttachedVolumes) + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iNickname) + } + case CmdCreateSnapshotImages: + for iNickname := range instances { + <-throttle.C + sem <- 1 + go func(project *prj.Project, logChan chan<- string, errChan chan<- error, iNickname string) { + logMsg, err := deployProvider.CreateSnapshotImage(iNickname) + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iNickname) + } + case CmdCreateInstancesFromSnapshotImages: + for iNickname := range instances { + <-throttle.C + sem <- 1 + go func(project *prj.Project, logChan chan<- string, errChan chan<- error, iNickname string) { + logMsg, err := deployProvider.CreateInstanceFromSnapshotImageAndWaitForCompletion(iNickname, + usedFlavors[deployProvider.getDeployCtx().Project.Instances[iNickname].FlavorName]) + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iNickname) + } + case CmdDeleteSnapshotImages: + for iNickname := range instances { + <-throttle.C + sem <- 1 + go func(project *prj.Project, logChan chan<- string, errChan chan<- error, iNickname string) { + logMsg, err := deployProvider.DeleteSnapshotImage(iNickname) + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iNickname) + } + default: + err := fmt.Errorf("unknown create/delete instance command %s", cmd) + cErr <- err.Error() + return err + } + } else if cmd == CmdPingInstances || + cmd == CmdInstallServices || + cmd == CmdConfigServices || + cmd == CmdStartServices || + cmd == CmdStopServices { + if len(nicknames) == 0 { + err := fmt.Errorf("not enough args, expected comma-separated list of instances or '*'") + cErr <- err.Error() + return err + } + + instances, err := filterByNickname(nicknames, deployProvider.getDeployCtx().Project.Instances, "instance") + if err != nil { + cErr <- err.Error() + return err + } + + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + cOut <- string(logMsgBastionIp) + if err != nil { + cErr <- err.Error() + return err + } + + errorsExpected = len(instances) + errChan = make(chan error, len(instances)) + for _, iDef := range instances { + <-throttle.C + sem <- 1 + go func(prj *prj.Project, logChan chan<- string, errChan chan<- error, iDef *prj.InstanceDef) { + var logMsg l.LogMsg + var err error + switch cmd { + case CmdPingInstances: + logMsg, err = pingOneHost(deployProvider.getDeployCtx().Project.SshConfig, iDef.BestIpAddress(), execArgs.Verbosity, execArgs.NumberOfRepetitions) + + case CmdInstallServices: + // Make sure ping passes + logMsg, err = pingOneHost(deployProvider.getDeployCtx().Project.SshConfig, iDef.BestIpAddress(), execArgs.Verbosity, 5) + + // If ping passed, it's ok to move on + if err == nil { + logMsg, err = rexec.ExecEmbeddedScriptsOnInstance(deployProvider.getDeployCtx().Project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Install, iDef.Service.Env, execArgs.Verbosity) + } + + case CmdConfigServices: + logMsg, err = rexec.ExecEmbeddedScriptsOnInstance(deployProvider.getDeployCtx().Project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Config, iDef.Service.Env, execArgs.Verbosity) + + case CmdStartServices: + logMsg, err = rexec.ExecEmbeddedScriptsOnInstance(deployProvider.getDeployCtx().Project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Start, iDef.Service.Env, execArgs.Verbosity) + + case CmdStopServices: + logMsg, err = rexec.ExecEmbeddedScriptsOnInstance(deployProvider.getDeployCtx().Project.SshConfig, iDef.BestIpAddress(), iDef.Service.Cmd.Stop, iDef.Service.Env, execArgs.Verbosity) + + default: + err = fmt.Errorf("unknown service command:%s", cmd) + } + + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iDef) + } + + } else if cmd == CmdCreateVolumes || cmd == CmdAttachVolumes || cmd == CmdDetachVolumes || cmd == CmdDeleteVolumes { + if len(nicknames) == 0 { + err := fmt.Errorf("not enough args, expected comma-separated list of instances or '*'") + cErr <- err.Error() + return err + } + + instances, err := filterByNickname(nicknames, deployProvider.getDeployCtx().Project.Instances, "instance") + if err != nil { + cErr <- err.Error() + return err + } + + volCount := 0 + for _, iDef := range instances { + volCount += len(iDef.Volumes) + } + if volCount == 0 { + fmt.Printf("No volumes to create/attach/detach/delete") + return nil + } + errorsExpected = volCount + errChan = make(chan error, volCount) + for iNickname, iDef := range instances { + for volNickname := range iDef.Volumes { + <-throttle.C + sem <- 1 + switch cmd { + case CmdCreateVolumes: + go func(project *prj.Project, logChan chan<- string, errChan chan<- error, iNickname string, volNickname string) { + logMsg, err := deployProvider.CreateVolume(iNickname, volNickname) + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iNickname, volNickname) + case CmdAttachVolumes: + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + cOut <- string(logMsgBastionIp) + if err != nil { + cErr <- err.Error() + return err + } + go func(project *prj.Project, logChan chan<- string, errChan chan<- error, iNickname string, volNickname string) { + logMsg, err := deployProvider.AttachVolume(iNickname, volNickname) + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iNickname, volNickname) + case CmdDetachVolumes: + logMsgBastionIp, err := deployProvider.PopulateInstanceExternalAddressByName() + cOut <- string(logMsgBastionIp) + if err != nil { + cErr <- err.Error() + return err + } + go func(project *prj.Project, logChan chan<- string, errChan chan<- error, iNickname string, volNickname string) { + logMsg, err := deployProvider.DetachVolume(iNickname, volNickname) + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iNickname, volNickname) + case CmdDeleteVolumes: + go func(project *prj.Project, logChan chan<- string, errChan chan<- error, iNickname string, volNickname string) { + logMsg, err := deployProvider.DeleteVolume(iNickname, volNickname) + logChan <- string(logMsg) + errChan <- err + <-sem + }(deployProvider.getDeployCtx().Project, cOut, errChan, iNickname, volNickname) + default: + err := fmt.Errorf("unknown cmd %s", cmd) + cErr <- err.Error() + return err + } + } + } + } else { + err := fmt.Errorf("unknown cmd %s", cmd) + cErr <- err.Error() + return err } - logMsg, _ := lb.Complete(nil) - return resources, logMsg, nil + + // Wait for all workers to finish + + var finalCmdErr error + for errorsExpected > 0 { + cmdErr := <-errChan + if cmdErr != nil { + cErr <- cmdErr.Error() + finalCmdErr = cmdErr + } + errorsExpected-- + } + + if execArgs.ShowProjectDetails { + prjJsonBytes, err := json.MarshalIndent(deployProvider.getDeployCtx().Project, "", " ") + if err != nil { + return fmt.Errorf("cannot show project json: %s", err.Error()) + } + cOut <- string(prjJsonBytes) + } + + if finalCmdErr != nil { + cOut <- fmt.Sprintf("%s %sERROR%s, elapsed %.3fs", cmd, l.LogColorRed, l.LogColorReset, time.Since(cmdStartTs).Seconds()) + } else { + cOut <- fmt.Sprintf("%s %sOK%s, elapsed %.3fs", cmd, l.LogColorGreen, l.LogColorReset, time.Since(cmdStartTs).Seconds()) + } + + return finalCmdErr } diff --git a/pkg/provider/deploy_provider_impl.go b/pkg/provider/deploy_provider_impl.go new file mode 100644 index 0000000..89ba10f --- /dev/null +++ b/pkg/provider/deploy_provider_impl.go @@ -0,0 +1,74 @@ +package provider + +import ( + "fmt" + "regexp" + "strings" + + "github.com/capillariesio/capillaries-deploy/pkg/cld" + "github.com/capillariesio/capillaries-deploy/pkg/l" + "github.com/capillariesio/capillaries-deploy/pkg/prj" + "github.com/capillariesio/capillaries-deploy/pkg/rexec" +) + +type deployProviderImpl interface { + getDeployCtx() *DeployCtx + listDeployments() (map[string]int, l.LogMsg, error) + listDeploymentResources() ([]*cld.Resource, l.LogMsg, error) + CreateFloatingIps() (l.LogMsg, error) + DeleteFloatingIps() (l.LogMsg, error) + CreateSecurityGroups() (l.LogMsg, error) + DeleteSecurityGroups() (l.LogMsg, error) + CreateNetworking() (l.LogMsg, error) + DeleteNetworking() (l.LogMsg, error) + HarvestInstanceTypesByFlavorNames(flavorMap map[string]string) (l.LogMsg, error) + HarvestImageIds(imageMap map[string]bool) (l.LogMsg, error) + VerifyKeypairs(keypairMap map[string]struct{}) (l.LogMsg, error) + CreateInstanceAndWaitForCompletion(iNickname string, flavorId string, imageId string) (l.LogMsg, error) + DeleteInstance(iNickname string, ignoreAttachedVolumes bool) (l.LogMsg, error) + CreateSnapshotImage(iNickname string) (l.LogMsg, error) + CreateInstanceFromSnapshotImageAndWaitForCompletion(iNickname string, flavorId string) (l.LogMsg, error) + DeleteSnapshotImage(iNickname string) (l.LogMsg, error) + CreateVolume(iNickname string, volNickname string) (l.LogMsg, error) + AttachVolume(iNickname string, volNickname string) (l.LogMsg, error) + DetachVolume(iNickname string, volNickname string) (l.LogMsg, error) + DeleteVolume(iNickname string, volNickname string) (l.LogMsg, error) + PopulateInstanceExternalAddressByName() (l.LogMsg, error) + CheckCassStatus() (l.LogMsg, error) +} + +func isAllNodesJoined(strOut string, instances map[string]*prj.InstanceDef) error { + missingIps := make([]string, 0) + for _, iDef := range instances { + if iDef.Purpose == string(prj.InstancePurposeCassandra) { + re := regexp.MustCompile(`UN ` + iDef.IpAddress) + matches := re.FindAllString(strOut, -1) + if len(matches) == 0 { + missingIps = append(missingIps, iDef.IpAddress) + } + } + } + if len(missingIps) > 0 { + return fmt.Errorf("nodes did not join cassandra cluster: %s", strings.Join(missingIps, ",")) + } + return nil +} + +func (p *AwsDeployProvider) CheckCassStatus() (l.LogMsg, error) { + for _, iDef := range p.DeployCtx.Project.Instances { + if iDef.Purpose == string(prj.InstancePurposeCassandra) { + logMsg, err := rexec.ExecCommandOnInstance(p.DeployCtx.Project.SshConfig, iDef.IpAddress, "nodetool describecluster;nodetool status", true) + if err == nil { + // All Cassandra nodes must have "UN $cassNodeIp" + err = isAllNodesJoined(string(logMsg), p.DeployCtx.Project.Instances) + } + if p.DeployCtx.IsVerbose { + return logMsg, err + } else { + return "", err + } + } + } + + return "", fmt.Errorf("cannot find even a single cassandra node") +} diff --git a/pkg/rexec/exec_ssh.go b/pkg/rexec/exec_ssh.go index 4a79a21..e8b5cd0 100755 --- a/pkg/rexec/exec_ssh.go +++ b/pkg/rexec/exec_ssh.go @@ -44,7 +44,7 @@ type SshConfigDef struct { BastionExternalIp string `json:"bastion_external_ip_address"` // Output only Port int `json:"port"` User string `json:"user"` - PrivateKeyPath string `json:"private_key_path"` + PrivateKeyOrPath string `json:"private_key_or_path"` } type TunneledSshClient struct { @@ -73,7 +73,7 @@ func (tsc *TunneledSshClient) Close() { func NewTunneledSshClient(sshConfig *SshConfigDef, ipAddress string) (*TunneledSshClient, error) { bastionSshClientConfig, err := NewSshClientConfig( sshConfig.User, - sshConfig.PrivateKeyPath) + sshConfig.PrivateKeyOrPath) if err != nil { return nil, err } @@ -104,7 +104,7 @@ func NewTunneledSshClient(sshConfig *SshConfigDef, ipAddress string) (*TunneledS tunneledSshClientConfig, err := NewSshClientConfig( sshConfig.User, - sshConfig.PrivateKeyPath) + sshConfig.PrivateKeyOrPath) if err != nil { return nil, err } diff --git a/pkg/rexec/scripts/daemon/config.sh b/pkg/rexec/scripts/daemon/config.sh index 20d87d4..a161255 100755 --- a/pkg/rexec/scripts/daemon/config.sh +++ b/pkg/rexec/scripts/daemon/config.sh @@ -13,18 +13,35 @@ if [ "$SSH_USER" = "" ]; then exit 1 fi +wait (){ + counter=0 + while [ "$(pgrep capidaemon)" != "" ]; do + counter=$((counter+1)) + if [[ "$counter" -gt 60 ]]; then + break + fi + sleep 1 + done +} + pkill -2 capidaemon -processid=$(pgrep capidaemon) -if [ "$processid" != "" ]; then - echo Trying pkill -9... - pkill -9 capidaemon 2> /dev/null - processid=$(pgrep capidaemon) - if [ "$processid" != "" ]; then - echo pkill -9 did not kill +wait + +if [ "$(pgrep capidaemon)" == "" ]; then + echo pkill -2 succeeded +else + echo pkill -2 failed + pkill -9 capidaemon 2>&1 + wait + if [ "$(pgrep capidaemon)" == "" ]; then + echo pkill -9 succeeded + else + echo pkill -9 failed exit 9 - fi + fi fi + ENV_CONFIG_FILE=/home/$SSH_USER/bin/capidaemon.json sed -i -e 's~"url":[ ]*"[a-zA-Z0-9@\.:\/\-_$ ]*"~"url": "'"$AMQP_URL"'"~g' $ENV_CONFIG_FILE diff --git a/pkg/rexec/scripts/daemon/stop.sh b/pkg/rexec/scripts/daemon/stop.sh index d3e2c24..4e746ff 100755 --- a/pkg/rexec/scripts/daemon/stop.sh +++ b/pkg/rexec/scripts/daemon/stop.sh @@ -1,11 +1,28 @@ +wait (){ + counter=0 + while [ "$(pgrep capidaemon)" != "" ]; do + counter=$((counter+1)) + if [[ "$counter" -gt 60 ]]; then + break + fi + sleep 1 + done +} + pkill -2 capidaemon -processid=$(pgrep capidaemon) -if [ "$processid" != "" ]; then - echo Trying pkill -9... - pkill -9 capidaemon 2> /dev/null - processid=$(pgrep capidaemon) - if [ "$processid" != "" ]; then - echo pkill -9 did not kill +wait + +if [ "$(pgrep capidaemon)" == "" ]; then + echo pkill -2 succeeded +else + echo pkill -2 failed + pkill -9 capidaemon 2>&1 + wait + if [ "$(pgrep capidaemon)" == "" ]; then + echo pkill -9 succeeded + else + echo pkill -9 failed exit 9 - fi -fi \ No newline at end of file + fi +fi + diff --git a/pkg/rexec/scripts/rabbitmq/install.sh b/pkg/rexec/scripts/rabbitmq/install.sh index a4b6ded..063efe8 100755 --- a/pkg/rexec/scripts/rabbitmq/install.sh +++ b/pkg/rexec/scripts/rabbitmq/install.sh @@ -2,7 +2,7 @@ sudo DEBIAN_FRONTEND=noninteractive add-apt-repository -y ppa:rabbitmq/rabbitmq- sudo DEBIAN_FRONTEND=noninteractive apt-get update -y # Erlang from https://launchpad.net/~rabbitmq/+archive/ubuntu/rabbitmq-erlang -ERLANG_VER=1:26.2.5-1rmq1ppa1~ubuntu24.04.1 +ERLANG_VER=1:26.2.5.2-1rmq1ppa1~ubuntu24.04.1 sudo DEBIAN_FRONTEND=noninteractive apt-get -y install erlang-base=$ERLANG_VER \ erlang-asn1=$ERLANG_VER erlang-crypto=$ERLANG_VER erlang-eldap=$ERLANG_VER erlang-ftp=$ERLANG_VER erlang-inets=$ERLANG_VER \ erlang-mnesia=$ERLANG_VER erlang-os-mon=$ERLANG_VER erlang-parsetools=$ERLANG_VER erlang-public-key=$ERLANG_VER \ diff --git a/pkg/rexec/ssh.go b/pkg/rexec/ssh.go index 232341a..e0b40bc 100644 --- a/pkg/rexec/ssh.go +++ b/pkg/rexec/ssh.go @@ -8,6 +8,7 @@ import ( "net" "os" "path/filepath" + "regexp" "strings" "time" @@ -62,20 +63,34 @@ func parsePemBlock(block *pem.Block) (any, error) { } } -func NewSshClientConfig(user string, privateKeyPath string) (*ssh.ClientConfig, error) { - keyPath := privateKeyPath - if strings.HasPrefix(keyPath, "~/") { - homeDir, _ := os.UserHomeDir() - keyPath = filepath.Join(homeDir, keyPath[2:]) - } - pemBytes, err := os.ReadFile(keyPath) - if err != nil { - return nil, fmt.Errorf("cannot read private key file %s: %s", keyPath, err.Error()) - } - - signer, err := signerFromPem(pemBytes) - if err != nil { - return nil, err +func NewSshClientConfig(user string, privateKeyOrPath string) (*ssh.ClientConfig, error) { + reBegin := regexp.MustCompile(`-----BEGIN [ a-zA-Z0-9]+ KEY-----`) + reEnd := regexp.MustCompile(`-----END [ a-zA-Z0-9]+ KEY-----`) + strBegin := reBegin.FindString(privateKeyOrPath) + strEnd := reEnd.FindString(privateKeyOrPath) + var signer ssh.Signer + if strBegin != "" && strEnd != "" { + pemWithoutCrlf := strings.NewReplacer("\n", "", "\r", "").Replace(privateKeyOrPath) + pemWithTwoCrlfs := strings.ReplaceAll(strings.ReplaceAll(pemWithoutCrlf, strBegin, strBegin+"\n"), strEnd, "\n"+strEnd) + var err error + signer, err = signerFromPem([]byte(pemWithTwoCrlfs)) + if err != nil { + return nil, fmt.Errorf("cannot use private key starting with %s: %s", strBegin, err.Error()) + } + } else { + keyPath := privateKeyOrPath + if strings.HasPrefix(keyPath, "~/") { + homeDir, _ := os.UserHomeDir() + keyPath = filepath.Join(homeDir, keyPath[2:]) + } + pemBytes, err := os.ReadFile(keyPath) + if err != nil { + return nil, fmt.Errorf("cannot read private key file %s: %s", keyPath, err.Error()) + } + signer, err = signerFromPem(pemBytes) + if err != nil { + return nil, fmt.Errorf("cannot use private key from file %s(%s): %s", privateKeyOrPath, keyPath, err.Error()) + } } return &ssh.ClientConfig{ diff --git a/sample.jsonnet b/sample.jsonnet index ed78c1a..df988db 100644 --- a/sample.jsonnet +++ b/sample.jsonnet @@ -1,22 +1,25 @@ { // Variables to play with - local dep_name = 'sampleaws001', // Can be any combination of alphanumeric characters. Make it unique. - local subnet_availability_zone = 'us-east-1a', // AWS-specific - local deployment_flavor_power = 'aws.arm64.c7g.8', // 1. aws or azure, 2. amd64 or arm64, 3. Flavor family, 4. Number of cores in Cassandra nodes. Daemon cores are 4 times less. - - // Cassandra cluster size - 4,8,16 - local cassandra_total_nodes = 4, + local dep_name = '{CAPIDEPLOY_DEPLOYMENT_NAME}', // Can be any combination of alphanumeric characters. Make it unique. + local subnet_availability_zone = '{CAPIDEPLOY_SUBNET_AVAILABILITY_ZONE}', // AWS-specific + local deployment_flavor_power = '{CAPIDEPLOY_DEPLOYMENT_FLAVOR_POWER}', // 1. aws or azure, 2. amd64 or arm64, 3. Flavor family, 4. Number of cores in Cassandra nodes. Daemon cores are 4 times less. + local cassandra_total_nodes = std.parseInt('{CAPIDEPLOY_CASSANDRA_CLUSTER_SIZE}'), // Cassandra cluster size - 4,8,16 // You probably will not change anything below this line // max: daemon_cores*1.5 (which is the same as cassandra cores / 4 * 1.5) local DEFAULT_DAEMON_THREAD_POOL_SIZE = std.toString(std.parseInt(std.split(deployment_flavor_power,".")[3]) / 4 * 1.5), - // Depends on cassandra latency, reasonable values are 5-20. Let it be: - // - max perf (->100% CPU): cassandra cores / 2: 8->4 16->8 32->16 64->32 - // - cpnservative: cassandra cores / 4: 8->2 16->4 32->8 64->16 - local DEFAULT_DAEMON_DB_WRITERS = std.toString(std.parseInt(std.split(deployment_flavor_power,".")[3]) / 2), + // Writer threads. Depends on cassandra latency. + // Writers Writers Writers + // Cassandra cores Daemon Cores Conservative,multiplier=0.5 Average,multiplier=1.0 Aggressive,multiplier=1.5 + // 8 2 2 4 6 + // 16 4 4 8 12 + // 32 8 8 16 24 + // 64 16 16 32 48 + local multiplier = 1.0, + local DEFAULT_DAEMON_DB_WRITERS = std.toString(std.round(std.parseInt(std.split(deployment_flavor_power,".")[3]) / 2 * multiplier)), // If tasks are CPU-intensive (Python calc), make it equal to cassandra_total_nodes, otherwise cassandra_total_nodes/2 may be enough local daemon_total_instances = cassandra_total_nodes, @@ -40,11 +43,13 @@ else if daemon_total_instances == 4 then ['10.5.0.101', '10.5.0.102', '10.5.0.103', '10.5.0.104'] else if daemon_total_instances == 8 then ['10.5.0.101', '10.5.0.102', '10.5.0.103', '10.5.0.104', '10.5.0.105', '10.5.0.106', '10.5.0.107', '10.5.0.108'] else if daemon_total_instances == 16 then ['10.5.0.101', '10.5.0.102', '10.5.0.103', '10.5.0.104', '10.5.0.105', '10.5.0.106', '10.5.0.107', '10.5.0.108', '10.5.0.109', '10.5.0.110', '10.5.0.111', '10.5.0.112', '10.5.0.113', '10.5.0.114', '10.5.0.115', '10.5.0.116'] + else if daemon_total_instances == 32 then ['10.5.0.101', '10.5.0.102', '10.5.0.103', '10.5.0.104', '10.5.0.105', '10.5.0.106', '10.5.0.107', '10.5.0.108', '10.5.0.109', '10.5.0.110', '10.5.0.111', '10.5.0.112', '10.5.0.113', '10.5.0.114', '10.5.0.115', '10.5.0.116', '10.5.0.117', '10.5.0.118', '10.5.0.119', '10.5.0.120', '10.5.0.121', '10.5.0.122', '10.5.0.123', '10.5.0.124', '10.5.0.125', '10.5.0.126', '10.5.0.127', '10.5.0.128', '10.5.0.129', '10.5.0.130', '10.5.0.131', '10.5.0.132'] else [], local cassandra_ips = if cassandra_total_nodes == 4 then ['10.5.0.11', '10.5.0.12', '10.5.0.13', '10.5.0.14'] else if cassandra_total_nodes == 8 then ['10.5.0.11', '10.5.0.12', '10.5.0.13', '10.5.0.14', '10.5.0.15', '10.5.0.16', '10.5.0.17', '10.5.0.18'] else if cassandra_total_nodes == 16 then ['10.5.0.11', '10.5.0.12', '10.5.0.13', '10.5.0.14', '10.5.0.15', '10.5.0.16', '10.5.0.17', '10.5.0.18', '10.5.0.19', '10.5.0.20', '10.5.0.21', '10.5.0.22', '10.5.0.23', '10.5.0.24', '10.5.0.25', '10.5.0.26'] + else if cassandra_total_nodes == 32 then ['10.5.0.11', '10.5.0.12', '10.5.0.13', '10.5.0.14', '10.5.0.15', '10.5.0.16', '10.5.0.17', '10.5.0.18', '10.5.0.19', '10.5.0.20', '10.5.0.21', '10.5.0.22', '10.5.0.23', '10.5.0.24', '10.5.0.25', '10.5.0.26', '10.5.0.27', '10.5.0.28', '10.5.0.29', '10.5.0.30', '10.5.0.31', '10.5.0.32', '10.5.0.33', '10.5.0.34', '10.5.0.35', '10.5.0.36', '10.5.0.37', '10.5.0.38', '10.5.0.39', '10.5.0.40', '10.5.0.41', '10.5.0.42'] else [], // Cassandra-specific @@ -52,6 +57,7 @@ if cassandra_total_nodes == 4 then ['-9223372036854775808', '-4611686018427387904', '0', '4611686018427387904'] else if cassandra_total_nodes == 8 then ['-9223372036854775808', '-6917529027641081856', '-4611686018427387904', '-2305843009213693952', '0', '2305843009213693952', '4611686018427387904', '6917529027641081856'] else if cassandra_total_nodes == 16 then ['-9223372036854775808','-8070450532247928832','-6917529027641081856','-5764607523034234880','-4611686018427387904','-3458764513820540928','-2305843009213693952','-1152921504606846976','0','1152921504606846976','2305843009213693952','3458764513820540928','4611686018427387904','5764607523034234880','6917529027641081856','8070450532247928832'] + else if cassandra_total_nodes == 32 then ['-9223372036854775808','-8646911284551352320','-8070450532247928832','-7493989779944505344','-6917529027641081856','-6341068275337658368','-5764607523034234880','-5188146770730811392','-4611686018427387904','-4035225266123964416','-3458764513820540928','-2882303761517117440','-2305843009213693952','-1729382256910270464','-1152921504606846976','-576460752303423488','0','576460752303423488','1152921504606846976','1729382256910270464','2305843009213693952','2882303761517117440','3458764513820540928','4035225266123964416','4611686018427387904','5188146770730811392','5764607523034234880','6341068275337658368','6917529027641081856','7493989779944505344','8070450532247928832','8646911284551352320'] else [], local cassandra_seeds = std.join(',', cassandra_ips), // Used by cassandra nodes, all are seeds to avoid bootstrapping local cassandra_hosts = "'[\"" + std.join('","', cassandra_ips) + "\"]'", // Used by daemons "'[\"10.5.0.11\",\"10.5.0.12\",\"10.5.0.13\",\"10.5.0.14\",\"10.5.0.15\",\"10.5.0.16\",\"10.5.0.17\",\"10.5.0.18\"]'", @@ -62,13 +68,9 @@ else if architecture == 'amd64' then 'ami-02f9afd340e6c0065' // ubuntu/images/hvm-ssd-gp3/ubuntu-noble-24.04-amd64-server-20240606 else 'unknown-architecture-unknown-image', - // local instance_image_id = - // if architecture == 'arm64' then 'ami-09b2701695676705d'// ubuntu/images/hvm-ssd/ubuntu-lunar-23.04-arm64-server-20240117 // 'ami-064b469793e32e5d2' ubuntu/images/hvm-ssd/ubuntu-lunar-23.04-arm64-server-20230904 - // else if architecture == 'amd64' then 'ami-0d8583a0d8d6dd14f' //ubuntu/images/hvm-ssd/ubuntu-lunar-23.04-amd64-server-20230714 - // else 'unknown-architecture-unknown-image', - + local instance_flavor = getFromMap({ - 'aws.amd64.c5a.4': {cassandra:'c5ad.xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]G', daemon: 'c6a.medium', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, + 'aws.amd64.c5a.4': {cassandra:'c5ad.xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]G', daemon: 'c6a.large', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, 'aws.amd64.c5a.8': {cassandra:'c5ad.2xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]G', daemon: 'c6a.large', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, 'aws.amd64.c5a.16': {cassandra:'c5ad.4xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] [0-9]+.[0-9]G', daemon: 'c6a.xlarge', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, 'aws.amd64.c5a.32': {cassandra:'c5ad.8xlarge', cass_nvme_regex:'nvme[0-9]n[0-9] 558.8G', daemon: 'c6a.2xlarge', rabbitmq: 't2.micro', prometheus: 't2.micro', bastion: 't2.micro' }, @@ -102,7 +104,7 @@ // external_ip_address: '', port: 22, user: '{CAPIDEPLOY_SSH_USER}', - private_key_path: '{CAPIDEPLOY_SSH_PRIVATE_KEY_PATH}', + private_key_or_path: '{CAPIDEPLOY_AWS_SSH_ROOT_KEYPAIR_PRIVATE_KEY_OR_PATH}', }, timeouts: { }, @@ -281,14 +283,14 @@ image_id: instance_image_id, security_group_name: $.security_groups.bastion.name, subnet_name: $.network.public_subnet.name, - associated_instance_profile: '{CAPIDEPLOY_INSTANCE_PROFILE_WITH_S3_ACCESS}', + associated_instance_profile: '{CAPIDEPLOY_AWS_INSTANCE_PROFILE_WITH_S3_ACCESS}', volumes: { 'log': { name: dep_name + '_log', availability_zone: volume_availability_zone, mount_point: '/mnt/capi_log', size: 10, - type: 'gp2', // No need for a top-spedd drive + type: 'gp2', // No need for a top-speed drive permissions: 777, owner: $.ssh_config.user, }, @@ -307,7 +309,7 @@ NETWORK_CIDR: $.network.cidr, BASTION_ALLOWED_IPS: '{CAPIDEPLOY_BASTION_ALLOWED_IPS}', EXTERNAL_IP_ADDRESS: '{CAPIDEPLOY.INTERNAL.BASTION_EXTERNAL_IP_ADDRESS}', // internal: capideploy populates it from ssh_config.external_ip_address after loading project file; used by webui and webapi config.sh - EXTERNAL_WEBAPI_PORT: '{CAPIDEPLOY_EXTERNAL_WEBAPI_PORT}', + EXTERNAL_WEBAPI_PORT: '6544', INTERNAL_WEBAPI_PORT: '6543', }, cmd: { @@ -489,7 +491,7 @@ image_id: instance_image_id, security_group_name: $.security_groups.internal.name, subnet_name: $.network.private_subnet.name, - associated_instance_profile: '{CAPIDEPLOY_INSTANCE_PROFILE_WITH_S3_ACCESS}', + associated_instance_profile: '{CAPIDEPLOY_AWS_INSTANCE_PROFILE_WITH_S3_ACCESS}', service: { env: { INTERNAL_BASTION_IP: internal_bastion_ip,