diff --git a/.secrets.baseline b/.secrets.baseline index 9b73b7f..5c8a408 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -125,7 +125,7 @@ { "path": "detect_secrets.filters.regex.should_exclude_file", "pattern": [ - "\\.secrets..*", + "\\.secrets\\..*", "\\.git.*", "\\.pre-commit-config\\.yaml", "\\.mypy_cache", @@ -135,7 +135,9 @@ "venv", "dist", "build", - ".*\\.egg-info" + ".*\\.egg-info", + ".*\\.tfstate", + ".*\\.tfvars" ] } ], @@ -156,7 +158,7 @@ "filename": "terraform/README.md", "hashed_secret": "f2d4e04179e44fa7386b985ac3c7ee4d95dfd65d", "is_verified": false, - "line_number": 109, + "line_number": 116, "is_secret": false }, { @@ -164,7 +166,7 @@ "filename": "terraform/README.md", "hashed_secret": "659a4d010b74afeddbcb9c4e8eae01f4390eeacc", "is_verified": false, - "line_number": 110, + "line_number": 117, "is_secret": false }, { @@ -172,7 +174,7 @@ "filename": "terraform/README.md", "hashed_secret": "bd3b85b91cb8cf6cfc6a4adc7a2505714939505b", "is_verified": false, - "line_number": 110, + "line_number": 117, "is_secret": false }, { @@ -180,7 +182,7 @@ "filename": "terraform/README.md", "hashed_secret": "a356cb3f3d1c9797cf59daf5b22fc0c7434d8dc7", "is_verified": false, - "line_number": 114, + "line_number": 121, "is_secret": false } ], @@ -229,75 +231,7 @@ "line_number": 295, "is_secret": false } - ], - "terraform/variables/terraform.tfvars.dev": [ - { - "type": "AWS Sensitive Information (Experimental Plugin)", - "filename": "terraform/variables/terraform.tfvars.dev", - "hashed_secret": "f2d4e04179e44fa7386b985ac3c7ee4d95dfd65d", - "is_verified": false, - "line_number": 3, - "is_secret": false - }, - { - "type": "AWS Sensitive Information (Experimental Plugin)", - "filename": "terraform/variables/terraform.tfvars.dev", - "hashed_secret": "226201cd08f00a589068e569d01716d0ad488ae4", - "is_verified": false, - "line_number": 4, - "is_secret": false - }, - { - "type": "AWS Sensitive Information (Experimental Plugin)", - "filename": "terraform/variables/terraform.tfvars.dev", - "hashed_secret": "4592cff3a9944664c9c182333782a5d551ec2516", - "is_verified": false, - "line_number": 4, - "is_secret": false - }, - { - "type": "Secret Keyword", - "filename": "terraform/variables/terraform.tfvars.dev", - "hashed_secret": "b293afb11f1f9b32461ab510aacb65a27ccb6111", - "is_verified": false, - "line_number": 9, - "is_secret": false - } - ], - "terraform/variables/terraform.tfvars.test": [ - { - "type": "AWS Sensitive Information (Experimental Plugin)", - "filename": "terraform/variables/terraform.tfvars.test", - "hashed_secret": "f2d4e04179e44fa7386b985ac3c7ee4d95dfd65d", - "is_verified": false, - "line_number": 3, - "is_secret": false - }, - { - "type": "AWS Sensitive Information (Experimental Plugin)", - "filename": "terraform/variables/terraform.tfvars.test", - "hashed_secret": "226201cd08f00a589068e569d01716d0ad488ae4", - "is_verified": false, - "line_number": 4, - "is_secret": false - }, - { - "type": "AWS Sensitive Information (Experimental Plugin)", - "filename": "terraform/variables/terraform.tfvars.test", - "hashed_secret": "4592cff3a9944664c9c182333782a5d551ec2516", - "is_verified": false, - "line_number": 4, - "is_secret": false - }, - { - "type": "Secret Keyword", - "filename": "terraform/variables/terraform.tfvars.test", - "hashed_secret": "b293afb11f1f9b32461ab510aacb65a27ccb6111", - "is_verified": false, - "line_number": 9, - "is_secret": false - } ] }, - "generated_at": "2025-02-04T19:12:34Z" + "generated_at": "2025-02-13T09:10:40Z" } diff --git a/README.md b/README.md index 21d0169..81908d3 100644 --- a/README.md +++ b/README.md @@ -48,3 +48,45 @@ User documentation is managed with Sphinx, which is also installed in your Pytho sphinx-build -b html docs/source docs/build/html Publish the pages on gh-pages branch + + +## Secret Detection + +The following commands can be used to detect secrets in the code. + +1) Setup a pythion virtual environment. + +```shell +mkdir ~/Tools +python3 -m venv ~/Tools/detect-secrets +source ~/Tools/detect-secrets/bin/activate +pip install git+https://github.com/NASA-AMMOS/slim-detect-secrets.git@exp +``` + +2) Execute the following command in Nucleus root directory to scan the code for secrets. + +```shell +detect-secrets scan --disable-plugin AbsolutePathDetectorExperimental \ + --exclude-files '\.secrets\..*' \ + --exclude-files '\.git.*' \ + --exclude-files '\.pre-commit-config\.yaml' \ + --exclude-files '\.mypy_cache' \ + --exclude-files '\.pytest_cache' \ + --exclude-files '\.tox' \ + --exclude-files '\.venv' \ + --exclude-files 'venv' \ + --exclude-files 'dist' \ + --exclude-files 'build' \ + --exclude-files '.*\.egg-info' \ + --exclude-files '.*\.tfstate' \ + --exclude-files '.*\.tfvars' \ + > .secrets.baseline +``` + +3) Execute the following command in Nucleus root directory to audit the possible secrets detected. + +```shell +detect-secrets audit .secrets.baseline +``` + +This will create a `.secrets.baseline` in Nucleus root directory. Commit and push that file, in order to pass the checks in GitHub during a pull request. \ No newline at end of file diff --git a/terraform/README.md b/terraform/README.md index a2b597c..d54bca6 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -18,7 +18,10 @@ Therefore, as a result of the Terraform scripts in this directory following thin ## Prerequisites to Deploy Nucleus Baseline System -1. An AWS Account with permissions to deploy following AWS services +1. Some of the libraries used in the ECS containers of PDS Nucleus are platform specific. Therefore, please execute the deployment +from an Amazon Linux EC2 instance with Architecture 64 bit (x86). + +2. An AWS Account with permissions to deploy following AWS services - Amazon Managed Workflows for Apache Airflow (MWAA) - AWS Security Groups - AWS S3 Bucket with relevant bucket policies @@ -26,21 +29,21 @@ Therefore, as a result of the Terraform scripts in this directory following thin - EFS File System - ECR -2. Ability to get AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN for the AWS account +3. Ability to get AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN for the AWS account -3. Terraform is installed in local environment (This was tested with Terraform v1.5.7. Any higher version should also work) +4. Terraform is installed in local environment (This was tested with Terraform v1.5.7. Any higher version should also work) - Instructions to install Terraform is available at https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli -4. A VPC and one or more subnets should be available on AWS (obtain the VPC ID and subnet IDs from AWS console or from the AWS +5. A VPC and one or more subnets should be available on AWS (obtain the VPC ID and subnet IDs from AWS console or from the AWS system admin team of your AWS account) -5. Docker service is installed and running (Instructions to install Docker: https://docs.docker.com/engine/install/) +6. Docker service is installed and running (Instructions to install Docker: https://docs.docker.com/engine/install/) -6. PDS Registry (OpenSearch) is accessible from the AWS account which is used to deploy PDS Nucleus) +7. PDS Registry (OpenSearch) is accessible from the AWS account which is used to deploy PDS Nucleus) -7. A Cognito User Pool to manage Nucleus users +8. A Cognito User Pool to manage Nucleus users -8. A certificate to be used for the ALB Listener facing Airflow UI +9. A certificate to be used for the ALB Listener facing Airflow UI ## Steps to Deploy the PDS Nucleus Baseline System @@ -81,6 +84,9 @@ Note: Examples of `terraform.tfvars` files are available at `terraform/variable - pds_node_names = List of PDS Node names to be supported (E.g.: ["PDS_SBN", "PDS_IMG", "PDS_EN"]).The following node name format should be used. - (PDS_ATM, PDS_ENG, PDS_GEO, PDS_IMG, PDS_NAIF, PDS_RMS, PDS_SBN, PSA, JAXA, ROSCOSMOS) - Please check https://nasa-pds.github.io/registry/user/harvest_job_configuration.html for PDS Node name descriptions. + + - pds_nucleus_opensearch_url : OpenSearch URL to be used with Harvest tool + - pds_nucleus_opensearch_registry_names : List of Nod3e specific OpenSearch registry names (E.g.: ["pds-nucleus-sbn-registry"", "pds-nucleus-img-registry"]) - pds_nucleus_opensearch_urls : List of Node specific OpenSearch URLs (E.g.: ["https://abcdef.us-west-2.aoss.amazonaws.com", "https://opqrst.us-west-2.aoss.amazonaws.com"]) - pds_nucleus_opensearch_credential_relative_url : Opensearch Credential URL (E.g.: "http:///AWS_CONTAINER_CREDENTIALS_RELATIVE_URI") - pds_nucleus_harvest_replace_prefix_with_list : List of harvest replace with strings (E.g.: ["s3://pds-sbn-nucleus-staging","s3://pds-img-nucleus-staging"]) @@ -121,7 +127,8 @@ aws_secretmanager_key_arn = "arn:aws:kms:us-west-2:12345678:key/12345-12 # Please check https://nasa-pds.github.io/registry/user/harvest_job_configuration.html for PDS Node name descriptions. pds_node_names = ["PDS_SBN", "PDS_IMG"] -pds_nucleus_opensearch_urls = ["https://abcdef.us-west-2.aoss.amazonaws.com", "https://opqrst.us-west-2.aoss.amazonaws.com"] +pds_nucleus_opensearch_url = "https://abcdef.us-west-2.aoss.amazonaws.com" +pds_nucleus_opensearch_registry_names = ["pds-nucleus-sbn-registry"", "pds-nucleus-img-registry"] pds_nucleus_opensearch_credential_relative_url = "http:///AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" pds_nucleus_harvest_replace_prefix_with_list = ["s3://pds-sbn-nucleus-staging", "s3://pds-img-nucleus-staging"] @@ -171,33 +178,27 @@ terraform apply 8. Wait for `terraform apply` command to be completed. If it fails due to expiration of AWS credentials, please provide a new set of AWS credentials and execute `terraform apply` again. -9. Login to the AWS Console with your AWS Account. +9. Note the `pds_nucleus_airflow_ui_url` printed as an output at the end of the `terraform apply` command results. -10. Make sure that the correct AWS Region is selected and search for "Managed Apache Airflow". +Example: -11. Visit the "Managed Apache Airflow" (Amazon MWAA) page and check the list of environments. +```shell +Outputs: -12. Find the relevant Amazon MWAA environment (Default name: PDS-Nucleus-Airflow-Env) and click on - Open Airflow UI link to open the Airflow UI. +pds_nucleus_airflow_ui_url = "https://pds-nucleus-12345678.us-west-2.elb.amazonaws.com:4443/aws_mwaa/aws-console-sso" +``` -13. The DAGs can be added to the Airflow by uploading Airflow DAG files to the DAG folder of S3 bucket -configured as `mwaa_dag_s3_bucket_name` in the `terraform.tfvars` file. +10. Login to the AWS Console with your AWS Account. -14. Go to the AWS Secret manager (https://us-west-2.console.aws.amazon.com/secretsmanager/listsecrets?region=us-west-2) and locate the secrets in the following format. - - pds/nucleus/opensearch/creds//user - - pds/nucleus/opensearch/creds//password - - E.g.: - - pds/nucleus/opensearch/creds/PDS_IMG/user - - pds/nucleus/opensearch/creds/PDS_SBN/user - - pds/nucleus/opensearch/creds/PDS_IMG/password - - pds/nucleus/opensearch/creds/PDS_SBN/password +11. Make sure that the correct AWS Region is selected and search for "Managed Apache Airflow". -15. Obtain the Opensearch username and password for each PDS Node and update the above secrets with relevant usernames and passwords. - - To update a secret, click on a secret -> Retrieve secret value -> Edit -> Save +12. Visit the "Managed Apache Airflow" (Amazon MWAA) page and check the list of environments. +13. Find the relevant Amazon MWAA environment (Default name: PDS-Nucleus-Airflow-Env) and click on + Open Airflow UI link to open the Airflow UI. -15. Use the PDS Data Upload Manager (DUM) tool to upload files to pds_nucleus_staging_bucket. +14. The DAGs can be added to the Airflow by uploading Airflow DAG files to the DAG folder of S3 bucket +configured as `mwaa_dag_s3_bucket_name` in the `terraform.tfvars` file. ## Steps to Access Nucleus Airflow UI With Cognito Credentials @@ -205,6 +206,26 @@ configured as `mwaa_dag_s3_bucket_name` in the `terraform.tfvars` file. Only some users have direct access to AWS and those users can access Airflow UI as explained in the step 9 to 12 in the above section. However, there is another way to access Airflow UI using a Cognito account as follows. +### Approach 1: Using the Web Based Login + +1. Make sure you have a Cognito user created in the Cognito user pool with required role (Cognito group). The PDS engineering node team can + help with this. + +2. Access the pds_nucleus_airflow_ui_url obtained in the step 9. of the section above. + +Example: + +```shell +Outputs: + +pds_nucleus_airflow_ui_url = "https://pds-nucleus-12345678.us-west-2.elb.amazonaws.com:4443/aws_mwaa/aws-console-sso" +``` + +3. Use the Cognito username and password to login. + + +### Approach 2: Using a Web Token + 1. Make sure you have a Cognito user created in the Cognito user pool with required role (Cognito group). The PDS engineering node team can help with this. diff --git a/terraform/main.tf b/terraform/main.tf index 7ea683c..7e89aac 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -102,7 +102,8 @@ module "product-copy-completion-checker" { pds_nucleus_cold_archive_bucket_name_postfix = var.pds_nucleus_cold_archive_bucket_name_postfix pds_node_names = var.pds_node_names - pds_nucleus_opensearch_urls = var.pds_nucleus_opensearch_urls + pds_nucleus_opensearch_url = var.pds_nucleus_opensearch_url + pds_nucleus_opensearch_registry_names = var.pds_nucleus_opensearch_registry_names pds_nucleus_opensearch_credential_relative_url = var.pds_nucleus_opensearch_credential_relative_url pds_nucleus_harvest_replace_prefix_with_list = var.pds_nucleus_harvest_replace_prefix_with_list @@ -142,3 +143,7 @@ module "cognito-auth" { aws_elb_account_id_for_the_region = var.aws_elb_account_id_for_the_region } +# Output the ALB URL for Airflow UI +output "pds_nucleus_airflow_ui_url" { + value = nonsensitive(module.cognito-auth.pds_nucleus_airflow_ui_url) +} diff --git a/terraform/terraform-modules/cognito-auth/cognito-auth.tf b/terraform/terraform-modules/cognito-auth/cognito-auth.tf index 93453d6..7ee0197 100644 --- a/terraform/terraform-modules/cognito-auth/cognito-auth.tf +++ b/terraform/terraform-modules/cognito-auth/cognito-auth.tf @@ -443,3 +443,7 @@ resource "aws_cognito_user_group" "pds_nucleus_viewer_cognito_user_group" { precedence = 65 role_arn = aws_iam_role.pds_nucleus_viewer_role.arn } + +output "pds_nucleus_airflow_ui_url" { + value = "https://${aws_lb.pds_nucleus_auth_alb.dns_name}:${var.auth_alb_listener_port}/aws_mwaa/aws-console-sso" +} diff --git a/terraform/terraform-modules/cognito-auth/lambda/package/pds_nucleus_alb_auth.py b/terraform/terraform-modules/cognito-auth/lambda/package/pds_nucleus_alb_auth.py index c3961c1..9f5dbb6 100644 --- a/terraform/terraform-modules/cognito-auth/lambda/package/pds_nucleus_alb_auth.py +++ b/terraform/terraform-modules/cognito-auth/lambda/package/pds_nucleus_alb_auth.py @@ -136,6 +136,9 @@ def login(headers, query_params=None, user_claims=None,iam_role_arn=None): except Exception as error: logger.error(str(error)) + if not redirect: + redirect = close(headers, "Login Failed. Please check your Cognito user groups", status_code=401) + return redirect def get_mwaa_client(role_arn, user): diff --git a/terraform/terraform-modules/cognito-auth/variables.tf b/terraform/terraform-modules/cognito-auth/variables.tf index 1858d53..54093b1 100644 --- a/terraform/terraform-modules/cognito-auth/variables.tf +++ b/terraform/terraform-modules/cognito-auth/variables.tf @@ -43,7 +43,6 @@ variable "auth_alb_name" { variable "auth_alb_listener_port" { description = "Auth ALB Listener Port" type = string - sensitive = true } variable "auth_alb_listener_certificate_arn" { diff --git a/terraform/terraform-modules/ecs-ecr/docker/template-deploy-ecr-images.sh b/terraform/terraform-modules/ecs-ecr/docker/template-deploy-ecr-images.sh index ebf3226..823a5fd 100755 --- a/terraform/terraform-modules/ecs-ecr/docker/template-deploy-ecr-images.sh +++ b/terraform/terraform-modules/ecs-ecr/docker/template-deploy-ecr-images.sh @@ -5,13 +5,13 @@ aws ecr get-login-password --region us-west-2 | docker login --username AWS --pa # Deploy pds-nucleus-config-init ECR image cd ./terraform-modules/ecs-ecr/docker/config-init -docker build -t pds-nucleus-config-init . +docker build --platform linux/amd64 -t pds-nucleus-config-init . docker tag pds-nucleus-config-init:latest "${pds_nucleus_aws_account_id}".dkr.ecr.us-west-2.amazonaws.com/pds-nucleus-config-init:latest docker push "${pds_nucleus_aws_account_id}".dkr.ecr.us-west-2.amazonaws.com/pds-nucleus-config-init:latest # Deploy pds-nucleus-s3-to-efs-copy ECR image cd ../s3-to-efs-copy -docker build -t pds-nucleus-s3-to-efs-copy . +docker build --platform linux/amd64 -t pds-nucleus-s3-to-efs-copy . docker tag pds-nucleus-s3-to-efs-copy:latest "${pds_nucleus_aws_account_id}".dkr.ecr.us-west-2.amazonaws.com/pds-nucleus-s3-to-efs-copy:latest docker push "${pds_nucleus_aws_account_id}".dkr.ecr.us-west-2.amazonaws.com/pds-nucleus-s3-to-efs-copy:latest diff --git a/terraform/terraform-modules/product-copy-completion-checker/lambda/pds-nucleus-product-completion-checker.py b/terraform/terraform-modules/product-copy-completion-checker/lambda/pds-nucleus-product-completion-checker.py index 5606551..027431f 100644 --- a/terraform/terraform-modules/product-copy-completion-checker/lambda/pds-nucleus-product-completion-checker.py +++ b/terraform/terraform-modules/product-copy-completion-checker/lambda/pds-nucleus-product-completion-checker.py @@ -33,6 +33,7 @@ dag_name = os.environ.get('AIRFLOW_DAG_NAME') pds_node_name = os.environ.get('PDS_NODE_NAME') opensearch_endpoint = os.environ.get('OPENSEARCH_ENDPOINT') +opensearch_registry_name = os.environ.get('OPENSEARCH_REGISTRY_NAME') pds_nucleus_opensearch_credential_relative_url = os.environ.get('OPENSEARCH_CREDENTIAL_RELATIVE_URL') replace_prefix_with = os.environ.get('REPLACE_PREFIX_WITH') efs_mount_path = os.environ.get('EFS_MOUNT_PATH') @@ -45,6 +46,7 @@ pds_hot_archive_bucket_name = os.environ.get('PDS_HOT_ARCHIVE_S3_BUCKET_NAME') pds_cold_archive_bucket_name = os.environ.get('PDS_COLD_ARCHIVE_S3_BUCKET_NAME') pds_staging_bucket_name = os.environ.get('PDS_STAGING_S3_BUCKET_NAME') +product_batch_size = os.environ.get('PRODUCT_BATCH_SIZE') replace_prefix = efs_mount_path @@ -98,7 +100,7 @@ def process_completed_products(): logger.debug(f"Number of completed product labels : {str(response['records'])}") logger.debug(f"Number of completed product labels : {str(len(response['records']))}") - n = 10 + n = product_batch_size count = 0 list_of_product_labels_to_process = [] @@ -222,7 +224,7 @@ def create_harvest_configs_and_trigger_nucleus(list_of_product_labels_to_process logger.info(f"Created harvest config XML file: {harvest_config_file_path}") connection_xml_content = f""" - + {pds_nucleus_opensearch_credential_relative_url} """ diff --git a/terraform/terraform-modules/product-copy-completion-checker/product-copy-completion-checker.tf b/terraform/terraform-modules/product-copy-completion-checker/product-copy-completion-checker.tf index e3a94b6..a65060e 100644 --- a/terraform/terraform-modules/product-copy-completion-checker/product-copy-completion-checker.tf +++ b/terraform/terraform-modules/product-copy-completion-checker/product-copy-completion-checker.tf @@ -51,7 +51,7 @@ resource "aws_rds_cluster" "default" { } lifecycle { - ignore_changes = ["availability_zones"] + ignore_changes = [availability_zones] } } @@ -235,13 +235,36 @@ resource "aws_s3_bucket" "pds_nucleus_s3_config_bucket" { force_destroy = true } -# Create a staging S3 Bucket for each PDS Node -resource "aws_s3_bucket" "pds_nucleus_s3_staging_bucket" { - count = length(var.pds_node_names) - # convert PDS node name to S3 bucket name compatible format +# This data source is added to access existing S3 buckets, bcause an S3 staging bucket is already available in MCP Prod environment. +data "aws_s3_bucket" "pds_nucleus_s3_staging_bucket" { + count = length(var.pds_node_names) bucket = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_staging_bucket_name_postfix}" } +# Commented out the following S3 bucket resources, because an S3 staging bucket is already available in MCP Prod environment. +# However, this resource is useful when deploying in a fresh environment. + +# # Create a staging S3 Bucket for each PDS Node +# resource "aws_s3_bucket" "pds_nucleus_s3_staging_bucket" { +# count = length(var.pds_node_names) +# # convert PDS node name to S3 bucket name compatible format +# bucket = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_staging_bucket_name_postfix}" +# } + +# # Create an aws_s3_bucket_notification for each s3 bucket of each Node +# resource "aws_s3_bucket_notification" "pds_nucleus_s3_staging_bucket_notification" { +# +# count = length(var.pds_node_names) +# # convert PDS node name to S3 bucket name compatible format +# bucket = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_staging_bucket_name_postfix}" +# +# queue { +# events = ["s3:ObjectCreated:*"] +# queue_arn = aws_sqs_queue.pds_nucleus_files_to_save_in_database_sqs_queue[count.index].arn +# } +# } + + # Create pds_nucleus_s3_file_file_event_processor_function for each PDS Node resource "aws_lambda_function" "pds_nucleus_s3_file_file_event_processor_function" { count = length(var.pds_node_names) @@ -292,7 +315,8 @@ resource "aws_lambda_function" "pds_nucleus_product_completion_checker_function" DB_SECRET_ARN = aws_secretsmanager_secret.pds_nucleus_rds_credentials.arn EFS_MOUNT_PATH = "/mnt/data" ES_AUTH_CONFIG_FILE_PATH = "/etc/es-auth.cfg" - OPENSEARCH_ENDPOINT = var.pds_nucleus_opensearch_urls[count.index] + OPENSEARCH_ENDPOINT = var.pds_nucleus_opensearch_url + OPENSEARCH_REGISTRY_NAME = var.pds_nucleus_opensearch_registry_names[count.index] OPENSEARCH_CREDENTIAL_RELATIVE_URL = var.pds_nucleus_opensearch_credential_relative_url PDS_NODE_NAME = var.pds_node_names[count.index] PDS_NUCLEUS_CONFIG_BUCKET_NAME = var.pds_nucleus_config_bucket_name @@ -300,7 +324,8 @@ resource "aws_lambda_function" "pds_nucleus_product_completion_checker_function" PDS_MWAA_ENV_NAME = var.airflow_env_name PDS_HOT_ARCHIVE_S3_BUCKET_NAME = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_hot_archive_bucket_name_postfix}" PDS_COLD_ARCHIVE_S3_BUCKET_NAME = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_cold_archive_bucket_name_postfix}" - PDS_STAGING_S3_BUCKET_NAME = aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].id + PDS_STAGING_S3_BUCKET_NAME = data.aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].id + PRODUCT_BATCH_SIZE = var.product_batch_size } } } @@ -342,7 +367,7 @@ resource "aws_lambda_permission" "s3-lambda-permission" { action = "lambda:InvokeFunction" function_name = aws_lambda_function.pds_nucleus_s3_file_file_event_processor_function[count.index].function_name principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].arn + source_arn = data.aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].arn } # Create an SQS queue to receive S3 bucket notifications for each s3 bucket of each Node @@ -374,7 +399,7 @@ data "aws_iam_policy_document" "pds_nucleus_files_to_save_in_database_sqs_queue_ condition { test = "StringEquals" variable = "aws:SourceArn" - values = [aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].arn] + values = [data.aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].arn] } } } @@ -386,20 +411,6 @@ resource "aws_sqs_queue_policy" "pds_nucleus_files_to_save_in_database_sqs_queue policy = data.aws_iam_policy_document.pds_nucleus_files_to_save_in_database_sqs_queue_policy_document[count.index].json } -# Create an aws_s3_bucket_notification for each s3 bucket of each Node -resource "aws_s3_bucket_notification" "pds_nucleus_s3_staging_bucket_notification" { - - count = length(var.pds_node_names) - # convert PDS node name to S3 bucket name compatible format - bucket = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_staging_bucket_name_postfix}" - - queue { - events = ["s3:ObjectCreated:*"] - queue_arn = aws_sqs_queue.pds_nucleus_files_to_save_in_database_sqs_queue[count.index].arn - } -} - - resource "time_sleep" "wait_for_database" { create_duration = "2m" diff --git a/terraform/terraform-modules/product-copy-completion-checker/variables.tf b/terraform/terraform-modules/product-copy-completion-checker/variables.tf index d181364..b13d173 100644 --- a/terraform/terraform-modules/product-copy-completion-checker/variables.tf +++ b/terraform/terraform-modules/product-copy-completion-checker/variables.tf @@ -75,9 +75,15 @@ variable "pds_node_names" { sensitive = true } -variable "pds_nucleus_opensearch_urls" { - description = "List of PDS Nucleus OpenSearch Config file paths" - type = list(string) +variable "pds_nucleus_opensearch_url" { + description = "List of PDS Nucleus OpenSearch URL" + type = string + sensitive = true +} + +variable "pds_nucleus_opensearch_registry_names" { + description = "List of PDS Nucleus OpenSearch Registry Names" + type = list(string) sensitive = true } @@ -119,6 +125,12 @@ variable "airflow_env_name" { type = string } +variable "product_batch_size" { + description = "Size of the product batch to send to Nuclees DAG top process per given DAG invocation" + default = 10 + type = number +} + variable "region" { description = "AWS Region" type = string diff --git a/terraform/variables.tf b/terraform/variables.tf index bf0741e..ae5b51c 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -101,15 +101,22 @@ variable "pds_nucleus_default_airflow_dag_id" { variable "pds_node_names" { description = "List of PDS Node Names" type = list(string) - default = ["pds-sbn", "pds-img"] + sensitive = true +} + +variable "pds_nucleus_opensearch_url" { + description = "List of PDS Nucleus OpenSearch URL" + type = string + sensitive = true } -variable "pds_nucleus_opensearch_urls" { - description = "List of PDS Nucleus OpenSearch Config file paths" +variable "pds_nucleus_opensearch_registry_names" { + description = "List of PDS Nucleus OpenSearch Registry Names" type = list(string) sensitive = true } + variable "pds_nucleus_opensearch_credential_relative_url" { description = "List of PDS Nucleus OpenSearch Credential Relative URL" type = string diff --git a/terraform/variables/terraform.tfvars.dev b/terraform/variables/terraform.tfvars.dev index 735aeb2..be2b474 100644 --- a/terraform/variables/terraform.tfvars.dev +++ b/terraform/variables/terraform.tfvars.dev @@ -13,7 +13,8 @@ aws_secretmanager_key_arn = "arn:aws:kms:us-west-2:12345678:key/abcdef-a # (PDS_ATM, PDS_ENG, PDS_GEO, PDS_IMG, PDS_NAIF, PDS_RMS, PDS_SBN, PSA, JAXA, ROSCOSMOS) pds_node_names = ["PDS_SBN", "PDS_IMG"] -pds_nucleus_opensearch_urls = ["https://abcdef.us-west-2.aoss.amazonaws.com", "https://pqrst.us-west-2.aoss.amazonaws.com"] +pds_nucleus_opensearch_url = "https://abcdef.us-west-2.aoss.amazonaws.com" +pds_nucleus_opensearch_registry_names = ["pds-nucleus-sbn-registry"", "pds-nucleus-img-registry"] pds_nucleus_opensearch_credential_relative_url = "http:///AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" pds_nucleus_harvest_replace_prefix_with_list = ["s3://pds-sbn-nucleus-staging", "s3://pds-img-nucleus-staging"] diff --git a/terraform/variables/terraform.tfvars.test b/terraform/variables/terraform.tfvars.test index 44ef7d3..724e62a 100644 --- a/terraform/variables/terraform.tfvars.test +++ b/terraform/variables/terraform.tfvars.test @@ -13,7 +13,8 @@ aws_secretmanager_key_arn = "arn:aws:kms:us-west-2:12345678:key/abcdef-a # (PDS_ATM, PDS_ENG, PDS_GEO, PDS_IMG, PDS_NAIF, PDS_RMS, PDS_SBN, PSA, JAXA, ROSCOSMOS) pds_node_names = ["PDS_SBN", "PDS_IMG"] -pds_nucleus_opensearch_urls = ["https://abcdef.us-west-2.aoss.amazonaws.com", "https://pqrst.us-west-2.aoss.amazonaws.com"] +pds_nucleus_opensearch_url = "https://abcdef.us-west-2.aoss.amazonaws.com" +pds_nucleus_opensearch_registry_names = ["pds-nucleus-sbn-registry"", "pds-nucleus-img-registry"] pds_nucleus_opensearch_credential_relative_url = "http:///AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" pds_nucleus_harvest_replace_prefix_with_list = ["s3://pds-sbn-nucleus-staging", "s3://pds-img-nucleus-staging"]