NASA-PDS · ramesh-maddegoda · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
@@ -135,7 +135,8 @@
         "venv",
         "dist",
         "build",
-        ".*\\.egg-info"
+        ".*\\.egg-info",
+        "\u2018*.tfstate \\\n    --exclude-files \u2018\\*.tfvars"
       ]
     }
   ],
@@ -156,41 +157,31 @@
         "filename": "terraform/README.md",
         "hashed_secret": "f2d4e04179e44fa7386b985ac3c7ee4d95dfd65d",
         "is_verified": false,
-        "line_number": 109,
+        "line_number": 113,
         "is_secret": false
       },
       {
         "type": "AWS Sensitive Information (Experimental Plugin)",
         "filename": "terraform/README.md",
         "hashed_secret": "659a4d010b74afeddbcb9c4e8eae01f4390eeacc",
         "is_verified": false,
-        "line_number": 110,
+        "line_number": 114,
         "is_secret": false
       },
       {
         "type": "AWS Sensitive Information (Experimental Plugin)",
         "filename": "terraform/README.md",
         "hashed_secret": "bd3b85b91cb8cf6cfc6a4adc7a2505714939505b",
         "is_verified": false,
-        "line_number": 110,
+        "line_number": 114,
         "is_secret": false
       },
       {
         "type": "Secret Keyword",
         "filename": "terraform/README.md",
         "hashed_secret": "a356cb3f3d1c9797cf59daf5b22fc0c7434d8dc7",
         "is_verified": false,
-        "line_number": 114,
-        "is_secret": false
-      }
-    ],
-    "terraform/terraform-modules/ecs-ecr/docker/deploy-ecr-images.sh": [
-      {
-        "type": "AWS Sensitive Information (Experimental Plugin)",
-        "filename": "terraform/terraform-modules/ecs-ecr/docker/deploy-ecr-images.sh",
-        "hashed_secret": "9ad897024d8c36c541d7fe84084c4e9f4df00b2a",
-        "is_verified": false,
-        "line_number": 4,
+        "line_number": 118,
         "is_secret": false
       }
     ],
@@ -299,5 +290,5 @@
       }
     ]
   },
-  "generated_at": "2025-02-04T19:12:34Z"
+  "generated_at": "2025-02-12T20:02:39Z"
 }
@@ -81,6 +81,9 @@ Note:  Examples of `terraform.tfvars` files are available at `terraform/variable
       - pds_node_names = List of PDS Node names to be supported (E.g.: ["PDS_SBN", "PDS_IMG", "PDS_EN"]).The following node name format should be used.
           - (PDS_ATM, PDS_ENG, PDS_GEO, PDS_IMG, PDS_NAIF, PDS_RMS, PDS_SBN, PSA, JAXA, ROSCOSMOS)
           - Please check https://nasa-pds.github.io/registry/user/harvest_job_configuration.html for PDS Node name descriptions.
+
+      - pds_nucleus_opensearch_url : OpenSearch URL to be used with Harvest tool
+      - pds_nucleus_opensearch_registry_names : List of Nod3e specific OpenSearch registry names (E.g.: ["pds-nucleus-sbn-registry"", "pds-nucleus-img-registry"])
       - pds_nucleus_opensearch_urls : List of Node specific OpenSearch URLs (E.g.: ["https://abcdef.us-west-2.aoss.amazonaws.com", "https://opqrst.us-west-2.aoss.amazonaws.com"])
       - pds_nucleus_opensearch_credential_relative_url : Opensearch Credential URL (E.g.: "http://<IP ADDRESS>/AWS_CONTAINER_CREDENTIALS_RELATIVE_URI")
       - pds_nucleus_harvest_replace_prefix_with_list : List of harvest replace with strings (E.g.: ["s3://pds-sbn-nucleus-staging","s3://pds-img-nucleus-staging"])
@@ -121,7 +124,8 @@ aws_secretmanager_key_arn         = "arn:aws:kms:us-west-2:12345678:key/12345-12
 # Please check https://nasa-pds.github.io/registry/user/harvest_job_configuration.html for PDS Node name descriptions.
 
 pds_node_names                                 = ["PDS_SBN", "PDS_IMG"]
-pds_nucleus_opensearch_urls                    = ["https://abcdef.us-west-2.aoss.amazonaws.com", "https://opqrst.us-west-2.aoss.amazonaws.com"]
+pds_nucleus_opensearch_url                     = "https://abcdef.us-west-2.aoss.amazonaws.com"
+pds_nucleus_opensearch_registry_names          = ["pds-nucleus-sbn-registry"", "pds-nucleus-img-registry"]
 pds_nucleus_opensearch_credential_relative_url = "http://<IP ADDRESS>/AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"
 pds_nucleus_harvest_replace_prefix_with_list   = ["s3://pds-sbn-nucleus-staging", "s3://pds-img-nucleus-staging"]
 
@@ -171,40 +175,54 @@ terraform apply
 
 8. Wait for `terraform apply` command to be completed. If it fails due to expiration of AWS credentials, please provide a new set of AWS credentials and execute `terraform apply` again.
 
-9. Login to the AWS Console with your AWS Account.
+9. Note the `pds_nucleus_airflow_ui_url` printed as an output at the end of the `terraform apply` command results. 
 
-10. Make sure that the correct AWS Region is selected and search for "Managed Apache Airflow".
+Example:
 
-11. Visit the "Managed Apache Airflow" (Amazon MWAA) page and check the list of environments.
+```shell
+Outputs:
 
-12. Find the relevant Amazon MWAA environment (Default name: PDS-Nucleus-Airflow-Env) and click on
-    Open Airflow UI link to open the Airflow UI.
+pds_nucleus_airflow_ui_url = "https://pds-nucleus-12345678.us-west-2.elb.amazonaws.com:4443/aws_mwaa/aws-console-sso"
+```
 
-13. The DAGs can be added to the Airflow by uploading Airflow DAG files to the DAG folder of S3 bucket
-configured as `mwaa_dag_s3_bucket_name` in the `terraform.tfvars` file.
+10. Login to the AWS Console with your AWS Account.
 
-14. Go to the AWS Secret manager (https://us-west-2.console.aws.amazon.com/secretsmanager/listsecrets?region=us-west-2) and locate the secrets in the following format.
-    - pds/nucleus/opensearch/creds/<PDS NODE NAME>/user
-    - pds/nucleus/opensearch/creds/<PDS NODE NAME>/password
-
-    E.g.: 
-      - pds/nucleus/opensearch/creds/PDS_IMG/user
-      - pds/nucleus/opensearch/creds/PDS_SBN/user
-      - pds/nucleus/opensearch/creds/PDS_IMG/password
-      - pds/nucleus/opensearch/creds/PDS_SBN/password
+11. Make sure that the correct AWS Region is selected and search for "Managed Apache Airflow".
 
-15. Obtain the Opensearch username and password for each PDS Node and update the above secrets with relevant usernames and passwords.
-      - To update a secret, click on a secret -> Retrieve secret value -> Edit -> Save 
+12. Visit the "Managed Apache Airflow" (Amazon MWAA) page and check the list of environments. 
 
+13. Find the relevant Amazon MWAA environment (Default name: PDS-Nucleus-Airflow-Env) and click on
+    Open Airflow UI link to open the Airflow UI.
 
-15. Use the PDS Data Upload Manager (DUM) tool to upload files to pds_nucleus_staging_bucket.
+14. The DAGs can be added to the Airflow by uploading Airflow DAG files to the DAG folder of S3 bucket
+configured as `mwaa_dag_s3_bucket_name` in the `terraform.tfvars` file.
 
 
 ## Steps to Access Nucleus Airflow UI With Cognito Credentials
 
 Only some users have direct access to AWS and those users can access Airflow UI as explained in the step 9 to 12
 in the above section. However, there is another way to access Airflow UI using a Cognito account as follows.
 
+### Approach 1: Using the Web Based Login
+
+1. Make sure you have a Cognito user created in the Cognito user pool with required role (Cognito group). The PDS engineering node team can
+   help with this.
+
+2. Access the pds_nucleus_airflow_ui_url obtained in the step 9. of the section above.
+
+Example:
+
+```shell
+Outputs:
+
+pds_nucleus_airflow_ui_url = "https://pds-nucleus-12345678.us-west-2.elb.amazonaws.com:4443/aws_mwaa/aws-console-sso"
+```
+
+3. Use the Cognito username and password to login.
+
+
+### Approach 2: Using a Web Token
+
 1. Make sure you have a Cognito user created in the Cognito user pool with required role (Cognito group). The PDS engineering node team can 
 help with this.
 

@@ -102,7 +102,8 @@ module "product-copy-completion-checker" {
   pds_nucleus_cold_archive_bucket_name_postfix = var.pds_nucleus_cold_archive_bucket_name_postfix
 
   pds_node_names                                 = var.pds_node_names
-  pds_nucleus_opensearch_urls                    = var.pds_nucleus_opensearch_urls
+  pds_nucleus_opensearch_url                     = var.pds_nucleus_opensearch_url
+  pds_nucleus_opensearch_registry_names          = var.pds_nucleus_opensearch_registry_names
   pds_nucleus_opensearch_credential_relative_url = var.pds_nucleus_opensearch_credential_relative_url
   pds_nucleus_harvest_replace_prefix_with_list   = var.pds_nucleus_harvest_replace_prefix_with_list
 
@@ -142,3 +143,7 @@ module "cognito-auth" {
   aws_elb_account_id_for_the_region = var.aws_elb_account_id_for_the_region
 }
 
+# Output the ALB URL for Airflow UI
+output "pds_nucleus_airflow_ui_url" {
+  value = nonsensitive(module.cognito-auth.pds_nucleus_airflow_ui_url)
+}
@@ -443,3 +443,7 @@ resource "aws_cognito_user_group" "pds_nucleus_viewer_cognito_user_group" {
   precedence   = 65
   role_arn     = aws_iam_role.pds_nucleus_viewer_role.arn
 }
+
+output "pds_nucleus_airflow_ui_url" {
+  value = "https://${aws_lb.pds_nucleus_auth_alb.dns_name}:${var.auth_alb_listener_port}/aws_mwaa/aws-console-sso"
+}
@@ -136,6 +136,9 @@ def login(headers, query_params=None, user_claims=None,iam_role_arn=None):
     except Exception as error:
         logger.error(str(error))
 
+    if not redirect:
+        redirect = close(headers, f"Login Failed. Please check your Cognito user groups", status_code=401)
+
     return redirect
 
 def get_mwaa_client(role_arn, user):

@@ -43,7 +43,6 @@ variable "auth_alb_name" {
 variable "auth_alb_listener_port" {
   description = "Auth ALB Listener Port"
   type        = string
-  sensitive   = true
 }
 
 variable "auth_alb_listener_certificate_arn" {

@@ -33,6 +33,7 @@
 dag_name = os.environ.get('AIRFLOW_DAG_NAME')
 pds_node_name = os.environ.get('PDS_NODE_NAME')
 opensearch_endpoint = os.environ.get('OPENSEARCH_ENDPOINT')
+opensearch_registry_name = os.environ.get('OPENSEARCH_REGISTRY_NAME')
 pds_nucleus_opensearch_credential_relative_url = os.environ.get('OPENSEARCH_CREDENTIAL_RELATIVE_URL')
 replace_prefix_with = os.environ.get('REPLACE_PREFIX_WITH')
 efs_mount_path = os.environ.get('EFS_MOUNT_PATH')
@@ -45,6 +46,7 @@
 pds_hot_archive_bucket_name = os.environ.get('PDS_HOT_ARCHIVE_S3_BUCKET_NAME')
 pds_cold_archive_bucket_name = os.environ.get('PDS_COLD_ARCHIVE_S3_BUCKET_NAME')
 pds_staging_bucket_name = os.environ.get('PDS_STAGING_S3_BUCKET_NAME')
+product_batch_size = os.environ.get('PRODUCT_BATCH_SIZE')
 
 replace_prefix = efs_mount_path
 
@@ -98,7 +100,7 @@ def process_completed_products():
     logger.debug(f"Number of completed product labels : {str(response['records'])}")
     logger.debug(f"Number of completed product labels : {str(len(response['records']))}")
 
-    n = 10
+    n = product_batch_size
     count = 0
     list_of_product_labels_to_process = []
 
@@ -222,7 +224,7 @@ def create_harvest_configs_and_trigger_nucleus(list_of_product_labels_to_process
         logger.info(f"Created harvest config XML file: {harvest_config_file_path}")
 
         connection_xml_content = f"""<?xml version="1.0" encoding="UTF-8"?>
-<registry_connection index="en-registry">
+<registry_connection index="{opensearch_registry_name}">
     <ec2_credential_url endpoint="{opensearch_endpoint}">{pds_nucleus_opensearch_credential_relative_url}</ec2_credential_url>
 </registry_connection>
             """

@@ -51,7 +51,7 @@ resource "aws_rds_cluster" "default" {
   }
 
   lifecycle {
-    ignore_changes = ["availability_zones"]
+    ignore_changes = [availability_zones]
   }
 }
 
@@ -235,13 +235,36 @@ resource "aws_s3_bucket" "pds_nucleus_s3_config_bucket" {
   force_destroy = true
 }
 
-# Create a staging S3 Bucket for each PDS Node
-resource "aws_s3_bucket" "pds_nucleus_s3_staging_bucket" {
-  count = length(var.pds_node_names)
-  # convert PDS node name to S3 bucket name compatible format
+# This data source is added to access existing S3 buckets, bcause an S3 staging bucket is already available in MCP Prod environment.
+data "aws_s3_bucket" "pds_nucleus_s3_staging_bucket" {
+  count  = length(var.pds_node_names)
   bucket = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_staging_bucket_name_postfix}"
 }
 
+# Commented out the following S3 bucket resources, because an S3 staging bucket is already available in MCP Prod environment.
+# However, this resource is useful when deploying in a fresh environment.
+
+# # Create a staging S3 Bucket for each PDS Node
+# resource "aws_s3_bucket" "pds_nucleus_s3_staging_bucket" {
+#   count = length(var.pds_node_names)
+#   # convert PDS node name to S3 bucket name compatible format
+#   bucket = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_staging_bucket_name_postfix}"
+# }
+
+# # Create an aws_s3_bucket_notification for each s3 bucket of each Node
+# resource "aws_s3_bucket_notification" "pds_nucleus_s3_staging_bucket_notification" {
+#
+#   count = length(var.pds_node_names)
+#   # convert PDS node name to S3 bucket name compatible format
+#   bucket = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_staging_bucket_name_postfix}"
+#
+#   queue {
+#     events    = ["s3:ObjectCreated:*"]
+#     queue_arn = aws_sqs_queue.pds_nucleus_files_to_save_in_database_sqs_queue[count.index].arn
+#   }
+# }
+
+
 # Create pds_nucleus_s3_file_file_event_processor_function for each PDS Node
 resource "aws_lambda_function" "pds_nucleus_s3_file_file_event_processor_function" {
   count            = length(var.pds_node_names)
@@ -292,15 +315,17 @@ resource "aws_lambda_function" "pds_nucleus_product_completion_checker_function"
       DB_SECRET_ARN                      = aws_secretsmanager_secret.pds_nucleus_rds_credentials.arn
       EFS_MOUNT_PATH                     = "/mnt/data"
       ES_AUTH_CONFIG_FILE_PATH           = "/etc/es-auth.cfg"
-      OPENSEARCH_ENDPOINT                = var.pds_nucleus_opensearch_urls[count.index]
+      OPENSEARCH_ENDPOINT                = var.pds_nucleus_opensearch_url
+      OPENSEARCH_REGISTRY_NAME           = var.pds_nucleus_opensearch_registry_names[count.index]
       OPENSEARCH_CREDENTIAL_RELATIVE_URL = var.pds_nucleus_opensearch_credential_relative_url
       PDS_NODE_NAME                      = var.pds_node_names[count.index]
       PDS_NUCLEUS_CONFIG_BUCKET_NAME     = var.pds_nucleus_config_bucket_name
       REPLACE_PREFIX_WITH                = var.pds_nucleus_harvest_replace_prefix_with_list[count.index]
       PDS_MWAA_ENV_NAME                  = var.airflow_env_name
       PDS_HOT_ARCHIVE_S3_BUCKET_NAME     = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_hot_archive_bucket_name_postfix}"
       PDS_COLD_ARCHIVE_S3_BUCKET_NAME    = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_cold_archive_bucket_name_postfix}"
-      PDS_STAGING_S3_BUCKET_NAME         = aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].id
+      PDS_STAGING_S3_BUCKET_NAME         = data.aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].id
+      PRODUCT_BATCH_SIZE                 = var.product_batch_size
     }
   }
 }
@@ -342,7 +367,7 @@ resource "aws_lambda_permission" "s3-lambda-permission" {
   action        = "lambda:InvokeFunction"
   function_name = aws_lambda_function.pds_nucleus_s3_file_file_event_processor_function[count.index].function_name
   principal     = "s3.amazonaws.com"
-  source_arn    = aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].arn
+  source_arn    = data.aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].arn
 }
 
 # Create an SQS queue to receive S3 bucket notifications for each s3 bucket of each Node
@@ -374,7 +399,7 @@ data "aws_iam_policy_document" "pds_nucleus_files_to_save_in_database_sqs_queue_
     condition {
       test     = "StringEquals"
       variable = "aws:SourceArn"
-      values   = [aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].arn]
+      values   = [data.aws_s3_bucket.pds_nucleus_s3_staging_bucket[count.index].arn]
     }
   }
 }
@@ -386,20 +411,6 @@ resource "aws_sqs_queue_policy" "pds_nucleus_files_to_save_in_database_sqs_queue
   policy    = data.aws_iam_policy_document.pds_nucleus_files_to_save_in_database_sqs_queue_policy_document[count.index].json
 }
 
-# Create an aws_s3_bucket_notification for each s3 bucket of each Node
-resource "aws_s3_bucket_notification" "pds_nucleus_s3_staging_bucket_notification" {
-
-  count = length(var.pds_node_names)
-  # convert PDS node name to S3 bucket name compatible format
-  bucket = "${lower(replace(var.pds_node_names[count.index], "_", "-"))}-${var.pds_nucleus_staging_bucket_name_postfix}"
-
-  queue {
-    events    = ["s3:ObjectCreated:*"]
-    queue_arn = aws_sqs_queue.pds_nucleus_files_to_save_in_database_sqs_queue[count.index].arn
-  }
-}
-
-
 resource "time_sleep" "wait_for_database" {
   create_duration = "2m"
 

@@ -75,9 +75,15 @@ variable "pds_node_names" {
   sensitive   = true
 }
 
-variable "pds_nucleus_opensearch_urls" {
-  description = "List of PDS Nucleus OpenSearch Config file paths"
-  type        =  list(string)
+variable "pds_nucleus_opensearch_url" {
+  description = "List of PDS Nucleus OpenSearch URL"
+  type        = string
+  sensitive   = true
+}
+
+variable "pds_nucleus_opensearch_registry_names" {
+  description = "List of PDS Nucleus OpenSearch Registry Names"
+  type        = list(string)
   sensitive   = true
 }
 
@@ -119,6 +125,12 @@ variable "airflow_env_name" {
   type        = string
 }
 
+variable "product_batch_size" {
+  description = "Size of the product batch to send to Nuclees DAG top process per given DAG invocation"
+  default     = 10
+  type        = number
+}
+
 variable "region" {
   description = "AWS Region"
   type        = string