Skip to content

Commit

Permalink
Merge pull request #10 from moengage/sqs_check_fix
Browse files Browse the repository at this point in the history
SRE-6953 - have updated the script to add all the different state int…
  • Loading branch information
priya-sharmaa committed Jun 12, 2024
2 parents bb5bedd + be96f2a commit 463dbf1
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 24 deletions.
8 changes: 8 additions & 0 deletions inputs/aws/aws_input.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ spec:
pd_apikey_secret_region: ''
pd_integration_key_check: True

dc_region_map:
# us-east-2: 'dc02'
region1: ''
region2: ''
region3: ''
region4: ''
region5: ''

awsAccessSecrets:
useAwsSecretManager: True
#useAwsSecretManager: False
Expand Down
21 changes: 11 additions & 10 deletions src/cloud/aws/aws_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
resource_classes = [TargetGroupAWSResource, SQSQueueAWSResourceGroup, LoadBalancerAWSResource, ElasticacheRedisAWSResource]

def generate_pretty_table( resource_class, active_resources, region_unmonitored_resources_map,
regional_resource_type_alarm_action_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs):
regional_resource_type_alarm_action_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs,sns_boto_clients, dcs):

'''
This will create the table and write the header row in the format of
Expand Down Expand Up @@ -51,7 +51,7 @@ def generate_pretty_table( resource_class, active_resources, region_unmonitored_
# Creating more rows based on the details obtained from the map, like region name, resource_name etc.
first_rows = get_rows_from_region_unmonitored_resources_map( region_unmonitored_resources_map, resource_class, business_team_map)

second_rows= get_rows_from_alarm_action_map( resource_class, active_resources, regional_resource_type_alarm_action_map,region_unmonitored_resources_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs)
second_rows= get_rows_from_alarm_action_map( resource_class, active_resources, regional_resource_type_alarm_action_map,region_unmonitored_resources_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs, sns_boto_clients, dcs)

# below we are formatting the column of the sheet based on the individual result we got from above line 52 and 54
# we are checking if the resouce name and dc in list of rows of first row match with resouce name and dc in list of rows of second row, then update the data ofMissing alarm metrics with reason' having both row first and row second detail.
Expand Down Expand Up @@ -85,7 +85,7 @@ def generate_pretty_table( resource_class, active_resources, region_unmonitored_

return len(rows)

def get_unmonitored_metric_for_resources( env, dc, resource_class, alarm_reader, boto_client,integration_id_list, yaml_inputs):
def get_unmonitored_metric_for_resources( env, dc, resource_class, alarm_reader, boto_client,integration_id_list, yaml_inputs, sns_boto_clients):

'''
This will find all those resources which either dont contains the alarms,
Expand Down Expand Up @@ -133,20 +133,20 @@ def get_unmonitored_metric_for_resources( env, dc, resource_class, alarm_reader,

return aws_resource, actice_resources, unmonitored_resource_metric_map

def get_unmonitored_resources_for_region( env, dc, resource_class, alarm_reader, boto_client,integration_id_list, yaml_inputs):
def get_unmonitored_resources_for_region( env, dc, resource_class, alarm_reader, boto_client,integration_id_list, yaml_inputs, sns_boto_clients):

'''
This will find all those resources for a specific region, which either dont contains the alarms,
or if contains the alarm so dont have any SNS topic or endpoint to it.
'''

aws_resource, active_resources, unmonitored_resources = get_unmonitored_metric_for_resources( env, dc, resource_class, alarm_reader, boto_client,integration_id_list, yaml_inputs)
aws_resource, active_resources, unmonitored_resources = get_unmonitored_metric_for_resources( env, dc, resource_class, alarm_reader, boto_client,integration_id_list, yaml_inputs, sns_boto_clients)

return dc, active_resources, unmonitored_resources, aws_resource


def run_checker_for_resource(resource_class, env, dcs, boto_clients, spreadsheet, business_team_map,
regional_alarm_readers, sns_topic_subscription_map,integration_id_list, yaml_inputs):
regional_alarm_readers, sns_topic_subscription_map,integration_id_list, yaml_inputs, sns_boto_clients):

'''
This will fetch all the unmonitored resources and alarm details for each region in a thread.
Expand Down Expand Up @@ -176,15 +176,15 @@ def run_checker_for_resource(resource_class, env, dcs, boto_clients, spreadsheet

# Fetch all the resources per region which needs to be monitored.
region_futures.append(region_pool.submit(
get_unmonitored_resources_for_region, env, dc, resource_class, alarm_reader, boto_clients[dc],integration_id_list, yaml_inputs))
get_unmonitored_resources_for_region, env, dc, resource_class, alarm_reader, boto_clients[dc],integration_id_list, yaml_inputs, sns_boto_clients))

for future in as_completed(region_futures):
dc, active_resources, unmonitored_resources, aws_resource = future.result()
region_unmonitored_resources_map[dc] = ( aws_resource, unmonitored_resources)

# unique_resource_group = group_alarms_by_resource( regional_resource_type_alarm_action_map)

generate_pretty_table(resource_class, active_resources, region_unmonitored_resources_map,regional_resource_type_alarm_action_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs)
generate_pretty_table(resource_class, active_resources, region_unmonitored_resources_map,regional_resource_type_alarm_action_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs,sns_boto_clients, dcs )

print(spreadsheet, resource_class, active_resources,
region_unmonitored_resources_map,
Expand All @@ -193,7 +193,7 @@ def run_checker_for_resource(resource_class, env, dcs, boto_clients, spreadsheet
n_rows = write_to_spreadsheet(
spreadsheet, resource_class, active_resources,
region_unmonitored_resources_map,
regional_resource_type_alarm_action_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs)
regional_resource_type_alarm_action_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs, sns_boto_clients,dcs)

return bool(n_rows)

Expand All @@ -202,6 +202,7 @@ def aws_alarm_checker(env, yaml_inputs, business_team_map, dcs, spreadsheet_writ
# Getting the AWS access boto client and cloud watch boto client for different regions
resource_boto_clients = get_boto_clients(env, resource_classes, dcs, yaml_inputs)
cloudwatch_boto_clients = get_cloudwatch_boto_clients(env, dcs, yaml_inputs)
sns_boto_clients = get_sns_boto_clients(env, dcs, yaml_inputs)

# Getting the boolean value from the input file to check, if user wants of have the pd integration key validation included or not

Expand Down Expand Up @@ -236,7 +237,7 @@ def aws_alarm_checker(env, yaml_inputs, business_team_map, dcs, spreadsheet_writ
resource_class_futures.append(
resource_class_pool.submit(
run_checker_for_resource, resource_class, env, dcs, resource_boto_clients[resource_class],
spreadsheet_writer, business_team_map, regional_alarm_readers, regional_sns_topic_subscription_map,integration_id_list, yaml_inputs))
spreadsheet_writer, business_team_map, regional_alarm_readers, regional_sns_topic_subscription_map,integration_id_list, yaml_inputs, sns_boto_clients))

has_unmonitored_resources = False
for future in as_completed(resource_class_futures):
Expand Down
15 changes: 15 additions & 0 deletions src/cloud/aws/utils/boto_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,19 @@ def get_cloudwatch_boto_clients(env, regions, yaml_inputs):
region = yaml_inputs['env_region_map'][dc]['region']
boto_clients[dc] = get_boto_client( env, 'cloudwatch', region, dc)

return boto_clients

def get_sns_boto_clients(env, regions, yaml_inputs):

'''
Get sns boto client for any env and region, from centralized secret store parameters.
'''

boto_clients = {}

for dc in regions:
region = yaml_inputs['env_region_map'][dc]['region']
boto_clients[dc] = get_boto_client( env, 'sns', region, dc)

return boto_clients
11 changes: 4 additions & 7 deletions src/cloud/aws/utils/integrationkeys.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,17 @@ def get_service_integration(service_id,pd_secretname,pd_secretregion):
service = pypd.Service.fetch(service_id)

# Iterate over each integration of the service and print its ID
if service['status'] == 'active':
if service['status'] != 'disabled':
integration_keys = []
for integration in iter(service.integrations()):
integration_id = integration["id"]
integration_key = get_integration_details(service_id, integration_id, api_key)
if integration_key is not None:
integration_keys.append(integration_key)
return integration_keys

elif service['status'] == 'disabled':
print(service_id, "is disabled")
return []

else:
print(f'{service_id} is neither active nor disabled')
print(f'{service_id} is disabled')
return []

except Exception as e:
Expand Down Expand Up @@ -162,5 +159,5 @@ def run_integration(yaml_inputs):
integration_key_list.extend(team_integration_keys)

print(f'Total integration keys: {len(integration_key_list)}')

print(integration_key_list)
return integration_key_list
6 changes: 3 additions & 3 deletions src/cloud/aws/utils/sns_validity.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
def return_boto_client_sns(region):
return boto3.client("sns", region_name=region)

def check_sns_validity(integration_id_list,region,sns_arn):
def check_sns_validity(integration_id_list,dcname,sns_arn, sns_boto_clients):

boto_client = return_boto_client_sns(region)
boto_client = sns_boto_clients[dcname]
result = ""
try:
response = boto_client.list_subscriptions_by_topic(
Expand All @@ -23,6 +23,6 @@ def check_sns_validity(integration_id_list,region,sns_arn):
else:
result="Not Valid Alarm - No PDEndpoint"
except:
result = "Not Valid SNS"
result = "Not Valid SNS - Exception Occured"

return result
9 changes: 5 additions & 4 deletions src/outputs/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def write_to_spreadsheet(spreadsheet_writer, resource_class, active_resources, region_unmonitored_resources_map,
regional_resource_type_alarm_action_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs):
regional_resource_type_alarm_action_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs,sns_boto_clients, dcs):

'''
This will create the spread sheet and write the header row in the format of
Expand All @@ -30,7 +30,7 @@ def write_to_spreadsheet(spreadsheet_writer, resource_class, active_resources, r
# Creating more rows based on the details obtained from the map, like region name, resource_name etc.
first_rows = get_rows_from_region_unmonitored_resources_map( region_unmonitored_resources_map, resource_class, business_team_map)

second_rows= get_rows_from_alarm_action_map( resource_class, active_resources, regional_resource_type_alarm_action_map,region_unmonitored_resources_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs)
second_rows= get_rows_from_alarm_action_map(resource_class, active_resources, regional_resource_type_alarm_action_map,region_unmonitored_resources_map, business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs, sns_boto_clients, dcs)
rows=[]

# below we are formatting the column of the sheet based on the individual result we got from above line 31 and 33
Expand Down Expand Up @@ -94,7 +94,7 @@ def get_rows_from_region_unmonitored_resources_map( region_unmonitored_resources
return rows

def get_rows_from_alarm_action_map(resource_class, active_resources, regional_resource_type_alarm_action_map, region_unmonitored_resources_map,
business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs,text_format=False ):
business_team_map, sns_topic_subscription_map,integration_id_list, yaml_inputs,sns_boto_clients,dcs,text_format=False):

'''
This will fill remaining information in the sheet, like reason for unmonitoring and alarm etc.
Expand Down Expand Up @@ -159,12 +159,13 @@ def get_rows_from_alarm_action_map(resource_class, active_resources, regional_re

region_name=yaml_inputs['env_region_map'][region]['region']
pd_integration_check=yaml_inputs['pagerduty']['pd_integration_key_check']
dcname=yaml_inputs['dc_region_map'][region_name]

# Getting the boolean value from the input file to check, if the user wants to validate the sns validity else simply return "valid Alamr"

if pd_integration_check == True:
# This will check the validity of the sns topic by checking the pagerduty integration key
subscription_info = check_sns_validity(integration_id_list,region_name,subscription_arn)
subscription_info = check_sns_validity(integration_id_list,dcname,subscription_arn, sns_boto_clients)
else:
subscription_info = "sns Subscription validity skipped"

Expand Down

0 comments on commit 463dbf1

Please sign in to comment.