From bdbbfee2d60afa9a82ab751e7f286401dfae9384 Mon Sep 17 00:00:00 2001 From: David Nakabaale Date: Wed, 3 Jul 2024 10:26:08 -0400 Subject: [PATCH] [COST-5228] log outside for loop (#5202) * [COST-5228] log outside for loop * additional log clean up * add context to logs in _remove_expired_data func --- koku/masu/processor/_tasks/remove_expired.py | 15 ++-- .../processor/aws/aws_report_db_cleaner.py | 40 +++++------ .../azure/azure_report_db_cleaner.py | 28 ++++---- koku/masu/processor/expired_data_remover.py | 17 ++--- .../processor/gcp/gcp_report_db_cleaner.py | 28 ++++---- .../processor/oci/oci_report_db_cleaner.py | 30 ++++---- .../processor/ocp/ocp_report_db_cleaner.py | 31 +++++---- koku/masu/test/processor/test_tasks.py | 68 +++++++++++++++++-- 8 files changed, 160 insertions(+), 97 deletions(-) diff --git a/koku/masu/processor/_tasks/remove_expired.py b/koku/masu/processor/_tasks/remove_expired.py index b1160ae34e..da52687ac2 100644 --- a/koku/masu/processor/_tasks/remove_expired.py +++ b/koku/masu/processor/_tasks/remove_expired.py @@ -5,6 +5,7 @@ """Remove expired data asynchronous tasks.""" import logging +from api.common import log_json from masu.processor.expired_data_remover import ExpiredDataRemover LOG = logging.getLogger(__name__) @@ -23,17 +24,13 @@ def _remove_expired_data(schema_name, provider, simulate, provider_uuid=None): None """ - log_statement = ( - f"Remove expired data:\n" - f" schema_name: {schema_name}\n" - f" provider: {provider}\n" - f" simulate: {simulate}\n" - ) - LOG.info(log_statement) + + context = {"schema": schema_name, "provider_type": provider, "provider_uuid": provider_uuid, "simulate": simulate} + + LOG.info(log_json(msg="Remove expired data", context=context)) remover = ExpiredDataRemover(schema_name, provider) removed_data = remover.remove(simulate=simulate, provider_uuid=provider_uuid) if removed_data: status_msg = "Expired Data" if simulate else "Removed Data" - result_msg = f"{status_msg}:\n {str(removed_data)}" - LOG.info(result_msg) + LOG.info(log_json(msg=status_msg, removed_data=removed_data, context=context)) diff --git a/koku/masu/processor/aws/aws_report_db_cleaner.py b/koku/masu/processor/aws/aws_report_db_cleaner.py index d63808c9ef..2a2ad68e44 100644 --- a/koku/masu/processor/aws/aws_report_db_cleaner.py +++ b/koku/masu/processor/aws/aws_report_db_cleaner.py @@ -58,7 +58,7 @@ def purge_expired_report_data(self, expired_date=None, provider_uuid=None, simul removed_items = [] all_account_ids = set() - all_period_start = set() + all_period_starts = set() with AWSReportDBAccessor(self._schema) as accessor: if (expired_date is None and provider_uuid is None) or ( # noqa: W504 @@ -85,16 +85,16 @@ def purge_expired_report_data(self, expired_date=None, provider_uuid=None, simul } ) all_account_ids.add(bill.payer_account_id) - all_period_start.add(str(bill.billing_period_start)) - - LOG.info( - log_json( - msg="deleting provider billing data for AWS", - schema=self._schema, - provider_uuid=bill.payer_account_id, - start_date=bill.billing_period_start, - ) + all_period_starts.add(str(bill.billing_period_start)) + + LOG.info( + log_json( + msg="deleting provider billing data for AWS", + schema=self._schema, + accounts=all_account_ids, + period_starts=all_period_starts, ) + ) if not simulate: cascade_delete(bill_objects.query.model, bill_objects) @@ -105,7 +105,7 @@ def purge_expired_report_data_by_date(self, expired_date, simulate=False): partition_from = str(date(expired_date.year, expired_date.month, 1)) removed_items = [] all_account_ids = set() - all_period_start = set() + all_period_starts = set() with AWSReportDBAccessor(self._schema) as accessor: all_bill_objects = accessor.get_bill_query_before_date(expired_date).all() @@ -114,16 +114,16 @@ def purge_expired_report_data_by_date(self, expired_date, simulate=False): {"account_payer_id": bill.payer_account_id, "billing_period_start": str(bill.billing_period_start)} ) all_account_ids.add(bill.payer_account_id) - all_period_start.add(str(bill.billing_period_start)) - - LOG.info( - log_json( - msg="deleting provider billing data for AWS", - schema=self._schema, - provider_uuid=bill.payer_account_id, - start_date=bill.billing_period_start, - ) + all_period_starts.add(str(bill.billing_period_start)) + + LOG.info( + log_json( + msg="deleting provider billing data for AWS", + schema=self._schema, + accounts=all_account_ids, + period_starts=all_period_starts, ) + ) table_names = [ accessor._table_map["ocp_on_aws_daily_summary"], diff --git a/koku/masu/processor/azure/azure_report_db_cleaner.py b/koku/masu/processor/azure/azure_report_db_cleaner.py index 69db2d87e9..8c940ffe1c 100644 --- a/koku/masu/processor/azure/azure_report_db_cleaner.py +++ b/koku/masu/processor/azure/azure_report_db_cleaner.py @@ -77,14 +77,14 @@ def purge_expired_report_data(self, expired_date=None, provider_uuid=None, simul all_providers.add(bill.provider_id) all_period_starts.add(str(bill.billing_period_start)) - LOG.info( - log_json( - msg="deleting provider billing data", - schema=self._schema, - provider_uuid=bill.provider_id, - start_date=bill.billing_period_start, - ) + LOG.info( + log_json( + msg="deleting provider billing data", + schema=self._schema, + providers=all_providers, + period_starts=all_period_starts, ) + ) if not simulate: cascade_delete(bill_objects.query.model, bill_objects) @@ -117,14 +117,14 @@ def purge_expired_report_data_by_date(self, expired_date, simulate=False): all_providers.add(bill.provider_id) all_period_starts.add(str(bill.billing_period_start)) - LOG.info( - log_json( - msg="deleting provider billing data", - schema=self._schema, - provider_uuid=bill.provider_id, - start_date=bill.billing_period_start, - ) + LOG.info( + log_json( + msg="deleting provider billing data", + schema=self._schema, + providers=all_providers, + period_starts=all_period_starts, ) + ) if not simulate: # Will call trigger to detach, truncate, and drop partitions diff --git a/koku/masu/processor/expired_data_remover.py b/koku/masu/processor/expired_data_remover.py index b96d9e66b4..3dfefb69ac 100644 --- a/koku/masu/processor/expired_data_remover.py +++ b/koku/masu/processor/expired_data_remover.py @@ -9,6 +9,7 @@ from django.conf import settings +from api.common import log_json from api.models import Provider from api.utils import DateHelper from masu.config import Config @@ -138,10 +139,11 @@ def remove(self, simulate=False, provider_uuid=None): if not simulate: manifest_accessor.purge_expired_report_manifest_provider_uuid(provider_uuid, expiration_date) LOG.info( - """Removed CostUsageReportManifest for - provider uuid: %s before billing period: %s""", - provider_uuid, - expiration_date, + log_json( + msg="Removed CostUsageReportManifest", + provider_uuid=provider_uuid, + expiration_date=expiration_date, + ) ) else: expiration_date = self._calculate_expiration_date() @@ -151,10 +153,9 @@ def remove(self, simulate=False, provider_uuid=None): if not simulate: manifest_accessor.purge_expired_report_manifest(self._provider, expiration_date) LOG.info( - """Removed CostUsageReportManifest for - provider type: %s before billing period: %s""", - self._provider, - expiration_date, + log_json( + msg="Removed CostUsageReportManifest", provider=self._provider, expiration_date=expiration_date + ) ) return removed_data diff --git a/koku/masu/processor/gcp/gcp_report_db_cleaner.py b/koku/masu/processor/gcp/gcp_report_db_cleaner.py index 5ef5df28bc..6287805d4b 100644 --- a/koku/masu/processor/gcp/gcp_report_db_cleaner.py +++ b/koku/masu/processor/gcp/gcp_report_db_cleaner.py @@ -74,14 +74,14 @@ def purge_expired_report_data(self, expired_date=None, provider_uuid=None, simul all_providers.add(bill.provider_id) all_period_starts.add(str(bill.billing_period_start)) - LOG.info( - log_json( - msg="deleting provider billing data for GCP", - schema=self._schema, - provider_uuid=bill.provider_id, - start_date=bill.billing_period_start, - ) + LOG.info( + log_json( + msg="deleting provider billing data for GCP", + schema=self._schema, + providers=all_providers, + period_starts=all_period_starts, ) + ) if not simulate: cascade_delete(bill_objects.query.model, bill_objects) @@ -132,13 +132,13 @@ def purge_expired_report_data_by_date(self, expired_date, simulate=False): all_providers.add(bill.provider_id) all_period_starts.add(str(bill.billing_period_start)) - LOG.info( - log_json( - msg="deleting provider billing data for GCP", - schema=self._schema, - provider_uuid=bill.provider_id, - start_date=bill.billing_period_start, - ) + LOG.info( + log_json( + msg="deleting provider billing data for GCP", + schema=self._schema, + providers=all_providers, + period_starts=all_period_starts, ) + ) return removed_items diff --git a/koku/masu/processor/oci/oci_report_db_cleaner.py b/koku/masu/processor/oci/oci_report_db_cleaner.py index d14ee1a0dc..03d059358f 100644 --- a/koku/masu/processor/oci/oci_report_db_cleaner.py +++ b/koku/masu/processor/oci/oci_report_db_cleaner.py @@ -73,14 +73,15 @@ def purge_expired_report_data(self, expired_date=None, provider_uuid=None, simul ) all_providers.add(bill.provider_id) all_period_starts.add(str(bill.billing_period_start)) - LOG.info( - log_json( - msg="deleting provider billing data", - schema=self._schema, - provider_uuid=bill.provider_id, - start_date=bill.billing_period_start, - ) + + LOG.info( + log_json( + msg="deleting provider billing data", + schema=self._schema, + providers=all_providers, + period_starts=all_period_starts, ) + ) if not simulate: cascade_delete(bill_objects.query.model, bill_objects) @@ -128,13 +129,14 @@ def purge_expired_report_data_by_date(self, expired_date, simulate=False): ) all_providers.add(bill.provider_id) all_period_starts.add(str(bill.billing_period_start)) - LOG.info( - log_json( - msg="deleting provider billing data", - schema=self._schema, - provider_uuid=bill.provider_id, - start_date=bill.billing_period_start, - ) + + LOG.info( + log_json( + msg="deleting provider billing data", + schema=self._schema, + providers=all_providers, + period_starts=all_period_starts, ) + ) return removed_items diff --git a/koku/masu/processor/ocp/ocp_report_db_cleaner.py b/koku/masu/processor/ocp/ocp_report_db_cleaner.py index 61d1a3f204..b905247cb6 100644 --- a/koku/masu/processor/ocp/ocp_report_db_cleaner.py +++ b/koku/masu/processor/ocp/ocp_report_db_cleaner.py @@ -73,14 +73,16 @@ def purge_expired_report_data(self, expired_date=None, provider_uuid=None, simul all_cluster_ids.add(usage_period.cluster_id) all_period_starts.add(str(usage_period.report_period_start)) - LOG.info( - log_json( - msg="deleting provider billing data", - schema=self._schema, - provider_uuid=provider_uuid, - start_date=usage_period.report_period_start, - ) + LOG.info( + log_json( + msg="deleting provider billing data", + schema=self._schema, + provider_uuid=provider_uuid, + report_periods=all_report_periods, + cluster_ids=all_cluster_ids, + period_starts=all_period_starts, ) + ) if not simulate: cascade_delete(usage_period_objs.query.model, usage_period_objs) @@ -115,14 +117,15 @@ def purge_expired_report_data_by_date(self, expired_date, simulate=False): all_cluster_ids.add(usage_period.cluster_id) all_period_starts.add(str(usage_period.report_period_start)) - LOG.info( - log_json( - msg="removing all data related to cluster_ids", - cluster_ids=all_cluster_ids, - period_starts=all_period_starts, - schema=self._schema, - ) + LOG.info( + log_json( + msg="removing all data related to cluster_ids", + report_periods=all_report_periods, + cluster_ids=all_cluster_ids, + period_starts=all_period_starts, + schema=self._schema, ) + ) if not simulate: # Will call trigger to detach, truncate, and drop partitions diff --git a/koku/masu/test/processor/test_tasks.py b/koku/masu/test/processor/test_tasks.py index 275acc9706..e54d273140 100644 --- a/koku/masu/test/processor/test_tasks.py +++ b/koku/masu/test/processor/test_tasks.py @@ -645,18 +645,78 @@ class TestRemoveExpiredDataTasks(MasuTestCase): """Test cases for Processor Celery tasks.""" @patch.object(ExpiredDataRemover, "remove") - def test_remove_expired_data(self, fake_remover): + def test_remove_expired_data_simulate(self, fake_remover): """Test task.""" expected_results = [{"account_payer_id": "999999999", "billing_period_start": "2018-06-24 15:47:33.052509"}] fake_remover.return_value = expected_results - expected = "INFO:masu.processor._tasks.remove_expired:Expired Data:\n {}" + schema = self.schema + provider = Provider.PROVIDER_AWS + simulate = True + + expected_initial_remove_log = ( + "INFO:masu.processor._tasks.remove_expired:" + "{'message': 'Remove expired data', 'tracing_id': '', " + "'schema': '" + schema + "', " + "'provider_type': '" + provider + "', " + "'provider_uuid': " + str(None) + ", " + "'simulate': " + str(simulate) + "}" + ) + + expected_expired_data_log = ( + "INFO:masu.processor._tasks.remove_expired:" + "{'message': 'Expired Data', 'tracing_id': '', " + "'schema': '" + schema + "', " + "'provider_type': '" + provider + "', " + "'provider_uuid': " + str(None) + ", " + "'simulate': " + str(simulate) + ", " + "'removed_data': " + str(expected_results) + "}" + ) + + # disable logging override set in masu/__init__.py + logging.disable(logging.NOTSET) + with self.assertLogs("masu.processor._tasks.remove_expired") as logger: + remove_expired_data(schema_name=schema, provider=provider, simulate=simulate) + + self.assertIn(expected_initial_remove_log, logger.output) + self.assertIn(expected_expired_data_log, logger.output) + + @patch.object(ExpiredDataRemover, "remove") + def test_remove_expired_data_no_simulate(self, fake_remover): + """Test task.""" + expected_results = [{"account_payer_id": "999999999", "billing_period_start": "2018-06-24 15:47:33.052509"}] + fake_remover.return_value = expected_results + + schema = self.schema + provider = Provider.PROVIDER_AWS + simulate = False + + expected_initial_remove_log = ( + "INFO:masu.processor._tasks.remove_expired:" + "{'message': 'Remove expired data', 'tracing_id': '', " + "'schema': '" + schema + "', " + "'provider_type': '" + provider + "', " + "'provider_uuid': " + str(None) + ", " + "'simulate': " + str(simulate) + "}" + ) + + expected_expired_data_log = ( + "INFO:masu.processor._tasks.remove_expired:" + "{'message': 'Expired Data', 'tracing_id': '', " + "'schema': '" + schema + "', " + "'provider_type': '" + provider + "', " + "'provider_uuid': " + str(None) + ", " + "'simulate': " + str(simulate) + ", " + "'removed_data': " + str(expected_results) + "}" + ) # disable logging override set in masu/__init__.py logging.disable(logging.NOTSET) with self.assertLogs("masu.processor._tasks.remove_expired") as logger: - remove_expired_data(schema_name=self.schema, provider=Provider.PROVIDER_AWS, simulate=True) - self.assertIn(expected.format(str(expected_results)), logger.output) + remove_expired_data(schema_name=schema, provider=provider, simulate=simulate) + + self.assertIn(expected_initial_remove_log, logger.output) + self.assertNotIn(expected_expired_data_log, logger.output) class TestUpdateSummaryTablesTask(MasuTestCase):