project-koku · lcouzens · Jun 26, 2024 · Jun 26, 2024 · Jun 27, 2024 · Jun 27, 2024
@@ -474,8 +474,8 @@ def update_summary_tables(  # noqa: C901
     if fallback_update_summary_tables_queue != SummaryQueue.DEFAULT:
         timeout = settings.WORKER_CACHE_LARGE_CUSTOMER_TIMEOUT
 
+    worker_cache = WorkerCache()
     if not synchronous:
-        worker_cache = WorkerCache()
         rate_limited = False
         if is_large_customer_rate_limited:
             rate_limited = rate_limit_tasks(task_name, schema)
@@ -617,6 +617,22 @@ def update_summary_tables(  # noqa: C901
     # OCP cost distribution of unattributed costs occurs within the `update_cost_model_costs` method.
     # This method should always be called for OCP providers even when it does not have a cost model
     if cost_model is not None or provider_type == Provider.PROVIDER_OCP:
+        # Prevent running duplicate tasks for the same source date range.
+        cache_key = f"cost-model:{provider_uuid}:{start_date}:{end_date}"
+        queued_task = worker_cache.get_task_from_cache_key(cache_key)
+        if queued_task:
+            LOG.info(
+                log_json(
+                    tracing_id, msg="cost model update for range already queued, skipping new task.", context=context
+                )
+            )
+            # Attempt to link manifest task to currently queued cost model update.
+            queued_task.link(
+                mark_manifest_complete.si(
+                    schema, provider_type, provider_uuid, manifest_list=manifest_list, tracing_id=tracing_id
+                ).set(queue=mark_manifest_complete_queue)
+            )
+            return
         LOG.info(log_json(tracing_id, msg="updating cost model costs", context=context))
         linked_tasks = update_cost_model_costs.s(
             schema, provider_uuid, start_date, end_date, tracing_id=tracing_id
@@ -636,7 +652,11 @@ def update_summary_tables(  # noqa: C901
             tracing_id=tracing_id,
         ).set(queue=mark_manifest_complete_queue)
 
-    chain(linked_tasks).apply_async()
+    result = chain(linked_tasks).apply_async()
+    if not queued_task:
+        # If we can assign the task id to the cache_key we can potentially
+        # look up the queued task and link the new manifest update task to it.
+        worker_cache.set_cache_task_id(cache_key, result.id)
 
     if not synchronous:
         worker_cache.release_single_task(task_name, cache_args)
@@ -837,10 +857,17 @@ def update_cost_model_costs(
         None
 
     """
+    LOG.info(f"\n\n PAUSE COST MODEL \n\n")
+    import time
+
+    time.sleep(100)
     task_name = "masu.processor.tasks.update_cost_model_costs"
+    # Remove queued task from cache
+    worker_cache = WorkerCache()
+    cache_key = f"cost-model:{provider_uuid}:{start_date}:{end_date}"
+    worker_cache.remove_task_from_cache(cache_key)
     cache_args = [schema_name, provider_uuid, start_date, end_date]
     if not synchronous:
-        worker_cache = WorkerCache()
         fallback_queue = get_customer_queue(schema_name, CostModelQueue)
         if worker_cache.single_task_is_running(task_name, cache_args):
             msg = f"Task {task_name} already running for {cache_args}. Requeuing."

@@ -5,6 +5,7 @@
 """Cache of worker tasks currently running."""
 import logging
 
+from celery.result import AsyncResult
 from django.conf import settings
 from django.core.cache import caches
 from django.db import connection
@@ -153,6 +154,19 @@ def task_is_running(self, task_key):
         task_list = self.get_all_running_tasks()
         return task_key in task_list
 
+    def get_task_from_cache_key(self, cache_key):
+        """Return the currently queued task"""
+        task = None
+        for i in self.get_all_running_tasks():
+            if cache_key in i:
+                task_id = i.split(":")[-1]
+                task = AsyncResult(task_id)
+        return task
+
+    def set_cache_task_id(self, cache_key, task_id):
+        """Set task id for cached key"""
+        return
+
     def single_task_is_running(self, task_name, task_args=None):
         """Check for a single task key in the cache."""
         cache_str = create_single_task_cache_key(task_name, task_args)