Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[COST-5195] - Prevent duplicate OCP cost model tasks #5190

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions koku/masu/processor/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,8 +474,8 @@ def update_summary_tables( # noqa: C901
if fallback_update_summary_tables_queue != SummaryQueue.DEFAULT:
timeout = settings.WORKER_CACHE_LARGE_CUSTOMER_TIMEOUT

worker_cache = WorkerCache()
if not synchronous:
worker_cache = WorkerCache()
rate_limited = False
if is_large_customer_rate_limited:
rate_limited = rate_limit_tasks(task_name, schema)
Expand Down Expand Up @@ -617,6 +617,22 @@ def update_summary_tables( # noqa: C901
# OCP cost distribution of unattributed costs occurs within the `update_cost_model_costs` method.
# This method should always be called for OCP providers even when it does not have a cost model
if cost_model is not None or provider_type == Provider.PROVIDER_OCP:
# Prevent running duplicate tasks for the same source date range.
cache_key = f"cost-model:{provider_uuid}:{start_date}:{end_date}"
queued_task = worker_cache.get_task_from_cache_key(cache_key)
if queued_task:
LOG.info(
log_json(
tracing_id, msg="cost model update for range already queued, skipping new task.", context=context
)
)
# Attempt to link manifest task to currently queued cost model update.
queued_task.link(
mark_manifest_complete.si(
schema, provider_type, provider_uuid, manifest_list=manifest_list, tracing_id=tracing_id
).set(queue=mark_manifest_complete_queue)
)
return
LOG.info(log_json(tracing_id, msg="updating cost model costs", context=context))
linked_tasks = update_cost_model_costs.s(
schema, provider_uuid, start_date, end_date, tracing_id=tracing_id
Expand All @@ -636,7 +652,11 @@ def update_summary_tables( # noqa: C901
tracing_id=tracing_id,
).set(queue=mark_manifest_complete_queue)

chain(linked_tasks).apply_async()
result = chain(linked_tasks).apply_async()
if not queued_task:
# If we can assign the task id to the cache_key we can potentially
# look up the queued task and link the new manifest update task to it.
worker_cache.set_cache_task_id(cache_key, result.id)

if not synchronous:
worker_cache.release_single_task(task_name, cache_args)
Expand Down Expand Up @@ -837,10 +857,17 @@ def update_cost_model_costs(
None

"""
LOG.info(f"\n\n PAUSE COST MODEL \n\n")
import time

time.sleep(100)
task_name = "masu.processor.tasks.update_cost_model_costs"
# Remove queued task from cache
worker_cache = WorkerCache()
cache_key = f"cost-model:{provider_uuid}:{start_date}:{end_date}"
worker_cache.remove_task_from_cache(cache_key)
cache_args = [schema_name, provider_uuid, start_date, end_date]
if not synchronous:
worker_cache = WorkerCache()
fallback_queue = get_customer_queue(schema_name, CostModelQueue)
if worker_cache.single_task_is_running(task_name, cache_args):
msg = f"Task {task_name} already running for {cache_args}. Requeuing."
Expand Down
14 changes: 14 additions & 0 deletions koku/masu/processor/worker_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""Cache of worker tasks currently running."""
import logging

from celery.result import AsyncResult
from django.conf import settings
from django.core.cache import caches
from django.db import connection
Expand Down Expand Up @@ -153,6 +154,19 @@ def task_is_running(self, task_key):
task_list = self.get_all_running_tasks()
return task_key in task_list

def get_task_from_cache_key(self, cache_key):
"""Return the currently queued task"""
task = None
for i in self.get_all_running_tasks():
if cache_key in i:
task_id = i.split(":")[-1]
task = AsyncResult(task_id)
return task

def set_cache_task_id(self, cache_key, task_id):
"""Set task id for cached key"""
return

def single_task_is_running(self, task_name, task_args=None):
"""Check for a single task key in the cache."""
cache_str = create_single_task_cache_key(task_name, task_args)
Expand Down
Loading