Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[COST-5213] - fix S3 prepare #5194

Merged
merged 6 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def get_processing_date(
if (
data_frame[invoice_bill].any() and start_date.month != DateHelper().now_utc.month or ingress_reports
) or not check_provider_setup_complete(provider_uuid):
ReportManifestDBAccessor().mark_s3_parquet_to_be_cleared(manifest_id)
process_date = ReportManifestDBAccessor().set_manifest_daily_start_date(manifest_id, start_date)
else:
process_date = utils.get_or_clear_daily_s3_by_date(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def get_processing_date(
or ingress_reports
):
process_date = start_date
ReportManifestDBAccessor().mark_s3_parquet_to_be_cleared(manifest_id)
process_date = ReportManifestDBAccessor().set_manifest_daily_start_date(manifest_id, process_date)
else:
process_date = get_or_clear_daily_s3_by_date(
Expand Down
7 changes: 4 additions & 3 deletions koku/masu/util/aws/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,6 @@ def get_or_clear_daily_s3_by_date(csv_s3_path, provider_uuid, start_date, end_da
delete_s3_objects(request_id, to_delete, context)
manifest = ReportManifestDBAccessor().get_manifest_by_id(manifest_id)
ReportManifestDBAccessor().mark_s3_csv_cleared(manifest)
ReportManifestDBAccessor().mark_s3_parquet_to_be_cleared(manifest_id)
LOG.info(
log_json(msg="removed csv files, marked manifest csv cleared and parquet not cleared", context=context)
)
Expand Down Expand Up @@ -823,7 +822,7 @@ def delete_s3_objects(request_id, keys_to_delete, context) -> list[str]:


def clear_s3_files(
csv_s3_path, provider_uuid, start_date, metadata_key, metadata_value_check, context, request_id, invoice_month=None
csv_s3_path, provider_uuid, start_date, metadata_key, manifest_id, context, request_id, invoice_month=None
):
"""Clear s3 files for daily archive processing"""
account = context.get("account")
Expand Down Expand Up @@ -858,7 +857,7 @@ def clear_s3_files(
try:
existing_object = obj_summary.Object()
metadata_value = existing_object.metadata.get(metadata_key)
if str(metadata_value) != str(metadata_value_check):
if str(metadata_value) != str(manifest_id):
to_delete.append(existing_object.key)
except (ClientError) as err:
LOG.warning(
Expand All @@ -871,6 +870,8 @@ def clear_s3_files(
exc_info=err,
)
delete_s3_objects(request_id, to_delete, context)
manifest = ReportManifestDBAccessor().get_manifest_by_id(manifest_id)
ReportManifestDBAccessor().mark_s3_parquet_cleared(manifest)
lcouzens marked this conversation as resolved.
Show resolved Hide resolved


def remove_files_not_in_set_from_s3_bucket(request_id, s3_path, manifest_id, context=None):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.11 on 2024-06-28 12:00
from django.db import migrations
from django.db import models


class Migration(migrations.Migration):

dependencies = [
("reporting_common", "0041_diskcapacity"),
]

operations = [
migrations.AlterField(
model_name="costusagereportmanifest",
name="s3_parquet_cleared",
field=models.BooleanField(default=False, null=True),
),
]
2 changes: 1 addition & 1 deletion koku/reporting_common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Meta:
# s3_csv_cleared used in AWS/Azure to indicate csv's have been cleared for daily archive processing
s3_csv_cleared = models.BooleanField(default=False, null=True)
# s3_parquet_cleared used to indicate parquet files have been cleared prior to csv to parquet conversion
s3_parquet_cleared = models.BooleanField(default=True, null=True)
s3_parquet_cleared = models.BooleanField(default=False, null=True)
# Indicates what initial date to start at for daily processing
daily_archive_start_date = models.DateTimeField(null=True)
operator_version = models.TextField(null=True)
Expand Down
Loading