Skip to content

Commit baf48e4

Browse files
committed
Rework migration 19
1 parent 3bf4498 commit baf48e4

File tree

1 file changed

+82
-49
lines changed

1 file changed

+82
-49
lines changed

pulp_python/app/migrations/0019_create_missing_metadata_artifacts.py

Lines changed: 82 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
22

33
from django.db import migrations
4+
from itertools import groupby
45

5-
BATCH_SIZE = 1000
6+
BATCH_SIZE = 200
67

78

89
def pulp_hashlib_new(name, *args, **kwargs):
@@ -118,6 +119,7 @@ def create_missing_metadata_artifacts(apps, schema_editor):
118119
import tempfile
119120
from django.conf import settings
120121
from django.db import models
122+
from django.db.utils import IntegrityError
121123

122124
PythonPackageContent = apps.get_model("python", "PythonPackageContent")
123125
ContentArtifact = apps.get_model("core", "ContentArtifact")
@@ -132,62 +134,93 @@ def create_missing_metadata_artifacts(apps, schema_editor):
132134
)
133135
.exclude(metadata_sha256="")
134136
.prefetch_related("_artifacts")
135-
.only("filename", "metadata_sha256")
137+
.only("filename", "metadata_sha256", "pulp_domain_id")
138+
.order_by("pulp_domain_id")
136139
)
137-
artifact_batch = []
140+
artifact_batch = {}
138141
contentartifact_batch = []
139142
packages_batch = []
140143

141-
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
142-
for package in packages:
143-
# Get the main artifact for package
144-
main_artifact = package._artifacts.get()
145-
146-
filename = package.filename
147-
metadata_digests = {"sha256": package.metadata_sha256}
148-
result = artifact_to_metadata_artifact(
149-
filename, main_artifact, metadata_digests, temp_dir, Artifact
150-
)
151-
if result is None:
152-
# Unset metadata_sha256 when extraction or validation fails
153-
package.metadata_sha256 = None
154-
packages_batch.append(package)
155-
continue
156-
metadata_artifact, mismatched_sha256 = result
157-
if mismatched_sha256:
158-
# Fix the package if its metadata_sha256 differs from the actual value
159-
package.metadata_sha256 = mismatched_sha256
160-
packages_batch.append(package)
161-
162-
# Set the domain on the metadata artifact to match the package's domain
163-
metadata_artifact.pulp_domain = package._pulp_domain
164-
165-
contentartifact = ContentArtifact(
166-
artifact=metadata_artifact,
167-
content=package,
168-
relative_path=f"{filename}.metadata",
144+
def batch_save_artifacts(domain_id):
145+
from django.db import transaction
146+
147+
sid = transaction.savepoint() # Start a savepoint
148+
try:
149+
Artifact.objects.bulk_create(artifact_batch.values(), batch_size=BATCH_SIZE)
150+
except IntegrityError:
151+
transaction.savepoint_rollback(sid) # Only rollback this batch
152+
# Find the existing artifacts and update the contentartifacts to point to the existing artifacts
153+
digest_cas = {}
154+
for ca in contentartifact_batch:
155+
digest_cas.setdefault(ca.artifact.sha256, []).append(ca)
156+
artifacts = Artifact.objects.filter(
157+
sha256__in=artifact_batch.keys(), pulp_domain_id=domain_id
169158
)
170-
artifact_batch.append(metadata_artifact)
171-
contentartifact_batch.append(contentartifact)
172-
173-
if len(artifact_batch) == BATCH_SIZE:
174-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
175-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
176-
artifact_batch.clear()
177-
contentartifact_batch.clear()
178-
if len(packages_batch) == BATCH_SIZE:
159+
for artifact in artifacts:
160+
for ca in digest_cas[artifact.sha256]:
161+
ca.artifact = artifact
162+
artifact_batch.pop(artifact.sha256)
163+
Artifact.objects.bulk_create(artifact_batch.values(), batch_size=BATCH_SIZE)
164+
165+
ContentArtifact.objects.bulk_create(
166+
contentartifact_batch,
167+
batch_size=BATCH_SIZE,
168+
update_conflicts=True,
169+
update_fields=["artifact"],
170+
unique_fields=["content", "relative_path"],
171+
)
172+
artifact_batch.clear()
173+
contentartifact_batch.clear()
174+
175+
for domain_id, domain_packages in groupby(
176+
packages.iterator(chunk_size=BATCH_SIZE), key=lambda x: x.pulp_domain_id
177+
):
178+
for package in domain_packages:
179+
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
180+
# Get the main artifact for package
181+
main_artifact = package._artifacts.get()
182+
183+
filename = package.filename
184+
metadata_digests = {"sha256": package.metadata_sha256}
185+
result = artifact_to_metadata_artifact(
186+
filename, main_artifact, metadata_digests, temp_dir, Artifact
187+
)
188+
if result is None:
189+
# Unset metadata_sha256 when extraction or validation fails
190+
package.metadata_sha256 = None
191+
packages_batch.append(package)
192+
continue
193+
metadata_artifact, mismatched_sha256 = result
194+
if mismatched_sha256:
195+
# Fix the package if its metadata_sha256 differs from the actual value
196+
package.metadata_sha256 = mismatched_sha256
197+
packages_batch.append(package)
198+
199+
# Set the domain on the metadata artifact to match the package's domain
200+
metadata_artifact.pulp_domain_id = domain_id
201+
202+
art = artifact_batch.setdefault(metadata_artifact.sha256, metadata_artifact)
203+
contentartifact = ContentArtifact(
204+
artifact=art,
205+
content=package,
206+
relative_path=f"{filename}.metadata",
207+
)
208+
contentartifact_batch.append(contentartifact)
209+
210+
if len(contentartifact_batch) == BATCH_SIZE:
211+
batch_save_artifacts(domain_id)
212+
if len(packages_batch) == BATCH_SIZE:
213+
PythonPackageContent.objects.bulk_update(
214+
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
215+
)
216+
packages_batch.clear()
217+
218+
if artifact_batch:
219+
batch_save_artifacts(domain_id)
220+
if packages_batch:
179221
PythonPackageContent.objects.bulk_update(
180222
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
181223
)
182-
packages_batch.clear()
183-
184-
if artifact_batch:
185-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
186-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
187-
if packages_batch:
188-
PythonPackageContent.objects.bulk_update(
189-
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
190-
)
191224

192225

193226
class Migration(migrations.Migration):

0 commit comments

Comments
 (0)