11# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
22
33from django .db import migrations
4+ from itertools import groupby
45
5- BATCH_SIZE = 1000
6+ BATCH_SIZE = 200
67
78
89def pulp_hashlib_new (name , * args , ** kwargs ):
@@ -118,6 +119,7 @@ def create_missing_metadata_artifacts(apps, schema_editor):
118119 import tempfile
119120 from django .conf import settings
120121 from django .db import models
122+ from django .db .utils import IntegrityError
121123
122124 PythonPackageContent = apps .get_model ("python" , "PythonPackageContent" )
123125 ContentArtifact = apps .get_model ("core" , "ContentArtifact" )
@@ -132,62 +134,93 @@ def create_missing_metadata_artifacts(apps, schema_editor):
132134 )
133135 .exclude (metadata_sha256 = "" )
134136 .prefetch_related ("_artifacts" )
135- .only ("filename" , "metadata_sha256" )
137+ .only ("filename" , "metadata_sha256" , "pulp_domain_id" )
138+ .order_by ("pulp_domain_id" )
136139 )
137- artifact_batch = []
140+ artifact_batch = {}
138141 contentartifact_batch = []
139142 packages_batch = []
140143
141- with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir :
142- for package in packages :
143- # Get the main artifact for package
144- main_artifact = package ._artifacts .get ()
145-
146- filename = package .filename
147- metadata_digests = {"sha256" : package .metadata_sha256 }
148- result = artifact_to_metadata_artifact (
149- filename , main_artifact , metadata_digests , temp_dir , Artifact
150- )
151- if result is None :
152- # Unset metadata_sha256 when extraction or validation fails
153- package .metadata_sha256 = None
154- packages_batch .append (package )
155- continue
156- metadata_artifact , mismatched_sha256 = result
157- if mismatched_sha256 :
158- # Fix the package if its metadata_sha256 differs from the actual value
159- package .metadata_sha256 = mismatched_sha256
160- packages_batch .append (package )
161-
162- # Set the domain on the metadata artifact to match the package's domain
163- metadata_artifact .pulp_domain = package ._pulp_domain
164-
165- contentartifact = ContentArtifact (
166- artifact = metadata_artifact ,
167- content = package ,
168- relative_path = f"{ filename } .metadata" ,
144+ def batch_save_artifacts (domain_id ):
145+ from django .db import transaction
146+
147+ sid = transaction .savepoint () # Start a savepoint
148+ try :
149+ Artifact .objects .bulk_create (artifact_batch .values (), batch_size = BATCH_SIZE )
150+ except IntegrityError :
151+ transaction .savepoint_rollback (sid ) # Only rollback this batch
152+ # Find the existing artifacts and update the contentartifacts to point to the existing artifacts
153+ digest_cas = {}
154+ for ca in contentartifact_batch :
155+ digest_cas .setdefault (ca .artifact .sha256 , []).append (ca )
156+ artifacts = Artifact .objects .filter (
157+ sha256__in = artifact_batch .keys (), pulp_domain_id = domain_id
169158 )
170- artifact_batch .append (metadata_artifact )
171- contentartifact_batch .append (contentartifact )
172-
173- if len (artifact_batch ) == BATCH_SIZE :
174- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
175- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
176- artifact_batch .clear ()
177- contentartifact_batch .clear ()
178- if len (packages_batch ) == BATCH_SIZE :
159+ for artifact in artifacts :
160+ for ca in digest_cas [artifact .sha256 ]:
161+ ca .artifact = artifact
162+ artifact_batch .pop (artifact .sha256 )
163+ Artifact .objects .bulk_create (artifact_batch .values (), batch_size = BATCH_SIZE )
164+
165+ ContentArtifact .objects .bulk_create (
166+ contentartifact_batch ,
167+ batch_size = BATCH_SIZE ,
168+ update_conflicts = True ,
169+ update_fields = ["artifact" ],
170+ unique_fields = ["content" , "relative_path" ],
171+ )
172+ artifact_batch .clear ()
173+ contentartifact_batch .clear ()
174+
175+ for domain_id , domain_packages in groupby (
176+ packages .iterator (chunk_size = BATCH_SIZE ), key = lambda x : x .pulp_domain_id
177+ ):
178+ for package in domain_packages :
179+ with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir :
180+ # Get the main artifact for package
181+ main_artifact = package ._artifacts .get ()
182+
183+ filename = package .filename
184+ metadata_digests = {"sha256" : package .metadata_sha256 }
185+ result = artifact_to_metadata_artifact (
186+ filename , main_artifact , metadata_digests , temp_dir , Artifact
187+ )
188+ if result is None :
189+ # Unset metadata_sha256 when extraction or validation fails
190+ package .metadata_sha256 = None
191+ packages_batch .append (package )
192+ continue
193+ metadata_artifact , mismatched_sha256 = result
194+ if mismatched_sha256 :
195+ # Fix the package if its metadata_sha256 differs from the actual value
196+ package .metadata_sha256 = mismatched_sha256
197+ packages_batch .append (package )
198+
199+ # Set the domain on the metadata artifact to match the package's domain
200+ metadata_artifact .pulp_domain_id = domain_id
201+
202+ art = artifact_batch .setdefault (metadata_artifact .sha256 , metadata_artifact )
203+ contentartifact = ContentArtifact (
204+ artifact = art ,
205+ content = package ,
206+ relative_path = f"{ filename } .metadata" ,
207+ )
208+ contentartifact_batch .append (contentartifact )
209+
210+ if len (contentartifact_batch ) == BATCH_SIZE :
211+ batch_save_artifacts (domain_id )
212+ if len (packages_batch ) == BATCH_SIZE :
213+ PythonPackageContent .objects .bulk_update (
214+ packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
215+ )
216+ packages_batch .clear ()
217+
218+ if artifact_batch :
219+ batch_save_artifacts (domain_id )
220+ if packages_batch :
179221 PythonPackageContent .objects .bulk_update (
180222 packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
181223 )
182- packages_batch .clear ()
183-
184- if artifact_batch :
185- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
186- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
187- if packages_batch :
188- PythonPackageContent .objects .bulk_update (
189- packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
190- )
191224
192225
193226class Migration (migrations .Migration ):
0 commit comments