88from django .db .models .query import QuerySet
99from pulp_python .app .models import PythonPackageContent , PythonRepository
1010from pulp_python .app .utils import (
11+ artifact_to_metadata_artifact ,
1112 artifact_to_python_content_data ,
1213 fetch_json_release_metadata ,
1314 parse_metadata ,
1415)
15- from pulpcore .plugin .models import ContentArtifact , ProgressReport
16+ from pulpcore .plugin .models import Artifact , ContentArtifact , Domain , ProgressReport
1617from pulpcore .plugin .util import get_domain
1718
1819log = logging .getLogger (__name__ )
@@ -41,16 +42,25 @@ def repair(repository_pk: UUID) -> None:
4142 content_set = repository .latest_version ().content .values_list ("pk" , flat = True )
4243 content = PythonPackageContent .objects .filter (pk__in = content_set )
4344
44- num_repaired , pkgs_not_repaired = repair_metadata (content )
45+ num_repaired , pkgs_not_repaired , num_metadata_repaired , pkgs_metadata_not_repaired = (
46+ repair_metadata (content )
47+ )
48+ # Convert set() to 0
49+ if not pkgs_not_repaired :
50+ pkgs_not_repaired = 0
51+ if not pkgs_metadata_not_repaired :
52+ pkgs_metadata_not_repaired = 0
53+
4554 log .info (
4655 _ (
4756 "{} packages' metadata repaired. Not repaired packages due to either "
48- "inaccessible URL or mismatched sha256: {}."
49- ).format (num_repaired , pkgs_not_repaired )
57+ "inaccessible URL or mismatched sha256: {}. "
58+ "{} metadata files repaired. Packages whose metadata files could not be repaired: {}."
59+ ).format (num_repaired , pkgs_not_repaired , num_metadata_repaired , pkgs_metadata_not_repaired )
5060 )
5161
5262
53- def repair_metadata (content : QuerySet [PythonPackageContent ]) -> tuple [int , set [str ]]:
63+ def repair_metadata (content : QuerySet [PythonPackageContent ]) -> tuple [int , set [str ], int , set [ str ] ]:
5464 """
5565 Repairs metadata for a queryset of PythonPackageContent objects
5666 and updates the progress report.
@@ -59,9 +69,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
5969 content (QuerySet[PythonPackageContent]): The queryset of items to repair.
6070
6171 Returns:
62- tuple[int, set[str]]: A tuple containing:
72+ tuple[int, set[str], int, set[str] ]: A tuple containing:
6373 - The number of packages that were repaired.
6474 - A set of packages' PKs that were not repaired.
75+ - The number of metadata files that were repaired.
76+ - A set of packages' PKs without repaired metadata artifacts.
6577 """
6678 immediate_content = (
6779 content .filter (contentartifact__artifact__isnull = False )
@@ -87,6 +99,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
8799 # Keep track of on-demand packages that were not repaired
88100 pkgs_not_repaired = set ()
89101
102+ # Metadata artifacts and content artifacts
103+ metadata_batch = []
104+ total_metadata_repaired = 0
105+ pkgs_metadata_not_repaired = set ()
106+
90107 progress_report = ProgressReport (
91108 message = "Repairing packages' metadata" ,
92109 code = "repair.metadata" ,
@@ -102,6 +119,14 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
102119 .artifact
103120 )
104121 new_data = artifact_to_python_content_data (package .filename , main_artifact , domain )
122+ total_metadata_repaired += update_metadata_artifact_if_needed (
123+ package ,
124+ new_data .get ("metadata_sha256" ),
125+ main_artifact ,
126+ domain ,
127+ metadata_batch ,
128+ pkgs_metadata_not_repaired ,
129+ )
105130 total_repaired += update_package_if_needed (
106131 package , new_data , batch , set_of_update_fields
107132 )
@@ -163,7 +188,12 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
163188 total_repaired += len (batch )
164189 PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
165190
166- return total_repaired , pkgs_not_repaired
191+ if metadata_batch :
192+ not_repaired = _process_metadata_batch (metadata_batch )
193+ pkgs_metadata_not_repaired .update (not_repaired )
194+ total_metadata_repaired += len (metadata_batch ) - len (not_repaired )
195+
196+ return total_repaired , pkgs_not_repaired , total_metadata_repaired , pkgs_metadata_not_repaired
167197
168198
169199def update_package_if_needed (
@@ -202,3 +232,95 @@ def update_package_if_needed(
202232 set_of_update_fields .clear ()
203233
204234 return total_repaired
235+
236+
237+ def update_metadata_artifact_if_needed (
238+ package : PythonPackageContent ,
239+ new_metadata_sha256 : str | None ,
240+ main_artifact : Artifact ,
241+ domain : Domain ,
242+ metadata_batch : list [tuple ],
243+ pkgs_metadata_not_repaired : set [str ],
244+ ) -> int :
245+ """
246+ Repairs metadata artifacts for wheel packages by creating missing metadata artifacts
247+ or updating existing ones when the metadata_sha256 differs. Only processes wheel files
248+ that have a valid new_metadata_sha256. Queues operations for batch processing.
249+
250+ Args:
251+ package: Package to check for metadata changes.
252+ new_metadata_sha256: The correct metadata_sha256 extracted from the main artifact, or None.
253+ main_artifact: The main package artifact used to generate metadata.
254+ domain: The domain in which the metadata artifact will be created.
255+ metadata_batch: List of tuples for batch processing (updated in-place).
256+ pkgs_metadata_not_repaired: Set of package PKs that failed repair (updated in-place).
257+
258+ Returns:
259+ Number of repaired metadata artifacts (only when batch is flushed at BULK_SIZE).
260+ """
261+ total_metadata_repaired = 0
262+
263+ if not package .filename .endswith (".whl" ) or not new_metadata_sha256 :
264+ return total_metadata_repaired
265+
266+ original_metadata_sha256 = package .metadata_sha256
267+ cas = package .contentartifact_set .filter (relative_path__endswith = ".metadata" )
268+
269+ # Create missing
270+ if not cas :
271+ metadata_batch .append (("create" , package , main_artifact , None , domain ))
272+ # Fix existing
273+ elif new_metadata_sha256 != original_metadata_sha256 :
274+ ca = cas .first ()
275+ metadata_artifact = ca .artifact
276+ if metadata_artifact is None or (metadata_artifact .sha256 != new_metadata_sha256 ):
277+ metadata_batch .append (("update" , package , main_artifact , ca , domain ))
278+
279+ if len (metadata_batch ) == BULK_SIZE :
280+ not_repaired = _process_metadata_batch (metadata_batch )
281+ pkgs_metadata_not_repaired .update (not_repaired )
282+ total_metadata_repaired += BULK_SIZE - len (not_repaired )
283+ metadata_batch .clear ()
284+
285+ return total_metadata_repaired
286+
287+
288+ def _process_metadata_batch (metadata_batch : list [tuple ]) -> set [str ]:
289+ """
290+ Processes a batch of metadata repair operations by creating metadata artifacts
291+ and their corresponding ContentArtifacts.
292+
293+ Args:
294+ metadata_batch: List of (action, package, main_artifact, content_artifact, domain) tuples.
295+
296+ Returns:
297+ Set of package PKs for which metadata artifacts could not be created.
298+ """
299+ not_repaired = set ()
300+ content_artifacts_to_create = []
301+ content_artifacts_to_update = []
302+
303+ for action , package , main_artifact , content_artifact , domain in metadata_batch :
304+ metadata_artifact = artifact_to_metadata_artifact (package .filename , main_artifact )
305+ if metadata_artifact :
306+ metadata_artifact .pulp_domain = domain
307+ metadata_artifact .save ()
308+ if action == "create" :
309+ ca = ContentArtifact (
310+ artifact = metadata_artifact ,
311+ content = package ,
312+ relative_path = f"{ package .filename } .metadata" ,
313+ )
314+ content_artifacts_to_create .append (ca )
315+ elif action == "update" :
316+ content_artifact .artifact = metadata_artifact
317+ content_artifacts_to_update .append (content_artifact )
318+ else :
319+ not_repaired .add (package .pk )
320+
321+ if content_artifacts_to_create :
322+ ContentArtifact .objects .bulk_create (content_artifacts_to_create )
323+ if content_artifacts_to_update :
324+ ContentArtifact .objects .bulk_update (content_artifacts_to_update , ["artifact" ])
325+
326+ return not_repaired
0 commit comments