From baaff5f8fc2cfb9b3a43ee09c53303a27b41f8c6 Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Fri, 22 Nov 2024 15:27:40 +1000
Subject: [PATCH] test(integration): handle missing steps info in GitHub API
 response (#923)

The GitHub API for some reason does not anymore return the steps information of the job that has published pkg:maven/io.micronaut.test/micronaut-test-junit5@4.5.0 even though it was published in Aug 2024, which is much earlier than the 400 retention policy. This PR raises a new exception to handle this case and allows the corresponding integration test to fail.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 .../checks/infer_artifact_pipeline_check.py   | 29 +++++++++++--------
 .../github_actions/github_actions_ci.py       | 14 ++++++++-
 .../micronaut-test.dl                         | 18 ++++++++++--
 3 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/infer_artifact_pipeline_check.py b/src/macaron/slsa_analyzer/checks/infer_artifact_pipeline_check.py
index 594c5c467..8902d6ef2 100644
--- a/src/macaron/slsa_analyzer/checks/infer_artifact_pipeline_check.py
+++ b/src/macaron/slsa_analyzer/checks/infer_artifact_pipeline_check.py
@@ -12,7 +12,7 @@
 
 from macaron.config.defaults import defaults
 from macaron.database.table_definitions import CheckFacts
-from macaron.errors import InvalidHTTPResponseError, ProvenanceError
+from macaron.errors import GitHubActionsValueError, InvalidHTTPResponseError, ProvenanceError
 from macaron.json_tools import json_extract
 from macaron.repo_finder.provenance_extractor import ProvenancePredicate
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext
@@ -219,17 +219,22 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
                 return CheckResultData(result_tables=[], result_type=CheckResultType.FAILED)
 
             # Find the workflow runs that have potentially triggered the artifact publishing.
-            html_urls = ci_service.workflow_run_in_date_time_range(
-                repo_full_name=ctx.component.repository.full_name,
-                workflow=build_entry_point,
-                publish_date_time=artifact_published_date,
-                commit_date_time=commit_date,
-                job_id=job_id,
-                step_name=step_name,
-                step_id=step_id,
-                time_range=publish_time_range,
-                callee_node_type=callee_node_type,
-            )
+            html_urls = set()
+            try:
+                html_urls = ci_service.workflow_run_in_date_time_range(
+                    repo_full_name=ctx.component.repository.full_name,
+                    workflow=build_entry_point,
+                    publish_date_time=artifact_published_date,
+                    commit_date_time=commit_date,
+                    job_id=job_id,
+                    step_name=step_name,
+                    step_id=step_id,
+                    time_range=publish_time_range,
+                    callee_node_type=callee_node_type,
+                )
+            except GitHubActionsValueError as error:
+                logger.debug(error)
+                ci_run_deleted = True
 
             # If provenance exists, we expect the timestamp of the reported triggered run
             # to be within an acceptable range, have succeeded, and called the deploy command.
diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py
index d3f820ade..43c4e3f0e 100644
--- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py
+++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py
@@ -13,7 +13,7 @@
 from macaron.code_analyzer.call_graph import BaseNode, CallGraph
 from macaron.config.defaults import defaults
 from macaron.config.global_config import global_config
-from macaron.errors import CallGraphError, ParseError
+from macaron.errors import CallGraphError, GitHubActionsValueError, ParseError
 from macaron.parsers.bashparser import BashNode, BashScriptType
 from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, BuildToolCommand
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
@@ -333,6 +333,11 @@ def workflow_run_in_date_time_range(
         -------
         set[str]
             The set of URLs found for the workflow within the time range.
+
+        Raises
+        ------
+        GitHubActionsValueError
+            This error is raised when the GitHub Action workflow run misses values.
         """
         logger.debug(
             "Getting the latest workflow run of %s at publishing time %s and source commit date %s within time range %s.",
@@ -377,6 +382,8 @@ def workflow_run_in_date_time_range(
 
                     # Find the matching step and check its `conclusion` and `started_at` attributes.
                     html_url = None
+                    if not run_jobs["jobs"]:
+                        raise GitHubActionsValueError("GitHub Actions workflow run misses jobs information.")
                     for job in run_jobs["jobs"]:
                         # If the deploy step is a Reusable Workflow, there won't be any steps in the caller job.
                         if callee_node_type == GitHubWorkflowType.REUSABLE.value:
@@ -393,6 +400,11 @@ def workflow_run_in_date_time_range(
                                 html_url = item["html_url"]
                                 break
 
+                        if not job["steps"]:
+                            raise GitHubActionsValueError(
+                                f"GitHub Actions workflow run misses steps information for the {job_id} job ID."
+                            )
+
                         for step in job["steps"]:
                             if step["name"] not in [step_name, step_id] or step["conclusion"] != "success":
                                 continue
diff --git a/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl b/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl
index 2e6da73d8..048942d06 100644
--- a/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl
+++ b/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl
@@ -14,11 +14,25 @@ Policy("test_policy", component_id, "") :-
     build_tool_check(gradle_id, "gradle", "java"),
     check_facts(gradle_id, _, component_id,_,_),
     check_passed(component_id, "mcn_provenance_level_three_1"),
-    check_passed(component_id, "mcn_find_artifact_pipeline_1"),
     check_failed(component_id, "mcn_provenance_derived_commit_1"),
     check_failed(component_id, "mcn_provenance_witness_level_one_1"),
     check_failed(component_id, "mcn_trusted_builder_level_three_1"),
-    is_repo_url(component_id, "https://github.com/micronaut-projects/micronaut-test").
+    is_repo_url(component_id, "https://github.com/micronaut-projects/micronaut-test"),
+    // The GitHub API for some reasons does not return the steps information anymore.
+    // Note that mcn_find_artifact_pipeline_1 fails because it returns UNKNOWN, in this case with low confidence.
+    check_failed_with_confidence(component_id, "mcn_find_artifact_pipeline_1", confidence),
+    confidence = 0.4,
+    artifact_pipeline_check(
+        apc_check_id,
+        "https://github.com/micronaut-projects/micronaut-test/blob/0ffa4e86ee4311f744f1a2b8ccd740a15af3a52b/.github/workflows/release.yml",
+        "release",
+        "publish",
+        _,
+        1,  // From provenance.
+        1,  // Run deleted.
+        0   // Published before the code was committed.
+    ),
+    check_facts(apc_check_id, confidence, component_id,_,_).
 
 apply_policy_to("test_policy", component_id) :-
     is_component(component_id, purl),