Skip to content

Commit 9968c46

Browse files
committed
feat: introduce confidence scores for check facts
Signed-off-by: behnazh-w <[email protected]>
1 parent 064ce8f commit 9968c46

27 files changed

+527
-432
lines changed

src/macaron/database/table_definitions.py

Lines changed: 45 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""
@@ -10,7 +10,6 @@
1010
1111
For table associated with a check see the check module.
1212
"""
13-
import hashlib
1413
import logging
1514
import os
1615
import string
@@ -19,14 +18,23 @@
1918
from typing import Any, Self
2019

2120
from packageurl import PackageURL
22-
from sqlalchemy import Boolean, Column, Enum, ForeignKey, Integer, String, Table, UniqueConstraint
21+
from sqlalchemy import (
22+
Boolean,
23+
CheckConstraint,
24+
Column,
25+
Enum,
26+
Float,
27+
ForeignKey,
28+
Integer,
29+
String,
30+
Table,
31+
UniqueConstraint,
32+
)
2333
from sqlalchemy.orm import Mapped, mapped_column, relationship
2434

2535
from macaron.database.database_manager import ORMBase
2636
from macaron.database.rfc3339_datetime import RFC3339DateTime
27-
from macaron.errors import CUEExpectationError, CUERuntimeError, InvalidPURLError
28-
from macaron.slsa_analyzer.provenance.expectations.cue import cue_validator
29-
from macaron.slsa_analyzer.provenance.expectations.expectation import Expectation
37+
from macaron.errors import InvalidPURLError
3038
from macaron.slsa_analyzer.slsa_req import ReqName
3139

3240
logger: logging.Logger = logging.getLogger(__name__)
@@ -415,6 +423,16 @@ class CheckFacts(ORMBase):
415423
#: The primary key.
416424
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
417425

426+
#: The confidence score to estimate the accuracy of the check fact. This value should be in [0.0, 1.0] with
427+
#: a lower value depicting a lower confidence. Because some analyses used in checks may use
428+
#: heuristics, the results can be inaccurate in certain cases.
429+
#: We use the confidence score to enable the check designer to assign a confidence estimate.
430+
#: This confidence is stored in the database to be used by the policy. This confidence score is
431+
#: also used to decide which evidence should be shown to the user in the HTML/JSON report.
432+
confidence: Mapped[float] = mapped_column(
433+
Float, CheckConstraint("confidence>=0.0 AND confidence<=1.0"), nullable=False
434+
)
435+
418436
#: The foreign key to the software component.
419437
component_id: Mapped[int] = mapped_column(Integer, ForeignKey("_component.id"), nullable=False)
420438

@@ -430,68 +448,36 @@ class CheckFacts(ORMBase):
430448
#: A many-to-one relationship with check results.
431449
checkresult: Mapped["MappedCheckResult"] = relationship(back_populates="checkfacts")
432450

433-
#: The polymorphic inheritance configuration.
434-
__mapper_args__ = {
435-
"polymorphic_identity": "CheckFacts",
436-
"polymorphic_on": "check_type",
437-
}
438-
451+
def __lt__(self, other: Self) -> bool:
452+
"""Compare two check facts using their confidence values.
439453
440-
class CUEExpectation(Expectation, CheckFacts):
441-
"""ORM Class for an expectation."""
454+
This comparison function is intended to be used by a heapq, which is a Min-Heap data structure.
455+
The root element in a heapq is the minimum element in the queue and each `confidence` value is in [0, 1].
456+
Therefore, we need reverse the comparison function to make sure the fact with highest confidence is stored
457+
in the root element. This implementation compares `1 - confidence` to return True if the confidence of
458+
`fact_a` is greater than the confidence of `fact_b`.
442459
443-
# TODO: provenance content check should store the expectation, its evaluation result,
444-
# and which PROVENANCE it was applied to rather than only linking to the repository.
460+
.. code-block:: pycon
445461
446-
__tablename__ = "_expectation"
462+
>>> fact_a = CheckFacts()
463+
>>> fact_b = CheckFacts()
464+
>>> fact_a.confidence = 0.2
465+
>>> fact_b.confidence = 0.7
466+
>>> fact_b < fact_a
467+
True
447468
448-
#: The primary key, which is also a foreign key to the base check table.
449-
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003
469+
Return
470+
------
471+
bool
472+
"""
473+
return (1 - self.confidence) < (1 - other.confidence)
450474

451475
#: The polymorphic inheritance configuration.
452476
__mapper_args__ = {
453-
"polymorphic_identity": "_expectation",
477+
"polymorphic_identity": "CheckFacts",
478+
"polymorphic_on": "check_type",
454479
}
455480

456-
@classmethod
457-
def make_expectation(cls, expectation_path: str) -> Self | None:
458-
"""Construct a CUE expectation from a CUE file.
459-
460-
Note: we require the CUE expectation file to have a "target" field.
461-
462-
Parameters
463-
----------
464-
expectation_path: str
465-
The path to the expectation file.
466-
467-
Returns
468-
-------
469-
Self
470-
The instantiated expectation object.
471-
"""
472-
logger.info("Generating an expectation from file %s", expectation_path)
473-
expectation: CUEExpectation = CUEExpectation(
474-
description="CUE expectation",
475-
path=expectation_path,
476-
target="",
477-
expectation_type="CUE",
478-
)
479-
480-
try:
481-
with open(expectation_path, encoding="utf-8") as expectation_file:
482-
expectation.text = expectation_file.read()
483-
expectation.sha = str(hashlib.sha256(expectation.text.encode("utf-8")).hexdigest())
484-
expectation.target = cue_validator.get_target(expectation.text)
485-
expectation._validator = ( # pylint: disable=protected-access
486-
lambda provenance: cue_validator.validate_expectation(expectation.text, provenance)
487-
)
488-
except (OSError, CUERuntimeError, CUEExpectationError) as error:
489-
logger.error("CUE expectation error: %s", error)
490-
return None
491-
492-
# TODO remove type ignore once mypy adds support for Self.
493-
return expectation # type: ignore
494-
495481

496482
class Provenance(ORMBase):
497483
"""ORM class for a provenance document."""

src/macaron/slsa_analyzer/analyze_context.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module contains the Analyze Context class.
@@ -38,8 +38,6 @@ class ChecksOutputs(TypedDict):
3838
"""The CI services information for this repository."""
3939
is_inferred_prov: bool
4040
"""True if we cannot find the provenance and Macaron need to infer the provenance."""
41-
# We need to use typing.Protocol for multiple inheritance, however, the Expectation
42-
# class uses inlined functions, which is not supported by Protocol.
4341
expectation: Expectation | None
4442
"""The expectation to verify the provenance for this repository."""
4543
package_registries: list[PackageRegistryInfo]
@@ -109,7 +107,9 @@ def provenances(self) -> dict[str, list[InTotoV01Statement | InTotoV1Statement]]
109107
# By default, initialize every key with an empty list.
110108
result: dict[str, list[InTotoV01Statement | InTotoV1Statement]] = defaultdict(list)
111109
for ci_info in ci_services:
112-
result[ci_info["service"].name].extend(payload.statement for payload in ci_info["provenances"])
110+
result[ci_info["service"].name].extend(
111+
prov_asset.payload.statement for prov_asset in ci_info["provenances"]
112+
)
113113
package_registry_entries = self.dynamic_data["package_registries"]
114114
for package_registry_entry in package_registry_entries:
115115
result[package_registry_entry.package_registry.name].extend(

src/macaron/slsa_analyzer/analyzer.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from macaron.repo_finder.commit_finder import find_commit
2929
from macaron.slsa_analyzer import git_url
3030
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
31+
from macaron.slsa_analyzer.asset import VirtualReleaseAsset
3132
from macaron.slsa_analyzer.build_tool import BUILD_TOOLS
3233

3334
# To load all checks into the registry
@@ -40,6 +41,7 @@
4041
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES
4142
from macaron.slsa_analyzer.provenance.expectations.expectation_registry import ExpectationRegistry
4243
from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload
44+
from macaron.slsa_analyzer.provenance.slsa import SLSAProvenanceData
4345
from macaron.slsa_analyzer.registry import registry
4446
from macaron.slsa_analyzer.specs.ci_spec import CIInfo
4547
from macaron.slsa_analyzer.specs.inferred_provenance import Provenance
@@ -857,7 +859,12 @@ def perform_checks(self, analyze_ctx: AnalyzeContext) -> dict[str, CheckResult]:
857859
callgraph=callgraph,
858860
provenance_assets=[],
859861
latest_release={},
860-
provenances=[InTotoV01Payload(statement=Provenance().payload)],
862+
provenances=[
863+
SLSAProvenanceData(
864+
payload=InTotoV01Payload(statement=Provenance().payload),
865+
asset=VirtualReleaseAsset(name="No_ASSET", url="NO_URL", size_in_bytes=0),
866+
)
867+
],
861868
)
862869
)
863870

src/macaron/slsa_analyzer/asset/__init__.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module defines classes and interfaces related to assets.
55
66
Assets are files published from some build.
77
"""
88

9-
from typing import Protocol
9+
from typing import NamedTuple, Protocol
1010

1111

1212
class AssetLocator(Protocol):
@@ -38,3 +38,30 @@ def download(self, dest: str) -> bool:
3838
bool
3939
``True`` if the asset is downloaded successfully; ``False`` if not.
4040
"""
41+
42+
43+
class VirtualReleaseAsset(NamedTuple):
44+
"""A dummy asset used when an asset doesn't actually exist."""
45+
46+
#: The asset name.
47+
name: str
48+
#: The URL to the asset.
49+
url: str
50+
#: The size of the asset, in bytes.
51+
size_in_bytes: int
52+
53+
def download(self, dest: str) -> bool: # pylint: disable=unused-argument
54+
"""Download the asset.
55+
56+
Parameters
57+
----------
58+
dest : str
59+
The local destination where the asset is downloaded to.
60+
Note that this must include the file name.
61+
62+
Returns
63+
-------
64+
bool
65+
``True`` if the asset is downloaded successfully; ``False`` if not.
66+
"""
67+
return False

src/macaron/slsa_analyzer/checks/base_check.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module contains the BaseCheck class to be inherited by other concrete Checks."""
@@ -99,9 +99,7 @@ def run(self, target: AnalyzeContext, skipped_info: SkippedInfo | None = None) -
9999
check_result_data: CheckResultData
100100

101101
if skipped_info:
102-
check_result_data = CheckResultData(
103-
justification=[skipped_info["suppress_comment"]], result_tables=[], result_type=self.result_on_skip
104-
)
102+
check_result_data = CheckResultData(result_tables=[], result_type=self.result_on_skip)
105103
logger.info(
106104
"Check %s is skipped on target %s, comment: %s",
107105
self.check_info.check_id,
@@ -115,14 +113,14 @@ def run(self, target: AnalyzeContext, skipped_info: SkippedInfo | None = None) -
115113
self.check_info.check_id,
116114
check_result_data.result_type.value,
117115
target.component.purl,
118-
check_result_data.justification,
116+
check_result_data.justification_report,
119117
)
120118

119+
# This justification string will be stored in the feedback column of `SLSARequirement` table.
120+
# TODO: Storing the justification as feedback in the `SLSARequirement` table seems redundant and might need
121+
# refactoring.
121122
justification_str = ""
122-
for ele in check_result_data.justification:
123-
if isinstance(ele, dict):
124-
for key, val in ele.items():
125-
justification_str += f"{key}: {val}. "
123+
for _, ele in check_result_data.justification_report:
126124
justification_str += f"{str(ele)}. "
127125

128126
target.bulk_update_req_status(

0 commit comments

Comments
 (0)