diff --git a/src/macaron/artifact/maven.py b/src/macaron/artifact/maven.py index 6130aed83..711758c8c 100644 --- a/src/macaron/artifact/maven.py +++ b/src/macaron/artifact/maven.py @@ -2,7 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module declares types and utilities for Maven artifacts.""" -import re + from collections.abc import Sequence from packageurl import PackageURL @@ -140,21 +140,3 @@ def create_maven_purl_from_artifact_filename( ) return None - - -def is_valid_maven_group_id(group_id: str) -> bool: - """Check if the provided string is a valid maven group id. - - Parameters - ---------- - group_id : str - The group id to check. - - Returns - ------- - bool - True if the group id is valid, False otherwise - """ - # Should match strings like org.example.foo, org.example-2.foo.bar_1. - pattern = r"^[a-zA-Z][a-zA-Z0-9-]*\.([a-zA-Z][a-zA-Z0-9-]*\.)*[a-zA-Z][a-zA-Z0-9-]*[a-zA-Z0-9]$" - return re.match(pattern, group_id) is not None diff --git a/src/macaron/repo_finder/repo_finder_deps_dev.py b/src/macaron/repo_finder/repo_finder_deps_dev.py index 468bf472e..7f2266051 100644 --- a/src/macaron/repo_finder/repo_finder_deps_dev.py +++ b/src/macaron/repo_finder/repo_finder_deps_dev.py @@ -5,7 +5,6 @@ import json import logging from enum import StrEnum -from typing import Any from urllib.parse import quote as encode from packageurl import PackageURL @@ -13,7 +12,6 @@ from macaron.json_tools import json_extract from macaron.repo_finder.repo_finder_base import BaseRepoFinder from macaron.repo_finder.repo_validator import find_valid_repository_url -from macaron.slsa_analyzer.git_url import clean_url from macaron.util import send_get_http_raw logger: logging.Logger = logging.getLogger(__name__) @@ -73,41 +71,6 @@ def find_repo(self, purl: PackageURL) -> str: return "" - @staticmethod - def get_project_info(project_url: str) -> dict[str, Any] | None: - """Retrieve project information from deps.dev. - - Parameters - ---------- - project_url : str - The URL of the project. - - Returns - ------- - dict[str, Any] | None - The project information or None if the information could not be retrieved. - """ - clean_repo_url = clean_url(project_url) - if clean_repo_url is None or clean_repo_url.hostname is None: - logger.debug("Invalid project url format: %s", project_url) - return None - - project_key = clean_repo_url.hostname + clean_repo_url.path - - request_url = f"https://api.deps.dev/v3alpha/projects/{encode(project_key, safe='')}" - response = send_get_http_raw(request_url) - if not (response and response.text): - logger.debug("Failed to retrieve additional repo info for: %s", project_url) - return None - - try: - response_json: dict = json.loads(response.text) - except ValueError as error: - logger.debug("Failed to parse response from deps.dev: %s", error) - return None - - return response_json - def _create_urls(self, purl: PackageURL) -> list[str]: """ Create the urls to search for the metadata relating to the passed artifact. diff --git a/src/macaron/repo_verifier/__init__.py b/src/macaron/repo_verifier/__init__.py deleted file mode 100644 index 727c3c37d..000000000 --- a/src/macaron/repo_verifier/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -"""This package contains classes for repository verification.""" diff --git a/src/macaron/repo_verifier/repo_verifier.py b/src/macaron/repo_verifier/repo_verifier.py deleted file mode 100644 index 534bae57d..000000000 --- a/src/macaron/repo_verifier/repo_verifier.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -"""This module contains code to verify whether a reported repository can be linked back to the artifact.""" -import logging - -from macaron.repo_verifier.repo_verifier_base import ( - RepositoryVerificationResult, - RepositoryVerificationStatus, - RepoVerifierBase, -) -from macaron.repo_verifier.repo_verifier_gradle import RepoVerifierGradle -from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven -from macaron.slsa_analyzer.build_tool import BaseBuildTool, Gradle, Maven - -logger = logging.getLogger(__name__) - - -def verify_repo( - namespace: str | None, - name: str, - version: str, - reported_repo_url: str, - reported_repo_fs: str, - build_tool: BaseBuildTool, -) -> RepositoryVerificationResult: - """Verify whether the repository links back to the artifact. - - Parameters - ---------- - namespace : str | None - The namespace of the artifact. - name : str - The name of the artifact. - version : str - The version of the artifact. - reported_repo_url : str - The reported repository URL. - reported_repo_fs : str - The reported repository filesystem path. - build_tool : BaseBuildTool - The build tool used to build the package. - - Returns - ------- - RepositoryVerificationResult - The result of the repository verification - """ - # TODO: Add support for other build tools. - verifier_map: dict[type[BaseBuildTool], type[RepoVerifierBase]] = { - Maven: RepoVerifierMaven, - Gradle: RepoVerifierGradle, - # Poetry(): RepoVerifierPoetry, - # Pip(): RepoVerifierPip, - # Docker(): RepoVerifierDocker, - # NPM(): RepoVerifierNPM, - # Yarn(): RepoVerifierYarn, - # Go(): RepoVerifierGo, - } - - verifier_cls = verifier_map.get(type(build_tool)) - if not verifier_cls: - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="unsupported_type", build_tool=build_tool - ) - - verifier = verifier_cls( - namespace=namespace, - name=name, - version=version, - reported_repo_url=reported_repo_url, - reported_repo_fs=reported_repo_fs, - ) - - return verifier.verify_repo() diff --git a/src/macaron/repo_verifier/repo_verifier_base.py b/src/macaron/repo_verifier/repo_verifier_base.py deleted file mode 100644 index 1fee6a31c..000000000 --- a/src/macaron/repo_verifier/repo_verifier_base.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -"""This module contains the base class and core data models for repository verification.""" -import abc -import logging -import os -from collections import deque -from dataclasses import dataclass -from enum import Enum -from pathlib import Path - -from macaron.slsa_analyzer.build_tool import BaseBuildTool - -logger = logging.getLogger(__name__) - - -def find_file_in_repo(root_dir: Path, filename: str) -> Path | None: - """Find the highest level file with a given name in a local repository. - - This function ignores certain paths that are not under the main source code directories. - - Parameters - ---------- - root_dir : Path - The root directory of the repository. - filename : str - The name of the file to search for. - - Returns - ------- - Path | None - The path to the file if it exists, otherwise - """ - # TODO: Consider using BaseBuildTool.get_build_dirs. - # + Refactor 'get_build_dirs' to skip certain directories - # that are most likely not part of the main codebase (e.g., sample). - # + Need to find a way to look for other - # files (e.g., gradle.properties) for the purpose of repo verification - # without breaking the current logic of finding build directories. - # + Add the capability to return the content/path of the file. - if not os.path.isdir(root_dir): - return None - - queue: deque[Path] = deque() - queue.append(Path(root_dir)) - while queue: - current_dir = queue.popleft() - - # Don't look through non-main directories. - if any( - keyword in current_dir.name.lower() - for keyword in ["test", "example", "sample", "doc", "demo", "spec", "mock"] - ): - continue - - if Path(current_dir, filename).exists(): - return Path(current_dir, filename) - - # Ignore symlinks to prevent potential infinite loop. - sub_dirs = [Path(it) for it in current_dir.iterdir() if it.is_dir() and not it.is_symlink()] - queue.extend(sub_dirs) - - return None - - -class RepositoryVerificationStatus(str, Enum): - """A class to store the status of the repo verification.""" - - #: We found evidence to prove that the repository can be linked back to the publisher of the artifact. - PASSED = "passed" - - #: We found evidence showing that the repository is not the publisher of the artifact. - FAILED = "failed" - - #: We could not find any evidence to prove or disprove that the repository can be linked back to the artifact. - UNKNOWN = "unknown" - - -@dataclass(frozen=True) -class RepositoryVerificationResult: - """A class to store the information about repository verification.""" - - #: The status of the repository verification. - status: RepositoryVerificationStatus - - #: The reason for the verification result. - reason: str - - #: The build tool used to build the package. - build_tool: BaseBuildTool - - -class RepoVerifierBase(abc.ABC): - """The base class to verify whether a reported repository links back to the artifact.""" - - @property - @abc.abstractmethod - def build_tool(self) -> BaseBuildTool: - """Define the build tool used to build the package.""" - - def __init__( - self, - namespace: str | None, - name: str, - version: str, - reported_repo_url: str, - reported_repo_fs: str, - ): - """Instantiate the class. - - Parameters - ---------- - namespace : str - The namespace of the artifact. - name : str - The name of the artifact. - version : str - The version of the artifact. - reported_repo_url : str - The URL of the repository reported by the publisher. - reported_repo_fs : str - The file system path of the reported repository. - """ - self.namespace = namespace - self.name = name - self.version = version - self.reported_repo_url = reported_repo_url - self.reported_repo_fs = reported_repo_fs - - @abc.abstractmethod - def verify_repo(self) -> RepositoryVerificationResult: - """Verify whether the repository links back to the artifact. - - Returns - ------- - RepositoryVerificationResult - The result of the repository verification - """ diff --git a/src/macaron/repo_verifier/repo_verifier_gradle.py b/src/macaron/repo_verifier/repo_verifier_gradle.py deleted file mode 100644 index b31601ebf..000000000 --- a/src/macaron/repo_verifier/repo_verifier_gradle.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -"""This module contains code to verify whether a repository with Gradle build system can be linked back to the artifact.""" -import logging -from pathlib import Path - -from macaron.artifact.maven import is_valid_maven_group_id -from macaron.repo_verifier.repo_verifier_base import ( - RepositoryVerificationResult, - RepositoryVerificationStatus, - RepoVerifierBase, - find_file_in_repo, -) -from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven -from macaron.slsa_analyzer.build_tool import Gradle -from macaron.slsa_analyzer.package_registry.maven_central_registry import same_organization - -logger = logging.getLogger(__name__) - - -class RepoVerifierGradle(RepoVerifierBase): - """A class to verify whether a repository with Gradle build tool links back to the artifact.""" - - build_tool = Gradle() - - def __init__( - self, - namespace: str, - name: str, - version: str, - reported_repo_url: str, - reported_repo_fs: str, - ): - """Initialize a RepoVerifierGradle instance. - - Parameters - ---------- - namespace : str - The namespace of the artifact. - name : str - The name of the artifact. - version : str - The version of the artifact. - reported_repo_url : str - The URL of the repository reported by the publisher. - reported_repo_fs : str - The file system path of the reported repository. - """ - super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs) - - self.maven_verifier = RepoVerifierMaven( - namespace=namespace, - name=name, - version=version, - reported_repo_url=reported_repo_url, - reported_repo_fs=reported_repo_fs, - ) - - def verify_repo(self) -> RepositoryVerificationResult: - """Verify whether the reported repository links back to the artifact. - - Returns - ------- - RepositoryVerificationResult - The result of the repository verification - """ - if not self.namespace: - logger.debug("No namespace provided for Gradle verification.") - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="no_namespace", build_tool=self.build_tool - ) - - recognized_services_verification_result = ( - self.maven_verifier.verify_domains_from_recognized_code_hosting_services() - ) - if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED: - return recognized_services_verification_result - - gradle_group_id = self._extract_group_id_from_properties() - if not gradle_group_id: - gradle_group_id = self._extract_group_id_from_build_groovy() - if not gradle_group_id: - gradle_group_id = self._extract_group_id_from_build_kotlin() - if not gradle_group_id: - logger.debug("Could not find group from gradle manifests for %s", self.reported_repo_url) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, - reason="no_group_in_gradle_manifest", - build_tool=self.build_tool, - ) - - if not same_organization(gradle_group_id, self.namespace): - logger.debug("Group in gradle manifest does not match the provided group id: %s", self.reported_repo_url) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool - ) - - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool - ) - - def _extract_group_id_from_gradle_manifest( - self, file_path: Path | None, quote_chars: set[str] | None = None, delimiter: str = "=" - ) -> str | None: - """Extract the group id from a gradle build or config file. - - Parameters - ---------- - file_path : Path | None - The path to the file. - quote_chars : set[str] | None - The characters used to quote the group id. - delimiter : str - The delimiter used in the file. - - Returns - ------- - str | None - The extracted group id. None if not found. - """ - if not file_path: - logger.debug("Could not find the file %s in the repository: %s", file_path, self.reported_repo_url) - return None - - file_content = file_path.read_text().splitlines() - for line in file_content: - line_parts = list(filter(None, map(str.strip, line.strip().lower().split(delimiter)))) - if len(line_parts) != 2: - continue - - if line_parts[0] != "group": - continue - - group_id = line_parts[1] - - # Check if the value for group_id is a string literal. - if quote_chars: - if group_id[0] not in quote_chars or group_id[-1] not in quote_chars or group_id[0] != group_id[-1]: - continue - group_id = group_id[1:-1] - - if is_valid_maven_group_id(group_id): - return group_id - - return None - - def _extract_group_id_from_properties(self) -> str | None: - """Extract the group id from the gradle.properties file.""" - gradle_properties = find_file_in_repo(Path(self.reported_repo_fs), "gradle.properties") - return self._extract_group_id_from_gradle_manifest(gradle_properties) - - def _extract_group_id_from_build_groovy(self) -> str | None: - """Extract the group id from the build.gradle file.""" - build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle") - return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter=" ") - - def _extract_group_id_from_build_kotlin(self) -> str | None: - """Extract the group id from the build.gradle.kts file.""" - build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle.kts") - return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={'"'}, delimiter="=") diff --git a/src/macaron/repo_verifier/repo_verifier_maven.py b/src/macaron/repo_verifier/repo_verifier_maven.py deleted file mode 100644 index 22c9e42b3..000000000 --- a/src/macaron/repo_verifier/repo_verifier_maven.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -"""This module contains code to verify whether a reported repository with Maven build system can be linked back to the artifact.""" -import logging -from pathlib import Path -from urllib.parse import urlparse - -from macaron.parsers.pomparser import parse_pom_string -from macaron.repo_verifier.repo_verifier_base import ( - RepositoryVerificationResult, - RepositoryVerificationStatus, - RepoVerifierBase, - find_file_in_repo, -) -from macaron.slsa_analyzer.build_tool import Maven -from macaron.slsa_analyzer.package_registry.maven_central_registry import ( - RECOGNIZED_CODE_HOSTING_SERVICES, - same_organization, -) - -logger = logging.getLogger(__name__) - - -class RepoVerifierMaven(RepoVerifierBase): - """A class to verify whether a repository with Maven build tool links back to the artifact.""" - - build_tool = Maven() - - def verify_repo(self) -> RepositoryVerificationResult: - """Verify whether the reported repository links back to the Maven artifact. - - Returns - ------- - RepositoryVerificationResult - The result of the repository verification - """ - if not self.namespace: - logger.debug("No namespace provided for Maven verification.") - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="no_namespace", build_tool=self.build_tool - ) - - recognized_services_verification_result = self.verify_domains_from_recognized_code_hosting_services() - if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED: - return recognized_services_verification_result - - # TODO: check other pom files. Think about how to decide in case of contradicting evidence. - # Check if repo contains pom.xml. - pom_file = find_file_in_repo(Path(self.reported_repo_fs), "pom.xml") - if not pom_file: - logger.debug("Could not find any pom.xml in the repository: %s", self.reported_repo_url) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="no_pom", build_tool=self.build_tool - ) - - pom_content = pom_file.read_text(encoding="utf-8") - pom_root = parse_pom_string(pom_content) - - if not pom_root: - logger.debug("Could not parse pom.xml: %s", pom_file.as_posix()) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="not_parsed_pom", build_tool=self.build_tool - ) - - # Find the group id in the pom (project/groupId). - # The closing curly brace represents the end of the XML namespace. - pom_group_id_elem = next((ch for ch in pom_root if ch.tag.endswith("}groupId")), None) - if pom_group_id_elem is None or not pom_group_id_elem.text: - logger.debug("Could not find groupId in pom.xml: %s", pom_file) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="no_group_id_in_pom", build_tool=self.build_tool - ) - - pom_group_id = pom_group_id_elem.text.strip() - if not same_organization(pom_group_id, self.namespace): - logger.debug("Group id in pom.xml does not match the provided group id: %s", pom_file) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool - ) - - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool - ) - - def verify_domains_from_recognized_code_hosting_services(self) -> RepositoryVerificationResult: - """Verify repository link by comparing the maven domain name and the account on code hosting services. - - This verification relies on the fact that Sonatype recognizes - certain code hosting platforms for namespace verification on maven central. - - Returns - ------- - RepositoryVerificationResult - The result of the repository verification - """ - if not self.namespace: - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="no_namespace", build_tool=self.build_tool - ) - - parsed_url = urlparse(self.reported_repo_url) - if parsed_url is None or not parsed_url.hostname: - logger.debug("Could not parse the claimed repository URL: %s", self.reported_repo_url) - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, reason="url_parse_error", build_tool=self.build_tool - ) - - reported_hostname = parsed_url.hostname.split(".")[0] - reported_account = parsed_url.path.strip("/").split("/")[0] - - group_parts = self.namespace.split(".") - for platform in RECOGNIZED_CODE_HOSTING_SERVICES: - # For artifacts from recognized code hosting services, check if the - # organization name is the same in maven and the source repository. - # For example, com.github.foo matches github.com/foo, - # but it doesn't match gitlab.com/foo or gitlab.com/bar. - if ( - group_parts[0].lower() in {"io", "com"} - and group_parts[1].lower() == platform.lower() # e.g., github - and group_parts[1].lower() == reported_hostname.lower() # e.g., github - and group_parts[2].lower() == reported_account.lower() # e.g., foo in github.com/foo - ): - return RepositoryVerificationResult( - status=RepositoryVerificationStatus.PASSED, reason="git_ns_match", build_tool=self.build_tool - ) - - return RepositoryVerificationResult( - # Not necessarily a fail, because many projects use maven group ids other than their repo domain. - status=RepositoryVerificationStatus.UNKNOWN, - reason="git_ns_mismatch", - build_tool=self.build_tool, - ) diff --git a/src/macaron/slsa_analyzer/analyze_context.py b/src/macaron/slsa_analyzer/analyze_context.py index 1f00df010..e54363f98 100644 --- a/src/macaron/slsa_analyzer/analyze_context.py +++ b/src/macaron/slsa_analyzer/analyze_context.py @@ -12,7 +12,6 @@ from typing import Any, TypedDict from macaron.database.table_definitions import Component, SLSALevel -from macaron.repo_verifier.repo_verifier import RepositoryVerificationResult from macaron.slsa_analyzer.checks.check_result import CheckResult, CheckResultType from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService from macaron.slsa_analyzer.git_service import BaseGitService @@ -35,8 +34,6 @@ class ChecksOutputs(TypedDict): git_service: BaseGitService """The git service information for the target software component.""" - repo_verification: list[RepositoryVerificationResult] - """The repository verification info.""" build_spec: BuildSpec """The build spec inferred for the target software component.""" ci_services: list[CIInfo] @@ -100,7 +97,6 @@ def __init__( # This attribute should be accessed via the `dynamic_data` property. self._dynamic_data: ChecksOutputs = ChecksOutputs( git_service=NoneGitService(), - repo_verification=[], build_spec=BuildSpec(tools=[], purl_tools=[]), ci_services=[], package_registries=[], diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 5c2b29368..fa6730a36 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -42,7 +42,6 @@ extract_repo_and_commit_from_provenance, ) from macaron.repo_finder.provenance_finder import ProvenanceFinder, find_provenance_from_ci -from macaron.repo_verifier.repo_verifier import verify_repo from macaron.slsa_analyzer import git_url from macaron.slsa_analyzer.analyze_context import AnalyzeContext from macaron.slsa_analyzer.asset import VirtualReleaseAsset @@ -449,8 +448,6 @@ def run_single( git_service = self._determine_git_service(analyze_ctx) self._determine_ci_services(analyze_ctx, git_service) self._determine_build_tools(analyze_ctx, git_service) - if parsed_purl is not None: - self._verify_repository_link(parsed_purl, analyze_ctx) self._determine_package_registries(analyze_ctx) if not provenance_payload: @@ -1137,33 +1134,6 @@ def _determine_package_registries(self, analyze_ctx: AnalyzeContext) -> None: ) ) - def _verify_repository_link(self, parsed_purl: PackageURL, analyze_ctx: AnalyzeContext) -> None: - """Verify whether the claimed repository links back to the artifact.""" - if not analyze_ctx.component.repository: - logger.debug("The repository is not available. Skipping the repository verification.") - return - - if parsed_purl.namespace is None or parsed_purl.version is None: - logger.debug("The PURL is not complete. Skipping the repository verification.") - return - - build_tools = ( - analyze_ctx.dynamic_data["build_spec"]["tools"] or analyze_ctx.dynamic_data["build_spec"]["purl_tools"] - ) - - analyze_ctx.dynamic_data["repo_verification"] = [] - - for build_tool in build_tools: - verification_result = verify_repo( - namespace=parsed_purl.namespace, - name=parsed_purl.name, - version=parsed_purl.version, - reported_repo_url=analyze_ctx.component.repository.remote_path, - reported_repo_fs=analyze_ctx.component.repository.fs_path, - build_tool=build_tool, - ) - analyze_ctx.dynamic_data["repo_verification"].append(verification_result) - class DuplicateCmpError(DuplicateError): """This class is used for duplicated software component errors.""" diff --git a/src/macaron/slsa_analyzer/checks/maven_repo_verification_check.py b/src/macaron/slsa_analyzer/checks/maven_repo_verification_check.py deleted file mode 100644 index 006ca3e69..000000000 --- a/src/macaron/slsa_analyzer/checks/maven_repo_verification_check.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -"""A check to determine whether the source repository of a maven package can be independently verified.""" - -import logging - -from packageurl import PackageURL -from sqlalchemy import ForeignKey, Integer, String -from sqlalchemy.orm import Mapped, mapped_column - -from macaron.database.table_definitions import CheckFacts -from macaron.repo_finder.repo_finder_deps_dev import DepsDevRepoFinder -from macaron.repo_verifier.repo_verifier_base import RepositoryVerificationStatus -from macaron.slsa_analyzer.analyze_context import AnalyzeContext -from macaron.slsa_analyzer.checks.base_check import BaseCheck -from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence -from macaron.slsa_analyzer.registry import registry - -logger: logging.Logger = logging.getLogger(__name__) - - -class MavenRepoVerificationFacts(CheckFacts): - """The ORM mapping for justifications in maven source repo check.""" - - __tablename__ = "_maven_repo_verification_check" - - #: The primary key. - id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003 - - group: Mapped[str] = mapped_column(String, nullable=False) - artifact: Mapped[str] = mapped_column(String, nullable=False) - version: Mapped[str] = mapped_column(String, nullable=False) - - # Repository link identified by Macaron's repo finder. - repo_link: Mapped[str] = mapped_column(String, nullable=True) - - # Repository link identified by deps.dev. - deps_dev_repo_link: Mapped[str | None] = mapped_column(String, nullable=True) - - # Number of stars on the repository identified by deps.dev. - deps_dev_stars_count: Mapped[int | None] = mapped_column(Integer, nullable=True) - - # Number of forks on the repository identified by deps.dev. - deps_dev_fork_count: Mapped[int | None] = mapped_column(Integer, nullable=True) - - # The status of the check: passed, failed, or unknown. - status: Mapped[str] = mapped_column(String, nullable=False) - - # The reason for the status. - reason: Mapped[str] = mapped_column(String, nullable=False) - - # The build tool used to build the package. - build_tool: Mapped[str] = mapped_column(String, nullable=False) - - __mapper_args__ = { - "polymorphic_identity": "_maven_repo_verification_check", - } - - -class MavenRepoVerificationCheck(BaseCheck): - """Check whether the claims of a source repository provenance made by a maven package can be independently verified.""" - - def __init__(self) -> None: - """Initialize a check instance.""" - check_id = "mcn_maven_repo_verification_1" - description = ( - "Check whether the claims of a source repository provenance" - " made by a maven package can be independently verified." - ) - - super().__init__( - check_id=check_id, - description=description, - ) - - def run_check(self, ctx: AnalyzeContext) -> CheckResultData: - """Implement the check in this method. - - Parameters - ---------- - ctx : AnalyzeContext - The object containing processed data for the target repo. - - Returns - ------- - CheckResultData - The result of the check. - """ - if ctx.component.type != "maven": - return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN) - - deps_dev_repo_finder = DepsDevRepoFinder() - deps_dev_repo_link = deps_dev_repo_finder.find_repo(PackageURL.from_string(ctx.component.purl)) - deps_dev_repo_info = deps_dev_repo_finder.get_project_info(deps_dev_repo_link) - - stars_count: int | None = None - fork_count: int | None = None - - if deps_dev_repo_info: - stars_count = deps_dev_repo_info.get("starsCount") - fork_count = deps_dev_repo_info.get("forksCount") - - result_type = CheckResultType.UNKNOWN - result_tables: list[CheckFacts] = [] - for verification_result in ctx.dynamic_data.get("repo_verification", []): - result_tables.append( - MavenRepoVerificationFacts( - group=ctx.component.namespace, - artifact=ctx.component.name, - version=ctx.component.version, - repo_link=ctx.component.repository.remote_path if ctx.component.repository else None, - reason=verification_result.reason, - status=verification_result.status.value, - build_tool=verification_result.build_tool.name, - confidence=Confidence.MEDIUM, - deps_dev_repo_link=deps_dev_repo_link, - deps_dev_stars_count=stars_count, - deps_dev_fork_count=fork_count, - ) - ) - - match (result_type, verification_result.status): - case (_, RepositoryVerificationStatus.PASSED): - result_type = CheckResultType.PASSED - case (CheckResultType.UNKNOWN, RepositoryVerificationStatus.FAILED): - result_type = CheckResultType.FAILED - - return CheckResultData(result_tables=result_tables, result_type=result_type) - - -registry.register(MavenRepoVerificationCheck()) diff --git a/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py b/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py index 67a2b100b..d9ef77d1a 100644 --- a/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/maven_central_registry.py @@ -19,56 +19,6 @@ logger: logging.Logger = logging.getLogger(__name__) -# These are the code hosting platforms that are recognized by Sonatype for namespace verification in maven central. -RECOGNIZED_CODE_HOSTING_SERVICES = [ - "github", - "gitlab", - "bitbucket", - "gitee", -] - - -def same_organization(group_id_1: str, group_id_2: str) -> bool: - """Check if two maven group ids are from the same organization. - - Note: It is assumed that for recognized source platforms, the top level domain doesn't change the organization. - I.e., io.github.foo and com.github.foo are assumed to be from the same organization. - - Parameters - ---------- - group_id_1 : str - The first group id. - group_id_2 : str - The second group id. - - Returns - ------- - bool - ``True`` if the two group ids are from the same organization, ``False`` otherwise. - """ - if group_id_1 == group_id_2: - return True - - group_id_1_parts = group_id_1.split(".") - group_id_2_parts = group_id_2.split(".") - if min(len(group_id_1_parts), len(group_id_2_parts)) < 2: - return False - - # For groups ids that are under recognized maven namespaces, we only compare the first 3 parts. - # For example, io.github.foo.bar and io.github.foo are from the same organization (foo). - # Also, io.github.foo and com.github.foo are from the same organization. - if ( - group_id_1_parts[0] in {"io", "com"} - and group_id_1_parts[1] in RECOGNIZED_CODE_HOSTING_SERVICES - and group_id_2_parts[0] in {"io", "com"} - and group_id_2_parts[1] in RECOGNIZED_CODE_HOSTING_SERVICES - ): - if len(group_id_1_parts) >= 3 and len(group_id_2_parts) >= 3: - return group_id_1_parts[2] == group_id_2_parts[2] - return False - - return all(group_id_1_parts[index] == group_id_2_parts[index] for index in range(2)) - class MavenCentralRegistry(PackageRegistry): """This class implements a Maven Central package registry.""" diff --git a/tests/integration/cases/maven_repo_verification/config.ini b/tests/integration/cases/maven_repo_verification/config.ini deleted file mode 100644 index 90d59ea35..000000000 --- a/tests/integration/cases/maven_repo_verification/config.ini +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -[analysis.checks] -exclude = -include = mcn_maven_repo_verification_1 diff --git a/tests/integration/cases/maven_repo_verification/policy_fail_1.dl b/tests/integration/cases/maven_repo_verification/policy_fail_1.dl deleted file mode 100644 index 0e36f0005..000000000 --- a/tests/integration/cases/maven_repo_verification/policy_fail_1.dl +++ /dev/null @@ -1,10 +0,0 @@ -/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */ -/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ - -#include "prelude.dl" - -Policy("test_policy", component_id, "") :- - check_failed(component_id, "mcn_maven_repo_verification_1"). - -apply_policy_to("test_policy", component_id) :- - is_component(component_id, "pkg:maven/com.alibaba.ververica/flink-cep@1.17-vvr-8.0.8"). diff --git a/tests/integration/cases/maven_repo_verification/policy_pass_1.dl b/tests/integration/cases/maven_repo_verification/policy_pass_1.dl deleted file mode 100644 index d43fd1f0f..000000000 --- a/tests/integration/cases/maven_repo_verification/policy_pass_1.dl +++ /dev/null @@ -1,10 +0,0 @@ -/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */ -/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ - -#include "prelude.dl" - -Policy("test_policy", component_id, "") :- - check_passed(component_id, "mcn_maven_repo_verification_1"). - -apply_policy_to("test_policy", component_id) :- - is_component(component_id, "pkg:maven/org.antlr/antlr4-maven-plugin@4.13.2"). diff --git a/tests/integration/cases/maven_repo_verification/policy_pass_2.dl b/tests/integration/cases/maven_repo_verification/policy_pass_2.dl deleted file mode 100644 index 49cd44d1e..000000000 --- a/tests/integration/cases/maven_repo_verification/policy_pass_2.dl +++ /dev/null @@ -1,10 +0,0 @@ -/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */ -/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ - -#include "prelude.dl" - -Policy("test_policy", component_id, "") :- - check_passed(component_id, "mcn_maven_repo_verification_1"). - -apply_policy_to("test_policy", component_id) :- - is_component(component_id, "pkg:maven/org.neo4j/cypher-parser-common@5.21.2"). diff --git a/tests/integration/cases/maven_repo_verification/test.yaml b/tests/integration/cases/maven_repo_verification/test.yaml deleted file mode 100644 index 2d084c954..000000000 --- a/tests/integration/cases/maven_repo_verification/test.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -description: | - Integration tests for mcn_maven_repo_verification_1 check. - -tags: -- macaron-python-package -- macaron-docker-image - -steps: -- name: Run macaron analyze on passing case 1 - kind: analyze - options: - ini: config.ini - command_args: - - -purl - - pkg:maven/org.antlr/antlr4-maven-plugin@4.13.2 -- name: Verify that the check passsed - kind: verify - options: - policy: policy_pass_1.dl -- name: Run macaron analyze on passing case 2 - kind: analyze - options: - ini: config.ini - command_args: - - -purl - - pkg:maven/org.neo4j/cypher-parser-common@5.21.2 -- name: Verify that the check passsed - kind: verify - options: - policy: policy_pass_2.dl -- name: Run macaron analyze on failing case 1 - kind: analyze - options: - ini: config.ini - command_args: - - -purl - - pkg:maven/com.alibaba.ververica/flink-cep@1.17-vvr-8.0.8 -- name: Verify that the check passsed - kind: verify - options: - policy: policy_fail_1.dl diff --git a/tests/slsa_analyzer/checks/test_maven_repo_verification_check.py b/tests/slsa_analyzer/checks/test_maven_repo_verification_check.py deleted file mode 100644 index 9d12c751c..000000000 --- a/tests/slsa_analyzer/checks/test_maven_repo_verification_check.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -"""Module to test the maven repository verification check.""" - -from pathlib import Path - -from macaron.repo_verifier.repo_verifier_base import RepositoryVerificationResult, RepositoryVerificationStatus -from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool -from macaron.slsa_analyzer.checks.check_result import CheckResultType -from macaron.slsa_analyzer.checks.maven_repo_verification_check import MavenRepoVerificationCheck -from macaron.slsa_analyzer.package_registry import PyPIRegistry -from macaron.slsa_analyzer.package_registry.maven_central_registry import MavenCentralRegistry -from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo -from tests.conftest import MockAnalyzeContext - -RESOURCE_PATH = Path(__file__).parent.joinpath("resources") - - -def test_repo_verification_pass(maven_tool: BaseBuildTool, macaron_path: Path) -> None: - """Test that the check passes when the repository is verified.""" - check = MavenRepoVerificationCheck() - - ctx = MockAnalyzeContext(macaron_path=macaron_path, output_dir="", purl="pkg:maven/test/test") - maven_registry = MavenCentralRegistry() - ctx.dynamic_data["package_registries"] = [PackageRegistryInfo(maven_tool, maven_registry)] - ctx.dynamic_data["repo_verification"] = [ - RepositoryVerificationResult( - status=RepositoryVerificationStatus.PASSED, - reason="", - build_tool=maven_tool, - ) - ] - - assert check.run_check(ctx).result_type == CheckResultType.PASSED - - -def test_repo_verification_fail(maven_tool: BaseBuildTool, macaron_path: Path) -> None: - """Test that the check fails when the repository verification is failed.""" - check = MavenRepoVerificationCheck() - - ctx = MockAnalyzeContext(macaron_path=macaron_path, output_dir="", purl="pkg:maven/test/test") - maven_registry = MavenCentralRegistry() - ctx.dynamic_data["package_registries"] = [PackageRegistryInfo(maven_tool, maven_registry)] - ctx.dynamic_data["repo_verification"] = [ - RepositoryVerificationResult( - status=RepositoryVerificationStatus.FAILED, - reason="", - build_tool=maven_tool, - ) - ] - - assert check.run_check(ctx).result_type == CheckResultType.FAILED - - -def test_repo_verification_unknown_for_unknown_repo_verification(maven_tool: BaseBuildTool, macaron_path: Path) -> None: - """Test that the check returns unknown when the repository verification is unknown.""" - check = MavenRepoVerificationCheck() - - ctx = MockAnalyzeContext(macaron_path=macaron_path, output_dir="", purl="pkg:maven/test/test") - maven_registry = MavenCentralRegistry() - ctx.dynamic_data["package_registries"] = [PackageRegistryInfo(maven_tool, maven_registry)] - ctx.dynamic_data["repo_verification"] = [ - RepositoryVerificationResult( - status=RepositoryVerificationStatus.UNKNOWN, - reason="", - build_tool=maven_tool, - ) - ] - - assert check.run_check(ctx).result_type == CheckResultType.UNKNOWN - - -def test_repo_verification_unknown_for_unsupported_build_tools(pip_tool: BaseBuildTool, macaron_path: Path) -> None: - """Test that the check returns unknown for unsupported build tools.""" - check = MavenRepoVerificationCheck() - - ctx = MockAnalyzeContext(macaron_path=macaron_path, output_dir="", purl="pkg:pypi/test/test") - pypi_registry = PyPIRegistry() - ctx.dynamic_data["package_registries"] = [PackageRegistryInfo(pip_tool, pypi_registry)] - - assert check.run_check(ctx).result_type == CheckResultType.UNKNOWN