Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: verify whether the reported repository can be linked back to the artifact #873

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion src/macaron/artifact/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module declares types and utilities for Maven artifacts."""

import re
from collections.abc import Sequence

from packageurl import PackageURL
Expand Down Expand Up @@ -140,3 +140,21 @@ def create_maven_purl_from_artifact_filename(
)

return None


def is_valid_maven_group_id(group_id: str) -> bool:
"""Check if the provided string is a valid maven group id.

Parameters
----------
group_id : str
The group id to check.

Returns
-------
bool
True if the group id is valid, False otherwise
"""
# Should match strings like org.example.foo, org.example-2.foo.bar_1.
pattern = r"^[a-zA-Z][a-zA-Z0-9-]*\.([a-zA-Z][a-zA-Z0-9-]*\.)*[a-zA-Z][a-zA-Z0-9-]*[a-zA-Z0-9]$"
return re.match(pattern, group_id) is not None
37 changes: 37 additions & 0 deletions src/macaron/repo_finder/repo_finder_deps_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
import json
import logging
from enum import StrEnum
from typing import Any
from urllib.parse import quote as encode

from packageurl import PackageURL

from macaron.json_tools import json_extract
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
from macaron.repo_finder.repo_validator import find_valid_repository_url
from macaron.slsa_analyzer.git_url import clean_url
from macaron.util import send_get_http_raw

logger: logging.Logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -71,6 +73,41 @@ def find_repo(self, purl: PackageURL) -> str:

return ""

@staticmethod
def get_project_info(project_url: str) -> dict[str, Any] | None:
"""Retrieve project information from deps.dev.

Parameters
----------
project_url : str
The URL of the project.

Returns
-------
dict[str, Any] | None
The project information or None if the information could not be retrieved.
"""
clean_repo_url = clean_url(project_url)
if clean_repo_url is None or clean_repo_url.hostname is None:
logger.debug("Invalid project url format: %s", project_url)
return None

project_key = clean_repo_url.hostname + clean_repo_url.path

request_url = f"https://api.deps.dev/v3alpha/projects/{encode(project_key, safe='')}"
response = send_get_http_raw(request_url)
if not (response and response.text):
logger.debug("Failed to retrieve additional repo info for: %s", project_url)
return None

try:
response_json: dict = json.loads(response.text)
except ValueError as error:
logger.debug("Failed to parse response from deps.dev: %s", error)
return None

return response_json

def _create_urls(self, purl: PackageURL) -> list[str]:
"""
Create the urls to search for the metadata relating to the passed artifact.
Expand Down
4 changes: 4 additions & 0 deletions src/macaron/repo_verifier/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This package contains classes for repository verification."""
75 changes: 75 additions & 0 deletions src/macaron/repo_verifier/repo_verifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains code to verify whether a reported repository can be linked back to the artifact."""
import logging

from macaron.repo_verifier.repo_verifier_base import (
RepositoryVerificationResult,
RepositoryVerificationStatus,
RepoVerifierBase,
)
from macaron.repo_verifier.repo_verifier_gradle import RepoVerifierGradle
from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven
from macaron.slsa_analyzer.build_tool import BaseBuildTool, Gradle, Maven

logger = logging.getLogger(__name__)


def verify_repo(
namespace: str | None,
name: str,
version: str,
reported_repo_url: str,
reported_repo_fs: str,
build_tool: BaseBuildTool,
) -> RepositoryVerificationResult:
"""Verify whether the repository links back to the artifact.

Parameters
----------
namespace : str | None
The namespace of the artifact.
name : str
The name of the artifact.
version : str
The version of the artifact.
reported_repo_url : str
The reported repository URL.
reported_repo_fs : str
The reported repository filesystem path.
build_tool : BaseBuildTool
The build tool used to build the package.

Returns
-------
RepositoryVerificationResult
The result of the repository verification
"""
# TODO: Add support for other build tools.
verifier_map: dict[type[BaseBuildTool], type[RepoVerifierBase]] = {
Maven: RepoVerifierMaven,
Gradle: RepoVerifierGradle,
# Poetry(): RepoVerifierPoetry,
# Pip(): RepoVerifierPip,
# Docker(): RepoVerifierDocker,
# NPM(): RepoVerifierNPM,
# Yarn(): RepoVerifierYarn,
# Go(): RepoVerifierGo,
}

verifier_cls = verifier_map.get(type(build_tool))
if not verifier_cls:
return RepositoryVerificationResult(
status=RepositoryVerificationStatus.UNKNOWN, reason="unsupported_type", build_tool=build_tool
)

verifier = verifier_cls(
namespace=namespace,
name=name,
version=version,
reported_repo_url=reported_repo_url,
reported_repo_fs=reported_repo_fs,
)

return verifier.verify_repo()
139 changes: 139 additions & 0 deletions src/macaron/repo_verifier/repo_verifier_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the base class and core data models for repository verification."""
import abc
import logging
import os
from collections import deque
from dataclasses import dataclass
from enum import Enum
from pathlib import Path

from macaron.slsa_analyzer.build_tool import BaseBuildTool

logger = logging.getLogger(__name__)


def find_file_in_repo(root_dir: Path, filename: str) -> Path | None:
"""Find the highest level file with a given name in a local repository.

This function ignores certain paths that are not under the main source code directories.

Parameters
----------
root_dir : Path
The root directory of the repository.
filename : str
The name of the file to search for.

Returns
-------
Path | None
The path to the file if it exists, otherwise
"""
# TODO: Consider using BaseBuildTool.get_build_dirs.
# + Refactor 'get_build_dirs' to skip certain directories
# that are most likely not part of the main codebase (e.g., sample).
# + Need to find a way to look for other
# files (e.g., gradle.properties) for the purpose of repo verification
# without breaking the current logic of finding build directories.
# + Add the capability to return the content/path of the file.
if not os.path.isdir(root_dir):
return None

queue: deque[Path] = deque()
queue.append(Path(root_dir))
while queue:
current_dir = queue.popleft()

# Don't look through non-main directories.
if any(
keyword in current_dir.name.lower()
for keyword in ["test", "example", "sample", "doc", "demo", "spec", "mock"]
):
continue

if Path(current_dir, filename).exists():
return Path(current_dir, filename)

# Ignore symlinks to prevent potential infinite loop.
sub_dirs = [Path(it) for it in current_dir.iterdir() if it.is_dir() and not it.is_symlink()]
queue.extend(sub_dirs)

return None


class RepositoryVerificationStatus(str, Enum):
"""A class to store the status of the repo verification."""

#: We found evidence to prove that the repository can be linked back to the publisher of the artifact.
PASSED = "passed"

#: We found evidence showing that the repository is not the publisher of the artifact.
FAILED = "failed"

#: We could not find any evidence to prove or disprove that the repository can be linked back to the artifact.
UNKNOWN = "unknown"


@dataclass(frozen=True)
class RepositoryVerificationResult:
"""A class to store the information about repository verification."""

#: The status of the repository verification.
status: RepositoryVerificationStatus

#: The reason for the verification result.
reason: str

#: The build tool used to build the package.
build_tool: BaseBuildTool


class RepoVerifierBase(abc.ABC):
"""The base class to verify whether a reported repository links back to the artifact."""

@property
@abc.abstractmethod
def build_tool(self) -> BaseBuildTool:
"""Define the build tool used to build the package."""

def __init__(
self,
namespace: str | None,
name: str,
version: str,
reported_repo_url: str,
reported_repo_fs: str,
):
"""Instantiate the class.

Parameters
----------
namespace : str
The namespace of the artifact.
name : str
The name of the artifact.
version : str
The version of the artifact.
reported_repo_url : str
The URL of the repository reported by the publisher.
reported_repo_fs : str
The file system path of the reported repository.
"""
self.namespace = namespace
self.name = name
self.version = version
self.reported_repo_url = reported_repo_url
self.reported_repo_fs = reported_repo_fs

@abc.abstractmethod
def verify_repo(self) -> RepositoryVerificationResult:
"""Verify whether the repository links back to the artifact.

Returns
-------
RepositoryVerificationResult
The result of the repository verification
"""
Loading
Loading