diff --git a/docs/source/pages/cli_usage/command_analyze.rst b/docs/source/pages/cli_usage/command_analyze.rst index 2bb7f3f6a..0d43d791d 100644 --- a/docs/source/pages/cli_usage/command_analyze.rst +++ b/docs/source/pages/cli_usage/command_analyze.rst @@ -24,6 +24,7 @@ Usage [-d DIGEST] [-pe PROVENANCE_EXPECTATION] [--skip-deps] [--deps-depth DEPS_DEPTH] [-g TEMPLATE_PATH] [--python-venv PYTHON_VENV] + [--local-maven-repo LOCAL_MAVEN_REPO] ------- Options @@ -79,6 +80,10 @@ Options The path to the Python virtual environment of the target software component. +.. option:: --local-maven-repo LOCAL_MAVEN_REPO + + The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2 + ----------- Environment ----------- diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index 5201cedb5..54ca13fd1 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -328,6 +328,10 @@ if [[ $command == "analyze" ]]; then python_venv_path="$2" shift ;; + --local-maven-repo) + local_maven_repo="$2" + shift + ;; *) rest_command+=("$1") ;; @@ -335,7 +339,7 @@ if [[ $command == "analyze" ]]; then shift done elif [[ $command == "verify-policy" ]]; then - while [[ $# -gt 0 ]]; do + while [[ $# -gt 0 ]]; do case $1 in -d|--database) arg_database="$2" @@ -351,6 +355,15 @@ elif [[ $command == "verify-policy" ]]; then esac shift done +elif [[ $command == "dump-defaults" ]]; then + while [[ $# -gt 0 ]]; do + case $1 in + *) + rest_command+=("$1") + ;; + esac + shift + done fi # MACARON entrypoint - Main argvs @@ -455,6 +468,47 @@ if [[ -n "${python_venv_path:-}" ]]; then mount_dir_ro "--python-venv" "$python_venv_path" "$python_venv_in_container" fi +# Mount the local Maven repo from the +# host file system into the container's +# ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. +if [[ -n "${local_maven_repo:-}" ]]; then + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") + + mount_dir_ro "--local-maven-repo" "$local_maven_repo" "$local_maven_repo_in_container" +else + # Mounting default local maven repo only + # when the user doesn't provide --local-maven-repo AND `analyze` command is used. + if [[ "$command" == "analyze" ]]; then + # We mount the host's $HOME/.m2 into the container's + # ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists. + if [[ -d "$HOME/.m2" ]]; then + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") + + mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container" + # If the host's $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly + # into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. And then provide + # ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly into the --local-maven-repo + # flag. + # This is because: + # - By default if --local-maven-repo is not used, Macaron uses $HOME/.m2 of the current + # environment as the local maven repo. + # - If --local-maven-repo is not set when Macaron is running in the Docker container, it will try to + # use $HOME/.m2 WITHIN the container. This is not desirable as this $HOME/.m2 is being used + # by the cyclonedx plugins for dependency resolution, which requires read write. We treat the local + # maven repo as a read only directory, hence they cannot share. + else + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + output_local_maven_repo="${output}/analyze_local_maven_repo_readonly" + mkdir -p "$output_local_maven_repo" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") + + mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container" + fi + fi +fi + # MACARON entrypoint - verify-policy command argvs # This is for macaron verify-policy command. # Determine the database path to be mounted into ${MACARON_WORKSPACE}/database/macaron.db diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index 3e541d633..a3d1dcbc5 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -74,6 +74,29 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None sys.exit(os.EX_OSFILE) global_config.load_python_venv(analyzer_single_args.python_venv) + # Set local maven repo path. + if analyzer_single_args.local_maven_repo is None: + # Load the default user local .m2 directory. + # Exit on error if $HOME is not set or empty. + home_dir = os.getenv("HOME") + if not home_dir: + logger.critical("Environment variable HOME is not set.") + sys.exit(os.EX_USAGE) + + local_maven_repo = os.path.join(home_dir, ".m2") + if not os.path.isdir(local_maven_repo): + logger.debug("The default local Maven repo at %s does not exist. Ignore ...") + global_config.local_maven_repo = None + + global_config.local_maven_repo = local_maven_repo + else: + user_provided_local_maven_repo = analyzer_single_args.local_maven_repo + if not os.path.isdir(user_provided_local_maven_repo): + logger.error("The user provided local Maven repo at %s is not valid.", user_provided_local_maven_repo) + sys.exit(os.EX_USAGE) + + global_config.local_maven_repo = user_provided_local_maven_repo + analyzer = Analyzer(global_config.output_path, global_config.build_log_path) # Initiate reporters. @@ -453,6 +476,14 @@ def main(argv: list[str] | None = None) -> None: ), ) + single_analyze_parser.add_argument( + "--local-maven-repo", + required=False, + help=( + "The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2" + ), + ) + # Dump the default values. sub_parser.add_parser(name="dump-defaults", description="Dumps the defaults.ini file to the output directory.") diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py new file mode 100644 index 000000000..ed37c335a --- /dev/null +++ b/src/macaron/artifact/local_artifact.py @@ -0,0 +1,249 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module declares types and utilities for handling local artifacts.""" + +import fnmatch +import glob +import os + +from packageurl import PackageURL + +from macaron.artifact.maven import construct_maven_repository_path +from macaron.errors import LocalArtifactFinderError + + +def construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) representing the directory that contains the local maven artifacts for ``maven_purl``. + + The glob pattern(s) can be used to search in `<...>/.m2/repository` directory. + + Parameters + ---------- + maven_purl : PackageURL + A maven type PackageURL instance. + + Returns + ------- + list[str] | None + A list of glob patterns or None if an error happened. + + Examples + -------- + >>> from packageurl import PackageURL + >>> purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0") + >>> construct_local_artifact_dirs_glob_pattern_maven_purl(purl) + ['com/oracle/macaron/macaron/0.13.0'] + """ + if maven_purl.type != "maven": + return None + + group = maven_purl.namespace + artifact = maven_purl.name + version = maven_purl.version + + if group is None or version is None: + return None + + return [construct_maven_repository_path(group, artifact, version)] + + +def construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) representing directories that contains the artifacts in a Python virtual environment. + + The glob pattern(s) can be used to search in `<...>//lib/python3.x/site-packages` + directory. + + Parameters + ---------- + pypi_purl : PackageURL + A pypi type PackageURL instance. + + Returns + ------- + list[str] | None + A list of glob patterns or None if an error happened. + + Examples + -------- + >>> from packageurl import PackageURL + >>> purl = PackageURL.from_string("pkg:pypi/django@1.11.1") + >>> construct_local_artifact_dirs_glob_pattern_pypi_purl(purl) + ['django', 'django-1.11.1.dist-info', 'django-1.11.1.data'] + """ + if pypi_purl.type != "pypi": + return None + + name = pypi_purl.name + version = pypi_purl.version + + if version is None: + return None + + # These patterns are from the content of a wheel file, which are extracted into the site-packages + # directory. References: + # https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-contents + glob_patterns = [] + glob_patterns.append(name) + glob_patterns.append(f"{name}-{version}.dist-info") + glob_patterns.append(f"{name}-{version}.data") + + return glob_patterns + + +def find_artifact_dirs_from_local_maven_repo( + local_maven_repo: str, + glob_patterns: list[str], +) -> list[str]: + """Find directories that contains maven artifacts within a maven local repository. + + ``local_maven_repo`` should be in format `<...>/.m2/repository`. + + Parameters + ---------- + local_maven_repo: str + The path to the directory to find artifacts. + glob_patterns: list[str] + The list of glob patterns that matches to artifact directory names. + + Returns + ------- + list[str] + The list of paths to artifact directories in the form of ``venv_site_package_path``/path/to/artifact_dir + If no artifact directory is found, this list will be empty. + + Raises + ------ + LocalArtifactFinderError + If ``local_maven_repo`` doesn't exist. + """ + if not os.path.isdir(local_maven_repo): + raise LocalArtifactFinderError(f"{local_maven_repo} doesn't exist.") + + artifact_paths = [] + for pattern in glob_patterns: + found_paths = glob.glob( + root_dir=local_maven_repo, + pathname=pattern, + ) + + for found_path in found_paths: + full_path = os.path.join(local_maven_repo, found_path) + if os.path.isdir(full_path): + artifact_paths.append(full_path) + + return artifact_paths + + +def find_artifact_dirs_from_python_venv( + venv_site_package_path: str, + glob_patterns: list[str], +) -> list[str]: + """Find directories within a python virtual environment. + + For packages in the virtual environment, we will treat their name case-insensitively. + https://packaging.python.org/en/latest/specifications/name-normalization/ + + ``venv_site_package_path`` should be in format `<...>/lib/python3.*/site-packages/`. + + Parameters + ---------- + venv_path: str + The path to the local directory to find artifacts. + glob_patterns: list[str] + The list of glob patterns that matches to artifact directory names. + + Returns + ------- + list[str] + The list of paths to artifact directories in the form of ``venv_site_package_path``/path/to/artifact_dir + If no artifact directory is found, this list will be empty. + + Raises + ------ + LocalArtifactFinderError + If ``venv_site_package_path`` doesn't exist or if we cannot view the sub-directory of it. + """ + if not os.path.isdir(venv_site_package_path): + raise LocalArtifactFinderError(f"{venv_site_package_path} doesn't exist.") + + artifact_paths = [] + + try: + venv_path_entries = os.listdir(venv_site_package_path) + except (NotADirectoryError, PermissionError, FileNotFoundError) as error: + error_msg = f"Cannot view the sub-directory of venv {venv_site_package_path}" + raise LocalArtifactFinderError(error_msg) from error + + all_package_dirs: list[str] = [] + for entry in venv_path_entries: + entry_path = os.path.join(venv_site_package_path, entry) + if os.path.isdir(entry_path): + all_package_dirs.append(entry) + + for package_dir in all_package_dirs: + for pattern in glob_patterns: + if fnmatch.fnmatch(package_dir.lower(), pattern.lower()): + full_path = os.path.join(venv_site_package_path, package_dir) + artifact_paths.append(full_path) + + return artifact_paths + + +def get_local_artifact_dirs( + purl: PackageURL, + local_artifact_repo_path: str, +) -> list[str]: + """Return the paths to directories that store local artifacts for a PackageURL. + + We look for local artifacts of ``purl`` in ``local_artifact_repo_path``. + + This function returns a list of paths (as strings), each has the format + ``local_artifact_repo_path``/path/to/artifact_dir`` + + This will mean that no path to an artifact is returned. Therefore, it's the responsibility + of this function caller to inspect the artifact directory to obtain the required + artifact. + + We assume that ``local_artifact_repo_path`` exists. + + Parameters + ---------- + purl : PackageURL + The purl we want to find local artifacts + local_artifact_repo_path : str + The local artifact repo directory. + + Returns + ------- + list[str] + The list contains the artifact directory paths. It will be empty if no artifact can be found. + + Raises + ------ + LocalArtifactFinderError + If an error happens when looking for local artifacts. + """ + purl_type = purl.type + + if purl_type == "maven": + maven_artifact_patterns = construct_local_artifact_dirs_glob_pattern_maven_purl(purl) + if not maven_artifact_patterns: + raise LocalArtifactFinderError(f"Cannot generate maven artifact patterns for {purl}") + + return find_artifact_dirs_from_local_maven_repo( + local_maven_repo=local_artifact_repo_path, + glob_patterns=maven_artifact_patterns, + ) + + if purl_type == "pypi": + pypi_artifact_patterns = construct_local_artifact_dirs_glob_pattern_pypi_purl(purl) + if not pypi_artifact_patterns: + raise LocalArtifactFinderError(f"Cannot generate Python package patterns for {purl}") + + return find_artifact_dirs_from_python_venv( + venv_site_package_path=local_artifact_repo_path, + glob_patterns=pypi_artifact_patterns, + ) + + raise LocalArtifactFinderError(f"Unsupported PURL type {purl_type}") diff --git a/src/macaron/artifact/maven.py b/src/macaron/artifact/maven.py index 6130aed83..dd97431f7 100644 --- a/src/macaron/artifact/maven.py +++ b/src/macaron/artifact/maven.py @@ -158,3 +158,41 @@ def is_valid_maven_group_id(group_id: str) -> bool: # Should match strings like org.example.foo, org.example-2.foo.bar_1. pattern = r"^[a-zA-Z][a-zA-Z0-9-]*\.([a-zA-Z][a-zA-Z0-9-]*\.)*[a-zA-Z][a-zA-Z0-9-]*[a-zA-Z0-9]$" return re.match(pattern, group_id) is not None + + +def construct_maven_repository_path( + group_id: str, + artifact_id: str | None = None, + version: str | None = None, + asset_name: str | None = None, +) -> str: + """Construct a path to a folder or file on the registry, assuming Maven repository layout. + + For more details regarding Maven repository layout, see the following: + - https://maven.apache.org/repository/layout.html + - https://maven.apache.org/guides/mini/guide-naming-conventions.html + + Parameters + ---------- + group_id : str + The group id of a Maven package. + artifact_id : str + The artifact id of a Maven package. + version : str + The version of a Maven package. + asset_name : str + The asset name. + + Returns + ------- + str + The path to a folder or file on the registry. + """ + path = group_id.replace(".", "/") + if artifact_id: + path = "/".join([path, artifact_id]) + if version: + path = "/".join([path, version]) + if asset_name: + path = "/".join([path, asset_name]) + return path diff --git a/src/macaron/config/global_config.py b/src/macaron/config/global_config.py index bfee23d32..d6d113a3a 100644 --- a/src/macaron/config/global_config.py +++ b/src/macaron/config/global_config.py @@ -46,6 +46,9 @@ class GlobalConfig: #: The path to Python virtual environment. python_venv_path: str = "" + #: The path to the local .m2 Maven repository. This attribute is None if there is no available .m2 directory. + local_maven_repo: str | None = None + def load( self, macaron_path: str, diff --git a/src/macaron/errors.py b/src/macaron/errors.py index b4e8b813f..5ae90295c 100644 --- a/src/macaron/errors.py +++ b/src/macaron/errors.py @@ -90,3 +90,7 @@ class DependencyAnalyzerError(MacaronError): class HeuristicAnalyzerValueError(MacaronError): """Error class for BaseHeuristicAnalyzer errors when parsing data.""" + + +class LocalArtifactFinderError(MacaronError): + """Happens when there is an error looking for local artifacts.""" diff --git a/src/macaron/slsa_analyzer/analyze_context.py b/src/macaron/slsa_analyzer/analyze_context.py index f6c8fd22a..31da3d54c 100644 --- a/src/macaron/slsa_analyzer/analyze_context.py +++ b/src/macaron/slsa_analyzer/analyze_context.py @@ -55,6 +55,8 @@ class ChecksOutputs(TypedDict): """The commit digest extracted from provenance, if applicable.""" provenance_verified: bool """True if the provenance exists and has been verified against a signed companion provenance.""" + local_artifact_paths: list[str] + """The local artifact absolute paths.""" class AnalyzeContext: @@ -110,6 +112,7 @@ def __init__( provenance_repo_url=None, provenance_commit_digest=None, provenance_verified=False, + local_artifact_paths=[], ) @property diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 8190f87fd..a5fd67f22 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -2,10 +2,13 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module handles the cloning and analyzing a Git repo.""" + +import glob import logging import os import re import sys +from collections.abc import Mapping from datetime import datetime, timezone from pathlib import Path from typing import Any, NamedTuple @@ -16,6 +19,7 @@ from sqlalchemy.orm import Session from macaron import __version__ +from macaron.artifact.local_artifact import get_local_artifact_dirs from macaron.config.defaults import defaults from macaron.config.global_config import global_config from macaron.config.target_config import Configuration @@ -26,6 +30,7 @@ DuplicateError, InvalidAnalysisTargetError, InvalidPURLError, + LocalArtifactFinderError, ProvenanceError, PURLNotFoundError, ) @@ -111,6 +116,8 @@ def __init__(self, output_path: str, build_log_path: str) -> None: # Create database tables: all checks have been registered so all tables should be mapped now self.db_man.create_tables() + self.local_artifact_repo_mapper = Analyzer._get_local_artifact_repo_mapper() + def run( self, user_config: dict, @@ -473,6 +480,17 @@ def run_single( analyze_ctx.dynamic_data["provenance_repo_url"] = provenance_repo_url analyze_ctx.dynamic_data["provenance_commit_digest"] = provenance_commit_digest + if parsed_purl and parsed_purl.type in self.local_artifact_repo_mapper: + local_artifact_repo_path = self.local_artifact_repo_mapper[parsed_purl.type] + try: + local_artifact_dirs = get_local_artifact_dirs( + purl=parsed_purl, + local_artifact_repo_path=local_artifact_repo_path, + ) + analyze_ctx.dynamic_data["local_artifact_paths"].extend(local_artifact_dirs) + except LocalArtifactFinderError as error: + logger.debug(error) + analyze_ctx.check_results = registry.scan(analyze_ctx) return Record( @@ -973,6 +991,35 @@ def _verify_repository_link(self, parsed_purl: PackageURL, analyze_ctx: AnalyzeC ) analyze_ctx.dynamic_data["repo_verification"].append(verification_result) + @staticmethod + def _get_local_artifact_repo_mapper() -> Mapping[str, str]: + """Return the mapping between purl type and its local artifact repo path if that path exists.""" + local_artifact_mapper: dict[str, str] = {} + + if global_config.local_maven_repo: + m2_repository_dir = os.path.join(global_config.local_maven_repo, "repository") + if os.path.isdir(m2_repository_dir): + local_artifact_mapper["maven"] = m2_repository_dir + + if global_config.python_venv_path: + site_packages_dir_pattern = os.path.join( + global_config.python_venv_path, + "lib", + "python3.*", + "site-packages", + ) + site_packages_dirs = glob.glob(site_packages_dir_pattern) + + if len(site_packages_dirs) == 1: + local_artifact_mapper["pypi"] = site_packages_dirs.pop() + else: + logger.info( + "There are multiple python3.* directories in the input Python venv. " + + "This venv will NOT be used for local artifact findings." + ) + + return local_artifact_mapper + class DuplicateCmpError(DuplicateError): """This class is used for duplicated software component errors.""" diff --git a/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py b/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py index 65987d1e2..bfdd9aa4b 100644 --- a/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py @@ -13,6 +13,7 @@ import requests +from macaron.artifact.maven import construct_maven_repository_path from macaron.config.defaults import defaults from macaron.errors import ConfigurationError from macaron.json_tools import JsonType @@ -197,44 +198,6 @@ def is_detected(self, build_tool: BaseBuildTool) -> bool: compatible_build_tool_classes = [Maven, Gradle] return any(isinstance(build_tool, build_tool_class) for build_tool_class in compatible_build_tool_classes) - def construct_maven_repository_path( - self, - group_id: str, - artifact_id: str | None = None, - version: str | None = None, - asset_name: str | None = None, - ) -> str: - """Construct a path to a folder or file on the registry, assuming Maven repository layout. - - For more details regarding Maven repository layout, see the following: - - https://maven.apache.org/repository/layout.html - - https://maven.apache.org/guides/mini/guide-naming-conventions.html - - Parameters - ---------- - group_id : str - The group id of a Maven package. - artifact_id : str - The artifact id of a Maven package. - version : str - The version of a Maven package. - asset_name : str - The asset name. - - Returns - ------- - str - The path to a folder or file on the registry. - """ - path = group_id.replace(".", "/") - if artifact_id: - path = "/".join([path, artifact_id]) - if version: - path = "/".join([path, version]) - if asset_name: - path = "/".join([path, asset_name]) - return path - def fetch_artifact_ids(self, group_id: str) -> list[str]: """Get all artifact ids under a group id. @@ -251,7 +214,7 @@ def fetch_artifact_ids(self, group_id: str) -> list[str]: The artifacts ids under the group. """ folder_info_url = self.construct_folder_info_url( - folder_path=self.construct_maven_repository_path(group_id), + folder_path=construct_maven_repository_path(group_id), ) try: @@ -440,7 +403,7 @@ def fetch_asset_names( list[str] The list of asset names. """ - folder_path = self.construct_maven_repository_path( + folder_path = construct_maven_repository_path( group_id=group_id, artifact_id=artifact_id, version=version, @@ -615,7 +578,7 @@ def fetch_asset_metadata( JFrogMavenAssetMetadata | None The asset's metadata, or ``None`` if the metadata cannot be retrieved. """ - file_path = self.construct_maven_repository_path( + file_path = construct_maven_repository_path( group_id=group_id, artifact_id=artifact_id, version=version, @@ -798,7 +761,7 @@ def construct_asset_url( str The URL to the asset, which can be use for downloading the asset. """ - group_path = self.construct_maven_repository_path(group_id) + group_path = construct_maven_repository_path(group_id) return urlunsplit( SplitResult( scheme="https", diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py new file mode 100644 index 000000000..0f0bbd270 --- /dev/null +++ b/tests/artifact/test_local_artifact.py @@ -0,0 +1,226 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Test the local artifact utilities.""" + +import os +from pathlib import Path + +import pytest +from packageurl import PackageURL + +from macaron.artifact.local_artifact import ( + construct_local_artifact_dirs_glob_pattern_maven_purl, + construct_local_artifact_dirs_glob_pattern_pypi_purl, + find_artifact_dirs_from_python_venv, + get_local_artifact_dirs, +) +from macaron.errors import LocalArtifactFinderError + + +@pytest.mark.parametrize( + ("purl_str", "expectation"), + [ + pytest.param( + "pkg:maven/com.google.guava/guava@33.2.1-jre", + ["com/google/guava/guava/33.2.1-jre"], + id="A Maven PURL with group, artifact and version", + ), + pytest.param( + "pkg:maven/com.google.guava/guava@33.2.1-jre?type=jar", + ["com/google/guava/guava/33.2.1-jre"], + id="A Maven PURL with group artifact, version and type qualifier", + ), + ], +) +def test_construct_local_artifact_paths_glob_pattern_maven_purl( + purl_str: str, + expectation: list[str], +) -> None: + """Test constructing a local artifact patterns from a given maven purl.""" + maven_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl=maven_purl) + assert result is not None + assert sorted(result) == sorted(expectation) + + +@pytest.mark.parametrize( + ("purl_str"), + [ + pytest.param("pkg:pypi/django@5.0.6", id="The purl type is not supported."), + pytest.param("pkg:maven/guava@33.2.1-jre", id="Missing group id in the PURL"), + pytest.param("pkg:maven/guava", id="Missing version"), + ], +) +def test_construct_local_artifact_paths_glob_pattern_maven_purl_error(purl_str: str) -> None: + """Test constructing a local artifact patterns from a given maven purl with error.""" + maven_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl=maven_purl) + assert result is None + + +@pytest.mark.parametrize( + ("purl_str", "expectation"), + [ + pytest.param( + "pkg:pypi/django@5.0.6", + ["django", "django-5.0.6.dist-info", "django-5.0.6.data"], + id="A valid pypi PURL with version", + ) + ], +) +def test_construct_local_artifact_paths_glob_pattern_pypi_purl( + purl_str: str, + expectation: list[str], +) -> None: + """Test constructing a local artifact patterns from a given pypi purl.""" + pypi_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + assert result is not None + assert sorted(result) == sorted(expectation) + + +@pytest.mark.parametrize( + ("purl_str"), + [ + pytest.param( + "pkg:pypi/django", + id="A pypi PURL without version", + ), + pytest.param( + "pkg:maven/com.google.guava/guava@33.2.1-jre", + id="The purl type is not supported.", + ), + ], +) +def test_construct_local_artifact_paths_glob_pattern_pypi_purl_error(purl_str: str) -> None: + """Test constructing a local artifact patterns from a given pypi purl with error.""" + pypi_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + assert result is None + + +def test_find_artifact_paths_from_invalid_python_venv() -> None: + """Test find_artifact_paths_from_python_venv method with invalid venv path""" + with pytest.raises(LocalArtifactFinderError): + find_artifact_dirs_from_python_venv("./does-not-exist", ["django", "django-5.0.6.dist-info"]) + + +@pytest.mark.parametrize( + ("purl_str", "expectation"), + [ + pytest.param( + "pkg:maven/com.google.guava/guava@33.2.1-jre", + [], + id="A maven type PURL", + ), + pytest.param( + "pkg:pypi/django@5.0.3", + [], + id="A pypi type PURL", + ), + ], +) +def test_get_local_artifact_paths_not_available( + purl_str: str, + expectation: list[str], + tmp_path: Path, +) -> None: + """Test getting local artifact paths where we cannot find local artifacts for the PURL.""" + purl = PackageURL.from_string(purl_str) + + assert ( + get_local_artifact_dirs( + purl=purl, + local_artifact_repo_path=str(tmp_path), + ) + == expectation + ) + + +@pytest.mark.parametrize( + ("purl_str"), + [ + pytest.param( + "pkg:maven/com.google.guava/guava", + id="A maven type PURL with no version", + ), + pytest.param( + "pkg:maven/guava@33.2.1-jre", + id="A maven type PURL with no group", + ), + pytest.param( + "pkg:maven/guava", + id="A maven type PURL with no group and no version", + ), + pytest.param( + "pkg:pypi/django", + id="A pypi type PURL without version", + ), + pytest.param( + "pkg:github/oracle/macaron", + id="A github type PURL (unsupported)", + ), + ], +) +def test_get_local_artifact_paths_invalid_purl( + purl_str: str, + tmp_path: Path, +) -> None: + """Test getting local artifact paths where the input PURL is invalid.""" + purl = PackageURL.from_string(purl_str) + + with pytest.raises(LocalArtifactFinderError): + get_local_artifact_dirs( + purl=purl, + local_artifact_repo_path=str(tmp_path), + ) + + +def test_get_local_artifact_paths_succeeded_maven(tmp_path: Path) -> None: + """Test getting local artifact paths succeeded with maven purl.""" + purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0") + + tmp_path_str = str(tmp_path) + + maven_local_repo_path = f"{tmp_path_str}/.m2/repository" + target_artifact_path = f"{maven_local_repo_path}/com/oracle/macaron/macaron/0.13.0" + os.makedirs(maven_local_repo_path) + os.makedirs(target_artifact_path) + + result = get_local_artifact_dirs( + purl=purl, + local_artifact_repo_path=maven_local_repo_path, + ) + + assert result == [target_artifact_path] + + +def test_get_local_artifact_paths_succeeded_pypi(tmp_path: Path) -> None: + """Test getting local artifact paths succeeded with pypi purl.""" + purl = PackageURL.from_string("pkg:pypi/macaron@0.13.0") + + tmp_path_str = str(tmp_path) + + python_venv_path = f"{tmp_path_str}/.venv/lib/python3.11/site-packages" + + # We are also testing if the patterns match case-insensitively. + pypi_artifact_paths = [ + f"{python_venv_path}/macaron", + f"{python_venv_path}/macaron-0.13.0.dist-info", + f"{python_venv_path}/Macaron-0.13.0.dist-info", + f"{python_venv_path}/macaron-0.13.0.data", + f"{python_venv_path}/Macaron-0.13.0.data", + ] + + os.makedirs(python_venv_path) + + for artifact_path in pypi_artifact_paths: + os.makedirs(artifact_path) + + result = get_local_artifact_dirs( + purl=purl, + local_artifact_repo_path=python_venv_path, + ) + + assert sorted(result) == sorted(pypi_artifact_paths) diff --git a/tests/artifact/test_maven.py b/tests/artifact/test_maven.py index 31e95ba53..6014c20ad 100644 --- a/tests/artifact/test_maven.py +++ b/tests/artifact/test_maven.py @@ -6,7 +6,7 @@ import pytest from packageurl import PackageURL -from macaron.artifact.maven import MavenSubjectPURLMatcher +from macaron.artifact.maven import MavenSubjectPURLMatcher, construct_maven_repository_path from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload @@ -86,3 +86,78 @@ def test_to_maven_artifact_subject( ) == provenance_payload.statement["subject"][subject_index] ) + + +@pytest.mark.parametrize( + ("args", "expected_path"), + [ + pytest.param( + { + "group_id": "io.micronaut", + }, + "io/micronaut", + id="Only group_id 1", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + }, + "com/fasterxml/jackson/core", + id="Only group_id 2", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + "artifact_id": "jackson-annotations", + }, + "com/fasterxml/jackson/core/jackson-annotations", + id="group_id and artifact_id", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + "artifact_id": "jackson-annotations", + "version": "2.9.9", + }, + "com/fasterxml/jackson/core/jackson-annotations/2.9.9", + id="group_id and artifact_id and version", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + "artifact_id": "jackson-annotations", + "version": "2.9.9", + "asset_name": "jackson-annotations-2.9.9.jar", + }, + "com/fasterxml/jackson/core/jackson-annotations/2.9.9/jackson-annotations-2.9.9.jar", + id="group_id and artifact_id and version and asset_name,", + ), + ], +) +def test_construct_maven_repository_path( + args: dict, + expected_path: str, +) -> None: + """Test the ``construct_maven_repository_path`` method.""" + assert construct_maven_repository_path(**args) == expected_path + + +@pytest.mark.parametrize( + ("group_id", "expected_group_path"), + [ + ( + "io.micronaut", + "io/micronaut", + ), + ( + "com.fasterxml.jackson.core", + "com/fasterxml/jackson/core", + ), + ], +) +def test_to_group_folder_path( + group_id: str, + expected_group_path: str, +) -> None: + """Test the ``to_gorup_folder_path`` method.""" + assert construct_maven_repository_path(group_id) == expected_group_path diff --git a/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml b/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml new file mode 100644 index 000000000..1fcd585c3 --- /dev/null +++ b/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml @@ -0,0 +1,46 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Test invalid usecases for --local-maven-repo. + +tags: +- macaron-docker-image +- macaron-python-package + +steps: +- name: HOME environment variable is not set and --local-maven-repo is not used. + kind: analyze + env: + HOME: + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@1.0?type=jar + expect_fail: true +- name: Providing a directory that doesn't exist to --local-maven-repo. + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@1.0?type=jar + - --local-maven-repo + - invalid_dir + expect_fail: true +- name: Create a test file to mimick user input. Therefore, this test file will be outside of the output dir. + kind: shell + options: + cmd: touch test.txt +- name: Providing a file path to --local-maven-repo. + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@1.0?type=jar + - --local-maven-repo + - ./test.txt + expect_fail: true +- name: Clean up the test file because it's not automatically cleaned up by the test script as it's outside of the output directory. + kind: shell + options: + cmd: rm test.txt diff --git a/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py b/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py index 985ecaeed..d72be69d1 100755 --- a/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py +++ b/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py @@ -6,13 +6,54 @@ import os import subprocess # nosec B404 import sys +import tempfile from collections import namedtuple +TestCase = namedtuple("TestCase", ["name", "script_args", "expected_macaron_args"]) -def test_macaron_command() -> int: - """Test if the ``macaron`` command in the container receives the correct arguments.""" - TestCase = namedtuple("TestCase", ["name", "script_args", "expected_macaron_args"]) +def run_test_case( + test_case: TestCase, + env: dict[str, str], +) -> int: + """Run a test case in an environment with variables defined by `env` and return the exit code.""" + exit_code = 0 + + name, script_args, expected_macaron_args = test_case + print(f"test_macaron_command[{name}]:", end=" ") + + result = subprocess.run( + [ # nosec B603 + "./output/run_macaron.sh", + *script_args, + ], + capture_output=True, + env=env, + check=False, + ) + if result.returncode != 0: + exit_code = 1 + print(f"FAILED with exit code {exit_code}") + print("stderr:") + print(result.stderr.decode("utf-8")) + return exit_code + + resulting_macaron_args = list(result.stderr.decode("utf-8").split()) + + if resulting_macaron_args != expected_macaron_args: + print("FAILED") + print(" script args : %s", str(script_args)) + print(" expected macaron args : %s", str(expected_macaron_args)) + print(" resulting macaron args: %s", str(resulting_macaron_args)) + exit_code = 1 + else: + print("PASSED") + + return exit_code + + +def test_macaron_command_help() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" test_cases = [ TestCase( name="'-h' as main argument", @@ -20,9 +61,9 @@ def test_macaron_command() -> int: expected_macaron_args=["-h"], ), TestCase( - name="'-h' as action argument for 'analyze'", - script_args=["analyze", "-h"], - expected_macaron_args=["analyze", "-h"], + name="'-h' as action argument for 'dump-defaults'", + script_args=["dump-defaults", "-h"], + expected_macaron_args=["dump-defaults", "-h"], ), TestCase( name="'-h' as action argument for 'verify-policy'", @@ -31,47 +72,97 @@ def test_macaron_command() -> int: ), ] + env = dict(os.environ) + env["MCN_DEBUG_ARGS"] = "1" + + for case in test_cases: + exit_code = run_test_case(case, env) + + return exit_code + + +def test_macaron_command_no_home_m2_on_host() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" + test_cases = [ + TestCase( + name="no --local-maven-repo and host $HOME/.m2 is not available", + script_args=["analyze"], + expected_macaron_args=["analyze", "--local-maven-repo", "/home/macaron/analyze_local_maven_repo_readonly"], + ), + ] + + env = dict(os.environ) + env["MCN_DEBUG_ARGS"] = "1" + # We mimick the behavior of $HOME/.m2 not available by making $HOME pointing to a directory that doesn't exist. + env["HOME"] = "./non_exist_dir" + exit_code = 0 + for case in test_cases: + exit_code = run_test_case(case, env) + + return exit_code + + +def test_macaron_command_host_home_m2_available() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" + test_cases = [ + TestCase( + name="no --local-maven-repo provided by the user and host $HOME/.m2 is available", + script_args=["analyze"], + expected_macaron_args=["analyze", "--local-maven-repo", "/home/macaron/analyze_local_maven_repo_readonly"], + ), + ] + env = dict(os.environ) env["MCN_DEBUG_ARGS"] = "1" + exit_code = 0 + + with tempfile.TemporaryDirectory() as temp_dir: + # We create a temp dir with a .m2 directory and point $HOME to it. + # This .m2 directory contains an empty `repository` directory. + os.mkdir(os.path.join(temp_dir, ".m2")) + os.mkdir(os.path.join(temp_dir, ".m2/repository")) + env["HOME"] = temp_dir + + for case in test_cases: + exit_code = run_test_case(case, env) + + return exit_code + + +def test_macaron_user_provide_valid_local_maven_repo() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" + with tempfile.TemporaryDirectory() as temp_dir: + test_cases = [ + TestCase( + name="with --local-maven-repo pointing to an existing directory", + script_args=["analyze", "--local-maven-repo", f"{temp_dir}"], + expected_macaron_args=[ + "analyze", + "--local-maven-repo", + "/home/macaron/analyze_local_maven_repo_readonly", + ], + ), + ] + + env = dict(os.environ) + env["MCN_DEBUG_ARGS"] = "1" + exit_code = 0 - for test_case in test_cases: - name, script_args, expected_macaron_args = test_case - print(f"test_macaron_command[{name}]:", end=" ") - - result = subprocess.run( - [ # nosec B603 - "./output/run_macaron.sh", - *script_args, - ], - capture_output=True, - env=env, - check=False, - ) - if result.returncode != 0: - exit_code = 1 - print(f"FAILED with exit code {exit_code}") - print("stderr:") - print(result.stderr.decode("utf-8")) - continue - - resulting_macaron_args = list(result.stderr.decode("utf-8").split()) - - if resulting_macaron_args != expected_macaron_args: - print("FAILED") - print(" script args : %s", str(script_args)) - print(" expected macaron args : %s", str(expected_macaron_args)) - print(" resulting macaron args: %s", str(resulting_macaron_args)) - exit_code = 1 - else: - print("PASSED") + for case in test_cases: + exit_code = run_test_case(case, env) return exit_code def main() -> int: """Run all tests.""" - return test_macaron_command() + return ( + test_macaron_command_help() + | test_macaron_command_no_home_m2_on_host() + | test_macaron_command_host_home_m2_available() + | test_macaron_user_provide_valid_local_maven_repo() + ) if __name__ == "__main__": diff --git a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py index eca170319..ebb960366 100644 --- a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py +++ b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the ``JFrogMavenRegistry`` class.""" @@ -137,83 +137,6 @@ def test_is_detected( assert jfrog_maven.is_detected(build_tool) is False -@pytest.mark.parametrize( - ("args", "expected_path"), - [ - pytest.param( - { - "group_id": "io.micronaut", - }, - "io/micronaut", - id="Only group_id 1", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - }, - "com/fasterxml/jackson/core", - id="Only group_id 2", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - "artifact_id": "jackson-annotations", - }, - "com/fasterxml/jackson/core/jackson-annotations", - id="group_id and artifact_id", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - "artifact_id": "jackson-annotations", - "version": "2.9.9", - }, - "com/fasterxml/jackson/core/jackson-annotations/2.9.9", - id="group_id and artifact_id and version", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - "artifact_id": "jackson-annotations", - "version": "2.9.9", - "asset_name": "jackson-annotations-2.9.9.jar", - }, - "com/fasterxml/jackson/core/jackson-annotations/2.9.9/jackson-annotations-2.9.9.jar", - id="group_id and artifact_id and version and asset_name,", - ), - ], -) -def test_construct_maven_repository_path( - jfrog_maven: JFrogMavenRegistry, - args: dict, - expected_path: str, -) -> None: - """Test the ``construct_maven_repository_path`` method.""" - assert jfrog_maven.construct_maven_repository_path(**args) == expected_path - - -@pytest.mark.parametrize( - ("group_id", "expected_group_path"), - [ - ( - "io.micronaut", - "io/micronaut", - ), - ( - "com.fasterxml.jackson.core", - "com/fasterxml/jackson/core", - ), - ], -) -def test_to_group_folder_path( - jfrog_maven: JFrogMavenRegistry, - group_id: str, - expected_group_path: str, -) -> None: - """Test the ``to_gorup_folder_path`` method.""" - assert jfrog_maven.construct_maven_repository_path(group_id) == expected_group_path - - @pytest.mark.parametrize( ("folder_path", "expected_url"), [