Skip to content

Commit

Permalink
feat: obtain Java and Python artifacts from .m2 or Python virtual env…
Browse files Browse the repository at this point in the history
…ironment from input (#864)

Signed-off-by: Trong Nhan Mai <[email protected]>
  • Loading branch information
tromai authored Dec 18, 2024
1 parent f1f281f commit 12d8593
Show file tree
Hide file tree
Showing 15 changed files with 917 additions and 159 deletions.
5 changes: 5 additions & 0 deletions docs/source/pages/cli_usage/command_analyze.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Usage
[-d DIGEST] [-pe PROVENANCE_EXPECTATION]
[--skip-deps] [--deps-depth DEPS_DEPTH] [-g TEMPLATE_PATH]
[--python-venv PYTHON_VENV]
[--local-maven-repo LOCAL_MAVEN_REPO]
-------
Options
Expand Down Expand Up @@ -79,6 +80,10 @@ Options

The path to the Python virtual environment of the target software component.

.. option:: --local-maven-repo LOCAL_MAVEN_REPO

The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2

-----------
Environment
-----------
Expand Down
56 changes: 55 additions & 1 deletion scripts/release_scripts/run_macaron.sh
Original file line number Diff line number Diff line change
Expand Up @@ -328,14 +328,18 @@ if [[ $command == "analyze" ]]; then
python_venv_path="$2"
shift
;;
--local-maven-repo)
local_maven_repo="$2"
shift
;;
*)
rest_command+=("$1")
;;
esac
shift
done
elif [[ $command == "verify-policy" ]]; then
while [[ $# -gt 0 ]]; do
while [[ $# -gt 0 ]]; do
case $1 in
-d|--database)
arg_database="$2"
Expand All @@ -351,6 +355,15 @@ elif [[ $command == "verify-policy" ]]; then
esac
shift
done
elif [[ $command == "dump-defaults" ]]; then
while [[ $# -gt 0 ]]; do
case $1 in
*)
rest_command+=("$1")
;;
esac
shift
done
fi

# MACARON entrypoint - Main argvs
Expand Down Expand Up @@ -455,6 +468,47 @@ if [[ -n "${python_venv_path:-}" ]]; then
mount_dir_ro "--python-venv" "$python_venv_path" "$python_venv_in_container"
fi

# Mount the local Maven repo from the
# host file system into the container's
# ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly.
if [[ -n "${local_maven_repo:-}" ]]; then
local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly"
argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly")

mount_dir_ro "--local-maven-repo" "$local_maven_repo" "$local_maven_repo_in_container"
else
# Mounting default local maven repo only
# when the user doesn't provide --local-maven-repo AND `analyze` command is used.
if [[ "$command" == "analyze" ]]; then
# We mount the host's $HOME/.m2 into the container's
# ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists.
if [[ -d "$HOME/.m2" ]]; then
local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly"
argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly")

mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container"
# If the host's $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly
# into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. And then provide
# ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly into the --local-maven-repo
# flag.
# This is because:
# - By default if --local-maven-repo is not used, Macaron uses $HOME/.m2 of the current
# environment as the local maven repo.
# - If --local-maven-repo is not set when Macaron is running in the Docker container, it will try to
# use $HOME/.m2 WITHIN the container. This is not desirable as this $HOME/.m2 is being used
# by the cyclonedx plugins for dependency resolution, which requires read write. We treat the local
# maven repo as a read only directory, hence they cannot share.
else
local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly"
output_local_maven_repo="${output}/analyze_local_maven_repo_readonly"
mkdir -p "$output_local_maven_repo"
argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly")

mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container"
fi
fi
fi

# MACARON entrypoint - verify-policy command argvs
# This is for macaron verify-policy command.
# Determine the database path to be mounted into ${MACARON_WORKSPACE}/database/macaron.db
Expand Down
31 changes: 31 additions & 0 deletions src/macaron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,29 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None
sys.exit(os.EX_OSFILE)
global_config.load_python_venv(analyzer_single_args.python_venv)

# Set local maven repo path.
if analyzer_single_args.local_maven_repo is None:
# Load the default user local .m2 directory.
# Exit on error if $HOME is not set or empty.
home_dir = os.getenv("HOME")
if not home_dir:
logger.critical("Environment variable HOME is not set.")
sys.exit(os.EX_USAGE)

local_maven_repo = os.path.join(home_dir, ".m2")
if not os.path.isdir(local_maven_repo):
logger.debug("The default local Maven repo at %s does not exist. Ignore ...")
global_config.local_maven_repo = None

global_config.local_maven_repo = local_maven_repo
else:
user_provided_local_maven_repo = analyzer_single_args.local_maven_repo
if not os.path.isdir(user_provided_local_maven_repo):
logger.error("The user provided local Maven repo at %s is not valid.", user_provided_local_maven_repo)
sys.exit(os.EX_USAGE)

global_config.local_maven_repo = user_provided_local_maven_repo

analyzer = Analyzer(global_config.output_path, global_config.build_log_path)

# Initiate reporters.
Expand Down Expand Up @@ -453,6 +476,14 @@ def main(argv: list[str] | None = None) -> None:
),
)

single_analyze_parser.add_argument(
"--local-maven-repo",
required=False,
help=(
"The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2"
),
)

# Dump the default values.
sub_parser.add_parser(name="dump-defaults", description="Dumps the defaults.ini file to the output directory.")

Expand Down
249 changes: 249 additions & 0 deletions src/macaron/artifact/local_artifact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module declares types and utilities for handling local artifacts."""

import fnmatch
import glob
import os

from packageurl import PackageURL

from macaron.artifact.maven import construct_maven_repository_path
from macaron.errors import LocalArtifactFinderError


def construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None:
"""Return a list of glob pattern(s) representing the directory that contains the local maven artifacts for ``maven_purl``.
The glob pattern(s) can be used to search in `<...>/.m2/repository` directory.
Parameters
----------
maven_purl : PackageURL
A maven type PackageURL instance.
Returns
-------
list[str] | None
A list of glob patterns or None if an error happened.
Examples
--------
>>> from packageurl import PackageURL
>>> purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/[email protected]")
>>> construct_local_artifact_dirs_glob_pattern_maven_purl(purl)
['com/oracle/macaron/macaron/0.13.0']
"""
if maven_purl.type != "maven":
return None

group = maven_purl.namespace
artifact = maven_purl.name
version = maven_purl.version

if group is None or version is None:
return None

return [construct_maven_repository_path(group, artifact, version)]


def construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None:
"""Return a list of glob pattern(s) representing directories that contains the artifacts in a Python virtual environment.
The glob pattern(s) can be used to search in `<...>/<python_venv>/lib/python3.x/site-packages`
directory.
Parameters
----------
pypi_purl : PackageURL
A pypi type PackageURL instance.
Returns
-------
list[str] | None
A list of glob patterns or None if an error happened.
Examples
--------
>>> from packageurl import PackageURL
>>> purl = PackageURL.from_string("pkg:pypi/[email protected]")
>>> construct_local_artifact_dirs_glob_pattern_pypi_purl(purl)
['django', 'django-1.11.1.dist-info', 'django-1.11.1.data']
"""
if pypi_purl.type != "pypi":
return None

name = pypi_purl.name
version = pypi_purl.version

if version is None:
return None

# These patterns are from the content of a wheel file, which are extracted into the site-packages
# directory. References:
# https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-contents
glob_patterns = []
glob_patterns.append(name)
glob_patterns.append(f"{name}-{version}.dist-info")
glob_patterns.append(f"{name}-{version}.data")

return glob_patterns


def find_artifact_dirs_from_local_maven_repo(
local_maven_repo: str,
glob_patterns: list[str],
) -> list[str]:
"""Find directories that contains maven artifacts within a maven local repository.
``local_maven_repo`` should be in format `<...>/.m2/repository`.
Parameters
----------
local_maven_repo: str
The path to the directory to find artifacts.
glob_patterns: list[str]
The list of glob patterns that matches to artifact directory names.
Returns
-------
list[str]
The list of paths to artifact directories in the form of ``venv_site_package_path``/path/to/artifact_dir
If no artifact directory is found, this list will be empty.
Raises
------
LocalArtifactFinderError
If ``local_maven_repo`` doesn't exist.
"""
if not os.path.isdir(local_maven_repo):
raise LocalArtifactFinderError(f"{local_maven_repo} doesn't exist.")

artifact_paths = []
for pattern in glob_patterns:
found_paths = glob.glob(
root_dir=local_maven_repo,
pathname=pattern,
)

for found_path in found_paths:
full_path = os.path.join(local_maven_repo, found_path)
if os.path.isdir(full_path):
artifact_paths.append(full_path)

return artifact_paths


def find_artifact_dirs_from_python_venv(
venv_site_package_path: str,
glob_patterns: list[str],
) -> list[str]:
"""Find directories within a python virtual environment.
For packages in the virtual environment, we will treat their name case-insensitively.
https://packaging.python.org/en/latest/specifications/name-normalization/
``venv_site_package_path`` should be in format `<...>/lib/python3.*/site-packages/`.
Parameters
----------
venv_path: str
The path to the local directory to find artifacts.
glob_patterns: list[str]
The list of glob patterns that matches to artifact directory names.
Returns
-------
list[str]
The list of paths to artifact directories in the form of ``venv_site_package_path``/path/to/artifact_dir
If no artifact directory is found, this list will be empty.
Raises
------
LocalArtifactFinderError
If ``venv_site_package_path`` doesn't exist or if we cannot view the sub-directory of it.
"""
if not os.path.isdir(venv_site_package_path):
raise LocalArtifactFinderError(f"{venv_site_package_path} doesn't exist.")

artifact_paths = []

try:
venv_path_entries = os.listdir(venv_site_package_path)
except (NotADirectoryError, PermissionError, FileNotFoundError) as error:
error_msg = f"Cannot view the sub-directory of venv {venv_site_package_path}"
raise LocalArtifactFinderError(error_msg) from error

all_package_dirs: list[str] = []
for entry in venv_path_entries:
entry_path = os.path.join(venv_site_package_path, entry)
if os.path.isdir(entry_path):
all_package_dirs.append(entry)

for package_dir in all_package_dirs:
for pattern in glob_patterns:
if fnmatch.fnmatch(package_dir.lower(), pattern.lower()):
full_path = os.path.join(venv_site_package_path, package_dir)
artifact_paths.append(full_path)

return artifact_paths


def get_local_artifact_dirs(
purl: PackageURL,
local_artifact_repo_path: str,
) -> list[str]:
"""Return the paths to directories that store local artifacts for a PackageURL.
We look for local artifacts of ``purl`` in ``local_artifact_repo_path``.
This function returns a list of paths (as strings), each has the format
``local_artifact_repo_path``/path/to/artifact_dir``
This will mean that no path to an artifact is returned. Therefore, it's the responsibility
of this function caller to inspect the artifact directory to obtain the required
artifact.
We assume that ``local_artifact_repo_path`` exists.
Parameters
----------
purl : PackageURL
The purl we want to find local artifacts
local_artifact_repo_path : str
The local artifact repo directory.
Returns
-------
list[str]
The list contains the artifact directory paths. It will be empty if no artifact can be found.
Raises
------
LocalArtifactFinderError
If an error happens when looking for local artifacts.
"""
purl_type = purl.type

if purl_type == "maven":
maven_artifact_patterns = construct_local_artifact_dirs_glob_pattern_maven_purl(purl)
if not maven_artifact_patterns:
raise LocalArtifactFinderError(f"Cannot generate maven artifact patterns for {purl}")

return find_artifact_dirs_from_local_maven_repo(
local_maven_repo=local_artifact_repo_path,
glob_patterns=maven_artifact_patterns,
)

if purl_type == "pypi":
pypi_artifact_patterns = construct_local_artifact_dirs_glob_pattern_pypi_purl(purl)
if not pypi_artifact_patterns:
raise LocalArtifactFinderError(f"Cannot generate Python package patterns for {purl}")

return find_artifact_dirs_from_python_venv(
venv_site_package_path=local_artifact_repo_path,
glob_patterns=pypi_artifact_patterns,
)

raise LocalArtifactFinderError(f"Unsupported PURL type {purl_type}")
Loading

0 comments on commit 12d8593

Please sign in to comment.