Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add a new setup.py related heuristic in the pypi malware analyzer #932

Merged
merged 1 commit into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/macaron/database/table_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class PackageURLMixin:
name: Mapped[str] = mapped_column(String(100), nullable=False, comment="Name of the package.")

#: Version of the package.
version: Mapped[str] = mapped_column(String(100), nullable=True, comment="Version of the package.")
version: Mapped[str | None] = mapped_column(String(100), nullable=True, comment="Version of the package.")

#: Extra qualifying data for a package such as the name of an OS.
qualifiers: Mapped[str] = mapped_column(
Expand Down
4 changes: 4 additions & 0 deletions src/macaron/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,7 @@ class CycloneDXParserError(MacaronError):

class DependencyAnalyzerError(MacaronError):
"""The DependencyAnalyzer error class."""


class HeuristicAnalyzerValueError(MacaronError):
"""Error class for BaseHeuristicAnalyzer errors when parsing data."""
3 changes: 3 additions & 0 deletions src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class Heuristics(str, Enum):
#: Indicates that the setup.py file contains suspicious imports, such as base64 and requests.
SUSPICIOUS_SETUP = "suspicious_setup"

#: Indicates that the package does not include a .whl file
WHEEL_ABSENCE = "wheel_absence"


class HeuristicResult(str, Enum):
"""Result type indicating the outcome of a heuristic."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""The heuristic analyzer to check .whl file absence."""

import logging

from macaron.errors import HeuristicAnalyzerValueError
from macaron.json_tools import JsonType
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset

logger: logging.Logger = logging.getLogger(__name__)


class WheelAbsenceAnalyzer(BaseHeuristicAnalyzer):
"""
Analyze to see if a .whl file is available for the package.

If a package is distributed with a .whl file, this heuristic passes. Otherwise, the
heuristic fails.
"""

WHEEL: str = "bdist_wheel"

def __init__(self) -> None:
super().__init__(
name="wheel_absence_analyzer",
heuristic=Heuristics.WHEEL_ABSENCE,
depends_on=None,
)

def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
"""Analyze the package.

Parameters
----------
pypi_package_json: PyPIPackageJsonAsset
The PyPI package JSON asset object.

Returns
-------
tuple[HeuristicResult, dict[str, JsonType]]:
The result and related information collected during the analysis.

Raises
------
HeuristicAnalyzerValueError
If there is no release information, or has no most recent version (if queried).
"""
releases = pypi_package_json.get_releases()
if releases is None: # no release information
error_msg = "There is no information for any release of this package."
logger.debug(error_msg)
raise HeuristicAnalyzerValueError(error_msg)

version = pypi_package_json.component.version
if version is None: # check latest release version
version = pypi_package_json.get_latest_version()

if version is None:
error_msg = "There is no latest version of this package."
logger.debug(error_msg)
raise HeuristicAnalyzerValueError(error_msg)

release_files: list[JsonType] = []
wheel_present: bool = False

try:
for release_metadata in releases[version]:
if release_metadata["packagetype"] == self.WHEEL:
wheel_present = True

release_files.append(release_metadata["filename"])
except KeyError as error:
error_msg = f"The version {version} is not available as a release."
logger.debug(error_msg)
raise HeuristicAnalyzerValueError(error_msg) from error

if wheel_present:
return HeuristicResult.PASS, {version: release_files}

return HeuristicResult.FAIL, {version: release_files}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from macaron.database.db_custom_types import DBJsonDict
from macaron.database.table_definitions import CheckFacts
from macaron.errors import HeuristicAnalyzerValueError
from macaron.json_tools import JsonType, json_extract
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
Expand All @@ -20,6 +21,7 @@
from macaron.malware_analyzer.pypi_heuristics.metadata.one_release import OneReleaseAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release import UnchangedReleaseAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.unreachable_project_links import UnreachableProjectLinksAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence import WheelAbsenceAnalyzer
from macaron.malware_analyzer.pypi_heuristics.sourcecode.suspicious_setup import SuspiciousSetupAnalyzer
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
from macaron.slsa_analyzer.build_tool.pip import Pip
Expand Down Expand Up @@ -70,6 +72,7 @@ class MaliciousMetadataFacts(CheckFacts):
UnchangedReleaseAnalyzer,
CloserReleaseJoinDateAnalyzer,
SuspiciousSetupAnalyzer,
WheelAbsenceAnalyzer,
]

# The HeuristicResult sequence is aligned with the sequence of ANALYZERS list
Expand All @@ -82,6 +85,7 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult,
HeuristicResult,
HeuristicResult,
HeuristicResult,
],
float,
] = {
Expand All @@ -93,9 +97,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.SKIP, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# No project link, only one release, and the maintainer released it shortly
# after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
Expand All @@ -105,9 +110,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.FAIL, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# No project link, frequent releases of multiple versions without modifying the content,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
Expand All @@ -117,9 +123,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.PASS, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# No project link, frequent releases of multiple versions,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
Expand All @@ -129,8 +136,23 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.FAIL, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.PASS, # Suspicious Setup
HeuristicResult.PASS, # Wheel Absence
# No project link, frequent releases of multiple versions without modifying the content,
# and the maintainer released it shortly after account registration.
# and the maintainer released it shortly after account registration. Presence/Absence of
# .whl file has no effect
): Confidence.MEDIUM,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.PASS, # One Release
HeuristicResult.FAIL, # High Release Frequency
HeuristicResult.FAIL, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.PASS, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# No project link, frequent releases of multiple versions without modifying the content,
# and the maintainer released it shortly after account registration. Presence/Absence of
# .whl file has no effect
): Confidence.MEDIUM,
(
HeuristicResult.PASS, # Empty Project
Expand All @@ -140,9 +162,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.PASS, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# All project links are unreachable, frequent releases of multiple versions,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
}

Expand Down Expand Up @@ -197,6 +220,11 @@ def run_heuristics(
-------
tuple[dict[Heuristics, HeuristicResult], dict[str, JsonType]]
Containing the analysis results and relevant metadata.

Raises
------
HeuristicAnalyzerValueError
If a heuristic analysis fails due to malformed package information.
"""
results: dict[Heuristics, HeuristicResult] = {}
detail_info: dict[str, JsonType] = {}
Expand Down Expand Up @@ -277,7 +305,11 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:

# Download the PyPI package JSON, but no need to persist it to the filesystem.
if pypi_package_json.download(dest=""):
result, detail_info = self.run_heuristics(pypi_package_json)
try:
result, detail_info = self.run_heuristics(pypi_package_json)
except HeuristicAnalyzerValueError:
return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)

result_combo: tuple = tuple(result.values())
confidence: float | None = SUSPICIOUS_COMBO.get(result_combo, None)
result_type = CheckResultType.FAILED
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-ajax-requester", component_id, "Check ajax-requester artifacts") :-
check_passed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-ajax-requester", component_id) :-
is_component(component_id, purl),
match("pkg:pypi/ajax-requester", purl).
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

description: |
Analyzing the metadata of a project with unreachable links, frequent releases, and the maintainer
released it shortly after account registration with a suspicious setup.py file, but a wheel file
is present. Macaron should report a pass for such a package.

tags:
- macaron-python-package

steps:
- name: Run macaron analyze against ajax-requester
kind: analyze
options:
command_args:
- -purl
- pkg:pypi/ajax-requester
- name: Run macaron verify-policy to check the results
kind: verify
options:
policy: policy.dl
Loading
Loading