Skip to content

Commit

Permalink
feat: added in code and tests for a new wheel file presence heuristic…
Browse files Browse the repository at this point in the history
… in the pypi malware analyzer, which checks for whether a wheel file is available with the package.
  • Loading branch information
art1f1c3R committed Nov 27, 2024
1 parent 7cfc839 commit a5dfaf1
Show file tree
Hide file tree
Showing 6 changed files with 305 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/macaron/database/table_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class PackageURLMixin:
name: Mapped[str] = mapped_column(String(100), nullable=False, comment="Name of the package.")

#: Version of the package.
version: Mapped[str] = mapped_column(String(100), nullable=True, comment="Version of the package.")
version: Mapped[str] | None = mapped_column(String(100), nullable=True, comment="Version of the package.")

#: Extra qualifying data for a package such as the name of an OS.
qualifiers: Mapped[str] = mapped_column(
Expand Down
4 changes: 4 additions & 0 deletions src/macaron/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,7 @@ class CycloneDXParserError(MacaronError):

class DependencyAnalyzerError(MacaronError):
"""The DependencyAnalyzer error class."""


class HeuristicAnalyzerValueError(MacaronError):
"""Error class for BaseHeuristicAnalyzer errors when parsing data."""
3 changes: 3 additions & 0 deletions src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class Heuristics(str, Enum):
#: Indicates that the setup.py file contains suspicious imports, such as base64 and requests.
SUSPICIOUS_SETUP = "suspicious_setup"

#: Indicates that the package does not include a .whl file
WHEEL_PRESENCE = "wheel_presence"


class HeuristicResult(str, Enum):
"""Result type indicating the outcome of a heuristic."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""The heuristic analyzer to check .whl file presence."""

import logging

from macaron.errors import HeuristicAnalyzerValueError
from macaron.json_tools import JsonType
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset

logger: logging.Logger = logging.getLogger(__name__)


class WheelPresenceAnalyzer(BaseHeuristicAnalyzer):
"""
Analyze to see if a .whl file is available for the package.
If a package is distributed with a .whl file, this heuristic passes. Otherwise, the
heuristic fails.
"""

WHEEL: str = "bdist_wheel"

def __init__(self) -> None:
super().__init__(
name="download_file_presence_analyzer",
heuristic=Heuristics.WHEEL_PRESENCE,
depends_on=None,
)

def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
"""Analyze the package.
Parameters
----------
pypi_package_json: PyPIPackageJsonAsset
The PyPI package JSON asset object.
Returns
-------
tuple[HeuristicResult, dict[str, JsonType]]:
The result and related information collected during the analysis.
Raises
------
HeuristicAnalyzerValueError
If there is no release information, or has no most recent version (if queried).
"""
releases = pypi_package_json.get_releases()
if releases is None: # no release information
raise HeuristicAnalyzerValueError("There is no information for any release of this package.")

version = pypi_package_json.component.version
if version is None: # check latest release version
version = pypi_package_json.get_latest_version()
if version is None:
raise HeuristicAnalyzerValueError("There is no latest version of this package.")

release_files: list[JsonType] = []
wheel_present: bool = False

for release_metadata in releases[version]:
if release_metadata["packagetype"] == self.WHEEL:
wheel_present = True

release_files.append(release_metadata["filename"])

if wheel_present:
return HeuristicResult.PASS, {version: release_files}

return HeuristicResult.FAIL, {version: release_files}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from macaron.database.db_custom_types import DBJsonDict
from macaron.database.table_definitions import CheckFacts
from macaron.errors import HeuristicAnalyzerValueError
from macaron.json_tools import JsonType, json_extract
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
Expand All @@ -20,6 +21,7 @@
from macaron.malware_analyzer.pypi_heuristics.metadata.one_release import OneReleaseAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release import UnchangedReleaseAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.unreachable_project_links import UnreachableProjectLinksAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.wheel_presence import WheelPresenceAnalyzer
from macaron.malware_analyzer.pypi_heuristics.sourcecode.suspicious_setup import SuspiciousSetupAnalyzer
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
from macaron.slsa_analyzer.build_tool.pip import Pip
Expand Down Expand Up @@ -70,6 +72,7 @@ class MaliciousMetadataFacts(CheckFacts):
UnchangedReleaseAnalyzer,
CloserReleaseJoinDateAnalyzer,
SuspiciousSetupAnalyzer,
WheelPresenceAnalyzer,
]

# The HeuristicResult sequence is aligned with the sequence of ANALYZERS list
Expand All @@ -82,6 +85,7 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult,
HeuristicResult,
HeuristicResult,
HeuristicResult,
],
float,
] = {
Expand All @@ -93,9 +97,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.SKIP, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Presence
# No project link, only one release, and the maintainer released it shortly
# after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
Expand All @@ -105,9 +110,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.FAIL, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Presence
# No project link, frequent releases of multiple versions without modifying the content,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
Expand All @@ -117,9 +123,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.PASS, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Presence
# No project link, frequent releases of multiple versions,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
Expand All @@ -129,8 +136,23 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.FAIL, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.PASS, # Suspicious Setup
HeuristicResult.PASS, # Wheel Presence
# No project link, frequent releases of multiple versions without modifying the content,
# and the maintainer released it shortly after account registration.
# and the maintainer released it shortly after account registration. Presence of .whl file
# has no effect
): Confidence.MEDIUM,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.PASS, # One Release
HeuristicResult.FAIL, # High Release Frequency
HeuristicResult.FAIL, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.PASS, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Presence
# No project link, frequent releases of multiple versions without modifying the content,
# and the maintainer released it shortly after account registration. Presence of .whl file
# has no effect
): Confidence.MEDIUM,
(
HeuristicResult.PASS, # Empty Project
Expand All @@ -140,9 +162,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.PASS, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Presence
# All project links are unreachable, frequent releases of multiple versions,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
}

Expand Down Expand Up @@ -197,6 +220,11 @@ def run_heuristics(
-------
tuple[dict[Heuristics, HeuristicResult], dict[str, JsonType]]
Containing the analysis results and relevant metadata.
Raises
------
HeuristicAnalyzerValueError
If a heuristic analysis fails due to malformed package information.
"""
results: dict[Heuristics, HeuristicResult] = {}
detail_info: dict[str, JsonType] = {}
Expand Down Expand Up @@ -277,7 +305,11 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:

# Download the PyPI package JSON, but no need to persist it to the filesystem.
if pypi_package_json.download(dest=""):
result, detail_info = self.run_heuristics(pypi_package_json)
try:
result, detail_info = self.run_heuristics(pypi_package_json)
except HeuristicAnalyzerValueError:
return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)

result_combo: tuple = tuple(result.values())
confidence: float | None = SUSPICIOUS_COMBO.get(result_combo, None)
result_type = CheckResultType.FAILED
Expand Down
Loading

0 comments on commit a5dfaf1

Please sign in to comment.