From b5afe0d2032b47efa533020b0d5c33062f241688 Mon Sep 17 00:00:00 2001 From: Behnaz Hassanshahi Date: Fri, 22 Nov 2024 16:40:17 +1000 Subject: [PATCH] feat: report known malware for all ecosystems (#922) If a package is already known to be malicious, this PR reports it as part of the mcn_detect_malicious_metadata_1 check. Additionally, two new integration tests for known Python and npm malware have been added. Signed-off-by: behnazh-w --- docs/source/index.rst | 2 +- .../tutorials/detect_malicious_package.rst | 4 +- .../checks/detect_malicious_metadata_check.py | 44 ++++++++++- src/macaron/util.py | 74 +++++++++++++++++++ .../cases/tautoak4-hello-world/policy.dl | 10 +++ .../cases/tautoak4-hello-world/test.yaml | 21 ++++++ .../cases/type-extension/policy.dl | 10 +++ .../cases/type-extension/test.yaml | 21 ++++++ 8 files changed, 180 insertions(+), 6 deletions(-) create mode 100644 tests/integration/cases/tautoak4-hello-world/policy.dl create mode 100644 tests/integration/cases/tautoak4-hello-world/test.yaml create mode 100644 tests/integration/cases/type-extension/policy.dl create mode 100644 tests/integration/cases/type-extension/test.yaml diff --git a/docs/source/index.rst b/docs/source/index.rst index 94caf4ff0..016f6f544 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -107,7 +107,7 @@ Macaron checks that report integrity issues but do not map to SLSA requirements * - Check ID - Description * - ``mcn_detect_malicious_metadata_1`` - - This check analyzes the metadata of a package and reports malicious behavior. This check currently supports PyPI packages. + - This check performs analysis on PyPI package metadata to detect malicious behavior. It also reports known malware from other ecosystems, but the analysis is currently limited to PyPI packages. ---------------------- How does Macaron work? diff --git a/docs/source/pages/tutorials/detect_malicious_package.rst b/docs/source/pages/tutorials/detect_malicious_package.rst index 898c51416..22c236700 100644 --- a/docs/source/pages/tutorials/detect_malicious_package.rst +++ b/docs/source/pages/tutorials/detect_malicious_package.rst @@ -13,9 +13,11 @@ In this tutorial we show how to use Macaron to find malicious packages. Imagine :widths: 25 :header-rows: 1 - * - Supported packages + * - Supported packages for analysis * - Python packages (PyPI) +Note that known malware is reported for packages across all ecosystems. + .. contents:: :local: diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 7e387b52d..13522e38f 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -5,12 +5,13 @@ import logging -from sqlalchemy import ForeignKey +import requests +from sqlalchemy import ForeignKey, String from sqlalchemy.orm import Mapped, mapped_column from macaron.database.db_custom_types import DBJsonDict from macaron.database.table_definitions import CheckFacts -from macaron.json_tools import JsonType +from macaron.json_tools import JsonType, json_extract from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics from macaron.malware_analyzer.pypi_heuristics.metadata.closer_release_join_date import CloserReleaseJoinDateAnalyzer @@ -28,6 +29,7 @@ from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry from macaron.slsa_analyzer.registry import registry from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo +from macaron.util import send_post_http_raw logger: logging.Logger = logging.getLogger(__name__) @@ -40,10 +42,15 @@ class MaliciousMetadataFacts(CheckFacts): #: The primary key. id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003 + #: Known malware. + known_malware: Mapped[str | None] = mapped_column( + String, nullable=True, info={"justification": JustificationType.HREF} + ) + #: Detailed information about the analysis. detail_information: Mapped[dict[str, JsonType]] = mapped_column(DBJsonDict, nullable=False) - #: The result of analysis, which is of dict[Heuristics, HeuristicResult] type. + #: The result of analysis, which can be an empty dictionary. result: Mapped[dict[Heuristics, HeuristicResult]] = mapped_column( DBJsonDict, nullable=False, info={"justification": JustificationType.TEXT} ) @@ -223,6 +230,36 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: CheckResultData The result of the check. """ + result_tables: list[CheckFacts] = [] + # First check if this package is a known malware + + url = "https://api.osv.dev/v1/query" + data = {"package": {"purl": ctx.component.purl}} + response = send_post_http_raw(url, json_data=data, headers=None) + res_obj = None + if response: + try: + res_obj = response.json() + except requests.exceptions.JSONDecodeError as error: + logger.debug("Unable to get a valid response from %s: %s", url, error) + if res_obj: + for vuln in res_obj.get("vulns", {}): + v_id = json_extract(vuln, ["id"], str) + if v_id and v_id.startswith("MAL-"): + result_tables.append( + MaliciousMetadataFacts( + known_malware=f"https://osv.dev/vulnerability/{v_id}", + result={}, + detail_information=vuln, + confidence=Confidence.HIGH, + ) + ) + if result_tables: + return CheckResultData( + result_tables=result_tables, + result_type=CheckResultType.FAILED, + ) + package_registry_info_entries = ctx.dynamic_data["package_registries"] for package_registry_info_entry in package_registry_info_entries: match package_registry_info_entry: @@ -230,7 +267,6 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: build_tool=Pip() | Poetry(), package_registry=PyPIRegistry() as pypi_registry, ) as pypi_registry_info: - result_tables: list[CheckFacts] = [] # Create an AssetLocator object for the PyPI package JSON object. pypi_package_json = PyPIPackageJsonAsset( diff --git a/src/macaron/util.py b/src/macaron/util.py index 6a531084e..8fdc41f3e 100644 --- a/src/macaron/util.py +++ b/src/macaron/util.py @@ -125,6 +125,80 @@ def send_get_http_raw( return response +def send_post_http_raw( + url: str, + json_data: dict | None = None, + headers: dict | None = None, + timeout: int | None = None, + allow_redirects: bool = True, +) -> Response | None: + """Send a POST HTTP request with the given url, data, and headers. + + This method also handle logging when the API server returns error status code. + + Parameters + ---------- + url : str + The url of the request. + json_data: dict | None + The request payload. + headers : dict | None + The dict that describes the headers of the request. + timeout: int | None + The request timeout (optional). + allow_redirects: bool + Whether to allow redirects. Default: True. + + Returns + ------- + Response | None + If a Response object is returned and ``allow_redirects`` is ``True`` (the default) it will have a status code of + 200 (OK). If ``allow_redirects`` is ``False`` the response can instead have a status code of 302. Otherwise, the + request has failed and ``None`` will be returned. + """ + logger.debug("POST - %s", url) + if not timeout: + timeout = defaults.getint("requests", "timeout", fallback=10) + error_retries = defaults.getint("requests", "error_retries", fallback=5) + retry_counter = error_retries + try: + response = requests.post( + url=url, + json=json_data, + headers=headers, + timeout=timeout, + allow_redirects=allow_redirects, + ) + except requests.exceptions.RequestException as error: + logger.debug(error) + return None + if not allow_redirects and response.status_code == 302: + # Found, most likely because a redirect is about to happen. + return response + while response.status_code != 200: + logger.debug( + "Receiving error code %s from server.", + response.status_code, + ) + if retry_counter <= 0: + logger.debug("Maximum retries reached: %s", error_retries) + return None + if response.status_code == 403: + check_rate_limit(response) + else: + return None + retry_counter = retry_counter - 1 + response = requests.post( + url=url, + json=json_data, + headers=headers, + timeout=timeout, + allow_redirects=allow_redirects, + ) + + return response + + def check_rate_limit(response: Response) -> None: """Check the remaining calls limit to GitHub API and wait accordingly. diff --git a/tests/integration/cases/tautoak4-hello-world/policy.dl b/tests/integration/cases/tautoak4-hello-world/policy.dl new file mode 100644 index 000000000..86543750b --- /dev/null +++ b/tests/integration/cases/tautoak4-hello-world/policy.dl @@ -0,0 +1,10 @@ +/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("check-malicious-package", component_id, "Check the malicious package.") :- + check_failed(component_id, "mcn_detect_malicious_metadata_1"). + +apply_policy_to("check-malicious-package", component_id) :- + is_component(component_id, "pkg:npm/tautoak4-hello-world"). diff --git a/tests/integration/cases/tautoak4-hello-world/test.yaml b/tests/integration/cases/tautoak4-hello-world/test.yaml new file mode 100644 index 000000000..636e8637c --- /dev/null +++ b/tests/integration/cases/tautoak4-hello-world/test.yaml @@ -0,0 +1,21 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing a known malicious package. + +tags: +- macaron-python-package +- macaron-docker-image + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -purl + - pkg:npm/tautoak4-hello-world +- name: Run macaron verify-policy to verify that the malicious metadata check fails. + kind: verify + options: + policy: policy.dl diff --git a/tests/integration/cases/type-extension/policy.dl b/tests/integration/cases/type-extension/policy.dl new file mode 100644 index 000000000..8699cf3b8 --- /dev/null +++ b/tests/integration/cases/type-extension/policy.dl @@ -0,0 +1,10 @@ +/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("check-malicious-package", component_id, "Check the malicious package.") :- + check_failed(component_id, "mcn_detect_malicious_metadata_1"). + +apply_policy_to("check-malicious-package", component_id) :- + is_component(component_id, "pkg:pypi/type-extension"). diff --git a/tests/integration/cases/type-extension/test.yaml b/tests/integration/cases/type-extension/test.yaml new file mode 100644 index 000000000..cfdcb2308 --- /dev/null +++ b/tests/integration/cases/type-extension/test.yaml @@ -0,0 +1,21 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing a known malicious package. + +tags: +- macaron-python-package +- macaron-docker-image + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -purl + - pkg:pypi/type-extension +- name: Run macaron verify-policy to verify that the malicious metadata check fails. + kind: verify + options: + policy: policy.dl