Skip to content

Commit 688af68

Browse files
committed
feat: report known malware for all ecosystems
Signed-off-by: behnazh-w <[email protected]>
1 parent b65f0db commit 688af68

File tree

8 files changed

+180
-9
lines changed

8 files changed

+180
-9
lines changed

docs/source/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ Macaron checks that report integrity issues but do not map to SLSA requirements
107107
* - Check ID
108108
- Description
109109
* - ``mcn_detect_malicious_metadata_1``
110-
- This check analyzes the metadata of a package and reports malicious behavior. This check currently supports PyPI packages.
110+
- This check performs analysis on PyPI package metadata to detect malicious behavior. It also reports known malware from other ecosystems, but the analysis is currently limited to PyPI packages.
111111

112112
----------------------
113113
How does Macaron work?

docs/source/pages/tutorials/detect_malicious_package.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@ In this tutorial we show how to use Macaron to find malicious packages. Imagine
1313
:widths: 25
1414
:header-rows: 1
1515

16-
* - Supported packages
16+
* - Supported packages for analysis
1717
* - Python packages (PyPI)
1818

19+
Note that known malware is reported for packages across all ecosystems.
20+
1921
.. contents:: :local:
2022

2123

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55

66
import logging
77

8-
from sqlalchemy import ForeignKey
8+
import requests
9+
from sqlalchemy import ForeignKey, String
910
from sqlalchemy.orm import Mapped, mapped_column
1011

1112
from macaron.database.db_custom_types import DBJsonDict
1213
from macaron.database.table_definitions import CheckFacts
13-
from macaron.json_tools import JsonType
14+
from macaron.json_tools import JsonType, json_extract
1415
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
1516
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
1617
from macaron.malware_analyzer.pypi_heuristics.metadata.closer_release_join_date import CloserReleaseJoinDateAnalyzer
@@ -28,6 +29,7 @@
2829
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
2930
from macaron.slsa_analyzer.registry import registry
3031
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
32+
from macaron.util import send_post_http_raw
3133

3234
logger: logging.Logger = logging.getLogger(__name__)
3335

@@ -40,13 +42,16 @@ class MaliciousMetadataFacts(CheckFacts):
4042
#: The primary key.
4143
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003
4244

45+
#: Known malware.
46+
known_malware: Mapped[str | None] = mapped_column(
47+
String, nullable=False, info={"justification": JustificationType.HREF}
48+
)
49+
4350
#: Detailed information about the analysis.
4451
detail_information: Mapped[dict[str, JsonType]] = mapped_column(DBJsonDict, nullable=False)
4552

46-
#: The result of analysis, which is of dict[Heuristics, HeuristicResult] type.
47-
result: Mapped[dict[Heuristics, HeuristicResult]] = mapped_column(
48-
DBJsonDict, nullable=False, info={"justification": JustificationType.TEXT}
49-
)
53+
#: The result of analysis, which can be an empty dictionary.
54+
result: Mapped[dict] = mapped_column(DBJsonDict, nullable=False, info={"justification": JustificationType.TEXT})
5055

5156
__mapper_args__ = {
5257
"polymorphic_identity": "_detect_malicious_metadata_check",
@@ -223,14 +228,43 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
223228
CheckResultData
224229
The result of the check.
225230
"""
231+
result_tables: list[CheckFacts] = []
232+
# First check if this package is a known malware
233+
234+
url = "https://api.osv.dev/v1/query"
235+
data = {"package": {"purl": ctx.component.purl}}
236+
response = send_post_http_raw(url, json_data=data, headers=None)
237+
res_obj = None
238+
if response:
239+
try:
240+
res_obj = response.json()
241+
except requests.exceptions.JSONDecodeError as error:
242+
logger.debug("Unable to get a valid response from %s: %s", url, error)
243+
if res_obj:
244+
for vuln in res_obj.get("vulns", {}):
245+
v_id = json_extract(vuln, ["id"], str)
246+
if v_id and v_id.startswith("MAL-"):
247+
result_tables.append(
248+
MaliciousMetadataFacts(
249+
known_malware=f"https://osv.dev/vulnerability/{v_id}",
250+
result={},
251+
detail_information=vuln,
252+
confidence=Confidence.HIGH,
253+
)
254+
)
255+
if result_tables:
256+
return CheckResultData(
257+
result_tables=result_tables,
258+
result_type=CheckResultType.FAILED,
259+
)
260+
226261
package_registry_info_entries = ctx.dynamic_data["package_registries"]
227262
for package_registry_info_entry in package_registry_info_entries:
228263
match package_registry_info_entry:
229264
case PackageRegistryInfo(
230265
build_tool=Pip() | Poetry(),
231266
package_registry=PyPIRegistry() as pypi_registry,
232267
) as pypi_registry_info:
233-
result_tables: list[CheckFacts] = []
234268

235269
# Create an AssetLocator object for the PyPI package JSON object.
236270
pypi_package_json = PyPIPackageJsonAsset(

src/macaron/util.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,79 @@ def send_get_http_raw(
125125
return response
126126

127127

128+
def send_post_http_raw(
129+
url: str,
130+
json_data: dict | None = None,
131+
headers: dict | None = None,
132+
timeout: int | None = None,
133+
allow_redirects: bool = True,
134+
) -> Response | None:
135+
"""Send a POST HTTP request with the given url, data, and headers.
136+
137+
This method also handle logging when the API server returns error status code.
138+
139+
Parameters
140+
----------
141+
url : str
142+
The url of the request.
143+
json_data: dict | None
144+
The request payload.
145+
headers : dict | None
146+
The dict that describes the headers of the request.
147+
timeout: int | None
148+
The request timeout (optional).
149+
allow_redirects: bool
150+
Whether to allow redirects. Default: True.
151+
152+
Returns
153+
-------
154+
Response | None
155+
If a Response object is returned and ``allow_redirects`` is ``True`` (the default) it will have a status code of
156+
200 (OK). If ``allow_redirects`` is ``False`` the response can instead have a status code of 302. Otherwise, the
157+
request has failed and ``None`` will be returned.
158+
"""
159+
logger.debug("POST - %s", url)
160+
if not timeout:
161+
timeout = defaults.getint("requests", "timeout", fallback=10)
162+
error_retries = defaults.getint("requests", "error_retries", fallback=5)
163+
retry_counter = error_retries
164+
try:
165+
response = requests.post(
166+
url=url,
167+
json=json_data,
168+
headers=headers,
169+
timeout=timeout,
170+
allow_redirects=allow_redirects,
171+
)
172+
except requests.exceptions.RequestException as error:
173+
logger.debug(error)
174+
return None
175+
if not allow_redirects and response.status_code == 302:
176+
# Found, most likely because a redirect is about to happen.
177+
return response
178+
while response.status_code != 200:
179+
logger.debug(
180+
"Receiving error code %s from server.",
181+
response.status_code,
182+
)
183+
if retry_counter <= 0:
184+
logger.debug("Maximum retries reached: %s", error_retries)
185+
return None
186+
if response.status_code == 403:
187+
check_rate_limit(response)
188+
else:
189+
return None
190+
retry_counter = retry_counter - 1
191+
response = requests.get(
192+
url=url,
193+
headers=headers,
194+
timeout=timeout,
195+
allow_redirects=allow_redirects,
196+
)
197+
198+
return response
199+
200+
128201
def check_rate_limit(response: Response) -> None:
129202
"""Check the remaining calls limit to GitHub API and wait accordingly.
130203
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */
2+
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
3+
4+
#include "prelude.dl"
5+
6+
Policy("check-malicious-package", component_id, "Check the malicious package.") :-
7+
check_failed(component_id, "mcn_detect_malicious_metadata_1").
8+
9+
apply_policy_to("check-malicious-package", component_id) :-
10+
is_component(component_id, "pkg:npm/tautoak4-hello-world").
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
description: |
5+
Analyzing a known malicious package.
6+
7+
tags:
8+
- macaron-python-package
9+
- macaron-docker-image
10+
11+
steps:
12+
- name: Run macaron analyze
13+
kind: analyze
14+
options:
15+
command_args:
16+
- -purl
17+
- pkg:npm/tautoak4-hello-world
18+
- name: Run macaron verify-policy to verify that the malicious metadata check fails.
19+
kind: verify
20+
options:
21+
policy: policy.dl
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */
2+
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
3+
4+
#include "prelude.dl"
5+
6+
Policy("check-malicious-package", component_id, "Check the malicious package.") :-
7+
check_failed(component_id, "mcn_detect_malicious_metadata_1").
8+
9+
apply_policy_to("check-malicious-package", component_id) :-
10+
is_component(component_id, "pkg:pypi/type-extension").
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
description: |
5+
Analyzing a known malicious package.
6+
7+
tags:
8+
- macaron-python-package
9+
- macaron-docker-image
10+
11+
steps:
12+
- name: Run macaron analyze
13+
kind: analyze
14+
options:
15+
command_args:
16+
- -purl
17+
- pkg:pypi/type-extension
18+
- name: Run macaron verify-policy to verify that the malicious metadata check fails.
19+
kind: verify
20+
options:
21+
policy: policy.dl

0 commit comments

Comments
 (0)