From 65f932578133738ab7500ae99dd9af437da563b7 Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Tue, 14 Jan 2025 10:02:21 +1000 Subject: [PATCH] feat: add in new metadata-based heuristic to pypi malware analyzer (#944) new heuristic with a dependency on a single-release to investigate the version number and determine if it is anomalous, defined as the major and/or epoch being above a threshold value. --- src/macaron/config/defaults.ini | 10 +- .../pypi_heuristics/heuristics.py | 7 +- .../metadata/anomalous_version.py | 257 +++++++++++++++ .../checks/detect_malicious_metadata_check.py | 70 ++++ .../pypi/test_anomalous_version.py | 299 ++++++++++++++++++ 5 files changed, 640 insertions(+), 3 deletions(-) create mode 100644 src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py create mode 100644 tests/malware_analyzer/pypi/test_anomalous_version.py diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index a6b13a80c..f895c20aa 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -584,5 +584,13 @@ include = * [heuristic.pypi] releases_frequency_threshold = 2 # The gap threshold. -# The timedelta indicate the gap between the date maintainer registers their pypi's account and the date of latest release. +# The timedelta represents the gap between when the date maintainer registers their pypi account, and the +# date of the latest release. timedelta_threshold_of_join_release = 5 + +# Any major version above this value is detected as anomalous and marked as suspicious. +major_threshold = 20 +# Any epoch number avove this value is detected as anomalous and marked as suspicious. +epoch_threshold = 3 +# The number of days +/- the day of publish the calendar versioning day may be. +day_publish_error = 4 diff --git a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py index d3e574027..1bd724fad 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Define the heuristic enum.""" @@ -31,9 +31,12 @@ class Heuristics(str, Enum): #: Indicates that the setup.py file contains suspicious imports, such as base64 and requests. SUSPICIOUS_SETUP = "suspicious_setup" - #: Indicates that the package does not include a .whl file + #: Indicates that the package does not include a .whl file. WHEEL_ABSENCE = "wheel_absence" + #: Indicates that the package has an unusually large version number for a single release. + ANOMALOUS_VERSION = "anomalous_version" + class HeuristicResult(str, Enum): """Result type indicating the outcome of a heuristic.""" diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py new file mode 100644 index 000000000..f02c4f595 --- /dev/null +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py @@ -0,0 +1,257 @@ +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""The heuristic analyzer to check for an anomalous package version.""" + +import logging +from enum import Enum + +from packaging.version import InvalidVersion, parse + +from macaron.config.defaults import defaults +from macaron.errors import HeuristicAnalyzerValueError +from macaron.json_tools import JsonType, json_extract +from macaron.malware_analyzer.datetime_parser import parse_datetime +from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer +from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics +from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset + +logger: logging.Logger = logging.getLogger(__name__) + + +class AnomalousVersionAnalyzer(BaseHeuristicAnalyzer): + """ + Analyze the version number (if there is only a single release) to detect if it is anomalous. + + A version number is anomalous if any of its values are greater than the epoch, major, or minor threshold values. + If the version does not adhere to PyPI standards (PEP 440, as per the 'packaging' module), this heuristic + cannot analyze it. + + Calendar versioning is detected as version numbers with the year, month and day present in the following combinations: + (using the example 11th October 2016) + - YYYY.MM.DD, e.g. 2016.10.11 + - YYYY.DD.MM, e.g. 2016.11.10 + - YY.DD.MM, e.g. 16.11.10 + - YY.MM.DD, e.g. 16.10.11 + - MM.DD.YYYY, e.g. 10.11.2016 + - DD.MM.YYYY, e.g. 11.10.2016 + - DD.MM.YY, e.g. 11.10.16 + - MM.DD.YY, e.g. 10.11.16 + - YYYYMMDD, e.g. 20161011 + - YYYYDDMM, e.g. 20161110 + - YYDDMM, e.g. 161110 + - YYMMDD, e.g. 161011 + - MMDDYYYY, e.g. 10112016 + - DDMMYYYY, e.g. 11102016 + - DDMMYY, e.g. 111016 + - MMDDYY, e.g. 101116 + This may be followed by further versioning (e.g. 2016.10.11.5.6.2). This type of versioning is detected based on the + date of the upload time for the release within a threshold of a number of days (in the defaults file). + + Calendar-semantic versioning is detected as version numbers with the major value as the year (either yyyy or yy), + and any other series of numbers following it: + - 2016.7.1 woud be version 7.1 of 2016 + - 16.1.4 would be version 1.4 of 2016 + This type of versioning is detected based on the exact year of the upload time for the release. + + All other versionings are detected as semantic versioning. + """ + + DETAIL_INFO_KEY: str = "versioning" + DIGIT_DATE_FORMATS: list[str] = ["%Y%m%d", "%Y%d%m", "%d%m%Y", "%m%d%Y", "%y%m%d", "%y%d%m", "%d%m%y", "%m%d%y"] + + def __init__(self) -> None: + super().__init__( + name="anomalous_version_analyzer", + heuristic=Heuristics.ANOMALOUS_VERSION, + depends_on=[(Heuristics.ONE_RELEASE, HeuristicResult.FAIL)], + ) + self.major_threshold, self.epoch_threshold, self.day_publish_error = self._load_defaults() + + def _load_defaults(self) -> tuple[int, int, int]: + """Load default settings from defaults.ini. + + Returns + ------- + tuple[int, int, int]: + The Major threshold, Epoch threshold, and Day published error. + """ + section_name = "heuristic.pypi" + if defaults.has_section(section_name): + section = defaults[section_name] + return ( + section.getint("major_threshold"), + section.getint("epoch_threshold"), + section.getint("day_publish_error"), + ) + return 20, 3, 4 + + def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: + """Analyze the package. + + Parameters + ---------- + pypi_package_json: PyPIPackageJsonAsset + The PyPI package JSON asset object. + + Returns + ------- + tuple[HeuristicResult, dict[str, JsonType]]: + The result and related information collected during the analysis. + + Raises + ------ + HeuristicAnalyzerValueError + if there is no release information available. + """ + releases = pypi_package_json.get_releases() + if releases is None: # no release information + error_msg = "There is no information for any release of this package." + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + + if len(releases) != 1: + error_msg = ( + "This heuristic depends on a single release, but somehow there are multiple when the one release" + + " heuristic failed." + ) + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + + # Since there is only one release, the latest version should be that release + release = pypi_package_json.get_latest_version() + if release is None: + error_msg = "No latest version information available" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + + try: + release_metadata = releases[release] + except KeyError as release_error: + error_msg = "The latest release is not available in the list of releases" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) from release_error + + try: + version = parse(release) + except InvalidVersion: + return HeuristicResult.SKIP, {self.DETAIL_INFO_KEY: Versioning.INVALID.value} + + years = [] + months = [] + publish_days = [] + + for distribution in release_metadata: + upload_time = json_extract(distribution, ["upload_time"], str) + if upload_time is None: + error_msg = "Missing upload time from release information" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + + parsed_time = parse_datetime(upload_time) + if parsed_time is None: + error_msg = "Upload time is not of the expected PyPI format" + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + + years.append(parsed_time.year) + years.append(parsed_time.year % 100) # last 2 digits + months.append(parsed_time.month) + publish_days.append(parsed_time.day) + + days = list(range(min(publish_days) - self.day_publish_error, max(publish_days) + self.day_publish_error + 1)) + + calendar = False + calendar_semantic = False + + # check for year YY[YY]... + if version.major in years: + # calendar versioning: YY[YY].(M[M].D[D])(D[D].M[M])... + if (version.minor in months and version.micro in days) or ( + version.minor in days and version.micro in months + ): + calendar = True + else: + calendar_semantic = True + # check for calendar versioning: M[M].D[D].YY[YY]... or D[D].M[M].YY[YY]... or the whole digit rerpesenting a datetime + elif ( + ((version.major in months and version.minor in days) or (version.major in days and version.minor in months)) + and version.micro in years + ) or self._integer_date(version.major, years, months, days): + # must include day and year for this to be calendar + calendar = True + + if calendar: # just check epoch + detail_info: dict[str, JsonType] = {self.DETAIL_INFO_KEY: Versioning.CALENDAR.value} + if version.epoch > self.epoch_threshold: + return HeuristicResult.FAIL, detail_info + + return HeuristicResult.PASS, detail_info + + if calendar_semantic: # check minor (as major) and epoch + detail_info = {self.DETAIL_INFO_KEY: Versioning.CALENDAR_SEMANTIC.value} + + if version.epoch > self.epoch_threshold: + return HeuristicResult.FAIL, detail_info + if version.minor > self.major_threshold: + return HeuristicResult.FAIL, detail_info + + return HeuristicResult.PASS, detail_info + + # semantic versioning + detail_info = {self.DETAIL_INFO_KEY: Versioning.SEMANTIC.value} + + if version.epoch > self.epoch_threshold: + return HeuristicResult.FAIL, detail_info + if version.major > self.major_threshold: + return HeuristicResult.FAIL, detail_info + + return HeuristicResult.PASS, detail_info + + def _integer_date(self, value: int, years: list[int], months: list[int], days: list[int]) -> bool: + """Check whether the provided integer represents a date. + + Valid representations are: + - YYYYMMDD + - YYYYDDMM + - YYDDMM + - YYMMDD + - MMDDYYYY + - DDMMYYYY + - DDMMYY + - MMDDYY + + Parameters + ---------- + value: int + The integer to check. + years: list[int] + A list of integers representing valid years for components of value to represent. + months: list[int] + A list of integers representing valid months for components of value to represent. + days: list[int] + A list of integers representing valid days for components of value to represent. + + Returns + ------- + bool: + True if the integer may represent a date present in the list of valid years, months and days. + False otherwise. + """ + for date_format in self.DIGIT_DATE_FORMATS: + if (date := parse_datetime(str(value), date_format)) is None: + continue + + if date.year in years and date.month in months and date.day in days: + return True + + return False + + +class Versioning(Enum): + """Enum used to assign different versioning methods.""" + + INVALID = "invalid" + CALENDAR = "calendar" + CALENDAR_SEMANTIC = "calendar_semantic" + SEMANTIC = "semantic" diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index f9c75d64f..0e2fe0039 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -15,6 +15,7 @@ from macaron.json_tools import JsonType, json_extract from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics +from macaron.malware_analyzer.pypi_heuristics.metadata.anomalous_version import AnomalousVersionAnalyzer from macaron.malware_analyzer.pypi_heuristics.metadata.closer_release_join_date import CloserReleaseJoinDateAnalyzer from macaron.malware_analyzer.pypi_heuristics.metadata.empty_project_link import EmptyProjectLinkAnalyzer from macaron.malware_analyzer.pypi_heuristics.metadata.high_release_frequency import HighReleaseFrequencyAnalyzer @@ -75,6 +76,7 @@ class MaliciousMetadataFacts(CheckFacts): CloserReleaseJoinDateAnalyzer, SuspiciousSetupAnalyzer, WheelAbsenceAnalyzer, + AnomalousVersionAnalyzer, ] @@ -89,6 +91,7 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult, HeuristicResult, HeuristicResult, + HeuristicResult, ], float, ] = { @@ -101,9 +104,26 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.FAIL, # Suspicious Setup HeuristicResult.FAIL, # Wheel Absence + HeuristicResult.FAIL, # Anomalous Version # No project link, only one release, and the maintainer released it shortly # after account registration. # The setup.py file contains suspicious imports and .whl file isn't present. + # Anomalous version has no effect. + ): Confidence.HIGH, + ( + HeuristicResult.FAIL, # Empty Project + HeuristicResult.SKIP, # Unreachable Project Links + HeuristicResult.FAIL, # One Release + HeuristicResult.SKIP, # High Release Frequency + HeuristicResult.SKIP, # Unchanged Release + HeuristicResult.FAIL, # Closer Release Join Date + HeuristicResult.FAIL, # Suspicious Setup + HeuristicResult.FAIL, # Wheel Absence + HeuristicResult.PASS, # Anomalous Version + # No project link, only one release, and the maintainer released it shortly + # after account registration. + # The setup.py file contains suspicious imports and .whl file isn't present. + # Anomalous version has no effect. ): Confidence.HIGH, ( HeuristicResult.FAIL, # Empty Project @@ -114,6 +134,7 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.FAIL, # Suspicious Setup HeuristicResult.FAIL, # Wheel Absence + HeuristicResult.SKIP, # Anomalous Version # No project link, frequent releases of multiple versions without modifying the content, # and the maintainer released it shortly after account registration. # The setup.py file contains suspicious imports and .whl file isn't present. @@ -127,6 +148,7 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.FAIL, # Suspicious Setup HeuristicResult.FAIL, # Wheel Absence + HeuristicResult.SKIP, # Anomalous Version # No project link, frequent releases of multiple versions, # and the maintainer released it shortly after account registration. # The setup.py file contains suspicious imports and .whl file isn't present. @@ -140,6 +162,7 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.PASS, # Suspicious Setup HeuristicResult.PASS, # Wheel Absence + HeuristicResult.SKIP, # Anomalous Version # No project link, frequent releases of multiple versions without modifying the content, # and the maintainer released it shortly after account registration. Presence/Absence of # .whl file has no effect @@ -153,6 +176,7 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.PASS, # Suspicious Setup HeuristicResult.FAIL, # Wheel Absence + HeuristicResult.SKIP, # Anomalous Version # No project link, frequent releases of multiple versions without modifying the content, # and the maintainer released it shortly after account registration. Presence/Absence of # .whl file has no effect @@ -166,10 +190,56 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.FAIL, # Suspicious Setup HeuristicResult.FAIL, # Wheel Absence + HeuristicResult.SKIP, # Anomalous Version # All project links are unreachable, frequent releases of multiple versions, # and the maintainer released it shortly after account registration. # The setup.py file contains suspicious imports and .whl file isn't present. ): Confidence.HIGH, + ( + HeuristicResult.FAIL, # Empty Project + HeuristicResult.SKIP, # Unreachable Project Links + HeuristicResult.FAIL, # One Release + HeuristicResult.SKIP, # High Release Frequency + HeuristicResult.SKIP, # Unchanged Release + HeuristicResult.FAIL, # Closer Release Join Date + HeuristicResult.PASS, # Suspicious Setup + HeuristicResult.PASS, # Wheel Absence + HeuristicResult.FAIL, # Anomalous Version + # No project link, only one release, and the maintainer released it shortly + # after account registration. + # The setup.py file has no effect and .whl file is present. + # The version number is anomalous. + ): Confidence.MEDIUM, + ( + HeuristicResult.FAIL, # Empty Project + HeuristicResult.SKIP, # Unreachable Project Links + HeuristicResult.FAIL, # One Release + HeuristicResult.SKIP, # High Release Frequency + HeuristicResult.SKIP, # Unchanged Release + HeuristicResult.FAIL, # Closer Release Join Date + HeuristicResult.FAIL, # Suspicious Setup + HeuristicResult.PASS, # Wheel Absence + HeuristicResult.FAIL, # Anomalous Version + # No project link, only one release, and the maintainer released it shortly + # after account registration. + # The setup.py file has no effect and .whl file is present. + # The version number is anomalous. + ): Confidence.MEDIUM, + ( + HeuristicResult.FAIL, # Empty Project + HeuristicResult.SKIP, # Unreachable Project Links + HeuristicResult.FAIL, # One Release + HeuristicResult.SKIP, # High Release Frequency + HeuristicResult.SKIP, # Unchanged Release + HeuristicResult.FAIL, # Closer Release Join Date + HeuristicResult.SKIP, # Suspicious Setup + HeuristicResult.PASS, # Wheel Absence + HeuristicResult.FAIL, # Anomalous Version + # No project link, only one release, and the maintainer released it shortly + # after account registration. + # The setup.py file has no effect and .whl file is present. + # The version number is anomalous. + ): Confidence.MEDIUM, } diff --git a/tests/malware_analyzer/pypi/test_anomalous_version.py b/tests/malware_analyzer/pypi/test_anomalous_version.py new file mode 100644 index 000000000..3edd73d0c --- /dev/null +++ b/tests/malware_analyzer/pypi/test_anomalous_version.py @@ -0,0 +1,299 @@ +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for heuristic detecting anomalous version numbers""" +from unittest.mock import MagicMock + +import pytest + +from macaron.errors import HeuristicAnalyzerValueError +from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult +from macaron.malware_analyzer.pypi_heuristics.metadata.anomalous_version import AnomalousVersionAnalyzer, Versioning + + +def test_analyze_no_information(pypi_package_json: MagicMock) -> None: + """Test for when there is no release information, so error""" + analyzer = AnomalousVersionAnalyzer() + + pypi_package_json.get_releases.return_value = None + + with pytest.raises(HeuristicAnalyzerValueError): + analyzer.analyze(pypi_package_json) + + +def test_analyze_invalid_time(pypi_package_json: MagicMock) -> None: + """Test for when the supplied upload time does not conform with PEP 440, so error.""" + analyzer = AnomalousVersionAnalyzer() + version = "1.1" + release = { + version: [ + { + "comment_text": "", + "digests": { + "blake2b_256": "defa2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3", + "md5": "9203bbb130f8ddb38269f4861c170d04", + "sha256": "168bcccbf5106132e90b85659297700194369b8f6b3e5a03769614f0d200e370", + }, + "downloads": -1, + "filename": "ttttttttest_nester.py-0.1.0.tar.gz", + "has_sig": False, + "md5_digest": "9203bbb130f8ddb38269f4861c170d04", + "packagetype": "sdist", + "python_version": "source", + "requires_python": None, + "size": 546, + "upload_time": "September 2 2022 5:42pm 27s", + "upload_time_iso_8601": "2022-09-02T05:42:27.073842Z", + "url": "https://files.pythonhosted.org/packages/de/fa/" + + "2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3/ttttttttest_nester.py-0.1.0.tar.gz", + "yanked": False, + "yanked_reason": None, + } + ] + } + + pypi_package_json.get_releases.return_value = release + pypi_package_json.get_latest_version.return_value = version + + with pytest.raises(HeuristicAnalyzerValueError): + analyzer.analyze(pypi_package_json) + + +def test_analyze_no_time(pypi_package_json: MagicMock) -> None: + """Test for when there is no supplied upload time, so error.""" + analyzer = AnomalousVersionAnalyzer() + version = "1.1" + release = { + version: [ + { + "comment_text": "", + "digests": { + "blake2b_256": "defa2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3", + "md5": "9203bbb130f8ddb38269f4861c170d04", + "sha256": "168bcccbf5106132e90b85659297700194369b8f6b3e5a03769614f0d200e370", + }, + "downloads": -1, + "filename": "ttttttttest_nester.py-0.1.0.tar.gz", + "has_sig": False, + "md5_digest": "9203bbb130f8ddb38269f4861c170d04", + "packagetype": "sdist", + "python_version": "source", + "requires_python": None, + "size": 546, + "url": "https://files.pythonhosted.org/packages/de/fa/" + + "2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3/ttttttttest_nester.py-0.1.0.tar.gz", + "yanked": False, + "yanked_reason": None, + } + ] + } + + pypi_package_json.get_releases.return_value = release + pypi_package_json.get_latest_version.return_value = version + + with pytest.raises(HeuristicAnalyzerValueError): + analyzer.analyze(pypi_package_json) + + +@pytest.mark.parametrize( + ("version", "upload_date", "result", "versioning"), + [ + pytest.param( + "2016-10-13", "2016-10-13", HeuristicResult.SKIP, Versioning.INVALID.value, id="test_invalid_version" + ), + pytest.param( + "2016.10.11", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_YYYY.MM.DD_pass", + ), + pytest.param( + "2016.12.10", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_YYYY.DD.MM_pass", + ), + pytest.param( + "16.10.13", "2016-10-13", HeuristicResult.PASS, Versioning.CALENDAR.value, id="test_calendar_YY.DD.MM_pass" + ), + pytest.param( + "16.14.10", "2016-10-13", HeuristicResult.PASS, Versioning.CALENDAR.value, id="test_calendar_YY.MM.DD_pass" + ), + pytest.param( + "10.10.2016", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_MM.DD.YYYY_pass", + ), + pytest.param( + "9.10.2016", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_DD.MM.YYYY_pass", + ), + pytest.param( + "10.15.16", "2016-10-13", HeuristicResult.PASS, Versioning.CALENDAR.value, id="test_calendar_DD.MM.YY_pass" + ), + pytest.param( + "16.10.16", "2016-10-13", HeuristicResult.PASS, Versioning.CALENDAR.value, id="test_calendar_MM.DD.YY_pass" + ), + pytest.param( + "20161011.0", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_YYYYMMDD_pass", + ), + pytest.param( + "20161210.6.1", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_YYYYDDMM_pass", + ), + pytest.param( + "161013.9.0.5", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_YYDDMM_pass", + ), + pytest.param( + "161410.2.5.7", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_YYMMDD_pass", + ), + pytest.param( + "10102016.0", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_MMDDYYYY_pass", + ), + pytest.param( + "09102016", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_DDMMYYYY_pass", + ), + pytest.param( + "101516.5.7", "2016-10-13", HeuristicResult.PASS, Versioning.CALENDAR.value, id="test_calendar_DDMMYY_pass" + ), + pytest.param( + "161016.0.0.0.0", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_MMDDYY_pass", + ), + pytest.param( + "2!16.10.17.2.5.3", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR.value, + id="test_calendar_epoch_pass", + ), + pytest.param( + "100!2016.10.14", + "2016-10-13", + HeuristicResult.FAIL, + Versioning.CALENDAR.value, + id="test_calendar_epoch_fail", + ), + pytest.param( + "2016.7.2", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR_SEMANTIC.value, + id="test_calendar_semantic_pass", + ), + pytest.param( + "2016.100.0", + "2016-10-13", + HeuristicResult.FAIL, + Versioning.CALENDAR_SEMANTIC.value, + id="test_calendar_semantic_fail", + ), + pytest.param( + "2!2016.1.5.6", + "2016-10-13", + HeuristicResult.PASS, + Versioning.CALENDAR_SEMANTIC.value, + id="test_calendar_semantic_epoch_pass", + ), + pytest.param( + "100!2016.1", + "2016-10-13", + HeuristicResult.FAIL, + Versioning.CALENDAR_SEMANTIC.value, + id="test_calendar_semantic_epoch_fail", + ), + pytest.param("3.1", "2016-10-13", HeuristicResult.PASS, Versioning.SEMANTIC.value, id="test_semantic_pass"), + pytest.param("999", "2016-10-13", HeuristicResult.FAIL, Versioning.SEMANTIC.value, id="test_semantic_fail"), + pytest.param( + "3!0.1.9999", "2016-10-13", HeuristicResult.PASS, Versioning.SEMANTIC.value, id="test_semantic_epoch_pass" + ), + pytest.param( + "999!0.0.0", "2016-10-13", HeuristicResult.FAIL, Versioning.SEMANTIC.value, id="test_semantic_epoch_fail" + ), + ], +) +def test_analyze( + pypi_package_json: MagicMock, version: str, upload_date: str, result: HeuristicResult, versioning: str +) -> None: + """ + Generic test for the expected return value of the anomalous version heuristic. + + Parameters + ---------- + version : str + the version number for the test package. + upload_date : str + the date of when the test package was uploaded. + result : HeuristicResult + the expected result the heuristic should arrive at. + versioning : str + which versioning system the heuristic should have identified. + """ + analyzer = AnomalousVersionAnalyzer() + release = { + version: [ + { + "comment_text": "", + "digests": { + "blake2b_256": "defa2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3", + "md5": "9203bbb130f8ddb38269f4861c170d04", + "sha256": "168bcccbf5106132e90b85659297700194369b8f6b3e5a03769614f0d200e370", + }, + "downloads": -1, + "filename": "ttttttttest_nester.py-0.1.0.tar.gz", + "has_sig": False, + "md5_digest": "9203bbb130f8ddb38269f4861c170d04", + "packagetype": "sdist", + "python_version": "source", + "requires_python": None, + "size": 546, + "upload_time": f"{upload_date}T05:42:27", + "upload_time_iso_8601": f"{upload_date}T05:42:27.073842Z", + "url": "https://files.pythonhosted.org/packages/de/fa/" + + "2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3/ttttttttest_nester.py-0.1.0.tar.gz", + "yanked": False, + "yanked_reason": None, + } + ] + } + + pypi_package_json.get_releases.return_value = release + pypi_package_json.get_latest_version.return_value = version + expected_result: tuple[HeuristicResult, dict] = (result, {AnomalousVersionAnalyzer.DETAIL_INFO_KEY: versioning}) + + actual_result = analyzer.analyze(pypi_package_json) + + assert actual_result == expected_result