Skip to content

Commit

Permalink
refactor parse_purl and update intergration test
Browse files Browse the repository at this point in the history
  • Loading branch information
hogo6002 committed Nov 27, 2024
1 parent 686fa73 commit 04662aa
Show file tree
Hide file tree
Showing 7 changed files with 271 additions and 180 deletions.
7 changes: 5 additions & 2 deletions gcp/api/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,10 @@ def test_query_unknown_purl_invalid_semver(self):
}),
timeout=_TIMEOUT)

self.assert_results_equal({}, response.json())
self.assert_results_equal({
'code': 3,
'message': 'Invalid PURL.'
}, response.json())

def test_query_semver_no_vulns(self):
"""Test queries by SemVer with no vulnerabilities."""
Expand Down Expand Up @@ -911,7 +914,7 @@ def test_all_possible_queries(self):
}
}, {}]

pkg_version = [{'package': {'version': '0.8.5'}}, {}]
pkg_version = [{'version': '0.8.5'}, {}]

commit = [{'commit': 'd374094d8c49b6b7d288f307e11217ec5a502391'}, {}]

Expand Down
152 changes: 74 additions & 78 deletions gcp/api/poetry.lock

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion gcp/api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ python = "^3.11"

google-cloud-ndb = "==2.3.2"
google-cloud-logging = "==3.11.3"
packageurl-python = "==0.16.0"
packaging = "==20.9"
requests = "==2.32.3"
grpcio = "==1.64.1"
Expand Down
59 changes: 31 additions & 28 deletions gcp/api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
from grpc_health.v1 import health_pb2
from grpc_health.v1 import health_pb2_grpc
from grpc_reflection.v1alpha import reflection
from packageurl import PackageURL
from packaging.utils import canonicalize_version

import osv
Expand Down Expand Up @@ -382,14 +381,16 @@ def query_info(query) -> tuple[str, str | None, str | None]:
version = query.version
if query.package.purl:
try:
purl = PackageURL.from_string(query.package.purl) # can raise ValueError
purl = purl_helpers.parse_purl(query.package.purl) # can raise ValueError
if not purl:
raise ValueError('purl is invalid.')
if query.package.ecosystem or query.package.name:
raise ValueError('purl and name/ecosystem cannot both be specified')
if purl.version and query.version:
if purl[2] and query.version:
raise ValueError('purl version and version cannot both be specified')
qtype = 'purl'
ecosystem = purl_helpers.parse_purl_ecosystem(purl)
version = purl.version or version
ecosystem = purl[0]
version = purl[2] or version
except ValueError:
return 'invalid', None, None

Expand Down Expand Up @@ -724,34 +725,36 @@ def do_query(query: osv_service_v1_pb2.Query,
version = query.version

# convert purl to package names
purl = PackageURL.from_string(purl_str)
purl = purl_helpers.parse_purl(purl_str)

if purl and package_name: # Purls already include the package name
if purl_str and not purl:
context.service_context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
'name specified in a purl query',
)
if purl and ecosystem:
# Purls already include the ecosystem inside
context.service_context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
'ecosystem specified in a purl query',
)
if purl.version and version:
# version included both in purl and query
context.service_context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
'version specified in params and purl query',
'Invalid PURL.',
)

if purl:
try:
package_name = purl.name
ecosystem = purl_helpers.parse_purl_ecosystem(purl)
version = purl.version
except ValueError:
context.service_context.abort(grpc.StatusCode.INVALID_ARGUMENT,
'Invalid Package URL.')
if package_name: # Purls already include the package name
context.service_context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
'name specified in a purl query',
)
if ecosystem:
# Purls already include the ecosystem inside
context.service_context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
'ecosystem specified in a purl query',
)
if purl[2] and version:
# version included both in purl and query
context.service_context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
'version specified in params and purl query',
)

ecosystem = purl[0]
package_name = purl[1]
version = purl[2]

if ecosystem and not ecosystems.get(ecosystem):
context.service_context.abort(grpc.StatusCode.INVALID_ARGUMENT,
Expand Down Expand Up @@ -783,7 +786,7 @@ def to_response(b: osv.Bug):
elif package_name and version:
bugs = yield query_by_version(
context, package_name, ecosystem, version, to_response=to_response)
elif package_name:
elif package_name and ecosystem:
# Package specified without version.
bugs = yield query_by_package(
context, package_name, ecosystem, to_response=to_response)
Expand Down
25 changes: 0 additions & 25 deletions gcp/api/server_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,37 +15,12 @@

import unittest

from server import _match_purl
from server import should_skip_bucket
from packageurl import PackageURL


class ServerTest(unittest.TestCase):
"""Server tests."""

def test_match_purl(self):
"""Test PURL generation for PyPI."""

test_cases = [
# Version diffs are ignored
('pkg:pypi/django', 'pkg:pypi/[email protected]', True),
('pkg:deb/debian/[email protected]+deb10u3',
'pkg:deb/debian/[email protected]+deb10u4', True),
# Different packages do not match
('pkg:deb/debian/[email protected]+deb10u3',
'pkg:deb/debian/[email protected]+deb10u3', False),
('pkg:deb/debian/[email protected]+deb10u3?arch=amd64',
'pkg:deb/debian/[email protected]?arch=source', True),
('pkg:deb/debian/[email protected]+deb10u3?distro=debian-10',
'pkg:deb/debian/[email protected]?arch=source', True),
]

for a, b, expected in test_cases:
self.assertEqual(
expected,
_match_purl(PackageURL.from_string(a), PackageURL.from_string(b)),
a + ' == ' + b)

def test_should_skip_bucket(self):
"""Test should_skip_bucket."""
test_cases = [
Expand Down
122 changes: 76 additions & 46 deletions osv/purl_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,47 +36,6 @@
'SwiftURL': 'swift',
}

# PURL spec: scheme:type/namespace/name@version?qualifiers#subpath
# project ecosystems use purl.type to represent.
PURL_TYPE_ECOSYSTEMS = {
# Android
'bitnami': 'Bitnami',
'cargo': 'crates.io',
# CRAN
'golang': 'Go',
'hackage': 'Hackage',
'hex': 'Hex',
'maven': 'Maven',
'npm': 'npm',
'nuget': 'NuGet',
'generic': 'OSS-Fuzz',
'composer': 'Packagist',
'pub': 'Pub',
'pypi': 'PyPI',
'gem': 'RubyGems',
'swift': 'SwiftURL',
}

# PURL spec: scheme:type/namespace/name@version?qualifiers#subpath
# For Linux distributions, the namespace helps determine the ecosystem.
# This is because different distributions (like Red Hat and openSUSE)
# might use the same package manager (like RPM).
# Example:
# - pkg:rpm/redhat/curl -> Ecosystem: redhat
# - pkg:rpm/opensuse/curl -> Ecosystem: opensuse
PURL_NAMESPACE_ECOSYSTEMS = {
# AlmaLinux
'alpine': 'Alpine',
'chainguard': 'Chainguard',
'debian': 'Debian',
'opensuse': 'openSUSE',
'redhat': 'Red Hat',
'rocky-linux': 'Rocky Linux',
'suse': 'SUSE',
'ubuntu': 'Ubuntu',
'wolfi': 'Wolfi',
}


def _url_encode(package_name):
"""URL encode a PURL `namespace/name` or `name`."""
Expand Down Expand Up @@ -107,8 +66,79 @@ def package_to_purl(ecosystem: str, package_name: str) -> str | None:
return f'pkg:{purl_type}/{_url_encode(package_name)}{suffix}'


def parse_purl_ecosystem(purl: PackageURL) -> str | None:
"""Extracts the ecosystem name from a PackageURL by checking
its `type` and `namespace`."""
return PURL_TYPE_ECOSYSTEMS.get(
purl.type, PURL_NAMESPACE_ECOSYSTEMS.get(purl.namespace, None))
def parse_purl(purl_str: str) -> tuple[str, str, str] | None:
"""Parses a PURL string and extracts
ecosystem, package, and version information.
Args:
purl_str: The Package URL string to parse.
Returns:
A tuple containing the ecosystem, package,
and version, or None if parsing fails.
"""

try:
purl = PackageURL.from_string(purl_str)
except ValueError: # Catch potential parsing errors
return None

match purl:
# Cases based on PURL_TYPE_ECOSYSTEMS
case PackageURL(type='bitnami'):
ecosystem = 'Bitnami'
case PackageURL(type='cargo'):
ecosystem = 'crates.io'
case PackageURL(
type='golang', namespace=namespace, name=name, version=version):
return 'Go', namespace + '/' + name, version
case PackageURL(type='hackage'):
ecosystem = 'Hackage'
case PackageURL(type='hex'):
ecosystem = 'Hex'
case PackageURL(type='maven'):
ecosystem = 'Maven'
case PackageURL(type='npm'):
ecosystem = 'npm'
case PackageURL(type='nuget'):
ecosystem = 'NuGet'
case PackageURL(type='generic'):
ecosystem = 'OSS-Fuzz'
case PackageURL(type='composer'):
ecosystem = 'Packagist'
case PackageURL(type='pub'):
ecosystem = 'Pub'
case PackageURL(type='pypi'):
ecosystem = 'PyPI'
case PackageURL(type='gem'):
ecosystem = 'RubyGems'
case PackageURL(type='swift'):
ecosystem = 'SwiftURL'

# For Linux distributions
case PackageURL(type='apk', namespace='alpine'):
ecosystem = 'Alpine'
case PackageURL(type='apk', namespace='chainguard'):
ecosystem = 'Chainguard'
case PackageURL(type='deb', namespace='debian'):
ecosystem = 'Debian'
case PackageURL(type='rpm', namespace='opensuse'):
ecosystem = 'openSUSE'
case PackageURL(type='rpm', namespace='redhat'):
ecosystem = 'Red Hat'
case PackageURL(type='rpm', namespace='rocky-linux'):
ecosystem = 'Rocky Linux'
case PackageURL(type='rpm', namespace='suse'):
ecosystem = 'SUSE'
case PackageURL(type='deb', namespace='ubuntu'):
ecosystem = 'Ubuntu'
case PackageURL(type='apk', namespace='wolfi'):
ecosystem = 'Wolfi'

case _:
return None

package = purl.name
version = purl.version

return ecosystem, package, version
85 changes: 85 additions & 0 deletions osv/purl_helpers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,91 @@ def test_swift(self):
purl_helpers.package_to_purl('SwiftURL',
'github.com/Alamofire/Alamofire'))

def test_parse_purl(self):
"""Test parse purl"""
self.assertEqual(
('Alpine', 'postgresql14', None),
purl_helpers.parse_purl('pkg:apk/alpine/postgresql14?arch=source'))

self.assertEqual(('Bitnami', 'moodl', None),
purl_helpers.parse_purl('pkg:bitnami/moodl'))

self.assertEqual(('Chainguard', 'solr', None),
purl_helpers.parse_purl('pkg:apk/chainguard/solr'))

self.assertEqual(('crates.io', 'surrealdb', '2.1.0'),
purl_helpers.parse_purl('pkg:cargo/[email protected]'))

self.assertEqual(('Debian', 'mpg123', '1.26.4-1+deb11u1'),
purl_helpers.parse_purl(
'pkg:deb/debian/[email protected]+deb11u1?arch=source'))

self.assertEqual(('Go', 'github.com/treeverse/lakefs', '1.33.0'),
purl_helpers.parse_purl(
'pkg:golang/github.com/treeverse/[email protected]'))

self.assertEqual(('Hackage', 'process', None),
purl_helpers.parse_purl('pkg:hackage/process'))

self.assertEqual(('Hex', 'test-package', None),
purl_helpers.parse_purl('pkg:hex/test-package'))

self.assertEqual(
('Maven', 'test-package', '1.0.0'),
purl_helpers.parse_purl('pkg:maven/com.example/[email protected]'))

self.assertEqual(('npm', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:npm/[email protected]'))

self.assertEqual(('NuGet', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:nuget/[email protected]'))

self.assertEqual(
('openSUSE', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:rpm/opensuse/[email protected]'))

self.assertEqual(('OSS-Fuzz', 'test-package', None),
purl_helpers.parse_purl('pkg:generic/test-package'))

self.assertEqual(('Packagist', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:composer/[email protected]'))

self.assertEqual(('Pub', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:pub/[email protected]'))

self.assertEqual(('PyPI', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:pypi/[email protected]'))

self.assertEqual(
('Red Hat', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:rpm/redhat/[email protected]'))

self.assertEqual(
('Rocky Linux', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:rpm/rocky-linux/[email protected]'))

self.assertEqual(('RubyGems', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:gem/[email protected]'))

self.assertEqual(('SUSE', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:rpm/suse/[email protected]'))

self.assertEqual(('SwiftURL', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:swift/[email protected]'))

self.assertEqual(('Ubuntu', 'pygments', '2.11.2+dfsg-2ubuntu0.1'),
purl_helpers.parse_purl(
'pkg:deb/ubuntu/[email protected]+dfsg-2ubuntu0.1'))

self.assertEqual(
('Wolfi', 'test-package', '1.2.3'),
purl_helpers.parse_purl('pkg:apk/wolfi/[email protected]'))

self.assertEqual(None, purl_helpers.parse_purl('pkg:bad/ubuntu/pygments'))

self.assertEqual(
None, purl_helpers.parse_purl('purl:apk/wolfi/[email protected]'))


if __name__ == '__main__':
unittest.main()

0 comments on commit 04662aa

Please sign in to comment.