Skip to content

Commit

Permalink
Add support for skipping license paths
Browse files Browse the repository at this point in the history
Add support for a new config file with glob like handling of paths as
some projects contain license files that are not the license of the
project.

scan_for_licenses now looks at the configuration and does glob like
matching for all the potential license files it tries to add.

This change caused a few structural adjustments to the code and a
slight style change for importing just the toplevel util submodule
that are superficial.

Signed-off-by: William Douglas <[email protected]>
  • Loading branch information
bryteise committed Sep 18, 2024
1 parent 6a4b23b commit 53fe209
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 39 deletions.
8 changes: 8 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ release
``.spec``. This is also bumped and generated on existing and new packages,
respectively. This results in less manual work via automatic management.
license_skips
Each line in the file should be a full path, that path is prefixed into a
tempfile directory + the package tarfile prefix. Requires using '*' to be
effective (e.g. /tmp/*/pkgname-*/path/to/license).
Files paths can contain a single '*' per directory such that
a line of ``/foo*/bar*`` is allowed but ``/foo*bar*`` is not.
$package.license
In certain cases, the package license may not be automatically discovered. In
this instance, ``autospec`` will exit with an error. Update this file to
Expand Down
5 changes: 5 additions & 0 deletions autospec/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def __init__(self, download_path):
self.custom_summ = ""
self.license_fetch = None
self.license_show = None
self.license_skips = []
self.git_uri = None
self.os_packages = set()
self.config_file = None
Expand Down Expand Up @@ -996,6 +997,10 @@ def parse_config_files(self, bump, filemanager, version, requirements):
if not license.add_license(word, self.license_translations, self.license_blacklist):
print_warning("{}: blacklisted license {} ignored.".format(self.content.name + ".license", word))

content = self.read_conf_file(os.path.join(self.download_path, "license_skips"))
if content:
self.license_skips = self.validate_extras_content(content, "license_skips")

# cargo_vendors is the output of 'cargo vendor' and should be read as is
content = self.read_file(os.path.join(self.download_path, "cargo_vendors"), track=True)
if content:
Expand Down
29 changes: 1 addition & 28 deletions autospec/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,33 +197,6 @@ def clean_directories(self, root):

return removed

def globlike_match(self, filename, match_name):
"""Compare the filename to the match_name in a way that simulates the shell glob '*'."""
fsplit = filename.split('/')
if len(fsplit) != len(match_name):
return False
match = True
for fpart, mpart in zip(fsplit, match_name):
if fpart != mpart:
if '*' not in mpart:
match = False
break
if len(mpart) > len(fpart) + 1:
match = False
break
mpl, mpr = mpart.split('*')
try:
if fpart.index(mpl) != 0:
match = False
break
if fpart.rindex(mpr) != len(fpart) - len(mpr):
match = False
break
except ValueError:
match = False
break
return match

def push_file(self, filename, pkg_name):
"""Perform a number of checks against the filename and push the filename if appropriate."""
if filename in self.files or filename in self.files_blacklist:
Expand All @@ -245,7 +218,7 @@ def push_file(self, filename, pkg_name):
elif len('/'.join(match_name)) <= (len(norm_filename) + 1):
# the match name may be 1 longer due to a glob
# being able to match an empty string
if self.globlike_match(norm_filename, match_name):
if util.globlike_match(norm_filename, match_name):
path_prefix = '/' if not match else match.group()
self.push_package_file(os.path.join(path_prefix, *match_name), k)
return
Expand Down
33 changes: 23 additions & 10 deletions autospec/license.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import chardet
import download
from util import get_contents, get_sha1sum, print_fatal, print_warning
import util

default_license = "TO BE DETERMINED"

Expand Down Expand Up @@ -96,7 +96,7 @@ def try_with_charset(license, charset):
def license_from_copying_hash(copying, srcdir, config, name):
"""Add licenses based on the hash of the copying file."""
try:
data = get_contents(copying)
data = util.get_contents(copying)
except FileNotFoundError:
# LICENSE file is a bad symlink (qemu-4.2.0!)
return
Expand All @@ -109,7 +109,7 @@ def license_from_copying_hash(copying, srcdir, config, name):
if not data:
return

hash_sum = get_sha1sum(copying)
hash_sum = util.get_sha1sum(copying)

if config.license_fetch:
values = {'hash': hash_sum, 'text': data, 'package': name}
Expand Down Expand Up @@ -142,9 +142,20 @@ def license_from_copying_hash(copying, srcdir, config, name):
else:
if not config.license_show:
return
print_warning("Unknown license {0} with hash {1}".format(copying, hash_sum))
util.print_warning("Unknown license {0} with hash {1}".format(copying, hash_sum))
hash_url = config.license_show % {'HASH': hash_sum}
print_warning("Visit {0} to enter".format(hash_url))
util.print_warning("Visit {0} to enter".format(hash_url))


def skip_license(license_path, config):
"""Check if a given license file path should be skipped."""
skip_name = False
for skip in config.license_skips:
if util.globlike_match(license_path, skip):
util.print_warning(f"Skip license detected for file at {license_path}")
skip_name = True
break
return skip_name


def scan_for_licenses(srcdir, config, pkg_name):
Expand All @@ -166,8 +177,9 @@ def scan_for_licenses(srcdir, config, pkg_name):
for dirpath, dirnames, files in os.walk(srcdir):
for name in files:
if name.lower() in targets or target_pat.search(name.lower()):
license_from_copying_hash(os.path.join(dirpath, name),
srcdir, config, pkg_name)
license_path = os.path.join(dirpath, name)
if not skip_license(license_path, config):
license_from_copying_hash(license_path, srcdir, config, pkg_name)
# Also search for license texts in project trees that are
# REUSE-compliant, or are in process of adopting this standard (for
# example, KDE ecosystem packages). See https://reuse.software for
Expand All @@ -179,11 +191,12 @@ def scan_for_licenses(srcdir, config, pkg_name):
# named `license` instead.
dirbase = os.path.basename(dirpath)
if re.search(r'^(LICENSES|licenses?|licensing)$', dirbase) and re.search(r'\.txt$', name):
license_from_copying_hash(os.path.join(dirpath, name),
srcdir, config, pkg_name)
license_path = os.path.join(dirpath, name)
if not skip_license(license_path, config):
license_from_copying_hash(license_path, srcdir, config, pkg_name)

if not licenses:
print_fatal(" Cannot find any license or a valid {}.license file!\n".format(pkg_name))
util.print_fatal(" Cannot find any license or a valid {}.license file!\n".format(pkg_name))
sys.exit(1)

print("Licenses : ", " ".join(sorted(licenses)))
Expand Down
28 changes: 28 additions & 0 deletions autospec/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,31 @@ def open_auto(*args, **kwargs):
assert 'encoding' not in kwargs
assert 'errors' not in kwargs
return open(*args, encoding="utf-8", errors="surrogateescape", **kwargs)


def globlike_match(filename, match_name):
"""Compare the filename to the match_name in a way that simulates the shell glob '*'."""
fsplit = filename.split('/')
if len(fsplit) != len(match_name):
return False
match = True
for fpart, mpart in zip(fsplit, match_name):
if fpart != mpart:
if '*' not in mpart:
match = False
break
if len(mpart) > len(fpart) + 1:
match = False
break
mpl, mpr = mpart.split('*')
try:
if fpart.index(mpl) != 0:
match = False
break
if fpart.rindex(mpr) != len(fpart) - len(mpr):
match = False
break
except ValueError:
match = False
break
return match
30 changes: 29 additions & 1 deletion tests/test_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_license_from_copying_hash_no_license_show(self):
conf = config.Config("")
conf.setup_patterns()
# remove the hash from license_hashes
del(conf.license_hashes[license.get_sha1sum('tests/COPYING_TEST')])
del(conf.license_hashes[license.util.get_sha1sum('tests/COPYING_TEST')])
conf.license_show = "license.show.url"
license.license_from_copying_hash('tests/COPYING_TEST', '', conf, '')

Expand Down Expand Up @@ -233,6 +233,34 @@ def test_scan_for_licenses_none(self):
self.assertIn("Cannot find any license", out.getvalue())
self.assertEqual(license.licenses, [])

def test_scan_for_licenses_skip(self):
"""
Test scan_for_licenses in temporary directory with licenses to skip
"""
conf = config.Config("")
conf.setup_patterns()
conf.license_skips = [['', 'tmp', '*', 'COPYING']]
with open('tests/COPYING_TEST', 'rb') as copyingf:
content = copyingf.read()

with tempfile.TemporaryDirectory() as tmpd:
# create the copying file
with open(os.path.join(tmpd, 'COPYING'), 'w') as newcopyingf:
newcopyingf.write(content.decode('utf-8'))
# create some cruft for testing
for testf in ['testlib.c', 'testmain.c', 'testheader.h']:
with open(os.path.join(tmpd, testf), 'w') as newtestf:
newtestf.write('test content')
# let's check that the proper thing is being printed as well
out = StringIO()
with redirect_stdout(out):
with self.assertRaises(SystemExit) as thread:
license.scan_for_licenses(tmpd, conf, '')

self.assertEqual(thread.exception.code, 1)
self.assertIn("Cannot find any license", out.getvalue())
self.assertEqual(license.licenses, [])

def test_load_specfile(self):
"""
Test load_specfile with populated license list. This method is not
Expand Down
47 changes: 47 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,53 @@ def isfile_mock(_):
util.call = call_backup
self.assertTrue(len(mock_call.mock_calls) == 3)

def test_globlike_match(self):
"""
Test globlike_match
"""
match_name = ['a', 'b', 'c']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'c']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'bb*']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'b*']
file_path = 'a/ab'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'b*']
file_path = 'a/ab'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', '*a']
file_path = 'a/ab'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'c*']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', '*c']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'b*']
file_path = 'a/b'
self.assertTrue(util.globlike_match(file_path, match_name))

match_name = ['a', '*b']
file_path = 'a/b'
self.assertTrue(util.globlike_match(file_path, match_name))

match_name = ['a', 'b']
file_path = 'a/b'
self.assertTrue(util.globlike_match(file_path, match_name))


if __name__ == '__main__':
Expand Down

0 comments on commit 53fe209

Please sign in to comment.