Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for skipping license paths #851

Merged
merged 1 commit into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ release
``.spec``. This is also bumped and generated on existing and new packages,
respectively. This results in less manual work via automatic management.

license_skips
Each line in the file should be a full path, that path is prefixed into a
tempfile directory + the package tarfile prefix. Requires using '*' to be
effective (e.g. /tmp/*/pkgname-*/path/to/license).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we hide some of this prefix? Especially if it's going to be boilerplate /tmp/*/... we should just prepend it when skip_license calls globlike_match, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ugh that is nicer, thanks.


Files paths can contain a single '*' per directory such that
a line of ``/foo*/bar*`` is allowed but ``/foo*bar*`` is not.

$package.license
In certain cases, the package license may not be automatically discovered. In
this instance, ``autospec`` will exit with an error. Update this file to
Expand Down
5 changes: 5 additions & 0 deletions autospec/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def __init__(self, download_path):
self.custom_summ = ""
self.license_fetch = None
self.license_show = None
self.license_skips = []
self.git_uri = None
self.os_packages = set()
self.config_file = None
Expand Down Expand Up @@ -996,6 +997,10 @@ def parse_config_files(self, bump, filemanager, version, requirements):
if not license.add_license(word, self.license_translations, self.license_blacklist):
print_warning("{}: blacklisted license {} ignored.".format(self.content.name + ".license", word))

content = self.read_conf_file(os.path.join(self.download_path, "license_skips"))
if content:
self.license_skips = self.validate_extras_content(content, "license_skips")

# cargo_vendors is the output of 'cargo vendor' and should be read as is
content = self.read_file(os.path.join(self.download_path, "cargo_vendors"), track=True)
if content:
Expand Down
29 changes: 1 addition & 28 deletions autospec/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,33 +197,6 @@ def clean_directories(self, root):

return removed

def globlike_match(self, filename, match_name):
"""Compare the filename to the match_name in a way that simulates the shell glob '*'."""
fsplit = filename.split('/')
if len(fsplit) != len(match_name):
return False
match = True
for fpart, mpart in zip(fsplit, match_name):
if fpart != mpart:
if '*' not in mpart:
match = False
break
if len(mpart) > len(fpart) + 1:
match = False
break
mpl, mpr = mpart.split('*')
try:
if fpart.index(mpl) != 0:
match = False
break
if fpart.rindex(mpr) != len(fpart) - len(mpr):
match = False
break
except ValueError:
match = False
break
return match

def push_file(self, filename, pkg_name):
"""Perform a number of checks against the filename and push the filename if appropriate."""
if filename in self.files or filename in self.files_blacklist:
Expand All @@ -245,7 +218,7 @@ def push_file(self, filename, pkg_name):
elif len('/'.join(match_name)) <= (len(norm_filename) + 1):
# the match name may be 1 longer due to a glob
# being able to match an empty string
if self.globlike_match(norm_filename, match_name):
if util.globlike_match(norm_filename, match_name):
path_prefix = '/' if not match else match.group()
self.push_package_file(os.path.join(path_prefix, *match_name), k)
return
Expand Down
33 changes: 23 additions & 10 deletions autospec/license.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import chardet
import download
from util import get_contents, get_sha1sum, print_fatal, print_warning
import util

default_license = "TO BE DETERMINED"

Expand Down Expand Up @@ -96,7 +96,7 @@ def try_with_charset(license, charset):
def license_from_copying_hash(copying, srcdir, config, name):
"""Add licenses based on the hash of the copying file."""
try:
data = get_contents(copying)
data = util.get_contents(copying)
except FileNotFoundError:
# LICENSE file is a bad symlink (qemu-4.2.0!)
return
Expand All @@ -109,7 +109,7 @@ def license_from_copying_hash(copying, srcdir, config, name):
if not data:
return

hash_sum = get_sha1sum(copying)
hash_sum = util.get_sha1sum(copying)

if config.license_fetch:
values = {'hash': hash_sum, 'text': data, 'package': name}
Expand Down Expand Up @@ -142,9 +142,20 @@ def license_from_copying_hash(copying, srcdir, config, name):
else:
if not config.license_show:
return
print_warning("Unknown license {0} with hash {1}".format(copying, hash_sum))
util.print_warning("Unknown license {0} with hash {1}".format(copying, hash_sum))
hash_url = config.license_show % {'HASH': hash_sum}
print_warning("Visit {0} to enter".format(hash_url))
util.print_warning("Visit {0} to enter".format(hash_url))


def skip_license(license_path, config):
"""Check if a given license file path should be skipped."""
skip_name = False
for skip in config.license_skips:
if util.globlike_match(license_path, skip):
util.print_warning(f"Skip license detected for file at {license_path}")
skip_name = True
break
return skip_name


def scan_for_licenses(srcdir, config, pkg_name):
Expand All @@ -166,8 +177,9 @@ def scan_for_licenses(srcdir, config, pkg_name):
for dirpath, dirnames, files in os.walk(srcdir):
for name in files:
if name.lower() in targets or target_pat.search(name.lower()):
license_from_copying_hash(os.path.join(dirpath, name),
srcdir, config, pkg_name)
license_path = os.path.join(dirpath, name)
if not skip_license(license_path, config):
license_from_copying_hash(license_path, srcdir, config, pkg_name)
# Also search for license texts in project trees that are
# REUSE-compliant, or are in process of adopting this standard (for
# example, KDE ecosystem packages). See https://reuse.software for
Expand All @@ -179,11 +191,12 @@ def scan_for_licenses(srcdir, config, pkg_name):
# named `license` instead.
dirbase = os.path.basename(dirpath)
if re.search(r'^(LICENSES|licenses?|licensing)$', dirbase) and re.search(r'\.txt$', name):
license_from_copying_hash(os.path.join(dirpath, name),
srcdir, config, pkg_name)
license_path = os.path.join(dirpath, name)
if not skip_license(license_path, config):
license_from_copying_hash(license_path, srcdir, config, pkg_name)

if not licenses:
print_fatal(" Cannot find any license or a valid {}.license file!\n".format(pkg_name))
util.print_fatal(" Cannot find any license or a valid {}.license file!\n".format(pkg_name))
sys.exit(1)

print("Licenses : ", " ".join(sorted(licenses)))
Expand Down
28 changes: 28 additions & 0 deletions autospec/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,31 @@ def open_auto(*args, **kwargs):
assert 'encoding' not in kwargs
assert 'errors' not in kwargs
return open(*args, encoding="utf-8", errors="surrogateescape", **kwargs)


def globlike_match(filename, match_name):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming you didn't edit this aside from moving it to util?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, same function though the signature had to change since it isn't a class method (though it wasn't using any of the object's fields).

"""Compare the filename to the match_name in a way that simulates the shell glob '*'."""
fsplit = filename.split('/')
if len(fsplit) != len(match_name):
return False
match = True
for fpart, mpart in zip(fsplit, match_name):
if fpart != mpart:
if '*' not in mpart:
match = False
break
if len(mpart) > len(fpart) + 1:
match = False
break
mpl, mpr = mpart.split('*')
try:
if fpart.index(mpl) != 0:
match = False
break
if fpart.rindex(mpr) != len(fpart) - len(mpr):
match = False
break
except ValueError:
match = False
break
return match
30 changes: 29 additions & 1 deletion tests/test_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_license_from_copying_hash_no_license_show(self):
conf = config.Config("")
conf.setup_patterns()
# remove the hash from license_hashes
del(conf.license_hashes[license.get_sha1sum('tests/COPYING_TEST')])
del(conf.license_hashes[license.util.get_sha1sum('tests/COPYING_TEST')])
conf.license_show = "license.show.url"
license.license_from_copying_hash('tests/COPYING_TEST', '', conf, '')

Expand Down Expand Up @@ -233,6 +233,34 @@ def test_scan_for_licenses_none(self):
self.assertIn("Cannot find any license", out.getvalue())
self.assertEqual(license.licenses, [])

def test_scan_for_licenses_skip(self):
"""
Test scan_for_licenses in temporary directory with licenses to skip
"""
conf = config.Config("")
conf.setup_patterns()
conf.license_skips = [['', 'tmp', '*', 'COPYING']]
with open('tests/COPYING_TEST', 'rb') as copyingf:
content = copyingf.read()

with tempfile.TemporaryDirectory() as tmpd:
# create the copying file
with open(os.path.join(tmpd, 'COPYING'), 'w') as newcopyingf:
newcopyingf.write(content.decode('utf-8'))
# create some cruft for testing
for testf in ['testlib.c', 'testmain.c', 'testheader.h']:
with open(os.path.join(tmpd, testf), 'w') as newtestf:
newtestf.write('test content')
# let's check that the proper thing is being printed as well
out = StringIO()
with redirect_stdout(out):
with self.assertRaises(SystemExit) as thread:
license.scan_for_licenses(tmpd, conf, '')

self.assertEqual(thread.exception.code, 1)
self.assertIn("Cannot find any license", out.getvalue())
self.assertEqual(license.licenses, [])

def test_load_specfile(self):
"""
Test load_specfile with populated license list. This method is not
Expand Down
47 changes: 47 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,53 @@ def isfile_mock(_):
util.call = call_backup
self.assertTrue(len(mock_call.mock_calls) == 3)

def test_globlike_match(self):
"""
Test globlike_match
"""
match_name = ['a', 'b', 'c']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'c']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'bb*']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'b*']
file_path = 'a/ab'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'b*']
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate test?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oopsie, will fix.

file_path = 'a/ab'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', '*a']
file_path = 'a/ab'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'c*']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', '*c']
file_path = 'a/b'
self.assertFalse(util.globlike_match(file_path, match_name))

match_name = ['a', 'b*']
file_path = 'a/b'
self.assertTrue(util.globlike_match(file_path, match_name))

match_name = ['a', '*b']
file_path = 'a/b'
self.assertTrue(util.globlike_match(file_path, match_name))

match_name = ['a', 'b']
file_path = 'a/b'
self.assertTrue(util.globlike_match(file_path, match_name))


if __name__ == '__main__':
Expand Down
Loading