diff --git a/README.rst b/README.rst index cf6f91c4..55175667 100644 --- a/README.rst +++ b/README.rst @@ -128,6 +128,14 @@ release ``.spec``. This is also bumped and generated on existing and new packages, respectively. This results in less manual work via automatic management. +license_skips + Each line in the file should be a full path, that path is prefixed into a + tempfile directory + the package tarfile prefix. Requires using '*' to be + effective (e.g. /tmp/*/pkgname-*/path/to/license). + + Files paths can contain a single '*' per directory such that + a line of ``/foo*/bar*`` is allowed but ``/foo*bar*`` is not. + $package.license In certain cases, the package license may not be automatically discovered. In this instance, ``autospec`` will exit with an error. Update this file to diff --git a/autospec/config.py b/autospec/config.py index c4d3e496..4b44beb6 100644 --- a/autospec/config.py +++ b/autospec/config.py @@ -110,6 +110,7 @@ def __init__(self, download_path): self.custom_summ = "" self.license_fetch = None self.license_show = None + self.license_skips = [] self.git_uri = None self.os_packages = set() self.config_file = None @@ -996,6 +997,10 @@ def parse_config_files(self, bump, filemanager, version, requirements): if not license.add_license(word, self.license_translations, self.license_blacklist): print_warning("{}: blacklisted license {} ignored.".format(self.content.name + ".license", word)) + content = self.read_conf_file(os.path.join(self.download_path, "license_skips")) + if content: + self.license_skips = self.validate_extras_content(content, "license_skips") + # cargo_vendors is the output of 'cargo vendor' and should be read as is content = self.read_file(os.path.join(self.download_path, "cargo_vendors"), track=True) if content: diff --git a/autospec/files.py b/autospec/files.py index d9e4a936..e7f64e94 100644 --- a/autospec/files.py +++ b/autospec/files.py @@ -197,33 +197,6 @@ def clean_directories(self, root): return removed - def globlike_match(self, filename, match_name): - """Compare the filename to the match_name in a way that simulates the shell glob '*'.""" - fsplit = filename.split('/') - if len(fsplit) != len(match_name): - return False - match = True - for fpart, mpart in zip(fsplit, match_name): - if fpart != mpart: - if '*' not in mpart: - match = False - break - if len(mpart) > len(fpart) + 1: - match = False - break - mpl, mpr = mpart.split('*') - try: - if fpart.index(mpl) != 0: - match = False - break - if fpart.rindex(mpr) != len(fpart) - len(mpr): - match = False - break - except ValueError: - match = False - break - return match - def push_file(self, filename, pkg_name): """Perform a number of checks against the filename and push the filename if appropriate.""" if filename in self.files or filename in self.files_blacklist: @@ -245,7 +218,7 @@ def push_file(self, filename, pkg_name): elif len('/'.join(match_name)) <= (len(norm_filename) + 1): # the match name may be 1 longer due to a glob # being able to match an empty string - if self.globlike_match(norm_filename, match_name): + if util.globlike_match(norm_filename, match_name): path_prefix = '/' if not match else match.group() self.push_package_file(os.path.join(path_prefix, *match_name), k) return diff --git a/autospec/license.py b/autospec/license.py index 18bc1115..3ce56632 100644 --- a/autospec/license.py +++ b/autospec/license.py @@ -29,7 +29,7 @@ import chardet import download -from util import get_contents, get_sha1sum, print_fatal, print_warning +import util default_license = "TO BE DETERMINED" @@ -96,7 +96,7 @@ def try_with_charset(license, charset): def license_from_copying_hash(copying, srcdir, config, name): """Add licenses based on the hash of the copying file.""" try: - data = get_contents(copying) + data = util.get_contents(copying) except FileNotFoundError: # LICENSE file is a bad symlink (qemu-4.2.0!) return @@ -109,7 +109,7 @@ def license_from_copying_hash(copying, srcdir, config, name): if not data: return - hash_sum = get_sha1sum(copying) + hash_sum = util.get_sha1sum(copying) if config.license_fetch: values = {'hash': hash_sum, 'text': data, 'package': name} @@ -142,9 +142,20 @@ def license_from_copying_hash(copying, srcdir, config, name): else: if not config.license_show: return - print_warning("Unknown license {0} with hash {1}".format(copying, hash_sum)) + util.print_warning("Unknown license {0} with hash {1}".format(copying, hash_sum)) hash_url = config.license_show % {'HASH': hash_sum} - print_warning("Visit {0} to enter".format(hash_url)) + util.print_warning("Visit {0} to enter".format(hash_url)) + + +def skip_license(license_path, config): + """Check if a given license file path should be skipped.""" + skip_name = False + for skip in config.license_skips: + if util.globlike_match(license_path, skip): + util.print_warning(f"Skip license detected for file at {license_path}") + skip_name = True + break + return skip_name def scan_for_licenses(srcdir, config, pkg_name): @@ -166,8 +177,9 @@ def scan_for_licenses(srcdir, config, pkg_name): for dirpath, dirnames, files in os.walk(srcdir): for name in files: if name.lower() in targets or target_pat.search(name.lower()): - license_from_copying_hash(os.path.join(dirpath, name), - srcdir, config, pkg_name) + license_path = os.path.join(dirpath, name) + if not skip_license(license_path, config): + license_from_copying_hash(license_path, srcdir, config, pkg_name) # Also search for license texts in project trees that are # REUSE-compliant, or are in process of adopting this standard (for # example, KDE ecosystem packages). See https://reuse.software for @@ -179,11 +191,12 @@ def scan_for_licenses(srcdir, config, pkg_name): # named `license` instead. dirbase = os.path.basename(dirpath) if re.search(r'^(LICENSES|licenses?|licensing)$', dirbase) and re.search(r'\.txt$', name): - license_from_copying_hash(os.path.join(dirpath, name), - srcdir, config, pkg_name) + license_path = os.path.join(dirpath, name) + if not skip_license(license_path, config): + license_from_copying_hash(license_path, srcdir, config, pkg_name) if not licenses: - print_fatal(" Cannot find any license or a valid {}.license file!\n".format(pkg_name)) + util.print_fatal(" Cannot find any license or a valid {}.license file!\n".format(pkg_name)) sys.exit(1) print("Licenses : ", " ".join(sorted(licenses))) diff --git a/autospec/util.py b/autospec/util.py index b425c5d2..045bc968 100644 --- a/autospec/util.py +++ b/autospec/util.py @@ -274,3 +274,31 @@ def open_auto(*args, **kwargs): assert 'encoding' not in kwargs assert 'errors' not in kwargs return open(*args, encoding="utf-8", errors="surrogateescape", **kwargs) + + +def globlike_match(filename, match_name): + """Compare the filename to the match_name in a way that simulates the shell glob '*'.""" + fsplit = filename.split('/') + if len(fsplit) != len(match_name): + return False + match = True + for fpart, mpart in zip(fsplit, match_name): + if fpart != mpart: + if '*' not in mpart: + match = False + break + if len(mpart) > len(fpart) + 1: + match = False + break + mpl, mpr = mpart.split('*') + try: + if fpart.index(mpl) != 0: + match = False + break + if fpart.rindex(mpr) != len(fpart) - len(mpr): + match = False + break + except ValueError: + match = False + break + return match diff --git a/tests/test_license.py b/tests/test_license.py index 9d748533..9557e71c 100644 --- a/tests/test_license.py +++ b/tests/test_license.py @@ -71,7 +71,7 @@ def test_license_from_copying_hash_no_license_show(self): conf = config.Config("") conf.setup_patterns() # remove the hash from license_hashes - del(conf.license_hashes[license.get_sha1sum('tests/COPYING_TEST')]) + del(conf.license_hashes[license.util.get_sha1sum('tests/COPYING_TEST')]) conf.license_show = "license.show.url" license.license_from_copying_hash('tests/COPYING_TEST', '', conf, '') @@ -233,6 +233,34 @@ def test_scan_for_licenses_none(self): self.assertIn("Cannot find any license", out.getvalue()) self.assertEqual(license.licenses, []) + def test_scan_for_licenses_skip(self): + """ + Test scan_for_licenses in temporary directory with licenses to skip + """ + conf = config.Config("") + conf.setup_patterns() + conf.license_skips = [['', 'tmp', '*', 'COPYING']] + with open('tests/COPYING_TEST', 'rb') as copyingf: + content = copyingf.read() + + with tempfile.TemporaryDirectory() as tmpd: + # create the copying file + with open(os.path.join(tmpd, 'COPYING'), 'w') as newcopyingf: + newcopyingf.write(content.decode('utf-8')) + # create some cruft for testing + for testf in ['testlib.c', 'testmain.c', 'testheader.h']: + with open(os.path.join(tmpd, testf), 'w') as newtestf: + newtestf.write('test content') + # let's check that the proper thing is being printed as well + out = StringIO() + with redirect_stdout(out): + with self.assertRaises(SystemExit) as thread: + license.scan_for_licenses(tmpd, conf, '') + + self.assertEqual(thread.exception.code, 1) + self.assertIn("Cannot find any license", out.getvalue()) + self.assertEqual(license.licenses, []) + def test_load_specfile(self): """ Test load_specfile with populated license list. This method is not diff --git a/tests/test_util.py b/tests/test_util.py index 34971000..b9227108 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -86,6 +86,53 @@ def isfile_mock(_): util.call = call_backup self.assertTrue(len(mock_call.mock_calls) == 3) + def test_globlike_match(self): + """ + Test globlike_match + """ + match_name = ['a', 'b', 'c'] + file_path = 'a/b' + self.assertFalse(util.globlike_match(file_path, match_name)) + + match_name = ['a', 'c'] + file_path = 'a/b' + self.assertFalse(util.globlike_match(file_path, match_name)) + + match_name = ['a', 'bb*'] + file_path = 'a/b' + self.assertFalse(util.globlike_match(file_path, match_name)) + + match_name = ['a', 'b*'] + file_path = 'a/ab' + self.assertFalse(util.globlike_match(file_path, match_name)) + + match_name = ['a', 'b*'] + file_path = 'a/ab' + self.assertFalse(util.globlike_match(file_path, match_name)) + + match_name = ['a', '*a'] + file_path = 'a/ab' + self.assertFalse(util.globlike_match(file_path, match_name)) + + match_name = ['a', 'c*'] + file_path = 'a/b' + self.assertFalse(util.globlike_match(file_path, match_name)) + + match_name = ['a', '*c'] + file_path = 'a/b' + self.assertFalse(util.globlike_match(file_path, match_name)) + + match_name = ['a', 'b*'] + file_path = 'a/b' + self.assertTrue(util.globlike_match(file_path, match_name)) + + match_name = ['a', '*b'] + file_path = 'a/b' + self.assertTrue(util.globlike_match(file_path, match_name)) + + match_name = ['a', 'b'] + file_path = 'a/b' + self.assertTrue(util.globlike_match(file_path, match_name)) if __name__ == '__main__':