Skip to content

Commit

Permalink
Merge pull request fosslinux#445 from fosslinux/check-substitutes
Browse files Browse the repository at this point in the history
Check "substitutes"
  • Loading branch information
fosslinux authored Feb 24, 2024
2 parents d6e49c1 + b80e1f6 commit 2261098
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 4 deletions.
11 changes: 10 additions & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: Install pylint
run: sudo pip3 install pylint
- name: pylint
run: pylint rootfs.py lib/utils.py lib/generator.py lib/target.py --disable=duplicate-code
run: pylint rootfs.py lib/utils.py lib/generator.py lib/target.py check_substitutes.py --disable=duplicate-code

shellcheck:
name: Lint shell files
Expand All @@ -42,3 +42,12 @@ jobs:
run: sudo pip3 install reuse
- name: reuse
run: reuse lint

substitutes:
name: Check validity of substituted tarballs
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Check validity of substitutes
run: ./check_substitutes.py
162 changes: 162 additions & 0 deletions check_substitutes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-3.0-or-later
#
# SPDX-FileCopyrightText: 2024 fosslinux <[email protected]>

"""Check that substituted files are the same."""
import bz2
import filecmp
import gzip
import itertools
import lzma
import shutil
import tarfile
import tempfile
import sys
import os

from lib.generator import Generator

# Get a temporary directory to work in
working = tempfile.mkdtemp()

# Colour constants
# pylint: disable=too-few-public-methods
class Colors():
"""ANSI Color Codes"""
GREY = "\033[90m"
RED = "\033[91m"
GREEN = "\033[92m"
ORANGE = "\033[91m\033[93m"
YELLOW = "\033[93m"
END = "\033[0m"

def traverse_path(base_root):
"""Takes a path and returns a set of all directories and files in that path."""
all_dirs = set()
all_files = set()
for root, directories, files in os.walk(base_root, topdown=True):
for d in directories:
all_dirs.add(os.path.join(root, d).lstrip(base_root))
for f in files:
all_files.add(os.path.join(root, f).lstrip(base_root))
return (all_dirs, all_files)

class Distfile():
"""Represents one distfile and operations performed on it."""
def __init__(self, i, url):
self.i = i
self.url = url
self.out_file = f"{i}-{os.path.basename(url)}"
self.filepath = ""

def download(self):
"""Downloads the distfile."""
Generator.download_file(self.url, working, self.out_file, silent=True)
self.filepath = os.path.join(working, self.out_file)

def decompress(self):
"""Decompresses the distfile."""
compression = self.out_file.split('.')[-1]
decompress_func = {
"gz": gzip.open,
"tgz": gzip.open,
"bz2": bz2.open,
"xz": lzma.open,
"lzma": lzma.open
}
if compression not in decompress_func:
# No decompression needed
return
# Remove the compression extension
new_path = '.'.join(self.filepath.split('.')[:-1])
# tgz -> .tar
if compression == "tgz":
new_path = f"{new_path}.tar"
# Move the decompressed binary stream to a new file
with decompress_func[compression](self.filepath, 'rb') as fin:
with open(new_path, 'wb') as fout:
shutil.copyfileobj(fin, fout)
self.filepath = new_path

def extract(self):
"""Extracts the distfile."""
# Sanity check
if not tarfile.is_tarfile(self.filepath):
return
out_dir = os.path.join(working, f"{self.i}")
os.mkdir(out_dir)
with tarfile.open(self.filepath, 'r') as f:
f.extractall(path=out_dir)
self.filepath = out_dir

# It makes more sense here to label them d1 and d2 rather than have one be self.
# pylint: disable=no-self-argument
def compare(d1, d2):
"""Compares the distfile to another distfile."""
if not os.path.isdir(d1.filepath):
# Compare files
return filecmp.cmp(d1.filepath, d2.filepath, shallow=False)
if not os.path.isdir(d2.filepath):
# Then, d2 is a file and d1 is a directory
return False
# Otherwise it's two directories
dirnames1, filenames1 = traverse_path(d1.filepath)
dirnames2, filenames2 = traverse_path(d2.filepath)
if dirnames1 != dirnames2:
return False
if filenames1 != filenames2:
return False
return filecmp.cmpfiles(d1.filepath, d2.filepath, filenames1, shallow=False)

def check(*args):
"""Check if a list of distfiles are equivalent."""
notequiv = []
# Find all pairs that are not equivalent
for pair in itertools.combinations(args, 2):
if pair[0].compare(pair[1]):
print(f"{Colors.GREY}DEBUG: {pair[0].url} is equivalent to {pair[1].url}{Colors.END}")
else:
notequiv.append(pair)

# Decompress all, and check again
for d in {y for x in notequiv for y in x}:
d.decompress()
for pair in notequiv.copy():
if pair[0].compare(pair[1]):
# pylint: disable=line-too-long
print(f"{Colors.YELLOW}NOTE: {pair[0].url} is equivalent to {pair[1].url} when decompressed{Colors.END}")
notequiv.remove(pair)

# Extract all, and check again
for d in {y for x in notequiv for y in x}:
d.extract()
has_error = False
for pair in notequiv:
if pair[0].compare(pair[1]):
# pylint: disable=line-too-long
print(f"{Colors.ORANGE}WARN: {pair[0].url} is equivalent to {pair[1].url} when extracted{Colors.END}")
else:
has_error = True
# pylint: disable=line-too-long
print(f"{Colors.RED}ERROR: {pair[0].url} is not equivalent to {pair[1].url}!{Colors.END}")

return has_error

def main():
"""Main function."""
has_error = False
with open("substitutes", 'r', encoding="utf-8") as f:
for line in f.readlines():
urls = line.strip().split(' ')
distfiles = []
for i, url in enumerate(urls):
distfiles.append(Distfile(i, url))
for distfile in distfiles:
distfile.download()
if check(*distfiles):
has_error = True
sys.exit(has_error)

if __name__ == "__main__":
main()
5 changes: 3 additions & 2 deletions lib/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def check_file(file_name, expected_hash):
this script the next time")

@staticmethod
def download_file(url, directory, file_name):
def download_file(url, directory, file_name, silent=False):
"""
Download a single source archive.
"""
Expand All @@ -290,7 +290,8 @@ def download_file(url, directory, file_name):
"User-Agent": "curl/7.88.1"
}
if not os.path.isfile(abs_file_name):
print(f"Downloading: {file_name}")
if not silent:
print(f"Downloading: {file_name}")
response = requests.get(url, allow_redirects=True, stream=True,
headers=headers, timeout=20)
if response.status_code == 200:
Expand Down
2 changes: 1 addition & 1 deletion steps/diffutils-2.7/sources
Original file line number Diff line number Diff line change
@@ -1 +1 @@
https://dept.rpi.edu/acm/packages/diffutils/2.7/distrib/diffutils-2.7.tar.bz2 fd6c44f7cbd0a942a3f0c012365997965451197ad4faeb0b8aac1fe03192de58
https://mirrors.kernel.org/gnu/diffutils/diffutils-2.7.tar.gz d5f2489c4056a31528e3ada4adacc23d498532b0af1a980f2f76158162b139d6
3 changes: 3 additions & 0 deletions substitutes
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://mirrors.kernel.org/gnu/bash/bash-2.05b.tar.gz https://src.fedoraproject.org/repo/pkgs/bash/bash-2.05b.tar.bz2/f3e5428ed52a4f536f571a945d5de95d/bash-2.05b.tar.bz2
https://mirrors.kernel.org/gnu/bc/bc-1.07.1.tar.gz https://mirrors.kernel.org/slackware/slackware64-15.0/source/ap/bc/bc-1.07.1.tar.xz
https://mirrors.kernel.org/gnu/ed/ed-1.4.tar.gz https://mirrors.kernel.org/slackware/slackware-13.37/source/a/ed/ed-1.4.tar.xz
3 changes: 3 additions & 0 deletions substitutes.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 fosslinux <[email protected]>

SPDX-License-Identifier: GPL-3.0-or-later

0 comments on commit 2261098

Please sign in to comment.