From c4931c1e026dd8a3a5338c831fef6eb3ca5f386c Mon Sep 17 00:00:00 2001 From: jack9603301 Date: Sun, 18 Jul 2021 22:33:10 +0800 Subject: [PATCH] Add sha256 validation --- apt-sync.py | 96 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 80 insertions(+), 16 deletions(-) diff --git a/apt-sync.py b/apt-sync.py index 1eb9dcd..26a1d02 100755 --- a/apt-sync.py +++ b/apt-sync.py @@ -34,6 +34,8 @@ DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800')) REPO_SIZE_FILE = os.getenv('REPO_SIZE_FILE', '') +package_info = {} + pattern_os_template = re.compile(r"@\{(.+)\}") pattern_package_name = re.compile(r"^Filename: (.+)$", re.MULTILINE) pattern_package_size = re.compile(r"^Size: (\d+)$", re.MULTILINE) @@ -98,6 +100,39 @@ def mkdir_with_dot_tmp(folder: Path)->Tuple[Path, Path]: shutil.rmtree(str(tmpdir)) tmpdir.mkdir(parents=True, exist_ok=True) return (folder, tmpdir) + +def flush_package_info(content): + + global package_info + + for pkg in content.split('\n\n'): + if len(pkg) < 10: # ignore blanks + continue + try: + pkg_filename = pattern_package_name.search(pkg).group(1) + pkg_size = int(pattern_package_size.search(pkg).group(1)) + pkg_checksum = pattern_package_sha256.search(pkg).group(1) + if pkg_filename not in package_info: + pkg_info = { + 'size': pkg_size, + 'sha256': { + 'new': pkg_checksum, + 'old': None + } + } + else: + pkg_info = package_info[pkg_filename] + pkg_info['size'] = pkg_size + if pkg_info['sha256']['new'] != None and pkg_info['sha256']['old'] == None: + pkg_info['sha256']['old'] = pkg_info['sha256']['new'] + pkg_info['sha256']['new'] = pkg_checksum + package_info.update({ + pkg_filename: pkg_info + }) + except: + print("Failed to parse one package description", flush=True) + traceback.print_exc() + return 1 def move_files_in(src: Path, dst: Path): empty = True @@ -110,6 +145,9 @@ def move_files_in(src: Path, dst: Path): print(f"{src} is empty") def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Path, deb_set: Dict[str, int])->int: + + global package_info + if not dest_base_dir.is_dir(): print("Destination directory is empty, cannot continue") return 1 @@ -134,6 +172,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa pkgidx_dir,pkgidx_tmp_dir = mkdir_with_dot_tmp(comp_dir / arch_dir) with open(release_file, "r") as fd: pkgidx_content=None + pkgidx_file_old = None + package_info = {} cnt_start=False for line in fd: if cnt_start: @@ -164,6 +204,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa pkgidx_file.unlink() continue if pkgidx_content is None and pkgidx_file.stem == 'Packages': + pkgidx_file_old = Path(f'{dist_dir}/{filename}.old') + shutil.copy(pkgidx_file, pkgidx_file_old) print(f"getting packages index content from {pkgidx_file.name}", flush=True) suffix = pkgidx_file.suffix if suffix == '.xz': @@ -176,12 +218,43 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa pkgidx_content = content.decode('utf-8') else: print("unsupported format") + continue + + flush_package_info(pkgidx_content) + + with pkgidx_file_old.open('rb') as t: content = t.read() + if len(content) != int(filesize): + print(f"Invalid size of {pkgidx_file}, expected {filesize}, skipped") + pkgidx_file.unlink() + continue + if hashlib.sha256(content).hexdigest() != checksum: + print(f"Invalid checksum of {pkgidx_file}, expected {checksum}, skipped") + pkgidx_file.unlink() + continue + if pkgidx_file_old.stem == 'Packages': + print(f"getting packages index content from {pkgidx_file_old.name}", flush=True) + suffix = pkgidx_file_old.suffix + if suffix == '.xz.old': + pkgidx_content_old = lzma.decompress(content).decode('utf-8') + elif suffix == '.bz2.old': + pkgidx_content_old = bz2.decompress(content).decode('utf-8') + elif suffix == '.gz.old': + pkgidx_content_old = gzip.decompress(content).decode('utf-8') + elif suffix == '.old': + pkgidx_content_old = content.decode('utf-8') + else: + print("unsupported format") + continue + + flush_package_info(pkgidx_content_old) + # Currently only support SHA-256 checksum, because # "Clients may not use the MD5Sum and SHA1 fields for security purposes, and must require a SHA256 or a SHA512 field." # from https://wiki.debian.org/DebianRepository/Format#A.22Release.22_files if line.startswith('SHA256:'): cnt_start = True + if not cnt_start: print("Cannot find SHA-256 checksum") return 1 @@ -216,18 +289,9 @@ def collect_tmp_dir(): err = 0 deb_count = 0 deb_size = 0 - for pkg in pkgidx_content.split('\n\n'): - if len(pkg) < 10: # ignore blanks - continue - try: - pkg_filename = pattern_package_name.search(pkg).group(1) - pkg_size = int(pattern_package_size.search(pkg).group(1)) - pkg_checksum = pattern_package_sha256.search(pkg).group(1) - except: - print("Failed to parse one package description", flush=True) - traceback.print_exc() - err = 1 - continue + for pkg_filename, pkg_info in package_info.items(): + pkg_size = pkg_info['size'] + pkg_checksum = pkg_info['sha256'] deb_count += 1 deb_size += pkg_size @@ -237,8 +301,8 @@ def collect_tmp_dir(): dest_dir.mkdir(parents=True, exist_ok=True) if dest_filename.suffix == '.deb': deb_set[str(dest_filename.relative_to(dest_base_dir))] = pkg_size - if dest_filename.is_file() and dest_filename.stat().st_size == pkg_size: - print(f"Skipping {pkg_filename}, size {pkg_size}") + if dest_filename.is_file() and ( dest_filename.stat().st_size == pkg_size and pkg_checksum['old'] == pkg_checksum['new']): + print(f"Skipping {pkg_filename}, size {pkg_size}, old sha256 {pkg_checksum['old']}, new sha256 {pkg_checksum['new']}") continue pkg_url=f"{base_url}/{pkg_filename}" @@ -253,8 +317,8 @@ def collect_tmp_dir(): with dest_tmp_filename.open("rb") as f: for block in iter(lambda: f.read(1024**2), b""): sha.update(block) - if sha.hexdigest() != pkg_checksum: - print(f"Invalid checksum of {dest_filename}, expected {pkg_checksum}") + if sha.hexdigest() != pkg_checksum['new']: + print(f"Invalid checksum of {dest_filename}, expected {pkg_checksum['new']}") dest_tmp_filename.unlink() continue dest_tmp_filename.rename(dest_filename)