Skip to content

Commit 6b9c356

Browse files
committed
add support to compare already unpacked directories
1 parent 61bd211 commit 6b9c356

File tree

1 file changed

+59
-43
lines changed

1 file changed

+59
-43
lines changed

src/gardenlinux/features/reproducibility/comparator.py

Lines changed: 59 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
class Comparator(object):
2323
"""
24-
This class takes either two .tar or two .oci files and identifies differences in the filesystems
24+
This class takes either two .tar or two .oci files or two directories and identifies differences in the filesystems
2525
2626
:author: Garden Linux Maintainers
2727
:copyright: Copyright 2026 SAP SE
@@ -194,58 +194,74 @@ def _diff_files(
194194
)
195195
return result
196196

197-
def generate(
197+
def _compare_directories(
198198
self, a: PathLike[str], b: PathLike[str]
199199
) -> tuple[dict[str, Any], bool]:
200200
"""
201-
Compare two .tar/.oci images with each other
201+
Compare directories with each other
202202
203-
:param a: First .tar/.oci file
204-
:param b: Second .tar/.oci file
203+
:param a: First folder
204+
:param b: Second folder
205205
206206
:return: dict[str, Any], bool Filtered recursive dict of paths with different content and flag indicating if whitelist was applied
207207
:since: 1.0.0
208208
"""
209209

210-
if filecmp.cmp(a, b, shallow=False):
211-
return {}, False
210+
cmp = filecmp.dircmp(a, b, shallow=False)
211+
212+
diff_files = self._diff_files(cmp)
213+
214+
filtered: dict[tuple[str, Optional[str], Optional[str]], Any] = {
215+
(
216+
general_name,
217+
diff_files[general_name][0],
218+
diff_files[general_name][1],
219+
): {}
220+
for general_name in diff_files
221+
if not any(re.match(pattern, general_name) for pattern in self.whitelist)
222+
}
223+
whitelist = len(diff_files) != len(filtered)
224+
225+
result: dict[str, Any] = {}
226+
for general_name, left_name, right_name in filtered:
227+
result[general_name] = {}
228+
if left_name and right_name:
229+
file_a = Path(a).joinpath(left_name[1:])
230+
file_b = Path(b).joinpath(right_name[1:])
231+
if (
232+
file_a.is_file()
233+
and file_b.is_file()
234+
and patoolib.is_archive(file_a)
235+
and patoolib.is_archive(file_b)
236+
):
237+
filtered_rec, whitelist_rec = self.generate(file_a, file_b)
238+
whitelist = whitelist or whitelist_rec
239+
if filtered_rec != {}:
240+
result[general_name] = filtered_rec
241+
else:
242+
# Remove if no files found in an archive to not count different timestamps inside the archives as a difference
243+
del result[general_name]
212244

213-
with self._unpack(a) as unpacked_a, self._unpack(b) as unpacked_b:
214-
cmp = filecmp.dircmp(unpacked_a, unpacked_b, shallow=False)
245+
return result, whitelist
215246

216-
diff_files = self._diff_files(cmp)
247+
def generate(
248+
self, a: PathLike[str], b: PathLike[str]
249+
) -> tuple[dict[str, Any], bool]:
250+
"""
251+
Compare two .tar/.oci images or directories with each other
217252
218-
filtered: dict[tuple[str, Optional[str], Optional[str]], Any] = {
219-
(
220-
general_name,
221-
diff_files[general_name][0],
222-
diff_files[general_name][1],
223-
): {}
224-
for general_name in diff_files
225-
if not any(
226-
re.match(pattern, general_name) for pattern in self.whitelist
227-
)
228-
}
229-
whitelist = len(diff_files) != len(filtered)
230-
231-
result: dict[str, Any] = {}
232-
for general_name, left_name, right_name in filtered:
233-
result[general_name] = {}
234-
if left_name and right_name:
235-
file_a = Path(unpacked_a).joinpath(left_name[1:])
236-
file_b = Path(unpacked_b).joinpath(right_name[1:])
237-
if (
238-
file_a.is_file()
239-
and file_b.is_file()
240-
and patoolib.is_archive(file_a)
241-
and patoolib.is_archive(file_b)
242-
):
243-
filtered_rec, whitelist_rec = self.generate(file_a, file_b)
244-
whitelist = whitelist or whitelist_rec
245-
if filtered_rec != {}:
246-
result[general_name] = filtered_rec
247-
else:
248-
# Remove if no files found in an archive to not count different timestamps inside the archives as a difference
249-
del result[general_name]
253+
:param a: First .tar/.oci file or directory
254+
:param b: Second .tar/.oci file or directory
250255
251-
return result, whitelist
256+
:return: dict[str, Any], bool Filtered recursive dict of paths with different content and flag indicating if whitelist was applied
257+
:since: 1.0.0
258+
"""
259+
260+
if Path(a).is_file() and Path(b).is_file():
261+
if filecmp.cmp(a, b, shallow=False):
262+
return {}, False
263+
264+
with self._unpack(a) as unpacked_a, self._unpack(b) as unpacked_b:
265+
return self._compare_directories(unpacked_a, unpacked_b)
266+
else:
267+
return self._compare_directories(a, b)

0 commit comments

Comments
 (0)