Skip to content

Commit

Permalink
additional output to show the script progress
Browse files Browse the repository at this point in the history
  • Loading branch information
niradar committed Jul 21, 2024
1 parent 99bd00a commit 1cd1eb0
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
3 changes: 2 additions & 1 deletion df_finder3.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def main(args):
confirm_script_execution(args)
hash_manager = setup_hash_manager(args.reference_dir, args.full_hash, args.clear_cache)

duplicates, scan_stats, ref_stats = find_duplicates_files_v3(args, args.scan_dir, args.reference_dir)
duplicates, scan_stats, ref_stats = find_duplicates_files_v3(args, args.scan_dir, args.reference_dir,
output_progress=True)

if args.action == 'move_duplicates':
files_moved, files_created = process_duplicates(duplicates, args)
Expand Down
14 changes: 13 additions & 1 deletion duplicate_files_in_folders/duplicates_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,18 +126,23 @@ def aggregate_duplicate_candidates(potential_duplicates: List[Dict], combined: D
return combined


def find_duplicates_files_v3(args: Namespace, scan_dir: str, ref_dir: str) -> (Dict, List[Dict], List[Dict]):
def find_duplicates_files_v3(args: Namespace, scan_dir: str, ref_dir: str, output_progress=False) \
-> (Dict, List[Dict], List[Dict]):
"""
Find duplicate files between scan_dir and ref directories.
Returns a dictionary of duplicates and the file stats for both directories.
:param args: parsed arguments
:param scan_dir: the directory to scan for duplicates
:param ref_dir: the reference directory
:param output_progress: whether to output progress
:return: a dictionary of duplicates, the file stats for the scan directory, and the file stats for the reference directory
Dictionary format: {file_key: {'scan': [file_info], 'ref': [file_info]}}
"""
hash_manager = HashManager.get_instance()

if output_progress:
print(f"Scanning directories for duplicates: {scan_dir} and {ref_dir}")

# Get the file stats for both directories and filter them based on the arguments
scan_stats = filter_files_by_args(args, FileManager.get_files_and_stats(scan_dir))
ref_stats = filter_files_by_args(args, FileManager.get_files_and_stats(ref_dir))
Expand All @@ -146,6 +151,13 @@ def find_duplicates_files_v3(args: Namespace, scan_dir: str, ref_dir: str) -> (D
potential_scan_duplicates = find_potential_duplicates(ref_stats, scan_stats, args.ignore_diff)
potential_ref_duplicates = find_potential_duplicates(scan_stats, ref_stats, args.ignore_diff)

if output_progress:
print(f"Found {len(potential_scan_duplicates)} potential duplicates in the scan directory out of " +
f"{len(scan_stats)} files.")
print(f"Found {len(potential_ref_duplicates)} potential duplicates in the reference directory out of " +
f"{len(ref_stats)} files.")
print("Aggregating potential duplicates...")

# Aggregate the potential duplicates into one dictionary
combined = {}
combined = aggregate_duplicate_candidates(potential_scan_duplicates, combined, 'scan', args)
Expand Down

0 comments on commit 1cd1eb0

Please sign in to comment.