From 20ed00de2de47a6bbfba9398e0851a651fe26e47 Mon Sep 17 00:00:00 2001 From: Nir Date: Sun, 23 Jun 2024 15:34:08 +0300 Subject: [PATCH] output summary also when output csv --- df_finder3.py | 4 +- .../duplicates_finder.py | 12 ++++- duplicate_files_in_folders/utils_io.py | 51 ++++++++++++++----- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/df_finder3.py b/df_finder3.py index 2ba3503..0781cbf 100644 --- a/df_finder3.py +++ b/df_finder3.py @@ -5,7 +5,8 @@ clean_scan_dir_duplications, create_csv_file from duplicate_files_in_folders.initializer import setup_logging, setup_hash_manager, setup_file_manager from duplicate_files_in_folders.utils import parse_arguments -from duplicate_files_in_folders.utils_io import display_initial_config, output_results, confirm_script_execution +from duplicate_files_in_folders.utils_io import display_initial_config, output_results, confirm_script_execution, \ + output_csv_file_creation_results def main(args): @@ -27,6 +28,7 @@ def main(args): elif args.action == 'create_csv': # Always run in run mode as it creates a file and maybe a folder. fm.with_run_mode(create_csv_file, args, duplicates) + output_csv_file_creation_results(args, duplicates, scan_stats, ref_stats) hash_manager.save_data() diff --git a/duplicate_files_in_folders/duplicates_finder.py b/duplicate_files_in_folders/duplicates_finder.py index 14bdf12..239f416 100644 --- a/duplicate_files_in_folders/duplicates_finder.py +++ b/duplicate_files_in_folders/duplicates_finder.py @@ -199,6 +199,15 @@ def process_duplicates(combined: Dict, args: Namespace) -> (int, int): return files_moved, files_created +def get_csv_file_path(args: Namespace) -> str: + """ + Get the path of the CSV file to create. + :param args: parsed arguments + :return: the path of the CSV file + """ + return str(os.path.join(args.move_to, os.path.basename(args.scan_dir) + "_dups.csv")) + + def create_csv_file(args: Namespace, combined: Dict) -> None: """ Create a CSV file with the duplicate files' information. @@ -206,7 +215,7 @@ def create_csv_file(args: Namespace, combined: Dict) -> None: :param combined: the dictionary of duplicates returned by find_duplicates_files_v3 :return: number of files moved """ - csv_file = os.path.join(args.move_to, os.path.basename(args.scan_dir) + "_dups.csv") + csv_file = get_csv_file_path(args) if not os.path.exists(args.move_to): FileManager.get_instance().make_dirs(args.move_to) @@ -222,7 +231,6 @@ def create_csv_file(args: Namespace, combined: Dict) -> None: key += 1 - def clean_scan_dir_duplications(args: Namespace, combined: Dict) -> int: """ Clean up the scan_dir duplications after moving files to the move_to folder. diff --git a/duplicate_files_in_folders/utils_io.py b/duplicate_files_in_folders/utils_io.py index 26989d3..b546f7e 100644 --- a/duplicate_files_in_folders/utils_io.py +++ b/duplicate_files_in_folders/utils_io.py @@ -2,6 +2,7 @@ import sys from argparse import Namespace +from duplicate_files_in_folders.duplicates_finder import get_csv_file_path from duplicate_files_in_folders.hash_manager import HashManager from duplicate_files_in_folders.utils import detect_pytest @@ -108,18 +109,6 @@ def output_results(args: Namespace, files_moved: int, files_created: int, delete :return: None """ summary_header = "Summary (Test Mode):" if not args.run else "Summary:" - separator = "-" * max(len(summary_header), 40) - fixed_width = 25 - - # Header - log_and_print("") - log_and_print(summary_header) - log_and_print(separator) - hash_manager = HashManager.get_instance() - # Cache hits information - cache_hits = f"Hash requests: {hash_manager.persistent_cache_requests + hash_manager.temporary_cache_requests}, " \ - f"Cache hits: {hash_manager.persistent_cache_hits + hash_manager.temporary_cache_hits}" - logger.debug(cache_hits) files_left = len(scan_stats) - files_moved - duplicate_scan_files_moved # Detailed summary @@ -137,6 +126,44 @@ def output_results(args: Namespace, files_moved: int, files_created: int, delete if deleted_scan_folders: summary_lines['Empty Folders Deleted'] = f"{deleted_scan_folders} empty folders in the scan folder" + common_output_results(summary_header, summary_lines) + + +def output_csv_file_creation_results(args: Namespace, combined_duplicates: dict, scan_stats=None, ref_stats=None): + """ Output the results of the CSV file creation. + :param args: The parsed arguments + :param combined_duplicates: The combined duplicates dictionary + :param scan_stats: Output of get_files_and_stats() for the scan folder + :param ref_stats: Output of get_files_and_stats() for the reference folder + """ + summary_header = "CSV File Creation Summary:" + + # Detailed summary + summary_lines = { + 'CSV File Path': get_csv_file_path(args), + 'Scan Folder Files': f"{format_number_with_commas(len(scan_stats)) if scan_stats else 'N/A'} files", + 'Reference Folder Files': f"{format_number_with_commas(len(ref_stats)) if ref_stats else 'N/A'} files", + 'Total Duplicate Files': len(combined_duplicates), + } + + common_output_results(summary_header, summary_lines) + + +def common_output_results(title: str, summary_lines: dict): + """ Output the common results of the script execution. + :param title: The title of the summary. + :param summary_lines: Summary lines to output. + """ + summary_header = f"{title}:" + separator = "-" * max(len(summary_header), 40) + fixed_width = 25 + + # Header + log_and_print("") + log_and_print(summary_header) + log_and_print(separator) + + # Detailed summary for key, value in summary_lines.items(): log_and_print(f"{key.ljust(fixed_width)}: {value}")