Skip to content

Commit

Permalink
output summary also when output csv
Browse files Browse the repository at this point in the history
  • Loading branch information
niradar committed Jun 23, 2024
1 parent 33932cf commit 20ed00d
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 15 deletions.
4 changes: 3 additions & 1 deletion df_finder3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
clean_scan_dir_duplications, create_csv_file
from duplicate_files_in_folders.initializer import setup_logging, setup_hash_manager, setup_file_manager
from duplicate_files_in_folders.utils import parse_arguments
from duplicate_files_in_folders.utils_io import display_initial_config, output_results, confirm_script_execution
from duplicate_files_in_folders.utils_io import display_initial_config, output_results, confirm_script_execution, \
output_csv_file_creation_results


def main(args):
Expand All @@ -27,6 +28,7 @@ def main(args):
elif args.action == 'create_csv':
# Always run in run mode as it creates a file and maybe a folder.
fm.with_run_mode(create_csv_file, args, duplicates)
output_csv_file_creation_results(args, duplicates, scan_stats, ref_stats)

hash_manager.save_data()

Expand Down
12 changes: 10 additions & 2 deletions duplicate_files_in_folders/duplicates_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,23 @@ def process_duplicates(combined: Dict, args: Namespace) -> (int, int):
return files_moved, files_created


def get_csv_file_path(args: Namespace) -> str:
"""
Get the path of the CSV file to create.
:param args: parsed arguments
:return: the path of the CSV file
"""
return str(os.path.join(args.move_to, os.path.basename(args.scan_dir) + "_dups.csv"))


def create_csv_file(args: Namespace, combined: Dict) -> None:
"""
Create a CSV file with the duplicate files' information.
:param args: parsed arguments
:param combined: the dictionary of duplicates returned by find_duplicates_files_v3
:return: number of files moved
"""
csv_file = os.path.join(args.move_to, os.path.basename(args.scan_dir) + "_dups.csv")
csv_file = get_csv_file_path(args)
if not os.path.exists(args.move_to):
FileManager.get_instance().make_dirs(args.move_to)

Expand All @@ -222,7 +231,6 @@ def create_csv_file(args: Namespace, combined: Dict) -> None:
key += 1



def clean_scan_dir_duplications(args: Namespace, combined: Dict) -> int:
"""
Clean up the scan_dir duplications after moving files to the move_to folder.
Expand Down
51 changes: 39 additions & 12 deletions duplicate_files_in_folders/utils_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
from argparse import Namespace

from duplicate_files_in_folders.duplicates_finder import get_csv_file_path
from duplicate_files_in_folders.hash_manager import HashManager
from duplicate_files_in_folders.utils import detect_pytest

Expand Down Expand Up @@ -108,18 +109,6 @@ def output_results(args: Namespace, files_moved: int, files_created: int, delete
:return: None
"""
summary_header = "Summary (Test Mode):" if not args.run else "Summary:"
separator = "-" * max(len(summary_header), 40)
fixed_width = 25

# Header
log_and_print("")
log_and_print(summary_header)
log_and_print(separator)
hash_manager = HashManager.get_instance()
# Cache hits information
cache_hits = f"Hash requests: {hash_manager.persistent_cache_requests + hash_manager.temporary_cache_requests}, " \
f"Cache hits: {hash_manager.persistent_cache_hits + hash_manager.temporary_cache_hits}"
logger.debug(cache_hits)

files_left = len(scan_stats) - files_moved - duplicate_scan_files_moved
# Detailed summary
Expand All @@ -137,6 +126,44 @@ def output_results(args: Namespace, files_moved: int, files_created: int, delete
if deleted_scan_folders:
summary_lines['Empty Folders Deleted'] = f"{deleted_scan_folders} empty folders in the scan folder"

common_output_results(summary_header, summary_lines)


def output_csv_file_creation_results(args: Namespace, combined_duplicates: dict, scan_stats=None, ref_stats=None):
""" Output the results of the CSV file creation.
:param args: The parsed arguments
:param combined_duplicates: The combined duplicates dictionary
:param scan_stats: Output of get_files_and_stats() for the scan folder
:param ref_stats: Output of get_files_and_stats() for the reference folder
"""
summary_header = "CSV File Creation Summary:"

# Detailed summary
summary_lines = {
'CSV File Path': get_csv_file_path(args),
'Scan Folder Files': f"{format_number_with_commas(len(scan_stats)) if scan_stats else 'N/A'} files",
'Reference Folder Files': f"{format_number_with_commas(len(ref_stats)) if ref_stats else 'N/A'} files",
'Total Duplicate Files': len(combined_duplicates),
}

common_output_results(summary_header, summary_lines)


def common_output_results(title: str, summary_lines: dict):
""" Output the common results of the script execution.
:param title: The title of the summary.
:param summary_lines: Summary lines to output.
"""
summary_header = f"{title}:"
separator = "-" * max(len(summary_header), 40)
fixed_width = 25

# Header
log_and_print("")
log_and_print(summary_header)
log_and_print(separator)

# Detailed summary
for key, value in summary_lines.items():
log_and_print(f"{key.ljust(fixed_width)}: {value}")

Expand Down

0 comments on commit 20ed00d

Please sign in to comment.