From f5942f41e941fd6a82b3a72df808c57b39207914 Mon Sep 17 00:00:00 2001 From: "HOME-2022\\User" Date: Tue, 11 Jun 2024 14:27:12 +0300 Subject: [PATCH] Removed the old version from the code --- df_finder3.py | 13 +- .../old_duplicates_finder.py | 176 --------- duplicate_files_in_folders/utils.py | 1 - tests/test_functions.py | 1 - tests/test_old_functions.py | 360 ------------------ tests/test_simple_usecases.py | 11 - 6 files changed, 4 insertions(+), 558 deletions(-) delete mode 100644 duplicate_files_in_folders/old_duplicates_finder.py delete mode 100644 tests/test_old_functions.py diff --git a/df_finder3.py b/df_finder3.py index eb282c7..0b0b25d 100644 --- a/df_finder3.py +++ b/df_finder3.py @@ -4,7 +4,6 @@ from duplicate_files_in_folders.duplicates_finder import find_duplicates_files_v3, process_duplicates, \ clean_scan_dir_duplications from duplicate_files_in_folders.logging_config import setup_logging -from duplicate_files_in_folders.old_duplicates_finder import find_and_process_duplicates from duplicate_files_in_folders.utils import parse_arguments, setup_hash_manager, setup_file_manager from duplicate_files_in_folders.utils_io import display_initial_config, output_results, confirm_script_execution @@ -15,16 +14,12 @@ def main(args): display_initial_config(args) confirm_script_execution(args) hash_manager = setup_hash_manager(args) - if args.old_script is True: - (files_moved, files_created, unique_scan_duplicate_files_found, duplicate_scan_files_moved) = ( - find_and_process_duplicates(args)) - scan_stats = ref_stats = [] - else: - duplicates, scan_stats, ref_stats = find_duplicates_files_v3(args, args.scan_dir, args.reference_dir) - files_moved, files_created = process_duplicates(duplicates, args) - duplicate_scan_files_moved = clean_scan_dir_duplications(args, duplicates) + duplicates, scan_stats, ref_stats = find_duplicates_files_v3(args, args.scan_dir, args.reference_dir) + files_moved, files_created = process_duplicates(duplicates, args) + duplicate_scan_files_moved = clean_scan_dir_duplications(args, duplicates) deleted_scan_folders = fm.delete_empty_folders_in_tree(args.scan_dir, True) if args.delete_empty_folders else 0 + hash_manager.save_data() output_results(args, files_moved, files_created, deleted_scan_folders, duplicate_scan_files_moved, scan_stats, ref_stats) diff --git a/duplicate_files_in_folders/old_duplicates_finder.py b/duplicate_files_in_folders/old_duplicates_finder.py deleted file mode 100644 index aca2f5c..0000000 --- a/duplicate_files_in_folders/old_duplicates_finder.py +++ /dev/null @@ -1,176 +0,0 @@ -import logging -import os -from collections import defaultdict -from typing import Dict, List, Tuple - -import tqdm - -from duplicate_files_in_folders.file_manager import FileManager -from duplicate_files_in_folders.hash_manager import HashManager -from duplicate_files_in_folders.utils import check_and_update_filename, copy_or_move_file, get_file_key - -logger = logging.getLogger(__name__) - - -def compare_files(src_filepath, tgt_filepath, ignore_diffs): - ignore_diffs = ignore_diffs if ignore_diffs else set('mdate') - if ('filename' not in ignore_diffs and - src_filepath[src_filepath.rfind(os.sep) + 1:] != tgt_filepath[tgt_filepath.rfind(os.sep) + 1:]): - return False - if 'mdate' not in ignore_diffs and not os.path.getmtime(src_filepath) == os.path.getmtime(tgt_filepath): - return False - if os.path.getsize(src_filepath) != os.path.getsize(tgt_filepath): - return False - return get_file_hash(src_filepath) == get_file_hash(tgt_filepath) - - -def clean_scan_duplications(args, keys_to_clean=None, given_duplicates: Dict[str, List[Tuple[str, int]]] = None): - """ - Clean the scan_dir folder from duplicate files. Move the duplicates to a new folder under the move_to folder. - :param given_duplicates: if not None, use this dictionary of duplicates instead of finding them again. - :param args: - :param keys_to_clean: List of key to clean. If None, clean all duplicates but the first one from each group. \ - If not None, clean only the duplicates with the hashes in the list but clean all the duplicates from the group. - - :return: - """ - scan_duplicates = given_duplicates if given_duplicates else { - src_key: src_filepaths for src_key, src_filepaths in collect_scan_files(args).items() - if len(src_filepaths) > 1 - } - source: str = args.scan_dir - scan_dups_move_to = os.path.join(args.move_to, os.path.basename(source) + "_dups") - unique_duplicate_files_found = duplicate_files_moved = 0 - - for group_key, group in scan_duplicates.items(): - if keys_to_clean and group_key not in keys_to_clean: - continue - logger.debug(f"Found {len(group)} duplicate files for {group[0][0]}") - - # Sort the files by their depth, then by their modification time or name - group.sort(key=lambda x: (x[1], x[0] if 'mdate' in args.ignore_diff else os.path.getmtime(x[0]))) - - unique_duplicate_files_found += 1 - start_index = 1 if not keys_to_clean else 0 - fm = FileManager.get_instance() - # Move all the other files to a new folder under the move_to folder - for src_filepath, _ in group[start_index:]: - new_src_path = os.path.join(scan_dups_move_to, os.path.relpath(src_filepath, source)) - new_src_dir = os.path.dirname(new_src_path) - if not os.path.exists(new_src_dir): - fm.make_dirs(new_src_dir) - new_filename = check_and_update_filename(new_src_path) - fm.move_file(src_filepath, new_filename) - duplicate_files_moved += 1 - - if unique_duplicate_files_found: - logger.info( - f"Cleaning scan_dir folder: Found {unique_duplicate_files_found} unique duplicate files in the scan_dir folder," - f" moved {duplicate_files_moved} files to {scan_dups_move_to}") - return unique_duplicate_files_found, duplicate_files_moved - - -def find_and_process_duplicates(args): - scan_files = collect_scan_files(args) - total_scan_files = sum(len(paths) for paths in scan_files.values()) - logger.info(f"Source folder: Found {total_scan_files} files ({len(scan_files)} unique files) in {args.scan_dir}") - - ref_files = collect_ref_files(args) # key is hash or filename, value is list of file paths - total_files = sum(len(paths) for paths in ref_files.values()) - key_type = "filenames" if 'filename' not in args.ignore_diff else "hashes" - logger.info(f"Found {total_files} files ({len(ref_files)} unique {key_type}) in {args.reference_dir}") - - # Store the scan_dir duplicates before processing - scan_duplicates: Dict[str, List[Tuple[str, int]]] = \ - {src_key: src_filepaths for src_key, src_filepaths in scan_files.items() if len(src_filepaths) > 1} - - files_moved = files_created = 0 - scan_duplicates_to_process = {} - - for src_key, src_filepaths in tqdm.tqdm(scan_files.items(), desc="Finding duplicate files"): - src_filepath, _ = src_filepaths[0] - ref_key = get_file_hash(src_filepath) \ - if 'filename' in args.ignore_diff else src_filepath[src_filepath.rfind(os.sep) + 1:] - if ref_key not in ref_files: # if the file is not found in the ref folder, no need to process it - continue - ref_paths = ref_files[ref_key] # all possible ref paths for the scan_dir file - ref_paths_to_copy = [] - try: - for tgt_filepath in ref_paths: - if compare_files(src_filepath, tgt_filepath, args.ignore_diff): - ref_paths_to_copy.append(tgt_filepath) - if ref_paths_to_copy: - srcs_to_move = scan_duplicates[src_key].copy() if src_key in scan_duplicates else [] - files_created, files_moved = move_to_ref_paths(args, src_filepath, ref_paths_to_copy, - srcs_to_move, files_created, files_moved) - filtered_group = [(src_path, depth) for src_path, depth in srcs_to_move if os.path.exists(src_path)] - if filtered_group: - scan_duplicates_to_process[src_key] = filtered_group - except Exception as e: - logger.exception(f"Error handling {src_filepath}: {e}") - raise - - # clean scan_dir duplicates of files moved to the move_to folder - unique_scan_duplicate_files_found, duplicate_scan_files_moved = ( - clean_scan_duplications(args, scan_duplicates_to_process.keys(), scan_duplicates_to_process)) \ - if scan_duplicates_to_process else (0, 0) - - return files_moved, files_created, unique_scan_duplicate_files_found, duplicate_scan_files_moved - - -def move_to_ref_paths(args, src_filepath, ref_paths_to_copy, scan_duplicates, files_created, files_moved): - # future improvement: smarter move - we might have same folder structure between copies in scan_dir and target - if not scan_duplicates: # If scan_duplicates is empty, use src_filepath for copying and moving - scan_duplicates = [(src_filepath, 0)] - scan_duplicates.sort(key=lambda x: x[0], reverse=True) # sort by path name reverse for easier testing - - if not args.copy_to_all: - copy_or_move_file(src_filepath, args.move_to, ref_paths_to_copy[0], args.reference_dir) - return files_created, files_moved + 1 - - num_to_copy = max(0, len(ref_paths_to_copy) - len(scan_duplicates)) - if num_to_copy: # Copy first scan_dir to make up for fewer scan_dir duplicates - for i in range(num_to_copy): - copy_or_move_file(src_filepath, args.move_to, ref_paths_to_copy[i], args.reference_dir, False) - files_created += 1 - - # Move each scan_dir duplicate to the corresponding ref path - for (src, _), tgt in zip(scan_duplicates, ref_paths_to_copy[num_to_copy:]): - copy_or_move_file(src, args.move_to, tgt, args.reference_dir, move=True) - files_moved += 1 - - return files_created, files_moved - - -def collect_ref_files(args): - ref_files = defaultdict(list) - # list so it won't be lazy - walk = list(os.walk(args.reference_dir)) - for root, dirs, files in tqdm.tqdm(walk, desc="Scanning ref folders"): - for f in files: - full_path = str(os.path.join(root, f)) - key = f if 'filename' not in args.ignore_diff else get_file_hash(full_path) - ref_files[key].append(full_path) - if args.extra_logging: - for key, paths in ref_files.items(): - logger.debug(f"{key}: {paths}") - return ref_files - - -def collect_scan_files(args) -> Dict[str, List[Tuple[str, int]]]: - scan_files = defaultdict(list) - scan_depth = args.scan_dir.count(os.sep) - walk = list(os.walk(args.scan_dir)) - for root, dirs, files in tqdm.tqdm(walk, desc="Scanning scan_dir folders"): - for f in files: - full_path = str(os.path.join(root, f)) - if os.path.isfile(full_path): - depth = full_path.count(os.sep) - scan_depth - scan_files[get_file_key(args, full_path)].append((full_path, depth)) - return scan_files - - -def get_file_hash(file_path: str) -> str: - """Retrieve the hash of the given file.""" - hash_manager = HashManager.get_instance() - return hash_manager.get_hash(file_path) diff --git a/duplicate_files_in_folders/utils.py b/duplicate_files_in_folders/utils.py index 1a6f23f..b9a04cd 100644 --- a/duplicate_files_in_folders/utils.py +++ b/duplicate_files_in_folders/utils.py @@ -94,7 +94,6 @@ def parse_arguments(cust_args=None, check_folders=True): parser.set_defaults(delete_empty_folders=True) parser.add_argument('--clear_cache', action='store_true', help=argparse.SUPPRESS) # for testing parser.add_argument('--extra_logging', action='store_true', help=argparse.SUPPRESS) # for testing - parser.add_argument('--old_script', action='store_true', help=argparse.SUPPRESS) # for testing args = parser.parse_args(cust_args if cust_args else None) # Validate the folders given in the arguments diff --git a/tests/test_functions.py b/tests/test_functions.py index 9f5810a..7045b5c 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -37,7 +37,6 @@ def test_parse_arguments(): assert args.whitelist_ext is None assert args.blacklist_ext is None assert args.full_hash is False - assert args.old_script is False # Test case 3: Many arguments provided args = parse_arguments(['--scan', scan_dir, '--reference_dir', reference_dir, '--move_to', move_to_folder, diff --git a/tests/test_old_functions.py b/tests/test_old_functions.py deleted file mode 100644 index 04228ae..0000000 --- a/tests/test_old_functions.py +++ /dev/null @@ -1,360 +0,0 @@ -import os -import shutil -import time - -from duplicate_files_in_folders.file_manager import FileManager -from duplicate_files_in_folders.old_duplicates_finder import compare_files, clean_scan_duplications, \ - collect_scan_files -from duplicate_files_in_folders.utils import parse_arguments, get_file_key -from tests.helpers_testing import copy_files, img_files, IMG_DIR, setup_teardown - - -def test_compare_files(setup_teardown): - scan_dir, reference_dir, move_to_dir, common_args = setup_teardown - - # Setup the files in the scan_dir directory - copy_files(range(1, 3), scan_dir) - - # sleep for 0.5 second to make sure the mdate is different - time.sleep(0.5) - copy_files(range(1, 3), reference_dir) - - src1_file = os.path.join(scan_dir, "1.jpg") - tgt1_file = os.path.join(reference_dir, "1.jpg") - src2_file = os.path.join(scan_dir, "2.jpg") - dup1_file = str(os.path.join(scan_dir, img_files[1]['original_name'])) - - # copy file 1 also with original name to scan_dir folder - shutil.copy(src1_file, dup1_file) - - # Test case 1: same file, compare by filename True - assert compare_files(src1_file, src1_file, None) is True - - # Test case 2: same file, compare by filename False - assert compare_files(src1_file, src1_file, {'filename'}) is True - - # Test case 3: different files, compare by filename True - assert compare_files(src1_file, src2_file, None) is False - - # Test case 4: different files, compare by filename False - assert compare_files(src1_file, src2_file, {'mdate', 'filename'}) is False - - # Test case 5: same file, different folders, compare by filename True, mdate different - assert compare_files(src1_file, tgt1_file, None) is False - - # Test case 6: same file, different folders, compare by filename False, ignore mdate - assert compare_files(src1_file, tgt1_file, {'mdate'}) is True - - # Test case 7: same file, different folders, compare by filename True, ignore mdate - assert compare_files(src1_file, tgt1_file, {'mdate'}) is True - - # Test case 8: same file, different name, compare by filename True, ignore mdate - # It is by design that it won't compare the files names, it assumes it was already done - assert compare_files(src1_file, dup1_file, {'mdate'}) is False - - # Test case 9: same file, different name, compare by filename False, ignore mdate - assert compare_files(src1_file, dup1_file, {'mdate', 'filename'}) is True - - -def test_clean_scan_duplications(setup_teardown): - scan_dir, reference_dir, move_to_dir, common_args = setup_teardown - - # Create the necessary subdirectories in the scan_dir and ref directories - os.makedirs(os.path.join(scan_dir, "sub1")) - - # Setup the files in the scan_dir directory - copy_files(range(1, 6), scan_dir) - copy_files(range(1, 6), os.path.join(scan_dir, "sub1")) - copy_files([7], reference_dir) # copy one file to ref folder to avoid argument error - - args = parse_arguments(common_args) - unique_duplicate_files_found, duplicate_files_moved = clean_scan_duplications(args) - - # Check if all files from scan_dir subdirectory are now in base folder of move_to - scan_sub_files = set(os.listdir(os.path.join(scan_dir, "sub1"))) - assert not scan_sub_files, "Scan subdirectory is not empty" - - # Check scan_dir folder has files 1-5 and sub1 folder is empty - scan_files = set(os.listdir(scan_dir)) - assert scan_files == set([f"{i}.jpg" for i in range(1, 6)] + ['sub1']), "Scan directory files not correct" - - # Check move_to folder has files 1-5 under move_to/scan_dups/sub1 folder - move_to_files = set(os.listdir(os.path.join(move_to_dir, "scan_dups", "sub1"))) - assert move_to_files == set([f"{i}.jpg" for i in range(1, 6)]), "Not all files have been moved to move_to directory" - - assert unique_duplicate_files_found == 5, "Unique duplicate files found" - assert duplicate_files_moved == 5, "Not all duplicate files have been moved to move_to directory" - - -def test_clean_scan_duplications_several_subfolders(setup_teardown): - scan_dir, reference_dir, move_to_dir, common_args = setup_teardown - - # Create the necessary subdirectories in the scan_dir and ref directories - os.makedirs(os.path.join(scan_dir, "sub1")) - os.makedirs(os.path.join(scan_dir, "sub2")) - - # Setup the files in the scan_dir directory - copy_files(range(1, 6), scan_dir) - copy_files(range(1, 6), os.path.join(scan_dir, "sub1")) - copy_files(range(1, 6), os.path.join(scan_dir, "sub2")) - - copy_files([7], reference_dir) # copy one file to ref folder to avoid argument error - - args = parse_arguments(common_args) - unique_duplicate_files_found, duplicate_files_moved = clean_scan_duplications(args) - - # Check if all files from scan_dir subdirectory are now in base folder of move_to - scan_sub_files = set(os.listdir(os.path.join(scan_dir, "sub1"))) - assert not scan_sub_files, "Scan subdirectory is not empty" - scan_sub_files = set(os.listdir(os.path.join(scan_dir, "sub2"))) - assert not scan_sub_files, "Scan subdirectory is not empty" - - # Check scan_dir folder has files 1-5 and sub1, sub2 folders are empty - scan_files = set(os.listdir(scan_dir)) - assert scan_files == set([f"{i}.jpg" for i in range(1, 6)] + ['sub1', 'sub2']), "Source files not correct" - - # Check move_to folder has files 1-5 under move_to/scan_dups/sub1 and sub2 folders - move_to_files = set(os.listdir(os.path.join(move_to_dir, "scan_dups", "sub1"))) - assert move_to_files == set([f"{i}.jpg" for i in range(1, 6)]), "Not all files have been moved to move_to directory" - move_to_files = set(os.listdir(os.path.join(move_to_dir, "scan_dups", "sub2"))) - assert move_to_files == set([f"{i}.jpg" for i in range(1, 6)]), "Not all files have been moved to move_to directory" - - assert unique_duplicate_files_found == 5, "Unique duplicate files found" - assert duplicate_files_moved == 10, "Not all duplicate files have been moved to move_to directory" - - -def test_clean_scan_duplications_test_mode(setup_teardown): - scan_dir, reference_dir, move_to_dir, common_args = setup_teardown - - # Create the necessary subdirectories in the scan_dir and ref directories - os.makedirs(os.path.join(scan_dir, "sub1")) - - # Setup the files in the scan_dir directory - copy_files(range(1, 6), scan_dir) - copy_files(range(1, 6), os.path.join(scan_dir, "sub1")) - - copy_files([7], reference_dir) # copy one file to ref folder to avoid argument error - - common_args.remove("--run") - FileManager._instance = None # reset the singleton instance to make sure it is not used - fm = FileManager(False).reset_all() - - args = parse_arguments(common_args) - unique_duplicate_files_found, duplicate_files_moved = clean_scan_duplications(args) - - # Check if all files from scan_dir subdirectory are still there - scan_sub_files = set(os.listdir(os.path.join(scan_dir, "sub1"))) - assert scan_sub_files == set([f"{i}.jpg" for i in range(1, 6)]), "Source subdirectory files have been moved" - - # Check scan_dir folder has files 1-5 and sub1 folder - scan_files = set(os.listdir(scan_dir)) - assert scan_files == set([f"{i}.jpg" for i in range(1, 6)] + ['sub1']), "Scan directory files not correct" - - # Check that os.path.join(move_to_dir, "scan_dups") does not exist - assert not os.path.exists(os.path.join(move_to_dir, "scan_dups")), "move_to directory exists" - - # Check move_to folder is empty - move_to_files = set(os.listdir(move_to_dir)) - assert not move_to_files, "move_to directory is not empty" - - assert unique_duplicate_files_found == 5, "Unique duplicate files found" - assert duplicate_files_moved == 5, "Wrong calculation of files to be moved to move_to directory" - - -def test_clean_scan_duplications_same_name_different_files(setup_teardown): - scan_dir, reference_dir, move_to_dir, common_args = setup_teardown - - os.makedirs(os.path.join(scan_dir, "sub1")) - os.makedirs(os.path.join(scan_dir, "sub2")) - - # Setup the files in the scan_dir directory - copy_files(range(1, 3), os.path.join(scan_dir, "sub1")) - - # copy files 3 and 4 to sub2 folder but call them 1.jpg and 2.jpg - for file_number in range(3, 5): - src_file = os.path.join(IMG_DIR, f"{file_number}.jpg") - dst_file = os.path.join(scan_dir, "sub2", f"{file_number - 2}.jpg") - shutil.copy(src_file, dst_file) - - # copy file 5 to both sub1 and sub2 folders - src_file = os.path.join(IMG_DIR, "5.jpg") - shutil.copy(src_file, os.path.join(scan_dir, "sub1", "5.jpg")) - shutil.copy(src_file, os.path.join(scan_dir, "sub2", "5.jpg")) - - copy_files([7], reference_dir) # copy one file to ref folder to avoid argument error - - common_args.append("--extra_logging") - - args = parse_arguments(common_args) - unique_duplicate_files_found, duplicate_files_moved = clean_scan_duplications(args) - - # sub1 folder should be the same - files 1, 2 and 5 - scan_sub_files = set(os.listdir(os.path.join(scan_dir, "sub1"))) - assert scan_sub_files == set([f"{i}.jpg" for i in range(1, 3)] + ['5.jpg']), "Source sub1 files have been moved" - - # sub2 folder should be - files 1 and 2 - scan_sub_files = set(os.listdir(os.path.join(scan_dir, "sub2"))) - assert scan_sub_files == set([f"{i}.jpg" for i in range(1, 3)]), "Source sub2 files is not correct" - - assert unique_duplicate_files_found == 1, "Unique duplicate files found" - assert duplicate_files_moved == 1, "Wrong calculation of files to be moved to move_to directory" - - -def test_clean_scan_duplications_same_name_different_files_ignore_filename(setup_teardown): - scan_dir, reference_dir, move_to_dir, common_args = setup_teardown - - os.makedirs(os.path.join(scan_dir, "sub1")) - os.makedirs(os.path.join(scan_dir, "sub2")) - - # Setup the files in the scan_dir directory - copy_files(range(1, 3), os.path.join(scan_dir, "sub1")) - - # copy files 3 and 4 to sub2 folder but call them 1.jpg and 2.jpg - for file_number in range(3, 5): - src_file = os.path.join(IMG_DIR, f"{file_number}.jpg") - dst_file = os.path.join(scan_dir, "sub2", f"{file_number - 2}.jpg") - shutil.copy(src_file, dst_file) - - # copy file 5 to both sub1 and sub2 folders - src_file = os.path.join(IMG_DIR, "5.jpg") - shutil.copy(src_file, os.path.join(scan_dir, "sub1", "5.jpg")) - shutil.copy(src_file, os.path.join(scan_dir, "sub2", "5.jpg")) - - copy_files([7], reference_dir) # copy one file to ref folder to avoid argument error - - common_args.append("--extra_logging") - common_args.append("--ignore_diff") - common_args.append("filename,mdate") - - # scan_dir content: - # sub1: 1.jpg, 2.jpg, 5.jpg - # sub2: 1.jpg (different file), 2.jpg (different file), 5.jpg - - args = parse_arguments(common_args) - unique_duplicate_files_found, duplicate_files_moved = clean_scan_duplications(args) - - # sub1 folder should be the same - files 1, 2 and 5 - scan_sub_files = set(os.listdir(os.path.join(scan_dir, "sub1"))) - assert scan_sub_files == set([f"{i}.jpg" for i in range(1, 3)] + ['5.jpg']), "Scan sub1 files have been moved" - - # sub2 folder should be - files 1 and 2 - scan_sub_files = set(os.listdir(os.path.join(scan_dir, "sub2"))) - assert scan_sub_files == set([f"{i}.jpg" for i in range(1, 3)]), "Scan sub2 files is not correct" - - assert unique_duplicate_files_found == 1, "Unique duplicate files found" - assert duplicate_files_moved == 1, "Wrong calculation of files to be moved to move_to directory" - - -def test_collect_scan_files_simple(setup_teardown): - # files 1 to 4 in root, 3 to 6 in sub1 - scan_dir, reference_dir, move_to_dir, common_args = setup_teardown - - os.makedirs(os.path.join(scan_dir, "sub1")) - copy_files(range(1, 5), scan_dir) - copy_files(range(3, 7), os.path.join(scan_dir, "sub1")) - - copy_files([7], reference_dir) # copy one file to ref folder to avoid argument error - - args = parse_arguments(common_args) - scan_files = collect_scan_files(args) - scan_duplicates = {src_key: src_filepaths for src_key, src_filepaths in scan_files.items() - if len(src_filepaths) > 1} - assert len(scan_duplicates) == 2, "Unique duplicate files found" - assert scan_duplicates == { - get_file_key(args, os.path.join(scan_dir, "3.jpg")): [(os.path.join(scan_dir, "3.jpg"), 1), (os.path.join(scan_dir, "sub1", "3.jpg"), 2)], - get_file_key(args, os.path.join(scan_dir, "4.jpg")): [(os.path.join(scan_dir, "4.jpg"), 1), (os.path.join(scan_dir, "sub1", "4.jpg"), 2)]}, "Wrong calculation of files to be moved to move_to directory" - - -# def test_validate_duplicate_files_destination(setup_teardown): -# scan_dir, reference_dir, move_to_dir, common_args = setup_teardown -# -# # test case 1: folder doesn't exist but can be created under the scan_dir folder -# file_manager.FileManager.reset_file_manager([reference_dir], [scan_dir, move_to_dir], True) -# assert validate_duplicate_files_destination(os.path.join(scan_dir, "sub1"), run_mode=True) is True -# -# # test case 2: folder doesn't exist and cannot be created -# with pytest.raises(SystemExit) as excinfo: -# file_manager.FileManager.reset_file_manager([reference_dir], [scan_dir, move_to_dir], True) -# validate_duplicate_files_destination(os.path.join(scan_dir, "\"^&%/#$^\0%&!@"), run_mode=True) -# assert excinfo.type == SystemExit -# assert excinfo.value.code == 1 -# -# # test case 3: folder exist -# file_manager.FileManager.reset_file_manager([reference_dir], [scan_dir, move_to_dir], True) -# assert validate_duplicate_files_destination(scan_dir, run_mode=True) is True -# -# # test case 4: same as test case 1 but with run_mode=False -# file_manager.FileManager.reset_file_manager([reference_dir], [scan_dir, move_to_dir], True) -# assert validate_duplicate_files_destination(os.path.join(scan_dir, "sub1"), run_mode=False) is True -# -# # test case 5: non-existing folder but can be created, run_mode=False -# file_manager.FileManager.reset_file_manager([reference_dir], [scan_dir, move_to_dir], True) -# assert validate_duplicate_files_destination(os.path.join(scan_dir, "sub_new"), run_mode=False) is True - - -def test_delete_empty_folders_in_tree(setup_teardown): - scan_dir, reference_dir, move_to_dir, common_args = setup_teardown - - # Create the necessary subdirectories in the scan_dir and ref directories - os.makedirs(os.path.join(scan_dir, "sub1")) - os.makedirs(os.path.join(scan_dir, "sub2")) - os.makedirs(os.path.join(scan_dir, "sub2", "sub2_2")) - - # Setup the files in the scan_dir directory - copy_files(range(1, 6), scan_dir) - copy_files(range(1, 6), os.path.join(scan_dir, "sub1")) - copy_files(range(1, 6), os.path.join(scan_dir, "sub2")) - copy_files(range(1, 6), os.path.join(scan_dir, "sub2", "sub2_2")) - - copy_files([7], reference_dir) # copy one file to ref folder to avoid argument error - - args = parse_arguments(common_args) - unique_duplicate_files_found, duplicate_files_moved = clean_scan_duplications(args) - fm = FileManager.get_instance() - fm.delete_empty_folders_in_tree(scan_dir) - - assert unique_duplicate_files_found == 5, "Unique duplicate files found" - - # check if all empty folders have been deleted - assert not os.path.exists(os.path.join(scan_dir, "sub1")), "sub1 folder is not empty" - assert not os.path.exists(os.path.join(scan_dir, "sub2")), "sub2 folder is not empty" # no need to check sub2_2 - - # check that scan_dir folder was not deleted - assert os.path.exists(scan_dir), "scan_dir folder does not exist" - - -# def test_validate_folder(setup_teardown): -# scan_dir, _, _, _ = setup_teardown -# -# # test case 1: folder not existing -# with pytest.raises(SystemExit) as excinfo: -# validate_folder(os.path.join(scan_dir, "sub1"), "sub1") -# assert excinfo.type == SystemExit -# assert excinfo.value.code == 1 -# -# # test case 2: folder existing but empty -# os.makedirs(os.path.join(scan_dir, "sub1")) -# with pytest.raises(SystemExit) as excinfo: -# validate_folder(os.path.join(scan_dir, "sub1"), "sub1") -# assert excinfo.type == SystemExit -# assert excinfo.value.code == 1 -# -# # test case 3: folder existing and not empty -# copy_files(range(1, 6), os.path.join(scan_dir, "sub1")) -# assert validate_folder(os.path.join(scan_dir, "sub1"), "sub1") is True -# -# -# -# def print_error(message): -# print(f"Error: {message}") -# logger.critical(f"{message}") -# sys.exit(1) -# -# -# def validate_folder(folder, name): -# """ Validate if a folder exists and is not empty. """ -# if not os.path.isdir(folder) or not os.path.exists(folder): -# print_error(f"{name} folder does not exist.") -# if not os.listdir(folder): -# print_error(f"{name} folder is empty.") -# return True diff --git a/tests/test_simple_usecases.py b/tests/test_simple_usecases.py index 848d3b6..d269216 100644 --- a/tests/test_simple_usecases.py +++ b/tests/test_simple_usecases.py @@ -218,14 +218,3 @@ def test_scan_argument_instead_of_src_to_instead_of_move_to(setup_teardown): main(args) simple_usecase_test(scan_dir, reference_dir, move_to_dir, 3) - - -def test_old_script_sanity(setup_teardown): - scan_dir, reference_dir, move_to_dir, common_args = setup_teardown - setup_test_files(range(1, 4), range(1, 4)) - common_args.append("--old_script") - args = parse_arguments(common_args) - assert args.old_script, "Old script flag not set" - main(args) - - simple_usecase_test(scan_dir, reference_dir, move_to_dir, 3)