Skip to content

Commit

Permalink
Merge pull request #6 from niradar/aider
Browse files Browse the repository at this point in the history
close: #3
  • Loading branch information
niradar authored Aug 20, 2024
2 parents c8d40fe + 4702f68 commit 827b0da
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
.coverage
conda.bat
profile.*
.aider*
12 changes: 8 additions & 4 deletions duplicate_files_in_folders/duplicates_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,16 +196,19 @@ def process_duplicates(combined: Dict, args: Namespace) -> (int, int):

# Copy or move files to reference locations
if not args.copy_to_all:
copy_or_move_file(src_filepath, args.move_to, ref_files[0]['path'], args.reference_dir, move=True)
copy_or_move_file(src_filepath, args.move_to, ref_files[0]['path'], args.reference_dir, move=True,
keep_structure=args.keep_structure, scan_base_path=args.scan_dir)
files_moved += 1
else:
num_to_copy = max(0, len(ref_files) - len(srcs_to_move))
for i in range(num_to_copy):
copy_or_move_file(src_filepath, args.move_to, ref_files[i]['path'], args.reference_dir, False)
copy_or_move_file(src_filepath, args.move_to, ref_files[i]['path'], args.reference_dir, move=False,
keep_structure=args.keep_structure, scan_base_path=args.scan_dir)
files_created += 1

for (src, _), tgt in zip(srcs_to_move, ref_files[num_to_copy:]):
copy_or_move_file(src, args.move_to, tgt['path'], args.reference_dir, move=True)
copy_or_move_file(src, args.move_to, tgt['path'], args.reference_dir, move=True,
keep_structure=args.keep_structure, scan_base_path=args.scan_dir)
files_moved += 1

return files_moved, files_created
Expand Down Expand Up @@ -254,5 +257,6 @@ def clean_scan_dir_duplications(args: Namespace, combined: Dict) -> int:
locations['scan'] if os.path.exists(file_info['path'])]
scan_dups_move_to: str = str(os.path.join(args.move_to, os.path.basename(args.scan_dir) + "_dups"))
for src_path in scan_paths:
copy_or_move_file(src_path, scan_dups_move_to, src_path, args.scan_dir, move=True)
copy_or_move_file(src_path, scan_dups_move_to, src_path, args.scan_dir, move=True,
keep_structure=args.keep_structure, scan_base_path=args.scan_dir)
return len(scan_paths)
15 changes: 13 additions & 2 deletions duplicate_files_in_folders/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def initialize_arguments():
help='Do not delete empty folders in the scan_dir folder. Default is to delete.')
parser.add_argument('--full_hash', action='store_true',
help='Use full file hash for comparison. Default is partial.')
parser.add_argument('--keep_structure', action='store_true',
help='Keep the original scan folder structure in the destination folder.')
parser.set_defaults(delete_empty_folders=True)
parser.add_argument('--clear_cache', action='store_true', help=argparse.SUPPRESS) # for testing
parser.add_argument('--extra_logging', action='store_true', help=argparse.SUPPRESS) # for testing
Expand Down Expand Up @@ -155,19 +157,28 @@ def parse_arguments(cust_args=None, check_folders=True):


def copy_or_move_file(scan_file_path: str, destination_base_path: str, ref_file_path: str, base_ref_path: str,
move: bool = True) -> str:
move: bool = True, keep_structure: bool = False, scan_base_path: str = None) -> str:
"""
Copy or move a file from the scan directory to the destination directory based on the reference file path.
:param scan_base_path: The base path of the scan directory. Required if keep_structure is True.
:param scan_file_path: Full path of the file we want to copy/move
:param ref_file_path: The full path to the reference file within the base reference directory.
This path is used to determine the relative path for the destination.
:param destination_base_path: The base path where the file should be copied or moved to.
:param base_ref_path: The base directory path of the reference files.
This is used to calculate the relative path of the ref_file_path.
:param move: True to move the file, False to copy it
:param keep_structure: True to keep the original scan folder structure in the destination folder, False otherwise
:return: the final destination path
"""
destination_path = os.path.join(destination_base_path, os.path.relpath(ref_file_path, base_ref_path))
if keep_structure:
if scan_base_path is None:
raise ValueError("scan_base_path must be provided if keep_structure is True.")
sub_path = os.path.relpath(scan_file_path, scan_base_path)
destination_path = os.path.join(destination_base_path, sub_path)
else:
destination_path = os.path.join(destination_base_path, os.path.relpath(ref_file_path, base_ref_path))
logger.debug(f"Copying {scan_file_path} to {destination_path}")
destination_dir = os.path.dirname(destination_path)
file_manager = FileManager.get_instance()
if not os.path.exists(destination_dir):
Expand Down
13 changes: 10 additions & 3 deletions tests/helpers_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,16 @@ def setup_teardown():
shutil.rmtree(TEMP_DIR)


def setup_test_files(scan_files, ref_files):
copy_files(scan_files, os.path.join(TEMP_DIR, SCAN_DIR_NAME))
copy_files(ref_files, os.path.join(TEMP_DIR, REF_DIR_NAME))
def setup_test_files(scan_files, ref_files, subfolder=None):
scan_dir = os.path.join(TEMP_DIR, SCAN_DIR_NAME)
ref_dir = os.path.join(TEMP_DIR, REF_DIR_NAME)
if subfolder:
scan_dir = os.path.join(scan_dir, subfolder)
ref_dir = os.path.join(ref_dir, subfolder)
os.makedirs(scan_dir, exist_ok=True)
os.makedirs(ref_dir, exist_ok=True)
copy_files(scan_files, scan_dir)
copy_files(ref_files, ref_dir)


def get_folder_structure_include_subfolders(folder):
Expand Down
25 changes: 25 additions & 0 deletions tests/test_duplicates_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,28 @@ def test_process_duplicates(setup_teardown):
files_moved, files_created = process_duplicates(duplicates, args)
assert files_created == 0
assert files_moved == 5


def test_process_duplicates_keep_structure(setup_teardown):
scan_dir, reference_dir, move_to_dir, common_args = setup_teardown
os.makedirs(os.path.join(scan_dir, "subfolder"))
setup_test_files(range(1, 6), [])
setup_test_files(range(6, 11), [], subfolder="subfolder")
time.sleep(0.1) # sleep to make sure the modified date is different
setup_test_files([], range(1, 11))

common_args = ["--scan", scan_dir, "--reference_dir", reference_dir, "--move_to", move_to_dir, "--run",
"--keep_structure"]
args = parse_arguments(common_args)
duplicates, scan_stats, ref_stats = find_duplicates_files_v3(args, scan_dir, reference_dir)
files_moved, files_created = process_duplicates(duplicates, args)

assert files_created == 0
assert files_moved == 10

print_all_folders(scan_dir, reference_dir, move_to_dir)
assert os.path.exists(os.path.join(move_to_dir, "subfolder", "6.jpg"))
assert os.path.exists(os.path.join(move_to_dir, "subfolder", "7.jpg"))
assert os.path.exists(os.path.join(move_to_dir, "subfolder", "8.jpg"))
assert os.path.exists(os.path.join(move_to_dir, "subfolder", "9.jpg"))
assert os.path.exists(os.path.join(move_to_dir, "subfolder", "10.jpg"))

0 comments on commit 827b0da

Please sign in to comment.