Skip to content

Commit

Permalink
fix: now seems to work correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
niradar committed Aug 20, 2024
1 parent a3dc51d commit 4702f68
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
.coverage
conda.bat
profile.*
.aider*
12 changes: 8 additions & 4 deletions duplicate_files_in_folders/duplicates_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,16 +196,19 @@ def process_duplicates(combined: Dict, args: Namespace) -> (int, int):

# Copy or move files to reference locations
if not args.copy_to_all:
copy_or_move_file(src_filepath, args.move_to, ref_files[0]['path'], args.reference_dir, move=True, keep_structure=args.keep_structure)
copy_or_move_file(src_filepath, args.move_to, ref_files[0]['path'], args.reference_dir, move=True,
keep_structure=args.keep_structure, scan_base_path=args.scan_dir)
files_moved += 1
else:
num_to_copy = max(0, len(ref_files) - len(srcs_to_move))
for i in range(num_to_copy):
copy_or_move_file(src_filepath, args.move_to, ref_files[i]['path'], args.reference_dir, move=False, keep_structure=args.keep_structure)
copy_or_move_file(src_filepath, args.move_to, ref_files[i]['path'], args.reference_dir, move=False,
keep_structure=args.keep_structure, scan_base_path=args.scan_dir)
files_created += 1

for (src, _), tgt in zip(srcs_to_move, ref_files[num_to_copy:]):
copy_or_move_file(src, args.move_to, tgt['path'], args.reference_dir, move=True, keep_structure=args.keep_structure)
copy_or_move_file(src, args.move_to, tgt['path'], args.reference_dir, move=True,
keep_structure=args.keep_structure, scan_base_path=args.scan_dir)
files_moved += 1

return files_moved, files_created
Expand Down Expand Up @@ -254,5 +257,6 @@ def clean_scan_dir_duplications(args: Namespace, combined: Dict) -> int:
locations['scan'] if os.path.exists(file_info['path'])]
scan_dups_move_to: str = str(os.path.join(args.move_to, os.path.basename(args.scan_dir) + "_dups"))
for src_path in scan_paths:
copy_or_move_file(src_path, scan_dups_move_to, src_path, args.scan_dir, move=True, keep_structure=args.keep_structure)
copy_or_move_file(src_path, scan_dups_move_to, src_path, args.scan_dir, move=True,
keep_structure=args.keep_structure, scan_base_path=args.scan_dir)
return len(scan_paths)
9 changes: 7 additions & 2 deletions duplicate_files_in_folders/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,10 @@ def parse_arguments(cust_args=None, check_folders=True):


def copy_or_move_file(scan_file_path: str, destination_base_path: str, ref_file_path: str, base_ref_path: str,
move: bool = True, keep_structure: bool = False) -> str:
move: bool = True, keep_structure: bool = False, scan_base_path: str = None) -> str:
"""
Copy or move a file from the scan directory to the destination directory based on the reference file path.
:param scan_base_path: The base path of the scan directory. Required if keep_structure is True.
:param scan_file_path: Full path of the file we want to copy/move
:param ref_file_path: The full path to the reference file within the base reference directory.
This path is used to determine the relative path for the destination.
Expand All @@ -171,9 +172,13 @@ def copy_or_move_file(scan_file_path: str, destination_base_path: str, ref_file_
:return: the final destination path
"""
if keep_structure:
destination_path = os.path.join(destination_base_path, os.path.relpath(scan_file_path, base_ref_path))
if scan_base_path is None:
raise ValueError("scan_base_path must be provided if keep_structure is True.")
sub_path = os.path.relpath(scan_file_path, scan_base_path)
destination_path = os.path.join(destination_base_path, sub_path)
else:
destination_path = os.path.join(destination_base_path, os.path.relpath(ref_file_path, base_ref_path))
logger.debug(f"Copying {scan_file_path} to {destination_path}")
destination_dir = os.path.dirname(destination_path)
file_manager = FileManager.get_instance()
if not os.path.exists(destination_dir):
Expand Down
9 changes: 7 additions & 2 deletions tests/test_duplicates_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,21 +244,26 @@ def test_process_duplicates(setup_teardown):
files_moved, files_created = process_duplicates(duplicates, args)
assert files_created == 0
assert files_moved == 5


def test_process_duplicates_keep_structure(setup_teardown):
scan_dir, reference_dir, move_to_dir, common_args = setup_teardown
os.makedirs(os.path.join(scan_dir, "subfolder"))
setup_test_files(range(1, 6), [], subfolder="subfolder")
setup_test_files(range(1, 6), [])
setup_test_files(range(6, 11), [], subfolder="subfolder")
time.sleep(0.1) # sleep to make sure the modified date is different
setup_test_files([], range(1, 11))

common_args = ["--scan", scan_dir, "--reference_dir", reference_dir, "--move_to", move_to_dir, "--run", "--keep_structure"]
common_args = ["--scan", scan_dir, "--reference_dir", reference_dir, "--move_to", move_to_dir, "--run",
"--keep_structure"]
args = parse_arguments(common_args)
duplicates, scan_stats, ref_stats = find_duplicates_files_v3(args, scan_dir, reference_dir)
files_moved, files_created = process_duplicates(duplicates, args)

assert files_created == 0
assert files_moved == 10

print_all_folders(scan_dir, reference_dir, move_to_dir)
assert os.path.exists(os.path.join(move_to_dir, "subfolder", "6.jpg"))
assert os.path.exists(os.path.join(move_to_dir, "subfolder", "7.jpg"))
assert os.path.exists(os.path.join(move_to_dir, "subfolder", "8.jpg"))
Expand Down

0 comments on commit 4702f68

Please sign in to comment.