Skip to content

Commit 054a98b

Browse files
committed
Refactoring changes asked by the maintainer which include centralisation of code my making functions in validation.py and using in search_with_tags feature in folders file
1 parent 3503808 commit 054a98b

File tree

5 files changed

+354
-110
lines changed

5 files changed

+354
-110
lines changed

datashuttle/configs/canonical_tags.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,40 @@ def tags(tag_name: str) -> str:
1111
"datetime": "@DATETIME@",
1212
"to": "@TO@",
1313
"*": "@*@",
14+
"DATETO": "@DATETO@",
15+
"TIMETO": "@TIMETO@",
16+
"DATETIMETO": "@DATETIMETO@",
1417
}
1518
return tags[tag_name]
19+
20+
21+
_DATETIME_FORMATS = {
22+
"datetime": "%Y%m%dT%H%M%S",
23+
"time": "%H%M%S",
24+
"date": "%Y%m%d",
25+
}
26+
27+
28+
def get_datetime_format(format_type: str) -> str:
29+
"""
30+
Get the datetime format string for a given format type.
31+
32+
Parameters
33+
----------
34+
format_type : str
35+
One of "datetime", "time", or "date"
36+
37+
Returns
38+
-------
39+
str
40+
The format string for the specified type
41+
42+
Raises
43+
------
44+
ValueError
45+
If format_type is not one of the supported types
46+
"""
47+
if format_type not in _DATETIME_FORMATS:
48+
raise ValueError(f"Invalid format type: {format_type}. Must be one of {list(_DATETIME_FORMATS.keys())}")
49+
return _DATETIME_FORMATS[format_type]
50+

datashuttle/utils/data_transfer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ def get_processed_names(
462462
processed_names = formatting.check_and_format_names(
463463
names_checked, prefix
464464
)
465-
processed_names = folders.search_for_wildcards(
465+
processed_names = folders.search_with_tags(
466466
self.__cfg,
467467
self.__base_folder,
468468
self.__local_or_central,

datashuttle/utils/folders.py

Lines changed: 102 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,49 @@ def process_glob_to_find_datatype_folders(
362362
# -----------------------------------------------------------------------------
363363

364364

365-
def search_for_wildcards(
365+
def filter_names_by_datetime_range(
366+
names: List[str],
367+
format_type: str,
368+
start_timepoint: datetime,
369+
end_timepoint: datetime,
370+
) -> List[str]:
371+
"""
372+
Filter a list of names based on a datetime range.
373+
Assumes all names contain the format_type pattern (e.g., date-*, time-*)
374+
as they were searched using this pattern.
375+
376+
Parameters
377+
----------
378+
names : List[str]
379+
List of names to filter, all containing the datetime pattern
380+
format_type : str
381+
One of "datetime", "time", or "date"
382+
start_timepoint : datetime
383+
Start of the datetime range
384+
end_timepoint : datetime
385+
End of the datetime range
386+
387+
Returns
388+
-------
389+
List[str]
390+
Filtered list of names that fall within the datetime range
391+
"""
392+
filtered_names: List[str] = []
393+
for candidate in names:
394+
candidate_basename = candidate if isinstance(candidate, str) else candidate.name
395+
value = get_values_from_bids_formatted_name([candidate_basename], format_type)[0]
396+
try:
397+
candidate_timepoint = datetime.strptime(
398+
value, canonical_tags.get_datetime_format(format_type)
399+
)
400+
if start_timepoint <= candidate_timepoint <= end_timepoint:
401+
filtered_names.append(candidate)
402+
except ValueError:
403+
continue
404+
return filtered_names
405+
406+
407+
def search_with_tags(
366408
cfg: Configs,
367409
base_folder: Path,
368410
local_or_central: str,
@@ -400,68 +442,69 @@ def search_for_wildcards(
400442
401443
sub : optional subject to search for sessions in. If not provided,
402444
will search for subjects rather than sessions.
403-
404445
"""
405446
new_all_names: List[str] = []
406447
for name in all_names:
407-
if canonical_tags.tags("*") in name or "@DATETO@" in name:
408-
search_str = name.replace(canonical_tags.tags("*"), "*")
409-
# If a date-range tag is present, extract dates and update the search string.
410-
if "@DATETO@" in name:
411-
m = re.search(r"(\d{8})@DATETO@(\d{8})", name)
412-
if not m:
413-
raise ValueError(
414-
"Invalid date range format in name: " + name
415-
)
416-
start_str, end_str = m.groups()
417-
try:
418-
start_date = datetime.strptime(start_str, "%Y%m%d")
419-
end_date = datetime.strptime(end_str, "%Y%m%d")
420-
except ValueError as e:
421-
raise ValueError("Invalid date in date range: " + str(e))
422-
# Replace the date-range substring with "date-*"
423-
search_str = re.sub(r"\d{8}@DATETO@\d{8}", "date-*", name)
424-
# Use the helper function to perform the glob search.
425-
if sub:
426-
matching_names: List[str] = search_sub_or_ses_level(
427-
cfg,
428-
base_folder,
429-
local_or_central,
430-
sub,
431-
search_str=search_str,
432-
)[0]
433-
else:
434-
matching_names = search_sub_or_ses_level(
435-
cfg, base_folder, local_or_central, search_str=search_str
436-
)[0]
437-
# If a date-range tag was provided, further filter the results.
438-
if "@DATETO@" in name:
439-
filtered_names: List[str] = []
440-
for candidate in matching_names:
441-
candidate_basename = (
442-
candidate
443-
if isinstance(candidate, str)
444-
else candidate.name
445-
)
446-
values_list = get_values_from_bids_formatted_name(
447-
[candidate_basename], "date"
448-
)
449-
if not values_list:
450-
continue
451-
candidate_date_str = values_list[0]
452-
try:
453-
candidate_date = datetime.strptime(
454-
candidate_date_str, "%Y%m%d"
455-
)
456-
except ValueError:
457-
continue
458-
if start_date <= candidate_date <= end_date:
459-
filtered_names.append(candidate)
460-
matching_names = filtered_names
461-
new_all_names += matching_names
448+
if not (canonical_tags.tags("*") in name or
449+
canonical_tags.tags("DATETO") in name or
450+
canonical_tags.tags("TIMETO") in name or
451+
canonical_tags.tags("DATETIMETO") in name):
452+
new_all_names.append(name)
453+
continue
454+
455+
# Initialize search string
456+
search_str = name
457+
458+
# Handle wildcard replacement first if present
459+
if canonical_tags.tags("*") in name:
460+
search_str = search_str.replace(canonical_tags.tags("*"), "*")
461+
462+
# Handle datetime ranges
463+
format_type = tag = None
464+
if canonical_tags.tags("DATETO") in search_str:
465+
format_type = "date"
466+
tag = canonical_tags.tags("DATETO")
467+
elif canonical_tags.tags("TIMETO") in search_str:
468+
format_type = "time"
469+
tag = canonical_tags.tags("TIMETO")
470+
elif canonical_tags.tags("DATETIMETO") in search_str:
471+
format_type = "datetime"
472+
tag = canonical_tags.tags("DATETIMETO")
473+
474+
if format_type is not None:
475+
assert tag is not None, "format and tag should be set together"
476+
search_str = validation.format_and_validate_datetime_search_str(search_str, format_type, tag)
477+
478+
# Use the helper function to perform the glob search
479+
if sub:
480+
matching_names: List[str] = search_sub_or_ses_level(
481+
cfg,
482+
base_folder,
483+
local_or_central,
484+
sub,
485+
search_str=search_str,
486+
)[0]
462487
else:
463-
new_all_names += [name]
464-
# Remove duplicates in case of wildcard overlap.
488+
matching_names = search_sub_or_ses_level(
489+
cfg, base_folder, local_or_central, search_str=search_str
490+
)[0]
491+
492+
# Filter results by datetime range if one was present
493+
if format_type is not None and tag is not None:
494+
expected_values = validation.get_expected_num_datetime_values(format_type)
495+
full_tag_regex = fr"(\d{{{expected_values}}}){re.escape(tag)}(\d{{{expected_values}}})"
496+
match = re.search(full_tag_regex, name)
497+
if match: # We know this is true because format_and_validate_datetime_search_str succeeded
498+
start_str, end_str = match.groups()
499+
start_timepoint = datetime.strptime(start_str, canonical_tags.get_datetime_format(format_type))
500+
end_timepoint = datetime.strptime(end_str, canonical_tags.get_datetime_format(format_type))
501+
matching_names = filter_names_by_datetime_range(
502+
matching_names, format_type, start_timepoint, end_timepoint
503+
)
504+
505+
new_all_names.extend(matching_names)
506+
507+
# Remove duplicates in case of wildcard overlap
465508
new_all_names = list(set(new_all_names))
466509
return new_all_names
467510

datashuttle/utils/validation.py

Lines changed: 114 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from itertools import chain
2525
from pathlib import Path
2626

27-
from datashuttle.configs import canonical_configs, canonical_folders
27+
from datashuttle.configs import canonical_configs, canonical_folders, canonical_tags
2828
from datashuttle.utils import formatting, getters, utils
2929
from datashuttle.utils.custom_exceptions import NeuroBlueprintError
3030

@@ -432,18 +432,11 @@ def datetime_are_iso_format(
432432
"""
433433
Check formatting for date-, time-, or datetime- tags.
434434
"""
435-
formats = {
436-
"datetime": "%Y%m%dT%H%M%S",
437-
"time": "%H%M%S",
438-
"date": "%Y%m%d",
439-
}
440-
441-
key = next((key for key in formats if key in name), None)
435+
key = next((key for key in ["datetime", "time", "date"] if key in name), None)
442436

443437
error_message: List[str]
444438
if not key:
445439
error_message = []
446-
447440
else:
448441
try:
449442
format_to_check = utils.get_values_from_bids_formatted_name(
@@ -452,17 +445,122 @@ def datetime_are_iso_format(
452445
except:
453446
return []
454447

455-
strfmt = formats[key]
456-
457448
try:
458-
datetime.strptime(format_to_check, strfmt)
459-
error_message = []
449+
if not validate_datetime(format_to_check, key):
450+
error_message = [get_datetime_error(
451+
key, name, canonical_tags.get_datetime_format(key), path_
452+
)]
453+
else:
454+
error_message = []
460455
except ValueError:
461-
error_message = [get_datetime_error(key, name, strfmt, path_)]
456+
error_message = [get_datetime_error(
457+
key, name, canonical_tags.get_datetime_format(key), path_
458+
)]
462459

463460
return error_message
464461

465462

463+
def validate_datetime(datetime_str: str, format_type: str) -> bool:
464+
"""
465+
Validate that a datetime string matches the expected format.
466+
467+
Parameters
468+
----------
469+
datetime_str : str
470+
The datetime string to validate
471+
format_type : str
472+
One of "datetime", "time", or "date"
473+
474+
Returns
475+
-------
476+
bool
477+
True if valid, False otherwise
478+
"""
479+
try:
480+
datetime.strptime(datetime_str, canonical_tags.get_datetime_format(format_type))
481+
return True
482+
except ValueError:
483+
return False
484+
485+
486+
def get_expected_num_datetime_values(format_type: str) -> int:
487+
"""
488+
Get the expected number of characters for a datetime format.
489+
490+
Parameters
491+
----------
492+
format_type : str
493+
One of "datetime", "time", or "date"
494+
495+
Returns
496+
-------
497+
int
498+
The number of characters expected for the format
499+
"""
500+
format_str = canonical_tags.get_datetime_format(format_type)
501+
today = datetime.now()
502+
return len(today.strftime(format_str))
503+
504+
505+
def format_and_validate_datetime_search_str(search_str: str, format_type: str, tag: str) -> str:
506+
"""
507+
Validate and format a search string containing a datetime range.
508+
509+
Parameters
510+
----------
511+
search_str : str
512+
The search string containing the datetime range
513+
format_type : str
514+
One of "datetime", "time", or "date"
515+
tag : str
516+
The tag used for the range (e.g. @DATETO@)
517+
518+
Returns
519+
-------
520+
str
521+
The formatted search string with datetime range replaced
522+
523+
Raises
524+
------
525+
NeuroBlueprintError
526+
If the datetime format is invalid or the range is malformed
527+
"""
528+
expected_values = get_expected_num_datetime_values(format_type)
529+
full_tag_regex = fr"(\d{{{expected_values}}}){re.escape(tag)}(\d{{{expected_values}}})"
530+
match = re.search(full_tag_regex, search_str)
531+
532+
if not match:
533+
utils.log_and_raise_error(
534+
f"Invalid {format_type} range format in search string: {search_str}",
535+
NeuroBlueprintError,
536+
)
537+
538+
start_str, end_str = match.groups()
539+
540+
if not validate_datetime(start_str, format_type):
541+
utils.log_and_raise_error(
542+
f"Invalid start {format_type} format: {start_str}",
543+
NeuroBlueprintError,
544+
)
545+
546+
if not validate_datetime(end_str, format_type):
547+
utils.log_and_raise_error(
548+
f"Invalid end {format_type} format: {end_str}",
549+
NeuroBlueprintError,
550+
)
551+
552+
start_timepoint = datetime.strptime(start_str, canonical_tags.get_datetime_format(format_type))
553+
end_timepoint = datetime.strptime(end_str, canonical_tags.get_datetime_format(format_type))
554+
555+
if end_timepoint < start_timepoint:
556+
utils.log_and_raise_error(
557+
f"End {format_type} is before start {format_type}",
558+
NeuroBlueprintError,
559+
)
560+
561+
return re.sub(full_tag_regex, f"{format_type}-*", search_str)
562+
563+
466564
def raise_display_mode(
467565
message: str, display_mode: DisplayMode, log: bool
468566
) -> None:
@@ -981,3 +1079,5 @@ def check_datatypes_are_valid(
9811079
return message
9821080

9831081
return None
1082+
1083+

0 commit comments

Comments
 (0)