Skip to content

Commit

Permalink
Fix: AC debug msg during download, refactor code and update workflow …
Browse files Browse the repository at this point in the history
…to use max workers.
  • Loading branch information
Labbeti committed Jan 5, 2024
1 parent 0837668 commit e556851
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package-pip.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ jobs:
- name: Try to download AudioCaps val
run: |
aac-datasets-download --verbose 2 audiocaps --subsets val --max_workers 2 --with_tags true
aac-datasets-download --verbose 2 audiocaps --subsets val --max_workers none --with_tags true
- name: Check data root
run: |
Expand Down
38 changes: 15 additions & 23 deletions src/aac_datasets/datasets/functional/audiocaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,9 @@ def _cast_line(line: Dict[str, Any], audio_format: str) -> Dict[str, Any]:
verbose=verbose,
)
with ThreadPoolExecutor(max_workers=max_workers) as executor:
if verbose >= 2:
pylog.debug(f"Using {executor._max_workers} workers.")

submitted_dict = {
fname: executor.submit(
_download_from_youtube_and_verify,
Expand All @@ -409,35 +412,24 @@ def _cast_line(line: Dict[str, Any], audio_format: str) -> Dict[str, Any]:
if verbose < 2:
continue

prefix = f"[{i:5d}/{len(download_kwds)}] "

if not file_exists:
if download_success:
if valid_file:
pylog.debug(
f"{prefix}File '{fname}' has been downloaded and verified."
)
else:
pylog.debug(
f"{prefix}File '{fname}' has been downloaded but it was not valid and has been removed."
)
if not download_success:
msg = f"File '{fname}' cannot be downloaded. (maybe the source video has been removed?)"
elif valid_file:
msg = f"File '{fname}' has been downloaded and verified."
elif verify_files:
msg = f"File '{fname}' has been downloaded but it was not valid and has been removed."
else:
pylog.debug(
f"{prefix}Cannot extract audio '{fname}'. (maybe the source video has been removed?)"
)
msg = f"File '{fname}' has been downloaded."
else:
if valid_file:
pylog.debug(
f"{prefix}File '{fname}' is already downloaded and has been verified."
)
msg = f"File '{fname}' is already downloaded and has been verified."
elif verify_files:
pylog.debug(
f"{prefix}File '{fname}' is already downloaded but it was not valid and has been removed."
)
msg = f"File '{fname}' is already downloaded but it was not valid and has been removed."
else:
pylog.debug(
f"{prefix}File '{fname}' is already downloaded but it is not verified due to verify_files={verify_files}."
)
msg = f"File '{fname}' is already downloaded."

pylog.debug(f"[{i+1:5d}/{len(download_kwds)}] {msg}")

if verbose >= 1:
duration = int(time.perf_counter() - start)
Expand Down
5 changes: 3 additions & 2 deletions src/aac_datasets/utils/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

_TRUE_VALUES = ("true", "t", "yes", "y", "1")
_FALSE_VALUES = ("false", "f", "no", "n", "0")
_NONE_VALUES = ("none",)


def _str_to_bool(s: str) -> bool:
Expand All @@ -25,15 +26,15 @@ def _str_to_bool(s: str) -> bool:

def _str_to_opt_int(s: str) -> Optional[int]:
s = str(s).strip().lower()
if s == "none":
if s in _NONE_VALUES:
return None
else:
return int(s)


def _str_to_opt_str(s: str) -> Optional[str]:
s = str(s)
if s.lower() == "none":
if s.lower() in _NONE_VALUES:
return None
else:
return s
Expand Down

0 comments on commit e556851

Please sign in to comment.