Skip to content

Commit

Permalink
Merge branch 'kevin-cgc-main' into dev (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
Labbeti committed Jan 23, 2024
2 parents 36a60ac + 26774e9 commit 0a00ec1
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 3 deletions.
9 changes: 9 additions & 0 deletions src/aac_datasets/datasets/functional/audiocaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,15 @@ def _download_from_youtube(
return False
_video_link, audio_link = lines[:2]

# if yt-dlp only returns one link, it is a combined video audio
if len(audio_link) == 0:
if verbose >= 2:
pylog.debug(
f"youtube_id={youtube_id} is combined video audio only (cant download)"
)
# audio_link = video_link # this does not work, not sure why. probably requires changes to ffmpeg command
return False

# Download and extract audio from audio_link to fpath_out with ffmpeg
extract_command = [
ffmpeg_path,
Expand Down
4 changes: 2 additions & 2 deletions src/aac_datasets/datasets/functional/wavcaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def download_wavcaps_dataset(
ign_patterns = [
pattern
for source in ign_sources
for pattern in (f"json_files/{source}/*.json", "Zip_files/*") # {source}/
for pattern in (f"json_files/{source}/*.json", f"Zip_files/{source}/*")
]
if verbose >= 2:
pylog.debug(f"ign_sources={ign_sources}")
Expand Down Expand Up @@ -395,7 +395,7 @@ def download_wavcaps_dataset(
del snapshot_dpath

# Build symlink to hf cache
if osp.exists(wavcaps_root):
if osp.lexists(wavcaps_root):
if not osp.islink(wavcaps_root):
raise RuntimeError("WavCaps root exists but it is not a symlink.")
link_target_abspath = osp.abspath(osp.realpath(wavcaps_root))
Expand Down
5 changes: 4 additions & 1 deletion src/aac_datasets/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def safe_rmdir(
):
if not rm_root and dpath == root:
continue
elif len(fnames) == 0 and len(dnames) == 0:
elif len(fnames) == 0 and (
len(dnames) == 0
or all(osp.join(dpath, dname) in to_delete for dname in dnames)
):
to_delete.append(dpath)
elif error_on_non_empty_dir:
raise RuntimeError(f"Cannot remove non-empty directory '{dpath}'.")
Expand Down

0 comments on commit 0a00ec1

Please sign in to comment.