Skip to content

Commit

Permalink
Mod: Refactor datasets download functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
Labbeti committed Dec 20, 2023
1 parent 1b3e6e9 commit 3af8dcd
Show file tree
Hide file tree
Showing 13 changed files with 284 additions and 238 deletions.
8 changes: 4 additions & 4 deletions src/aac_datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
from .utils.globals import (
get_default_ffmpeg_path,
get_default_root,
get_default_ytdl_path,
get_default_ytdlp_path,
set_default_ffmpeg_path,
set_default_root,
set_default_ytdl_path,
set_default_ytdlp_path,
)


Expand All @@ -34,8 +34,8 @@
"WavCaps",
"get_default_ffmpeg_path",
"get_default_root",
"get_default_ytdl_path",
"get_default_ytdlp_path",
"set_default_ffmpeg_path",
"set_default_root",
"set_default_ytdl_path",
"set_default_ytdlp_path",
]
39 changes: 19 additions & 20 deletions src/aac_datasets/datasets/audiocaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
Dict,
List,
Optional,
Tuple,
Union,
)

Expand All @@ -32,13 +31,13 @@
from aac_datasets.datasets.base import AACDataset
from aac_datasets.datasets.functional.audiocaps import (
AudioCapsCard,
download_audiocaps_dataset,
download_class_labels_indices,
load_audiocaps_dataset,
load_class_labels_indices,
prepare_audiocaps_dataset,
prepare_class_labels_indices,
_get_audio_subset_dpath,
)
from aac_datasets.utils.globals import _get_root, _get_ffmpeg_path, _get_ytdl_path
from aac_datasets.utils.globals import _get_root, _get_ffmpeg_path, _get_ytdlp_path


pylog = logging.getLogger(__name__)
Expand Down Expand Up @@ -118,7 +117,7 @@ def __init__(
flat_captions: bool = False,
sr: int = 32_000,
with_tags: bool = False,
ytdl_path: Union[str, Path, None] = None,
ytdlp_path: Union[str, Path, None] = None,
) -> None:
"""
:param root: Dataset root directory.
Expand Down Expand Up @@ -146,7 +145,7 @@ def __init__(
:param with_tags: If True, load the tags from AudioSet dataset.
Note: tags needs to be downloaded with download=True & with_tags=True before being used.
defaults to False.
:param ytdl_path: Path to yt-dlp or ytdlp executable.
:param ytdlp_path: Path to yt-dlp or ytdlp executable.
defaults to "yt-dlp".
"""
if subset not in AudioCapsCard.SUBSETS:
Expand All @@ -155,34 +154,34 @@ def __init__(
)

root = _get_root(root)
ytdl_path = _get_ytdl_path(ytdl_path)
ytdlp_path = _get_ytdlp_path(ytdlp_path)
ffmpeg_path = _get_ffmpeg_path(ffmpeg_path)

if download:
prepare_audiocaps_dataset(
download_audiocaps_dataset(
root=root,
subset=subset,
sr=sr,
with_tags=with_tags,
verbose=verbose,
force=AudioCaps.FORCE_PREPARE_DATA,
ytdl_path=ytdl_path,
ffmpeg_path=ffmpeg_path,
audio_format=AudioCaps.AUDIO_FORMAT,
verbose=verbose,
audio_duration=AudioCaps.AUDIO_DURATION,
audio_format=AudioCaps.AUDIO_FORMAT,
download_audio=AudioCaps.DOWNLOAD_AUDIO,
ffmpeg_path=ffmpeg_path,
n_channels=AudioCaps.AUDIO_N_CHANNELS,
sr=sr,
verify_files=AudioCaps.VERIFY_FILES,
download_audio=AudioCaps.DOWNLOAD_AUDIO,
with_tags=with_tags,
ytdlp_path=ytdlp_path,
)

raw_data, index_to_tagname = load_audiocaps_dataset(
root=root,
subset=subset,
sr=sr,
with_tags=with_tags,
exclude_removed_audio=exclude_removed_audio,
verbose=verbose,
audio_format=AudioCaps.AUDIO_FORMAT,
exclude_removed_audio=exclude_removed_audio,
sr=sr,
with_tags=with_tags,
)
audio_subset_dpath = _get_audio_subset_dpath(root, subset, sr)
size = len(next(iter(raw_data.values())))
Expand Down Expand Up @@ -266,12 +265,12 @@ def load_class_labels_indices(
return load_class_labels_indices(root, sr)

@classmethod
def prepare_class_labels_indices(
def download_class_labels_indices(
cls,
root: Union[str, Path, None] = None,
sr: int = 32_000,
) -> None:
return prepare_class_labels_indices(root, sr)
return download_class_labels_indices(root, sr)

# Magic methods
def __repr__(self) -> str:
Expand Down
11 changes: 7 additions & 4 deletions src/aac_datasets/datasets/clotho.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from aac_datasets.datasets.functional.clotho import (
ClothoCard,
load_clotho_dataset,
prepare_clotho_dataset,
download_clotho_dataset,
_get_audio_subset_dpath,
)
from aac_datasets.utils.globals import _get_root
Expand Down Expand Up @@ -154,12 +154,12 @@ def __init__(
root = _get_root(root)

if download:
prepare_clotho_dataset(
download_clotho_dataset(
root=root,
subset=subset,
force=Clotho.FORCE_PREPARE_DATA,
verbose=verbose,
clean_archives=Clotho.CLEAN_ARCHIVES,
force=Clotho.FORCE_PREPARE_DATA,
verify_files=Clotho.VERIFY_FILES,
version=version,
)
Expand Down Expand Up @@ -200,7 +200,10 @@ def __init__(
column_names.remove(name)

raw_data = load_clotho_dataset(
root=root, version=version, subset=subset, verbose=verbose
root=root,
subset=subset,
verbose=verbose,
version=version,
)

size = len(next(iter(raw_data.values())))
Expand Down
8 changes: 4 additions & 4 deletions src/aac_datasets/datasets/functional/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from .audiocaps import AudioCapsCard, load_audiocaps_dataset, prepare_audiocaps_dataset
from .clotho import ClothoCard, load_clotho_dataset, prepare_clotho_dataset
from .macs import MACSCard, load_macs_dataset, prepare_macs_dataset
from .wavcaps import WavCapsCard, load_wavcaps_dataset, prepare_wavcaps_dataset
from .audiocaps import AudioCapsCard, load_audiocaps_dataset, download_audiocaps_dataset
from .clotho import ClothoCard, load_clotho_dataset, download_clotho_dataset
from .macs import MACSCard, load_macs_dataset, download_macs_dataset
from .wavcaps import WavCapsCard, load_wavcaps_dataset, download_wavcaps_dataset
81 changes: 63 additions & 18 deletions src/aac_datasets/datasets/functional/audiocaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from typing import (
Any,
Dict,
Iterable,
List,
Optional,
Tuple,
Expand All @@ -25,7 +26,7 @@
from torch.hub import download_url_to_file

from aac_datasets.datasets.functional.common import DatasetCard
from aac_datasets.utils.globals import _get_root, _get_ffmpeg_path, _get_ytdl_path
from aac_datasets.utils.globals import _get_root, _get_ffmpeg_path, _get_ytdlp_path


pylog = logging.getLogger(__name__)
Expand Down Expand Up @@ -250,21 +251,21 @@ def load_audiocaps_dataset(
return raw_data, index_to_tagname


def prepare_audiocaps_dataset(
def download_audiocaps_dataset(
# Common args
root: Union[str, Path, None] = None,
subset: str = AudioCapsCard.DEFAULT_SUBSET,
force: bool = False,
verbose: int = 0,
# AudioCaps-specific args
audio_duration: float = 10.0,
audio_format: str = "flac",
download_audio: bool = True,
ffmpeg_path: Union[str, Path, None] = None,
force: bool = False,
n_channels: int = 1,
sr: int = 32_000,
verify_files: bool = False,
ytdl_path: Union[str, Path, None] = None,
ytdlp_path: Union[str, Path, None] = None,
with_tags: bool = False,
) -> None:
"""Prepare AudioCaps data (audio, labels, metadata).
Expand All @@ -274,6 +275,8 @@ def prepare_audiocaps_dataset(
defaults to ".".
:param subset: The subset of AudioCaps to use. Can be one of :attr:`~AudioCapsCard.SUBSETS`.
defaults to "train".
:param force: If True, force to download again all files.
defaults to False.
:param verbose: Verbose level.
defaults to 0.
Expand All @@ -285,8 +288,6 @@ def prepare_audiocaps_dataset(
defaults to True.
:param ffmpeg_path: Path to ffmpeg executable file.
defaults to "ffmpeg".
:param force: If True, force to download again all files.
defaults to False.
:param n_channels: Number of channels extracted for each audio file.
defaults to 1.
:param sr: The sample rate used for audio files in the dataset (in Hz).
Expand All @@ -296,18 +297,18 @@ def prepare_audiocaps_dataset(
defaults to False.
:param with_tags: If True, download the tags from AudioSet dataset.
defaults to False.
:param ytdl_path: Path to yt-dlp or ytdlp executable.
:param ytdlp_path: Path to yt-dlp or ytdlp executable.
defaults to "yt-dlp".
"""

root = _get_root(root)
ytdl_path = _get_ytdl_path(ytdl_path)
ytdlp_path = _get_ytdlp_path(ytdlp_path)
ffmpeg_path = _get_ffmpeg_path(ffmpeg_path)

if not osp.isdir(root):
raise RuntimeError(f"Cannot find root directory '{root}'.")

_check_ytdl(ytdl_path)
_check_ytdl(ytdlp_path)
_check_ffmpeg(ffmpeg_path)

if _is_prepared(root, subset, sr, -1) and not force:
Expand Down Expand Up @@ -373,7 +374,7 @@ def prepare_audiocaps_dataset(
start_time=start_time,
duration=audio_duration,
sr=sr,
ytdl_path=ytdl_path,
ytdlp_path=ytdlp_path,
ffmpeg_path=ffmpeg_path,
n_channels=n_channels,
)
Expand Down Expand Up @@ -433,14 +434,58 @@ def prepare_audiocaps_dataset(
pylog.info(f"- {n_samples} total samples.")

if with_tags:
prepare_class_labels_indices(root)
download_class_labels_indices(root)

if verbose >= 2:
pylog.debug(
f"Dataset {AudioCapsCard.PRETTY_NAME} (subset={subset}) has been prepared."
)


def download_audiocaps_datasets(
# Common args
root: Union[str, Path, None] = None,
subsets: Union[str, Iterable[str]] = AudioCapsCard.DEFAULT_SUBSET,
force: bool = False,
verbose: int = 0,
# AudioCaps-specific args
audio_duration: float = 10.0,
audio_format: str = "flac",
download_audio: bool = True,
ffmpeg_path: Union[str, Path, None] = None,
n_channels: int = 1,
sr: int = 32_000,
verify_files: bool = False,
ytdlp_path: Union[str, Path, None] = None,
with_tags: bool = False,
) -> None:
"""Function helper to download a list of subsets. See :func:`~aac_datasets.datasets.functional.audiocaps.download_audiocaps_dataset` for details."""
if isinstance(subsets, str):
subsets = [subsets]
else:
subsets = list(subsets)

kwargs: Dict[str, Any] = dict(
root=root,
force=force,
verbose=verbose,
audio_duration=audio_duration,
audio_format=audio_format,
download_audio=download_audio,
ffmpeg_path=ffmpeg_path,
n_channels=n_channels,
sr=sr,
verify_files=verify_files,
ytdlp_path=ytdlp_path,
with_tags=with_tags,
)
for subset in subsets:
download_audiocaps_dataset(
subset=subset,
**kwargs,
)


def load_class_labels_indices(
root: Union[str, Path, None] = None,
sr: int = 32_000,
Expand All @@ -462,7 +507,7 @@ def load_class_labels_indices(
return audioset_classes_data


def prepare_class_labels_indices(
def download_class_labels_indices(
root: Union[str, Path, None] = None,
sr: int = 32_000,
verbose: int = 0,
Expand Down Expand Up @@ -522,17 +567,17 @@ def _download_and_extract_from_youtube(
n_channels: int = 1,
target_format: str = "flac",
acodec: str = "flac",
ytdl_path: Union[str, Path, None] = None,
ytdlp_path: Union[str, Path, None] = None,
ffmpeg_path: Union[str, Path, None] = None,
) -> bool:
"""Download audio from youtube with yt-dlp and ffmpeg."""
ytdl_path = _get_ytdl_path(ytdl_path)
ytdlp_path = _get_ytdlp_path(ytdlp_path)
ffmpeg_path = _get_ffmpeg_path(ffmpeg_path)

# Get audio download link with yt-dlp, without start time
link = _get_youtube_link(youtube_id, None)
get_url_command = [
ytdl_path,
ytdlp_path,
"--youtube-skip-dash-manifest",
"-g",
link,
Expand Down Expand Up @@ -582,15 +627,15 @@ def _download_and_extract_from_youtube(
return False


def _check_ytdl(ytdl_path: str) -> None:
def _check_ytdl(ytdlp_path: str) -> None:
try:
subprocess.check_call(
[ytdl_path, "--help"],
[ytdlp_path, "--help"],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
except (CalledProcessError, PermissionError, FileNotFoundError) as err:
pylog.error(f"Invalid ytdlp path '{ytdl_path}'. ({err})")
pylog.error(f"Invalid ytdlp path '{ytdlp_path}'. ({err})")
raise err


Expand Down
Loading

0 comments on commit 3af8dcd

Please sign in to comment.