Skip to content

Commit

Permalink
Mod: Update CARD info and typing.
Browse files Browse the repository at this point in the history
  • Loading branch information
Labbeti committed Dec 18, 2023
1 parent 1cab987 commit 6cacaec
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 22 deletions.
5 changes: 2 additions & 3 deletions src/aac_datasets/datasets/functional/audiocaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class AudioCapsCard(DatasetCard):
NAME: str = "audiocaps"
PRETTY_NAME: str = "AudioCaps"
SIZE_CATEGORIES: Tuple[str, ...] = ("10K<n<100K",)
SUBSETS: Tuple[str, ...] = ("train", "val", "test", "train_v2")
SUBSETS: Tuple[str, ...] = tuple(CAPTIONS_PER_AUDIO.keys())
TASK_CATEGORIES: Tuple[str, ...] = ("audio-to-text", "text-to-audio")


Expand Down Expand Up @@ -390,8 +390,7 @@ def load_class_labels_indices(
root = _get_root(root)
class_labels_indices_fname = _AUDIOSET_LINKS["class_labels_indices"]["fname"]
class_labels_indices_fpath = osp.join(
_get_audiocaps_dpath(root, sr),
class_labels_indices_fname
_get_audiocaps_dpath(root, sr), class_labels_indices_fname
)
if not osp.isfile(class_labels_indices_fpath):
raise ValueError(
Expand Down
16 changes: 2 additions & 14 deletions src/aac_datasets/datasets/functional/clotho.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from pathlib import Path
from typing import (
Any,
Callable,
ClassVar,
Dict,
List,
Optional,
Expand All @@ -21,9 +19,7 @@
from zipfile import ZipFile

from py7zr import SevenZipFile
from torch import Tensor
from torch.hub import download_url_to_file
from typing_extensions import TypedDict, NotRequired

from aac_datasets.datasets.functional.common import DatasetCard
from aac_datasets.utils.download import hash_file
Expand All @@ -35,7 +31,7 @@

class ClothoCard(DatasetCard):
ANNOTATIONS_CREATORS: Tuple[str, ...] = ("crowdsourced",)
CAPTIONS_PER_AUDIO = {
CAPTIONS_PER_AUDIO: Dict[str, int] = {
"dev": 5,
"val": 5,
"eval": 5,
Expand Down Expand Up @@ -64,15 +60,7 @@ class ClothoCard(DatasetCard):
PRETTY_NAME: str = "Clotho"
SAMPLE_RATE: int = 44_100 # Hz
SIZE_CATEGORIES: Tuple[str, ...] = ("1K<n<10K",)
SUBSETS: Tuple[str, ...] = (
"dev",
"val",
"eval",
"dcase_aac_test",
"dcase_aac_analysis",
"dcase_t2a_audio",
"dcase_t2a_captions",
)
SUBSETS: Tuple[str, ...] = tuple(CAPTIONS_PER_AUDIO.keys())
TASK_CATEGORIES: Tuple[str, ...] = ("audio-to-text", "text-to-audio")
VERSIONS: Tuple[str, ...] = ("v1", "v2", "v2.1")

Expand Down
10 changes: 5 additions & 5 deletions src/aac_datasets/datasets/functional/wavcaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import zipfile

from pathlib import Path
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import tqdm

Expand Down Expand Up @@ -54,7 +54,7 @@ class WavCapsCard(DatasetCard):
DEFAULT_REVISION: str = "85a0c21e26fa7696a5a74ce54fada99a9b43c6de"
DEFAULT_SUBSET: str = "as_noac"
DESCRIPTION: str = "WavCaps: A ChatGPT-Assisted Weakly-Labelled Audio Captioning Dataset for Audio-Language Multimodal Research."
EXPECTED_SIZES: ClassVar[Dict[str, int]] = {
EXPECTED_SIZES: Dict[str, int] = {
"AudioSet_SL": 108317,
"BBC_Sound_Effects": 31201,
"FreeSound": 262300,
Expand All @@ -65,9 +65,9 @@ class WavCapsCard(DatasetCard):
LANGUAGE_DETAILS: Tuple[str, ...] = ("en-US",)
NAME: str = "wavcaps"
PRETTY_NAME: str = "WavCaps"
REPO_ID: ClassVar[str] = "cvssp/WavCaps"
SOURCES: ClassVar[Tuple[str, ...]] = tuple(EXPECTED_SIZES.keys())
SUBSETS: Tuple[str, ...] = ("as", "bbc", "fsd", "sb", "as_noac", "fsd_nocl")
REPO_ID: str = "cvssp/WavCaps"
SOURCES: Tuple[str, ...] = tuple(EXPECTED_SIZES.keys())
SUBSETS: Tuple[str, ...] = tuple(CAPTIONS_PER_AUDIO.keys())
SAMPLE_RATE: int = 32_000 # Hz
TASK_CATEGORIES: Tuple[str, ...] = ("audio-to-text", "text-to-audio")

Expand Down

0 comments on commit 6cacaec

Please sign in to comment.