Version 0.4.1

Labbeti · Oct 25, 2023 · c9312b5 · c9312b5
1 parent a467d9c
commit c9312b5
Show file tree

Hide file tree

Showing 17 changed files with 399 additions and 236 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,15 @@
 # Change log
 
 All notable changes to this project will be documented in this file.
+## [0.4.1] 2023-10-25
+### Added
+- `AudioCaps.DOWNLOAD_AUDIO` class variable for compatibility with [audiocaps-download 1.0](https://github.com/MorenoLaQuatra/audiocaps-download).
+
+### Changed
+- Set log level to WARNING if verbose<=0 in check.py and download.py scripts.
+- Use `yt-dlp` instead of `youtube-dl` as backend to download AudioCaps audio files.. ([#1](https://github.com/Labbeti/aac-datasets/issues/1))
+- Update default download message for AudioCaps. ([#1](https://github.com/Labbeti/aac-datasets/issues/1))
+- Update error message when checksum is invalid for Clotho and MACS datasets. ([#2](https://github.com/Labbeti/aac-datasets/issues/2))
 
 ## [0.4.0] 2023-09-25
 ### Added

diff --git a/CITATION.cff b/CITATION.cff
@@ -22,5 +22,5 @@ keywords:
   - captioning
   - audio-captioning
 license: MIT
-version: 0.4.0
-date-released: '2023-09-25'
+version: 0.4.1
+date-released: '2023-10-25'
diff --git a/README.md b/README.md
@@ -102,16 +102,17 @@ numpy >= 1.21.2
 
 ### External requirements (AudioCaps only)
 
-The external requirements needed to download **AudioCaps** are **ffmpeg** and **youtube-dl** (yt-dlp should work too).
-These two programs can be download on Ubuntu using `sudo apt install ffmpeg youtube-dl`.
+The external requirements needed to download **AudioCaps** are **ffmpeg** and **yt-dlp**.
+**ffmpeg** can be install on Ubuntu using `sudo apt install ffmpeg` and **yt-dlp** from the [official repo](https://github.com/yt-dlp/yt-dlp).
+ <!-- programs can be downloaded on Ubuntu using `sudo apt install ffmpeg`. -->
 
 You can also override their paths for AudioCaps:
 ```python
 from aac_datasets import AudioCaps
 dataset = AudioCaps(
     download=True,
     ffmpeg_path="/my/path/to/ffmpeg",
-    ytdl_path="/my/path/to/youtube_dl",
+    ytdl_path="/my/path/to/ytdlp",
 )
 ```
 
@@ -125,6 +126,24 @@ However, if you want to download datasets from a script, you can also use the fo
 aac-datasets-download --root "." clotho --subsets "dev"
 ```
 
+## Additional information
+### Compatibility with audiocaps-download
+If you want to use [audiocaps-download 1.0](https://github.com/MorenoLaQuatra/audiocaps-download) package to download AudioCaps, you will have to respect the AudioCaps folder tree:
+```python
+from audiocaps_download import Downloader
+root = "your/path/to/root"
+downloader = Downloader(root_path=f"{root}/AUDIOCAPS/audio_32000Hz/", n_jobs=16)
+downloader.download(format="wav")
+```
+
+Then disable audio download and set the correct audio format before init AudioCaps :
+```python
+from aac_datasets import AudioCaps
+AudioCaps.AUDIO_FORMAT = "wav"
+AudioCaps.DOWNLOAD_AUDIO = False  # this will only download labels and metadata files
+dataset = AudioCaps(root=root, subset="train", download=True)
+```
+
 ## References
 #### AudioCaps
 [1] C. D. Kim, B. Kim, H. Lee, and G. Kim, “Audiocaps: Generating captions for audios in the wild,” in NAACL-HLT, 2019. Available: https://aclanthology.org/N19-1011/
@@ -139,17 +158,17 @@ aac-datasets-download --root "." clotho --subsets "dev"
 [1] X. Mei et al., “WavCaps: A ChatGPT-Assisted Weakly-Labelled Audio Captioning Dataset for Audio-Language Multimodal Research,” arXiv preprint arXiv:2303.17395, 2023, [Online]. Available: https://arxiv.org/pdf/2303.17395.pdf 
 
 ## Cite the aac-datasets package
-If you use this software, please consider cite it as below :
+If you use this software, please consider cite it as "Labbe, E. (2013). aac-datasets: Audio Captioning datasets for PyTorch.", or use the following BibTeX citation:
 
 ```
 @software{
-    Labbe_aac_datasets_2022,
+    Labbe_aac_datasets_2023,
     author = {Labbé, Etienne},
     license = {MIT},
-    month = {09},
+    month = {10},
     title = {{aac-datasets}},
     url = {https://github.com/Labbeti/aac-datasets/},
-    version = {0.4.0},
+    version = {0.4.1},
     year = {2023}
 }
 ```

diff --git a/docs/aac_datasets.utils.cmdline.rst b/docs/aac_datasets.utils.cmdline.rst
@@ -0,0 +1,7 @@
+aac\_datasets.utils.cmdline module
+==================================
+
+.. automodule:: aac_datasets.utils.cmdline
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/aac_datasets.utils.dcase.rst b/docs/aac_datasets.utils.dcase.rst
diff --git a/docs/package_tree.rst b/docs/package_tree.rst
diff --git a/src/aac_datasets/__init__.py b/src/aac_datasets/__init__.py
@@ -1,9 +1,17 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-"""
-Audio Captioning Datasets package.
-"""
+"""Audio Captioning datasets for PyTorch.  """
+
+
+__author__ = "Etienne Labbé (Labbeti)"
+__author_email__ = "[email protected]"
+__license__ = "MIT"
+__maintainer__ = "Etienne Labbé (Labbeti)"
+__name__ = "aac-datasets"
+__status__ = "Development"
+__version__ = "0.4.1"
+
 
 from .datasets.audiocaps import AudioCaps
 from .datasets.clotho import Clotho
@@ -19,10 +27,15 @@
 )
 
 
-__author__ = "Etienne Labbé (Labbeti)"
-__author_email__ = "[email protected]"
-__license__ = "MIT"
-__maintainer__ = "Etienne Labbé (Labbeti)"
-__name__ = "aac-datasets"
-__status__ = "Development"
-__version__ = "0.4.0"
+__all__ = [
+    "AudioCaps",
+    "Clotho",
+    "MACS",
+    "WavCaps",
+    "get_default_ffmpeg_path",
+    "get_default_root",
+    "get_default_ytdl_path",
+    "set_default_ffmpeg_path",
+    "set_default_root",
+    "set_default_ytdl_path",
+]
diff --git a/src/aac_datasets/check.py b/src/aac_datasets/check.py
@@ -3,18 +3,20 @@
 
 import logging
 import os.path as osp
-import sys
 
 from argparse import ArgumentParser, Namespace
 from typing import Dict, Iterable
 
 import yaml
 
+import aac_datasets
+
 from aac_datasets.datasets.audiocaps import AudioCaps, AudioCapsCard
 from aac_datasets.datasets.clotho import Clotho, ClothoCard
 from aac_datasets.datasets.macs import MACS, MACSCard
 from aac_datasets.datasets.wavcaps import WavCaps, WavCapsCard
 from aac_datasets.utils.paths import get_default_root
+from aac_datasets.download import _setup_logging
 
 
 pylog = logging.getLogger(__name__)
@@ -112,14 +114,8 @@ def _get_main_check_args() -> Namespace:
 
 
 def _main_check() -> None:
-    format_ = "[%(asctime)s][%(name)s][%(levelname)s] - %(message)s"
-    handler = logging.StreamHandler(sys.stdout)
-    handler.setFormatter(logging.Formatter(format_))
-    pkg_logger = logging.getLogger("aac_datasets")
-    pkg_logger.setLevel(logging.DEBUG)
-    pkg_logger.addHandler(handler)
-
     args = _get_main_check_args()
+    _setup_logging(aac_datasets.__package__, args.verbose)
 
     if args.verbose >= 2:
         pylog.debug(yaml.dump({"Arguments": args.__dict__}, sort_keys=False))