Version 0.4.4

Labbeti · Aug 14, 2023 · 228d77b · 228d77b
1 parent f087ae9
commit 228d77b
Show file tree

Hide file tree

Showing 46 changed files with 840 additions and 346 deletions.
diff --git a/.github/workflows/python-package-pip.yaml b/.github/workflows/python-package-pip.yaml
@@ -8,13 +8,21 @@ on:
   pull_request:
     branches: [ main, dev ]
 
+env:
+  CACHE_NUMBER: 0  # increase to reset cache manually
+
+# Cancel workflow if a new push occurs
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   build:
     runs-on: ${{ matrix.os }}
 
     strategy:
       matrix:
-        os: [ubuntu-latest]
+        os: [ubuntu-latest,windows-latest]
         python-version: ["3.9"]
         java-version: ["11"]
 
@@ -39,9 +47,10 @@ jobs:
         java-package: jre
 
     - name: Install package
+      shell: bash
       # note: ${GITHUB_REF##*/} gives the branch name
       run: |
-        python -m pip install "aac-metrics[dev] @ git+https://github.com/Labbeti/aac-metrics@${GITHUB_REF##*/}"
+        python -m pip install "aac-metrics[${GITHUB_REF_NAME}] @ git+https://github.com/Labbeti/aac-metrics@${GITHUB_REF##*/}"
   
     - name: Load cache of external code and data
       uses: actions/cache@master

diff --git a/.gitignore b/.gitignore
@@ -136,3 +136,5 @@ tests/fense
 tmp/
 tmp*/
 *.mdb
+core-python*
+core-srun*
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,14 +2,25 @@
 
 All notable changes to this project will be documented in this file.
 
-## [0.4.4] UNRELEASED
+## [0.4.4] 2023-08-14
+### Added
+- `Evaluate` class now implements a `__hash__` and `tolist()` methods.
+- `BLEU` 1 to n classes and functions.
+- Get and set global user paths for cache, java and tmp.
+
 ### Changed
-- TODO
+- Function `get_install_info` now returns `package_path`.
+- `AACMetric` now indicate the output type when using `__call__` method.
+- Rename `AACEvaluate` to `DCASE2023Evaluate` and use `dcase2023` metric set instead of `all` metric set.
+
+### Fixed
+- `sbert_sim` name in internal instantiation functions.
+- Path management for Windows.
 
 ## [0.4.3] 2023-06-15
 ### Changed
-- `AACMetric` is no longer a subclass of `torchmetrics.Metric` even when it is installed. It avoid dependency to this package and remove potential errors due to Metric.
-- Java 12 and 13 are now allowed.
+- `AACMetric` is no longer a subclass of `torchmetrics.Metric` even when it is installed. It avoid dependency to this package and remove potential errors due to Metric base class.
+- Java 12 and 13 are now allowed in this package.
 
 ### Fixed
 - Output name `sbert_sim` in FENSE and SBERTSim classes.

diff --git a/CITATION.cff b/CITATION.cff
@@ -19,5 +19,5 @@ keywords:
   - captioning
   - audio-captioning
 license: MIT
-version: 0.4.3
-date-released: '2023-06-15'
+version: 0.4.4
+date-released: '2023-08-14'
diff --git a/README.md b/README.md
@@ -38,6 +38,11 @@ Install the pip package:
 pip install aac-metrics
 ```
 
+If you want to check if the package has been installed and the version, you can use this command:
+```bash
+aac-metrics-info
+```
+
 Download the external code and models needed for METEOR, SPICE, SPIDEr, SPIDEr-max, PTBTokenizer, SBERTSim, FluencyError, FENSE and SPIDEr-FL:
 ```bash
 aac-metrics-download
@@ -114,8 +119,10 @@ Each metrics also exists as a python class version, like `aac_metrics.classes.ci
 | SPIDEr-FL [[9]](#spider-fl) | `SPIDErFL` | audio captioning | [0, 5.5] | Combines SPIDEr and Fluency Error |
 
 ## Requirements
+This package has been developped for Ubuntu 20.04, and it is expected to work on most Linux distributions.
 ### Python packages
 
+
 The pip requirements are automatically installed when using `pip install` on this repository.
 ```
 torch >= 1.10.1
@@ -126,7 +133,7 @@ sentence-transformers >= 2.2.2
 ```
 
 ### External requirements
-- `java` **>= 1.8 and <= 1.11** is required to compute METEOR, SPICE and use the PTBTokenizer.
+- `java` **>= 1.8 and <= 1.13** is required to compute METEOR, SPICE and use the PTBTokenizer.
 Most of these functions can specify a java executable path with `java_path` argument.
 
 - `unzip` command to extract SPICE zipped files.
@@ -191,18 +198,14 @@ arXiv: 1612.00370. [Online]. Available: http://arxiv.org/abs/1612.00370
 ## Citation
 If you use **SPIDEr-max**, you can cite the following paper using BibTex :
 ```
-@inproceedings{labbe:hal-03810396,
-    TITLE = {{Is my automatic audio captioning system so bad? spider-max: a metric to consider several caption candidates}},
-    AUTHOR = {Labb{\'e}, Etienne and Pellegrini, Thomas and Pinquier, Julien},
-    URL = {https://hal.archives-ouvertes.fr/hal-03810396},
-    BOOKTITLE = {{Workshop DCASE}},
-    ADDRESS = {Nancy, France},
-    YEAR = {2022},
-    MONTH = Nov,
-    KEYWORDS = {audio captioning ; evaluation metric ; beam search ; multiple candidates},
-    PDF = {https://hal.archives-ouvertes.fr/hal-03810396/file/Labbe_DCASE2022.pdf},
-    HAL_ID = {hal-03810396},
-    HAL_VERSION = {v1},
+@inproceedings{Labbe2022,
+    title        = {Is my Automatic Audio Captioning System so Bad? SPIDEr-max: A Metric to Consider Several Caption Candidates},
+    author       = {Labb\'{e}, Etienne and Pellegrini, Thomas and Pinquier, Julien},
+    year         = 2022,
+    month        = {November},
+    booktitle    = {Proceedings of the 7th Detection and Classification of Acoustic Scenes and Events 2022 Workshop (DCASE2022)},
+    address      = {Nancy, France},
+    url          = {https://dcase.community/documents/workshop2022/proceedings/DCASE2022Workshop_Labbe_46.pdf}
 }
 ```
 
@@ -212,10 +215,10 @@ If you use this software, please consider cite it as below :
     Labbe_aac-metrics_2023,
     author = {Labbé, Etienne},
     license = {MIT},
-    month = {6},
+    month = {8},
     title = {{aac-metrics}},
     url = {https://github.com/Labbeti/aac-metrics/},
-    version = {0.4.3},
+    version = {0.4.4},
     year = {2023},
 }
 ```

diff --git a/docs/aac_metrics.utils.paths.rst b/docs/aac_metrics.utils.paths.rst
@@ -0,0 +1,7 @@
+aac\_metrics.utils.paths module
+===============================
+
+.. automodule:: aac_metrics.utils.paths
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/conf.py b/docs/conf.py
@@ -101,3 +101,7 @@
 
 def setup(app) -> None:
     app.add_css_file("my_theme.css")
+
+
+# TODO: to be used with sphinx>=7.1
+maximum_signature_line_length = 10
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1 +1 @@
-sphinx-press-theme==0.8.0
+sphinx-press-theme>=0.8.0
diff --git a/docs/spider_max.rst b/docs/spider_max.rst
@@ -75,7 +75,7 @@ Here is 2 examples with the 5 candidates generated by the beam search algorithm,
 (Audio file id "jid4t-FzUn0" from AudioCaps testing subset)
 
 
-Even with very similar candidates, the SPIDEr scores varies drastically. To adress this issue, we proposed a SPIDEr-max metric which take the maximum value of several candidates for the same audio. SPIDEr-max demonstrate that SPIDEr can exceed state-of-the-art scores on AudioCaps and Clotho and even [human scores on AudioCaps](https://hal.archives-ouvertes.fr/hal-03810396).
+Even with very similar candidates, the SPIDEr scores varies drastically. To adress this issue, we proposed a SPIDEr-max metric which take the maximum value of several candidates for the same audio. SPIDEr-max demonstrate that SPIDEr can exceed state-of-the-art scores on AudioCaps and Clotho and even `human scores on AudioCaps <https://dcase.community/documents/workshop2022/proceedings/DCASE2022Workshop_Labbe_46.pdf>`_.
 
 How ?
 #####
@@ -95,6 +95,6 @@ This usage is very similar to other captioning metrics, with the main difference
 
     corpus_scores, sents_scores = spider_max(mult_candidates, mult_references)
     print(corpus_scores)
-    # {"spider": tensor(0.1), ...}
+    # {"spider_max": tensor(0.1), ...}
     print(sents_scores)
-    # {"spider": tensor([0.9, ...]), ...}
+    # {"spider_max": tensor([0.9, ...]), ...}
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,7 +33,7 @@ dynamic = ["version"]
 [project.urls]
 Homepage = "https://pypi.org/project/aac-metrics/"
 Documentation = "https://aac-metrics.readthedocs.io/"
-Repository = "https://github.com//Labbeti/aac-metrics.git"
+Repository = "https://github.com/Labbeti/aac-metrics.git"
 Changelog = "https://github.com/Labbeti/aac-metrics/blob/main/CHANGELOG.md"
 
 [project.scripts]
@@ -50,6 +50,7 @@ dev = [
     "scikit-image==0.19.2",
     "matplotlib==3.5.2",
     "torchmetrics>=0.10",
+    "transformers<4.31.0",
 ]
 
 [tool.setuptools.packages.find]

diff --git a/requirements.txt b/requirements.txt
@@ -5,3 +5,4 @@ numpy>=1.21.2
 pyyaml>=6.0
 tqdm>=4.64.0
 sentence-transformers>=2.2.2
+transformers<4.31.0
diff --git a/src/aac_metrics/__init__.py b/src/aac_metrics/__init__.py
@@ -10,32 +10,47 @@
 __license__ = "MIT"
 __maintainer__ = "Etienne Labbé (Labbeti)"
 __status__ = "Development"
-__version__ = "0.4.3"
+__version__ = "0.4.4"
 
 
 from .classes.base import AACMetric
 from .classes.bleu import BLEU
 from .classes.cider_d import CIDErD
-from .classes.evaluate import AACEvaluate, _get_metric_factory_classes
+from .classes.evaluate import DCASE2023Evaluate, _get_metric_factory_classes
 from .classes.fense import FENSE
 from .classes.meteor import METEOR
 from .classes.rouge_l import ROUGEL
 from .classes.spice import SPICE
 from .classes.spider import SPIDEr
 from .functional.evaluate import dcase2023_evaluate, evaluate
+from .utils.paths import (
+    get_default_cache_path,
+    get_default_java_path,
+    get_default_tmp_path,
+    set_default_cache_path,
+    set_default_java_path,
+    set_default_tmp_path,
+)
 
 
 __all__ = [
     "BLEU",
     "CIDErD",
-    "AACEvaluate",
+    "DCASE2023Evaluate",
     "FENSE",
     "METEOR",
     "ROUGEL",
     "SPICE",
     "SPIDEr",
     "dcase2023_evaluate",
     "evaluate",
+    "get_default_cache_path",
+    "get_default_java_path",
+    "get_default_tmp_path",
+    "set_default_cache_path",
+    "set_default_java_path",
+    "set_default_tmp_path",
+    "load_metric",
 ]
 
 

diff --git a/src/aac_metrics/classes/__init__.py b/src/aac_metrics/classes/__init__.py
@@ -3,7 +3,7 @@
 
 from .bleu import BLEU
 from .cider_d import CIDErD
-from .evaluate import Evaluate, AACEvaluate
+from .evaluate import DCASE2023Evaluate, Evaluate
 from .fense import FENSE
 from .fluerr import FluErr
 from .meteor import METEOR
@@ -18,7 +18,7 @@
 __all__ = [
     "BLEU",
     "CIDErD",
-    "AACEvaluate",
+    "DCASE2023Evaluate",
     "Evaluate",
     "FENSE",
     "FluErr",

diff --git a/src/aac_metrics/classes/base.py b/src/aac_metrics/classes/base.py
@@ -1,13 +1,15 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from typing import Any, Optional
+from typing import Any, Generic, Optional, TypeVar
 
 from torch import nn
 
+OutType = TypeVar("OutType")
 
-class AACMetric(nn.Module):
-    """Base Metric module used when torchmetrics is not installed."""
+
+class AACMetric(nn.Module, Generic[OutType]):
+    """Base Metric module for AAC metrics. Similar to torchmetrics.Metric."""
 
     # Global values
     full_state_update: Optional[bool] = False
@@ -23,10 +25,10 @@ def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
 
     # Public methods
-    def compute(self) -> Any:
-        return None
+    def compute(self) -> OutType:
+        return None  # type: ignore
 
-    def forward(self, *args: Any, **kwargs: Any) -> Any:
+    def forward(self, *args: Any, **kwargs: Any) -> OutType:
         self.update(*args, **kwargs)
         output = self.compute()
         self.reset()
@@ -37,3 +39,7 @@ def reset(self) -> None:
 
     def update(self, *args, **kwargs) -> None:
         pass
+
+    # Magic methods
+    def __call__(self, *args: Any, **kwds: Any) -> OutType:
+        return super().__call__(*args, **kwds)
diff --git a/src/aac_metrics/classes/bleu.py b/src/aac_metrics/classes/bleu.py
@@ -13,7 +13,7 @@
 )
 
 
-class BLEU(AACMetric):
+class BLEU(AACMetric[Union[tuple[dict[str, Tensor], dict[str, Tensor]], Tensor]]):
     """BiLingual Evaluation Understudy metric class.
 
     - Paper: https://www.aclweb.org/anthology/P02-1040.pdf
@@ -85,3 +85,47 @@ def update(
             self._cooked_cands,
             self._cooked_mrefs,
         )
+
+
+class BLEU1(BLEU):
+    def __init__(
+        self,
+        return_all_scores: bool = True,
+        option: str = "closest",
+        verbose: int = 0,
+        tokenizer: Callable[[str], list[str]] = str.split,
+    ) -> None:
+        super().__init__(return_all_scores, 1, option, verbose, tokenizer)
+
+
+class BLEU2(BLEU):
+    def __init__(
+        self,
+        return_all_scores: bool = True,
+        option: str = "closest",
+        verbose: int = 0,
+        tokenizer: Callable[[str], list[str]] = str.split,
+    ) -> None:
+        super().__init__(return_all_scores, 2, option, verbose, tokenizer)
+
+
+class BLEU3(BLEU):
+    def __init__(
+        self,
+        return_all_scores: bool = True,
+        option: str = "closest",
+        verbose: int = 0,
+        tokenizer: Callable[[str], list[str]] = str.split,
+    ) -> None:
+        super().__init__(return_all_scores, 3, option, verbose, tokenizer)
+
+
+class BLEU4(BLEU):
+    def __init__(
+        self,
+        return_all_scores: bool = True,
+        option: str = "closest",
+        verbose: int = 0,
+        tokenizer: Callable[[str], list[str]] = str.split,
+    ) -> None:
+        super().__init__(return_all_scores, 4, option, verbose, tokenizer)
diff --git a/src/aac_metrics/classes/cider_d.py b/src/aac_metrics/classes/cider_d.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from typing import Callable, Union
+from typing import Any, Callable, Union
 
 from torch import Tensor
 
@@ -12,7 +12,7 @@
 )
 
 
-class CIDErD(AACMetric):
+class CIDErD(AACMetric[Union[tuple[dict[str, Tensor], dict[str, Any]], Tensor]]):
     """Consensus-based Image Description Evaluation metric class.
 
     - Paper: https://arxiv.org/pdf/1411.5726.pdf