From e221382dffabacf77dd697a7395f9f3fc41733f2 Mon Sep 17 00:00:00 2001 From: hadware Date: Thu, 11 Jan 2024 18:38:36 +0100 Subject: [PATCH] - Migrating to pyproject.toml - fixing some display bugs in docstrings - some tweaks and additions in README.md --- .github/workflows/test-pytest.yml | 2 +- MANIFEST.in | 3 -- README.md | 16 +++++++- docs/source/quickstart.rst | 4 +- pygamma_agreement/cst.py | 2 + pygamma_agreement/dissimilarity.py | 14 ++++--- pygamma_agreement/sampler.py | 1 + pyproject.toml | 63 ++++++++++++++++++++++++++++++ 8 files changed, 92 insertions(+), 13 deletions(-) delete mode 100644 MANIFEST.in create mode 100644 pyproject.toml diff --git a/.github/workflows/test-pytest.yml b/.github/workflows/test-pytest.yml index e5b0c74..6b81523 100644 --- a/.github/workflows/test-pytest.yml +++ b/.github/workflows/test-pytest.yml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v2 diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index bce7e17..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include LICENSE -include README.* -include requirements.txt diff --git a/README.md b/README.md index bbe6536..a418466 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,8 @@ Optionally, to allow `pygamma-agreement` to display visual representations of our API's objects in Jupyter Notebooks, [Matplotlib](https://matplotlib.org/>) is needed. -pygamma-agreement is a Python 3 package and is currently tested for Python 3.7, 3.8, 3.9 and 3.10. -pygamma-agreement does not work with Python 2.7. +`pygamma-agreement` is a Python 3 package and is currently tested for Python 3.7 to 3.12. +`pygamma-agreement` does not work with Python 2.7. ## Installation @@ -69,6 +69,18 @@ After that, tests are ready to be launched (they take ~2m on a recent laptop) : $ pytest tests/ +## Documentation + +The documentation for this package has been written using Sphinx. To build the documentation locally, run: + + $ pip install "pygamma-agreement[docs]" + $ cd docs/ + $ make html + +After that, you can view the documentation by running + + $ firefox build/html/index.html + ## Submitting and issue or contributing Please read `CONTRIBUTING.md` before submitting and issue or writing some contribution diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 480e436..c39f6fa 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -22,8 +22,8 @@ tuple of 3 information: * Segment end (at 7.2s) Obviously, our annotators sometimes disagree on who might be talking, -or when exactly each person's speech turn is starting or ending. Luckily, the Gamma -inter-annotator agreement enables us to measure that. +or when exactly each person's speech turn is starting or ending. The Gamma +inter-annotator agreement enables us to obtain a measure of that disagreement. We'll first load the annotation into ``pygamma-agreement``'s base data structure, the ``Continuum``, made to store this kind of annotated data. diff --git a/pygamma_agreement/cst.py b/pygamma_agreement/cst.py index 380c8b8..f9a7ed6 100644 --- a/pygamma_agreement/cst.py +++ b/pygamma_agreement/cst.py @@ -144,6 +144,7 @@ def category_shuffle(self, continuum: Continuum, """ Shuffles the categories of the annotations in the given continuum using the process described in section 3.3.5 of https://hal.archives-ouvertes.fr/hal-00769639/. + Parameters ---------- overlapping_fun: @@ -151,6 +152,7 @@ def category_shuffle(self, continuum: Continuum, (the lower the distance between categories, the higher the chance one will be changed into the other). prevalence: specify whether or not to consider the proportion of presence of each category in the reference. + """ category_weights = self._reference_continuum.category_weights # matrix "A" diff --git a/pygamma_agreement/dissimilarity.py b/pygamma_agreement/dissimilarity.py index 54b2482..dda80ad 100644 --- a/pygamma_agreement/dissimilarity.py +++ b/pygamma_agreement/dissimilarity.py @@ -73,8 +73,8 @@ def __init__(self, categories: Optional[SortedSet] = None, delta_empty: float = @abc.abstractmethod def compile_d_mat(self) -> Callable[[np.ndarray, np.ndarray], float]: """ - Must set self.d_mat to the the cfunc (decorated with @dissimilarity_dec) function that corresponds to the - unit-to-unit, (in arrays form) disorder given by the dissimilarity. + Must set self.d_mat to the cfunc (decorated with @dissimilarity_dec) function that corresponds to the + unit-to-unit (in arrays form) disorder given by the dissimilarity. """ raise NotImplemented() @@ -283,9 +283,11 @@ class PositionalSporadicDissimilarity(AbstractDissimilarity): """ Positional-sporadic dissimilarity. Takes only the position of annotations into account. This distance is : + * 0 when segments are equal - * < delta_empty when segments completely overlap :math:`A \cup B = A` or :math:`B`) + * < delta_empty when segments completely overlap (:math:`A \cup B = A` or :math:`B`) * > delta_empty when segments are separated (:math:`A \cap B = \emptyset`) + """ def __init__(self, delta_empty: float = 1.0): super().__init__(delta_empty=delta_empty) @@ -415,7 +417,7 @@ def cat_dissim_func(str1: str, str2: str) -> float: class OrdinalCategoricalDissimilarity(PrecomputedCategoricalDissimilarity): """ Categorical dissimilarity where each label is given a position on the real axis, and the disorder between - categories of positions 'a' and 'b' being |a - b|/m * delta_empty with m the maximum position. If not provided, + categories of positions 'a' and 'b' being \|a - b\|/m * delta_empty with m the maximum position. If not provided, positions are 0, 1, 2... """ def __init__(self, labels: Iterable[str], p: Iterable[float] = None, delta_empty=1.0): @@ -453,7 +455,7 @@ def __init__(self, labels: Iterable[str], p: Iterable[float] = None, delta_empty class NumericalCategoricalDissimilarity(OrdinalCategoricalDissimilarity): """ Categorical dissimilarity made for numerical categories (i.e a category is a float or int literal). - The disorder between categories 'a' and 'b' being |a - b|/m * delta_empty with m the maximum category. + The disorder between categories 'a' and 'b' being \|a - b\|/m * delta_empty with m the maximum category. """ def __init__(self, labels: Iterable[str], delta_empty: float = 1.0): try: @@ -468,6 +470,7 @@ class CombinedCategoricalDissimilarity(AbstractDissimilarity): """ This dissimilarity takes both positioning and categorizing of annotations into account. Combined categorical dissimilarity constructor. + Parameters ---------- delta_empty : optional, float @@ -479,6 +482,7 @@ class CombinedCategoricalDissimilarity(AbstractDissimilarity): coefficient weighting the categorical dissimilarity value. Defaults to 1. cat_dissim : optional, CategoricalDissimilarity Categorical-only dissimilarity to be used. If not set, defaults to the absolute categorical dissimilarity. + """ def __init__(self, alpha: float = 1.0, diff --git a/pygamma_agreement/sampler.py b/pygamma_agreement/sampler.py index fed6baa..04a375a 100644 --- a/pygamma_agreement/sampler.py +++ b/pygamma_agreement/sampler.py @@ -198,6 +198,7 @@ class StatisticalContinuumSampler(AbstractContinuumSampler): - The number of annotations per annotator - The gap between two of an annotator's annotations - The duration of the annotations' segments + The sample is thus created by computing normal distributions using these parameters. It also requires the probability of occurence of each annotations category. You can either initalize sampling with diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5204655 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,63 @@ +[project] +name = "pygamma-agreement" +readme = "README.md" +version = "0.5.7" +description = 'Inter-annotator agreement measure and alignment written in python' + +authors = [ + { name = "Hadrien Titeux", email = "hadrien.titeux@ens.psl.eu" }, + { name = "Rachid Riad", email = "rachid.riad@ens.psl.eu" }, + { name = "LĂ©opold Favre", email = "favreleopold@gmail.com" }, +] +maintainers = [ + { name = "Hadrien Titeux", email = "hadrien.titeux@ens.psl.eu" }, +] + + +license = { text = "MIT" } +requires-python = ">=3.7" +keywords = [] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Topic :: Scientific/Engineering", + "Topic :: Text Processing :: Linguistic", + "Topic :: Multimedia :: Sound/Audio :: Speech", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dynamic = ["dependencies"] + +[project.urls] +Documentation = "https://pygamma-agreement.readthedocs.io/en/latest/" +Homepage = "https://github.com/bootphon/pygamma-agreement" +Repository = "https://github.com/bootphon/pygamma-agreement" + + +[tool.setuptools.dynamic] +dependencies = { file = ["requirements.txt"] } + +[project.optional-dependencies] +notebook = ["matplotlib"] +CBC = ["cylp"] +testing = ["pytest", "cylp"] +docs = ["sphinx", "sphinx_rtd_theme"] + +[build-system] +requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project.scripts] +pygamma-agreement = "pygamma_agreement.cli_apps:pygamma_cmd" + +[tool.setuptools.packages.find] +where = ["."] +include = ["adfluo*"] +exclude = ["docs*", "tests*"] +