Merge pull request #76 from sp-nitech/drc

Add drc
sp-nitech · Jul 3, 2024 · 4471f7d · 4471f7d
2 parents 87195b9 + 4803739
commit 4471f7d
Show file tree

Hide file tree

Showing 8 changed files with 334 additions and 0 deletions.
diff --git a/diffsptk/functional.py b/diffsptk/functional.py
@@ -364,6 +364,62 @@ def dfs(x, b=None, a=None):
     return nn.InfiniteImpulseResponseDigitalFilter._func(x, b=b, a=a)
 
 
+def drc(
+    x,
+    threshold,
+    ratio,
+    attack_time,
+    release_time,
+    sample_rate,
+    makeup_gain=0,
+    abs_max=1,
+):
+    """Apply dynamic range compression.
+
+    Parameters
+    ----------
+    x : Tensor [shape=(..., T)]
+        Input signal.
+
+    threshold : float <= 0
+        Threshold in dB.
+
+    ratio : float > 1
+        Input/output ratio.
+
+    attack_time : float > 0
+        Attack time in msec.
+
+    release_time : float > 0
+        Release time in msec.
+
+    sample_rate : int >= 1
+        Sample rate in Hz.
+
+    makeup_gain : float >= 0
+        Make-up gain in dB.
+
+    abs_max : float > 0
+        Absolute maximum value of input.
+
+    Returns
+    -------
+    out : Tensor [shape=(..., T)]
+        Compressed signal.
+
+    """
+    return nn.DynamicRangeCompression._func(
+        x,
+        threshold=threshold,
+        ratio=ratio,
+        attack_time=attack_time,
+        release_time=release_time,
+        sample_rate=sample_rate,
+        makeup_gain=makeup_gain,
+        abs_max=abs_max,
+    )
+
+
 def entropy(p, out_format="nat"):
     """Calculate entropy.
 

diff --git a/diffsptk/misc/utils.py b/diffsptk/misc/utils.py
@@ -82,6 +82,11 @@ def to(x, dtype=None):
     return x.to(dtype=dtype)
 
 
+def to_2d(x):
+    y = x.view(-1, x.size(-1))
+    return y
+
+
 def to_3d(x):
     y = x.view(-1, 1, x.size(-1))
     return y

diff --git a/diffsptk/modules/__init__.py b/diffsptk/modules/__init__.py
@@ -19,6 +19,8 @@
 from .df2 import SecondOrderDigitalFilter
 from .dfs import InfiniteImpulseResponseDigitalFilter
 from .dfs import InfiniteImpulseResponseDigitalFilter as IIR
+from .drc import DynamicRangeCompression
+from .drc import DynamicRangeCompression as DRC
 from .entropy import Entropy
 from .excite import ExcitationGeneration
 from .fbank import MelFilterBankAnalysis

diff --git a/diffsptk/modules/drc.py b/diffsptk/modules/drc.py
@@ -0,0 +1,183 @@
+# ------------------------------------------------------------------------ #
+# Copyright 2022 SPTK Working Group                                        #
+#                                                                          #
+# Licensed under the Apache License, Version 2.0 (the "License");          #
+# you may not use this file except in compliance with the License.         #
+# You may obtain a copy of the License at                                  #
+#                                                                          #
+#     http://www.apache.org/licenses/LICENSE-2.0                           #
+#                                                                          #
+# Unless required by applicable law or agreed to in writing, software      #
+# distributed under the License is distributed on an "AS IS" BASIS,        #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and      #
+# limitations under the License.                                           #
+# ------------------------------------------------------------------------ #
+
+import numpy as np
+import torch
+from torch import nn
+import torchcomp
+
+from ..misc.utils import to_2d
+
+
+class DynamicRangeCompression(nn.Module):
+    """See `this page <https://sp-nitech.github.io/sptk/latest/main/drc.html>`_
+    for details.
+
+    Parameters
+    ----------
+    threshold : float <= 0
+        Threshold in dB.
+
+    ratio : float > 1
+        Input/output ratio.
+
+    attack_time : float > 0
+        Attack time in msec.
+
+    release_time : float > 0
+        Release time in msec.
+
+    sample_rate : int >= 1
+        Sample rate in Hz.
+
+    makeup_gain : float >= 0
+        Make-up gain in dB.
+
+    abs_max : float > 0
+        Absolute maximum value of input.
+
+    learnable : bool
+        Whether to make the DRC parameters learnable.
+
+    References
+    ----------
+    .. [1] C.-Y. Yu et al., "Differentiable all-pole filters for time-varying audio
+           systems," *Proceedings of DAFx*, 2024.
+
+    """
+
+    def __init__(
+        self,
+        threshold,
+        ratio,
+        attack_time,
+        release_time,
+        sample_rate,
+        makeup_gain=0,
+        abs_max=1,
+        learnable=False,
+    ):
+        super().__init__()
+
+        assert threshold <= 0
+        assert 1 < ratio
+        assert 0 < attack_time
+        assert 0 < release_time
+        assert 1 <= sample_rate
+        assert 0 <= makeup_gain
+        assert 0 < abs_max
+
+        self.abs_max = abs_max
+        params = self._precompute(
+            threshold, ratio, attack_time, release_time, sample_rate, makeup_gain
+        )
+        if learnable:
+            self.params = nn.Parameter(params)
+        else:
+            self.register_buffer("params", params)
+
+    def forward(self, x):
+        """Perform dynamic range compression.
+
+        Parameters
+        ----------
+        x : Tensor [shape=(..., T)]
+            Input signal.
+
+        Returns
+        -------
+        out : Tensor [shape=(..., T)]
+            Compressed signal.
+
+        Examples
+        --------
+        >>> x = torch.randn(16000)
+        >>> x.abs().max()
+        tensor(4.2224)
+        >>> drc = diffsptk.DynamicRangeCompression(-20, 4, 10, 100, 16000)
+        >>> y = drc(x)
+        >>> y.abs().max()
+        tensor(2.5779)
+
+        """
+        return self._forward(x, self.abs_max, self.params)
+
+    @staticmethod
+    def _forward(x, abs_max, params):
+        eps = 1e-10
+
+        y = to_2d(x)
+        y_abs = y.abs() / abs_max + eps
+
+        g = torchcomp.compexp_gain(
+            y_abs,
+            params[0],
+            params[1],
+            -1000,  # Expander threshold
+            eps,  # Expander ratio
+            params[2],
+            params[3],
+        )
+
+        makeup_gain = params[-1]
+        y = y * g * makeup_gain
+        y = y.view_as(x)
+        return y
+
+    @staticmethod
+    def _func(
+        x,
+        threshold,
+        ratio,
+        attack_time,
+        release_time,
+        sample_rate,
+        makeup_gain,
+        abs_max,
+    ):
+        params = DynamicRangeCompression._precompute(
+            threshold,
+            ratio,
+            attack_time,
+            release_time,
+            sample_rate,
+            makeup_gain,
+            dtype=x.dtype,
+            device=x.device,
+        )
+        return DynamicRangeCompression._forward(x, abs_max, params)
+
+    @staticmethod
+    def _precompute(
+        threshold,
+        ratio,
+        attack_time,
+        release_time,
+        sample_rate,
+        makeup_gain,
+        dtype=None,
+        device=None,
+    ):
+        c = round(np.log(9), 1)
+        attack_time = (
+            torchcomp.ms2coef(torch.tensor(attack_time * c), sample_rate).cpu().numpy()
+        )
+        release_time = (
+            torchcomp.ms2coef(torch.tensor(release_time * c), sample_rate).cpu().numpy()
+        )
+        makeup_gain = 10 ** (makeup_gain / 20)
+        params = np.array([threshold, ratio, attack_time, release_time, makeup_gain])
+        return torch.tensor(params, dtype=dtype, device=device)
diff --git a/diffsptk/modules/poledf.py b/diffsptk/modules/poledf.py
@@ -37,6 +37,12 @@ class AllPoleDigitalFilter(nn.Module):
     ignore_gain : bool
         If True, perform filtering without gain.
 
+    References
+    ----------
+    .. [1] C.-Y. Yu et al., "Differentiable time-varying linear prediction in the
+           context of end-to-end analysis-by-synthesis," *Proceedings of Interspeech*,
+           2024.
+
     """
 
     def __init__(self, filter_order, frame_period, ignore_gain=False):

diff --git a/docs/modules/drc.rst b/docs/modules/drc.rst
@@ -0,0 +1,11 @@
+.. _drc:
+
+drc
+---
+
+.. autoclass:: diffsptk.DRC
+
+.. autoclass:: diffsptk.DynamicRangeCompression
+   :members:
+
+.. autofunction:: diffsptk.functional.drc
diff --git a/pyproject.toml b/pyproject.toml
@@ -32,6 +32,7 @@ dependencies = [
   "torchaudio >= 0.11.0",
   "torchcrepe >= 0.0.21",
   "torchlpc >= 0.2.0",
+  "torchcomp >= 0.1.1",
   "vector-quantize-pytorch >= 0.8.0, <= 1.12.12",
 ]
 dynamic = ["version"]

diff --git a/tests/test_drc.py b/tests/test_drc.py
@@ -0,0 +1,70 @@
+# ------------------------------------------------------------------------ #
+# Copyright 2022 SPTK Working Group                                        #
+#                                                                          #
+# Licensed under the Apache License, Version 2.0 (the "License");          #
+# you may not use this file except in compliance with the License.         #
+# You may obtain a copy of the License at                                  #
+#                                                                          #
+#     http://www.apache.org/licenses/LICENSE-2.0                           #
+#                                                                          #
+# Unless required by applicable law or agreed to in writing, software      #
+# distributed under the License is distributed on an "AS IS" BASIS,        #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and      #
+# limitations under the License.                                           #
+# ------------------------------------------------------------------------ #
+
+import numpy as np
+import pytest
+
+import diffsptk
+import tests.utils as U
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+@pytest.mark.parametrize("module", [False, True])
+def test_compatibility(
+    device,
+    module,
+    threshold=-40,
+    ratio=2,
+    attack_time=50,
+    release_time=20,
+    sr=16000,
+    gain=0,
+    T=20,
+):
+    drc = U.choice(
+        module,
+        diffsptk.DRC,
+        diffsptk.functional.drc,
+        {},
+        {
+            "threshold": threshold,
+            "ratio": ratio,
+            "attack_time": attack_time,
+            "release_time": release_time,
+            "sample_rate": sr,
+            "makeup_gain": gain,
+        },
+    )
+
+    U.check_compatibility(
+        device,
+        drc,
+        [],
+        "x2x +sd tools/SPTK/asset/data.short | sopr -d 32768",
+        (
+            f"drc -v 1 -t {threshold} -r {ratio} -A {attack_time} -R {release_time} "
+            f"-s {sr // 1000} -m {gain} -d 0"
+        ),
+        [],
+        eq=lambda a, b: np.corrcoef(a, b)[0, 1] > 0.99,
+    )
+
+    U.check_differentiability(device, drc, [T])
+
+
+def test_learnable(T=20):
+    drc = diffsptk.DRC(-20, 2, 50, 50, 16000, learnable=True)
+    U.check_learnable(drc, (T,))