diff --git a/diffsptk/functional.py b/diffsptk/functional.py index 11aa940..2b3f5bd 100644 --- a/diffsptk/functional.py +++ b/diffsptk/functional.py @@ -364,6 +364,62 @@ def dfs(x, b=None, a=None): return nn.InfiniteImpulseResponseDigitalFilter._func(x, b=b, a=a) +def drc( + x, + threshold, + ratio, + attack_time, + release_time, + sample_rate, + makeup_gain=0, + abs_max=1, +): + """Apply dynamic range compression. + + Parameters + ---------- + x : Tensor [shape=(..., T)] + Input signal. + + threshold : float <= 0 + Threshold in dB. + + ratio : float > 1 + Input/output ratio. + + attack_time : float > 0 + Attack time in msec. + + release_time : float > 0 + Release time in msec. + + sample_rate : int >= 1 + Sample rate in Hz. + + makeup_gain : float >= 0 + Make-up gain in dB. + + abs_max : float > 0 + Absolute maximum value of input. + + Returns + ------- + out : Tensor [shape=(..., T)] + Compressed signal. + + """ + return nn.DynamicRangeCompression._func( + x, + threshold=threshold, + ratio=ratio, + attack_time=attack_time, + release_time=release_time, + sample_rate=sample_rate, + makeup_gain=makeup_gain, + abs_max=abs_max, + ) + + def entropy(p, out_format="nat"): """Calculate entropy. diff --git a/diffsptk/misc/utils.py b/diffsptk/misc/utils.py index 1a70eda..c408a14 100644 --- a/diffsptk/misc/utils.py +++ b/diffsptk/misc/utils.py @@ -82,6 +82,11 @@ def to(x, dtype=None): return x.to(dtype=dtype) +def to_2d(x): + y = x.view(-1, x.size(-1)) + return y + + def to_3d(x): y = x.view(-1, 1, x.size(-1)) return y diff --git a/diffsptk/modules/__init__.py b/diffsptk/modules/__init__.py index c3de12e..b5ea846 100644 --- a/diffsptk/modules/__init__.py +++ b/diffsptk/modules/__init__.py @@ -19,6 +19,8 @@ from .df2 import SecondOrderDigitalFilter from .dfs import InfiniteImpulseResponseDigitalFilter from .dfs import InfiniteImpulseResponseDigitalFilter as IIR +from .drc import DynamicRangeCompression +from .drc import DynamicRangeCompression as DRC from .entropy import Entropy from .excite import ExcitationGeneration from .fbank import MelFilterBankAnalysis diff --git a/diffsptk/modules/drc.py b/diffsptk/modules/drc.py new file mode 100644 index 0000000..533ea0b --- /dev/null +++ b/diffsptk/modules/drc.py @@ -0,0 +1,183 @@ +# ------------------------------------------------------------------------ # +# Copyright 2022 SPTK Working Group # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +# ------------------------------------------------------------------------ # + +import numpy as np +import torch +from torch import nn +import torchcomp + +from ..misc.utils import to_2d + + +class DynamicRangeCompression(nn.Module): + """See `this page `_ + for details. + + Parameters + ---------- + threshold : float <= 0 + Threshold in dB. + + ratio : float > 1 + Input/output ratio. + + attack_time : float > 0 + Attack time in msec. + + release_time : float > 0 + Release time in msec. + + sample_rate : int >= 1 + Sample rate in Hz. + + makeup_gain : float >= 0 + Make-up gain in dB. + + abs_max : float > 0 + Absolute maximum value of input. + + learnable : bool + Whether to make the DRC parameters learnable. + + References + ---------- + .. [1] C.-Y. Yu et al., "Differentiable all-pole filters for time-varying audio + systems," *Proceedings of DAFx*, 2024. + + """ + + def __init__( + self, + threshold, + ratio, + attack_time, + release_time, + sample_rate, + makeup_gain=0, + abs_max=1, + learnable=False, + ): + super().__init__() + + assert threshold <= 0 + assert 1 < ratio + assert 0 < attack_time + assert 0 < release_time + assert 1 <= sample_rate + assert 0 <= makeup_gain + assert 0 < abs_max + + self.abs_max = abs_max + params = self._precompute( + threshold, ratio, attack_time, release_time, sample_rate, makeup_gain + ) + if learnable: + self.params = nn.Parameter(params) + else: + self.register_buffer("params", params) + + def forward(self, x): + """Perform dynamic range compression. + + Parameters + ---------- + x : Tensor [shape=(..., T)] + Input signal. + + Returns + ------- + out : Tensor [shape=(..., T)] + Compressed signal. + + Examples + -------- + >>> x = torch.randn(16000) + >>> x.abs().max() + tensor(4.2224) + >>> drc = diffsptk.DynamicRangeCompression(-20, 4, 10, 100, 16000) + >>> y = drc(x) + >>> y.abs().max() + tensor(2.5779) + + """ + return self._forward(x, self.abs_max, self.params) + + @staticmethod + def _forward(x, abs_max, params): + eps = 1e-10 + + y = to_2d(x) + y_abs = y.abs() / abs_max + eps + + g = torchcomp.compexp_gain( + y_abs, + params[0], + params[1], + -1000, # Expander threshold + eps, # Expander ratio + params[2], + params[3], + ) + + makeup_gain = params[-1] + y = y * g * makeup_gain + y = y.view_as(x) + return y + + @staticmethod + def _func( + x, + threshold, + ratio, + attack_time, + release_time, + sample_rate, + makeup_gain, + abs_max, + ): + params = DynamicRangeCompression._precompute( + threshold, + ratio, + attack_time, + release_time, + sample_rate, + makeup_gain, + dtype=x.dtype, + device=x.device, + ) + return DynamicRangeCompression._forward(x, abs_max, params) + + @staticmethod + def _precompute( + threshold, + ratio, + attack_time, + release_time, + sample_rate, + makeup_gain, + dtype=None, + device=None, + ): + c = round(np.log(9), 1) + attack_time = ( + torchcomp.ms2coef(torch.tensor(attack_time * c), sample_rate).cpu().numpy() + ) + release_time = ( + torchcomp.ms2coef(torch.tensor(release_time * c), sample_rate).cpu().numpy() + ) + makeup_gain = 10 ** (makeup_gain / 20) + params = np.array([threshold, ratio, attack_time, release_time, makeup_gain]) + return torch.tensor(params, dtype=dtype, device=device) diff --git a/diffsptk/modules/poledf.py b/diffsptk/modules/poledf.py index 0ae987f..c2d6caa 100644 --- a/diffsptk/modules/poledf.py +++ b/diffsptk/modules/poledf.py @@ -37,6 +37,12 @@ class AllPoleDigitalFilter(nn.Module): ignore_gain : bool If True, perform filtering without gain. + References + ---------- + .. [1] C.-Y. Yu et al., "Differentiable time-varying linear prediction in the + context of end-to-end analysis-by-synthesis," *Proceedings of Interspeech*, + 2024. + """ def __init__(self, filter_order, frame_period, ignore_gain=False): diff --git a/docs/modules/drc.rst b/docs/modules/drc.rst new file mode 100644 index 0000000..cb79523 --- /dev/null +++ b/docs/modules/drc.rst @@ -0,0 +1,11 @@ +.. _drc: + +drc +--- + +.. autoclass:: diffsptk.DRC + +.. autoclass:: diffsptk.DynamicRangeCompression + :members: + +.. autofunction:: diffsptk.functional.drc diff --git a/pyproject.toml b/pyproject.toml index 52d5867..775239a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "torchaudio >= 0.11.0", "torchcrepe >= 0.0.21", "torchlpc >= 0.2.0", + "torchcomp >= 0.1.1", "vector-quantize-pytorch >= 0.8.0, <= 1.12.12", ] dynamic = ["version"] diff --git a/tests/test_drc.py b/tests/test_drc.py new file mode 100644 index 0000000..ffbed56 --- /dev/null +++ b/tests/test_drc.py @@ -0,0 +1,70 @@ +# ------------------------------------------------------------------------ # +# Copyright 2022 SPTK Working Group # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +# ------------------------------------------------------------------------ # + +import numpy as np +import pytest + +import diffsptk +import tests.utils as U + + +@pytest.mark.parametrize("device", ["cpu", "cuda"]) +@pytest.mark.parametrize("module", [False, True]) +def test_compatibility( + device, + module, + threshold=-40, + ratio=2, + attack_time=50, + release_time=20, + sr=16000, + gain=0, + T=20, +): + drc = U.choice( + module, + diffsptk.DRC, + diffsptk.functional.drc, + {}, + { + "threshold": threshold, + "ratio": ratio, + "attack_time": attack_time, + "release_time": release_time, + "sample_rate": sr, + "makeup_gain": gain, + }, + ) + + U.check_compatibility( + device, + drc, + [], + "x2x +sd tools/SPTK/asset/data.short | sopr -d 32768", + ( + f"drc -v 1 -t {threshold} -r {ratio} -A {attack_time} -R {release_time} " + f"-s {sr // 1000} -m {gain} -d 0" + ), + [], + eq=lambda a, b: np.corrcoef(a, b)[0, 1] > 0.99, + ) + + U.check_differentiability(device, drc, [T]) + + +def test_learnable(T=20): + drc = diffsptk.DRC(-20, 2, 50, 50, 16000, learnable=True) + U.check_learnable(drc, (T,))