Skip to content

Commit

Permalink
Merge pull request #76 from sp-nitech/drc
Browse files Browse the repository at this point in the history
Add drc
  • Loading branch information
takenori-y committed Jul 3, 2024
2 parents 87195b9 + 4803739 commit 4471f7d
Show file tree
Hide file tree
Showing 8 changed files with 334 additions and 0 deletions.
56 changes: 56 additions & 0 deletions diffsptk/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,62 @@ def dfs(x, b=None, a=None):
return nn.InfiniteImpulseResponseDigitalFilter._func(x, b=b, a=a)


def drc(
x,
threshold,
ratio,
attack_time,
release_time,
sample_rate,
makeup_gain=0,
abs_max=1,
):
"""Apply dynamic range compression.
Parameters
----------
x : Tensor [shape=(..., T)]
Input signal.
threshold : float <= 0
Threshold in dB.
ratio : float > 1
Input/output ratio.
attack_time : float > 0
Attack time in msec.
release_time : float > 0
Release time in msec.
sample_rate : int >= 1
Sample rate in Hz.
makeup_gain : float >= 0
Make-up gain in dB.
abs_max : float > 0
Absolute maximum value of input.
Returns
-------
out : Tensor [shape=(..., T)]
Compressed signal.
"""
return nn.DynamicRangeCompression._func(
x,
threshold=threshold,
ratio=ratio,
attack_time=attack_time,
release_time=release_time,
sample_rate=sample_rate,
makeup_gain=makeup_gain,
abs_max=abs_max,
)


def entropy(p, out_format="nat"):
"""Calculate entropy.
Expand Down
5 changes: 5 additions & 0 deletions diffsptk/misc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ def to(x, dtype=None):
return x.to(dtype=dtype)


def to_2d(x):
y = x.view(-1, x.size(-1))
return y


def to_3d(x):
y = x.view(-1, 1, x.size(-1))
return y
Expand Down
2 changes: 2 additions & 0 deletions diffsptk/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from .df2 import SecondOrderDigitalFilter
from .dfs import InfiniteImpulseResponseDigitalFilter
from .dfs import InfiniteImpulseResponseDigitalFilter as IIR
from .drc import DynamicRangeCompression
from .drc import DynamicRangeCompression as DRC
from .entropy import Entropy
from .excite import ExcitationGeneration
from .fbank import MelFilterBankAnalysis
Expand Down
183 changes: 183 additions & 0 deletions diffsptk/modules/drc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# ------------------------------------------------------------------------ #

import numpy as np
import torch
from torch import nn
import torchcomp

from ..misc.utils import to_2d


class DynamicRangeCompression(nn.Module):
"""See `this page <https://sp-nitech.github.io/sptk/latest/main/drc.html>`_
for details.
Parameters
----------
threshold : float <= 0
Threshold in dB.
ratio : float > 1
Input/output ratio.
attack_time : float > 0
Attack time in msec.
release_time : float > 0
Release time in msec.
sample_rate : int >= 1
Sample rate in Hz.
makeup_gain : float >= 0
Make-up gain in dB.
abs_max : float > 0
Absolute maximum value of input.
learnable : bool
Whether to make the DRC parameters learnable.
References
----------
.. [1] C.-Y. Yu et al., "Differentiable all-pole filters for time-varying audio
systems," *Proceedings of DAFx*, 2024.
"""

def __init__(
self,
threshold,
ratio,
attack_time,
release_time,
sample_rate,
makeup_gain=0,
abs_max=1,
learnable=False,
):
super().__init__()

assert threshold <= 0
assert 1 < ratio
assert 0 < attack_time
assert 0 < release_time
assert 1 <= sample_rate
assert 0 <= makeup_gain
assert 0 < abs_max

self.abs_max = abs_max
params = self._precompute(
threshold, ratio, attack_time, release_time, sample_rate, makeup_gain
)
if learnable:
self.params = nn.Parameter(params)
else:
self.register_buffer("params", params)

def forward(self, x):
"""Perform dynamic range compression.
Parameters
----------
x : Tensor [shape=(..., T)]
Input signal.
Returns
-------
out : Tensor [shape=(..., T)]
Compressed signal.
Examples
--------
>>> x = torch.randn(16000)
>>> x.abs().max()
tensor(4.2224)
>>> drc = diffsptk.DynamicRangeCompression(-20, 4, 10, 100, 16000)
>>> y = drc(x)
>>> y.abs().max()
tensor(2.5779)
"""
return self._forward(x, self.abs_max, self.params)

@staticmethod
def _forward(x, abs_max, params):
eps = 1e-10

y = to_2d(x)
y_abs = y.abs() / abs_max + eps

g = torchcomp.compexp_gain(
y_abs,
params[0],
params[1],
-1000, # Expander threshold
eps, # Expander ratio
params[2],
params[3],
)

makeup_gain = params[-1]
y = y * g * makeup_gain
y = y.view_as(x)
return y

@staticmethod
def _func(
x,
threshold,
ratio,
attack_time,
release_time,
sample_rate,
makeup_gain,
abs_max,
):
params = DynamicRangeCompression._precompute(
threshold,
ratio,
attack_time,
release_time,
sample_rate,
makeup_gain,
dtype=x.dtype,
device=x.device,
)
return DynamicRangeCompression._forward(x, abs_max, params)

@staticmethod
def _precompute(
threshold,
ratio,
attack_time,
release_time,
sample_rate,
makeup_gain,
dtype=None,
device=None,
):
c = round(np.log(9), 1)
attack_time = (
torchcomp.ms2coef(torch.tensor(attack_time * c), sample_rate).cpu().numpy()
)
release_time = (
torchcomp.ms2coef(torch.tensor(release_time * c), sample_rate).cpu().numpy()
)
makeup_gain = 10 ** (makeup_gain / 20)
params = np.array([threshold, ratio, attack_time, release_time, makeup_gain])
return torch.tensor(params, dtype=dtype, device=device)
6 changes: 6 additions & 0 deletions diffsptk/modules/poledf.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ class AllPoleDigitalFilter(nn.Module):
ignore_gain : bool
If True, perform filtering without gain.
References
----------
.. [1] C.-Y. Yu et al., "Differentiable time-varying linear prediction in the
context of end-to-end analysis-by-synthesis," *Proceedings of Interspeech*,
2024.
"""

def __init__(self, filter_order, frame_period, ignore_gain=False):
Expand Down
11 changes: 11 additions & 0 deletions docs/modules/drc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.. _drc:

drc
---

.. autoclass:: diffsptk.DRC

.. autoclass:: diffsptk.DynamicRangeCompression
:members:

.. autofunction:: diffsptk.functional.drc
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies = [
"torchaudio >= 0.11.0",
"torchcrepe >= 0.0.21",
"torchlpc >= 0.2.0",
"torchcomp >= 0.1.1",
"vector-quantize-pytorch >= 0.8.0, <= 1.12.12",
]
dynamic = ["version"]
Expand Down
70 changes: 70 additions & 0 deletions tests/test_drc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# ------------------------------------------------------------------------ #

import numpy as np
import pytest

import diffsptk
import tests.utils as U


@pytest.mark.parametrize("device", ["cpu", "cuda"])
@pytest.mark.parametrize("module", [False, True])
def test_compatibility(
device,
module,
threshold=-40,
ratio=2,
attack_time=50,
release_time=20,
sr=16000,
gain=0,
T=20,
):
drc = U.choice(
module,
diffsptk.DRC,
diffsptk.functional.drc,
{},
{
"threshold": threshold,
"ratio": ratio,
"attack_time": attack_time,
"release_time": release_time,
"sample_rate": sr,
"makeup_gain": gain,
},
)

U.check_compatibility(
device,
drc,
[],
"x2x +sd tools/SPTK/asset/data.short | sopr -d 32768",
(
f"drc -v 1 -t {threshold} -r {ratio} -A {attack_time} -R {release_time} "
f"-s {sr // 1000} -m {gain} -d 0"
),
[],
eq=lambda a, b: np.corrcoef(a, b)[0, 1] > 0.99,
)

U.check_differentiability(device, drc, [T])


def test_learnable(T=20):
drc = diffsptk.DRC(-20, 2, 50, 50, 16000, learnable=True)
U.check_learnable(drc, (T,))

0 comments on commit 4471f7d

Please sign in to comment.