Skip to content

Tests for preprocessing #177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions analytics/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,45 @@ def lj_trainer(lj_speech_tacotron2_file):
trainer = Tacotron2Trainer(hparams, rank=0, world_size=1)

return trainer


from scipy.io.wavfile import read


@pytest.fixture()
def raw_audio_files(raw_audio_file_paths):
output = [read(raw_audio_file_path) for raw_audio_file_path in raw_audio_file_paths]
return output


@pytest.fixture()
def resampled_normalized_audio_files(resampled_normalized_audio_file_paths):
output = [
read(resampled_normalized_audio_file_path)
for resampled_normalized_audio_file_path in resampled_normalized_audio_file_paths
]
return output


@pytest.fixture()
def radtts_spectrograms(radtts_spectrogram_paths):
output = [
torch.load(radtts_spectrogram_path)
for radtts_spectrogram_path in radtts_spectrogram_paths
]
return output


@pytest.fixture()
def pyin_f0s(pyin_f0_paths):
output = [torch.load(pyin_f0_path) for pyin_f0_path in pyin_f0_paths]
return output


@pytest.fixture()
def audio_embeddings(audio_embeddings_paths):
output = [
torch.load(audio_embedding_path)
for audio_embedding_path in audio_embeddings_paths
]
return output
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
26 changes: 26 additions & 0 deletions analytics/tests/tests/test_preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# TODO (Sam): use get to replace get_mels
from uberduck_ml_dev.data.get import get_mels
from uberduck_ml_dev.data.data import RADTTS_DEFAULTS as data_config
import os
import torch


class TestGetMels:
def test_compute_mels_radtts(
self,
resampled_normalized_path_list,
spectrogram_path_list,
target_spectrogram_path_list,
):
get(resampled_normalized_path_list, spectrogram_path_list)
for sp, tsp in zip(
resampled_normalized_path_list,
spectrogram_path_list,
target_spectrogram_path_list,
):
assert os.path.exists(sp)
assert torch.load(sp) == torch.load(tsp)

def test_compute_mels_diffsinger(self, resampled_normalized_path_list,
spectrogram_path_list,
target_spectrogram_path_list):
138 changes: 138 additions & 0 deletions tutorials/preprocessing_fixtures_082923.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"id": "25641f60",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import os\n",
"from uberduck_ml_dev.data.get import get_mels, get_pitches\n",
"from uberduck_ml_dev.data.data import RADTTS_DEFAULTS as data_config"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "37e90cae",
"metadata": {},
"outputs": [],
"source": [
"target_folder = '/usr/src/app/uberduck_ml_dev/analytics/tests/fixtures/ljtest/processed/'\n",
"source_folder = '/usr/src/app/uberduck_ml_dev/analytics/tests/fixtures/ljtest/wavs/'\n",
"paths = os.listdir(source_folder)\n",
"\n",
"local_path_list = [os.path.join(source_folder, path) for path in paths]\n",
"folder_path_list = [os.path.join(target_folder, path.split('.wav')[0]) for path in paths]\n",
"resampled_normalized_path_list = [os.path.join(folder_path, 'resampled_normalized.pt') for folder_path in folder_path_list]\n",
"spectrogram_path_list = [os.path.join(target_folder, folder_path, 'spectrogram.pt') for folder_path in folder_path_list]\n",
"\n",
"for folder_path in folder_path_list:\n",
" os.makedirs(folder_path, exist_ok = True)\n",
"\n",
"def load_resampled_floatnorm_audio(source_path):\n",
" rate, data = read(source_path)\n",
" if len(data.shape) > 1:\n",
" rez_data = data[:,0] / np.abs(data[:,0]).max()\n",
" else:\n",
" rez_data = data / np.abs(data).max()\n",
" output = librosa.resample(rez_data, orig_sr = rate, target_sr = 22050)\n",
" return output\n",
"\n",
"integer_normalize_audio = lambda x : np.asarray((x / np.abs(x).max()) * (MAX_WAV_VALUE - 1), dtype = np.int16)\n",
"def save_audio(data, filename, rate = 22050):\n",
" write(filename, rate, data) # must be in this order\n",
" \n",
"def resample_normalize(source_path, target_path):\n",
" \n",
" resample_floatnorm_mono_audio = load_resampled_floatnorm_audio(source_path)\n",
" resample_intnorm_audio = integer_normalize_audio(resample_floatnorm_mono_audio)\n",
" save_audio(resample_intnorm_audio, target_path)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "adaa4f87",
"metadata": {},
"outputs": [],
"source": [
"from scipy.io.wavfile import read\n",
"\n",
"from uberduck_ml_dev.data.get import get\n",
"import librosa\n",
"import numpy as np\n",
"from scipy.io.wavfile import write\n",
"\n",
"MAX_WAV_VALUE = 32768\n",
"sr = 22050\n",
"get(resample_normalize,\n",
" local_path_list,\n",
" resampled_normalized_path_list,\n",
" True)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "57326aec",
"metadata": {},
"outputs": [],
"source": [
"get_mels(resampled_normalized_path_list, data_config, spectrogram_path_list)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "a65b6b73",
"metadata": {},
"outputs": [],
"source": [
"get_pitches(resampled_normalized_path_list, data_config, folder_path_list, method = 'radtts')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0fa0adb",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
3 changes: 3 additions & 0 deletions uberduck_ml_dev/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,7 +926,10 @@ def _get_data(
# ) # undoes normalization
# data = (data * MAX_WAV_VALUE) / (np.abs(data).max() * 2)
# else:

rate, data = read(audiopath)
if data.dtype == np.int16:
data = data / np.abs(data.max())
if self.method == "radtts":
pitch = get_f0_pvoiced(
data,
Expand Down
12 changes: 2 additions & 10 deletions uberduck_ml_dev/data/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import torch
import os

from ..data.data import DataMel, DataPitch, DataEmbedding
from ..data.data import DataMel, DataPitch
from ..data.collate import CollateBlank
from ..data.processor import Processor

Expand All @@ -18,9 +18,6 @@ def get_parallel_torch(data):
pass


from typing import Callable, List


# TODO (Sam): use get_parallel_torch to reduce boilerplate.
# NOTE (Sam): assumes data is in a directory structure like:
# /tmp/{uuid}/resampled_normalized.wav
Expand Down Expand Up @@ -101,16 +98,11 @@ def get_hubert_embeddings(


def get(
processing_function,
saving_function,
loading_function,
function_,
source_paths,
target_paths,
recompute,
):
function_ = lambda source_path, target_path: saving_function(
processing_function(loading_function(source_path)), target_path
)
processor = Processor(
function_=function_,
source_paths=source_paths,
Expand Down
2 changes: 1 addition & 1 deletion uberduck_ml_dev/data/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(

def _get_data(self, source_path, target_path):
# NOTE (Sam): we need caching to debug training issues in dev and for speed!
# NOTE (Sam): won't catch issues with recomputation using different parameters but name name
# NOTE (Sam): won't catch issues with recomputation using different parameters but ssame name
# TODO (Sam): add hashing
if self.recompute or not os.path.exists(target_path):
self.function_(source_path, target_path)
Expand Down
2 changes: 2 additions & 0 deletions uberduck_ml_dev/models/hifigan.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ def __init__(
self.conv_pre = Conv1d(
initial_channel, upsample_initial_channel, 7, 1, padding=3
)

print(use_noise_convs, "asdfasdfasdf")
if use_noise_convs:
self.noise_convs = nn.ModuleList()
self.m_source = SourceModuleHnNSF(
Expand Down