uberduck-ai · sjkoelle · Aug 29, 2023 · Aug 29, 2023 · Aug 29, 2023 · Aug 29, 2023
diff --git a/analytics/tests/conftest.py b/analytics/tests/conftest.py
@@ -100,3 +100,45 @@ def lj_trainer(lj_speech_tacotron2_file):
     trainer = Tacotron2Trainer(hparams, rank=0, world_size=1)
 
     return trainer
+
+
+from scipy.io.wavfile import read
+
+
+@pytest.fixture()
+def raw_audio_files(raw_audio_file_paths):
+    output = [read(raw_audio_file_path) for raw_audio_file_path in raw_audio_file_paths]
+    return output
+
+
+@pytest.fixture()
+def resampled_normalized_audio_files(resampled_normalized_audio_file_paths):
+    output = [
+        read(resampled_normalized_audio_file_path)
+        for resampled_normalized_audio_file_path in resampled_normalized_audio_file_paths
+    ]
+    return output
+
+
+@pytest.fixture()
+def radtts_spectrograms(radtts_spectrogram_paths):
+    output = [
+        torch.load(radtts_spectrogram_path)
+        for radtts_spectrogram_path in radtts_spectrogram_paths
+    ]
+    return output
+
+
+@pytest.fixture()
+def pyin_f0s(pyin_f0_paths):
+    output = [torch.load(pyin_f0_path) for pyin_f0_path in pyin_f0_paths]
+    return output
+
+
+@pytest.fixture()
+def audio_embeddings(audio_embeddings_paths):
+    output = [
+        torch.load(audio_embedding_path)
+        for audio_embedding_path in audio_embeddings_paths
+    ]
+    return output
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0001/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0001/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0001/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0001/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0001/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0001/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0002/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0002/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0002/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0002/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0002/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0002/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0003/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0003/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0003/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0003/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0003/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0003/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0004/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0004/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0004/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0004/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0004/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0004/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0005/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0005/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0005/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0005/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0005/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0005/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0006/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0006/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0006/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0006/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0006/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0006/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0007/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0007/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0007/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0007/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0007/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0007/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0008/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0008/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0008/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0008/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0008/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0008/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0009/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0009/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0009/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0009/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0009/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0009/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0010/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0010/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0010/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0010/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0010/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0010/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0011/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0011/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0011/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0011/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0011/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0011/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0012/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0012/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0012/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0012/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0012/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0012/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0013/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0013/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0013/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0013/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0013/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0013/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0014/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0014/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0014/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0014/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0014/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0014/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0015/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0015/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0015/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0015/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0015/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0015/spectrogram.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0016/f0.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0016/f0.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0016/resampled_normalized.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0016/resampled_normalized.pt
diff --git a/analytics/tests/fixtures/ljtest/processed/LJ001-0016/spectrogram.pt b/analytics/tests/fixtures/ljtest/processed/LJ001-0016/spectrogram.pt
diff --git a/analytics/tests/tests/test_preprocess.py b/analytics/tests/tests/test_preprocess.py
@@ -0,0 +1,26 @@
+# TODO (Sam): use get to replace get_mels
+from uberduck_ml_dev.data.get import get_mels
+from uberduck_ml_dev.data.data import RADTTS_DEFAULTS as data_config
+import os
+import torch
+
+
+class TestGetMels:
+    def test_compute_mels_radtts(
+        self,
+        resampled_normalized_path_list,
+        spectrogram_path_list,
+        target_spectrogram_path_list,
+    ):
+        get(resampled_normalized_path_list, spectrogram_path_list)
+        for sp, tsp in zip(
+            resampled_normalized_path_list,
+            spectrogram_path_list,
+            target_spectrogram_path_list,
+        ):
+            assert os.path.exists(sp)
+            assert torch.load(sp) == torch.load(tsp)
+
+    def test_compute_mels_diffsinger(self, resampled_normalized_path_list,
+        spectrogram_path_list,
+        target_spectrogram_path_list):
diff --git a/tutorials/preprocessing_fixtures_082923.ipynb b/tutorials/preprocessing_fixtures_082923.ipynb
@@ -0,0 +1,138 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "25641f60",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import os\n",
+    "from uberduck_ml_dev.data.get import get_mels, get_pitches\n",
+    "from uberduck_ml_dev.data.data import RADTTS_DEFAULTS as data_config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "37e90cae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_folder = '/usr/src/app/uberduck_ml_dev/analytics/tests/fixtures/ljtest/processed/'\n",
+    "source_folder = '/usr/src/app/uberduck_ml_dev/analytics/tests/fixtures/ljtest/wavs/'\n",
+    "paths = os.listdir(source_folder)\n",
+    "\n",
+    "local_path_list = [os.path.join(source_folder, path) for path in paths]\n",
+    "folder_path_list = [os.path.join(target_folder, path.split('.wav')[0]) for path in paths]\n",
+    "resampled_normalized_path_list = [os.path.join(folder_path, 'resampled_normalized.pt') for folder_path in folder_path_list]\n",
+    "spectrogram_path_list = [os.path.join(target_folder, folder_path, 'spectrogram.pt') for folder_path in folder_path_list]\n",
+    "\n",
+    "for folder_path in folder_path_list:\n",
+    "    os.makedirs(folder_path, exist_ok = True)\n",
+    "\n",
+    "def load_resampled_floatnorm_audio(source_path):\n",
+    "    rate, data = read(source_path)\n",
+    "    if len(data.shape) > 1:\n",
+    "        rez_data = data[:,0] / np.abs(data[:,0]).max()\n",
+    "    else:\n",
+    "        rez_data = data / np.abs(data).max()\n",
+    "    output = librosa.resample(rez_data, orig_sr = rate, target_sr = 22050)\n",
+    "    return output\n",
+    "\n",
+    "integer_normalize_audio = lambda x : np.asarray((x / np.abs(x).max()) * (MAX_WAV_VALUE - 1), dtype = np.int16)\n",
+    "def save_audio(data, filename, rate = 22050):\n",
+    "    write(filename, rate, data) # must be in this order\n",
+    "    \n",
+    "def resample_normalize(source_path, target_path):\n",
+    "    \n",
+    "    resample_floatnorm_mono_audio = load_resampled_floatnorm_audio(source_path)\n",
+    "    resample_intnorm_audio = integer_normalize_audio(resample_floatnorm_mono_audio)\n",
+    "    save_audio(resample_intnorm_audio, target_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "adaa4f87",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.io.wavfile import read\n",
+    "\n",
+    "from uberduck_ml_dev.data.get import get\n",
+    "import librosa\n",
+    "import numpy as np\n",
+    "from scipy.io.wavfile import write\n",
+    "\n",
+    "MAX_WAV_VALUE = 32768\n",
+    "sr = 22050\n",
+    "get(resample_normalize,\n",
+    "    local_path_list,\n",
+    "    resampled_normalized_path_list,\n",
+    "    True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "57326aec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "get_mels(resampled_normalized_path_list, data_config, spectrogram_path_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "a65b6b73",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "get_pitches(resampled_normalized_path_list, data_config, folder_path_list, method = 'radtts')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0fa0adb",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/uberduck_ml_dev/data/data.py b/uberduck_ml_dev/data/data.py
@@ -926,7 +926,10 @@ def _get_data(
             #     )  # undoes normalization
             #     data = (data * MAX_WAV_VALUE) / (np.abs(data).max() * 2)
             # else:
+
             rate, data = read(audiopath)
+            if data.dtype == np.int16:
+                data = data / np.abs(data.max())
             if self.method == "radtts":
                 pitch = get_f0_pvoiced(
                     data,

diff --git a/uberduck_ml_dev/data/get.py b/uberduck_ml_dev/data/get.py
@@ -5,7 +5,7 @@
 import torch
 import os
 
-from ..data.data import DataMel, DataPitch, DataEmbedding
+from ..data.data import DataMel, DataPitch
 from ..data.collate import CollateBlank
 from ..data.processor import Processor
 
@@ -18,9 +18,6 @@ def get_parallel_torch(data):
         pass
 
 
-from typing import Callable, List
-
-
 # TODO (Sam): use get_parallel_torch to reduce boilerplate.
 # NOTE (Sam): assumes data is in a directory structure like:
 # /tmp/{uuid}/resampled_normalized.wav
@@ -101,16 +98,11 @@ def get_hubert_embeddings(
 
 
 def get(
-    processing_function,
-    saving_function,
-    loading_function,
+    function_,
     source_paths,
     target_paths,
     recompute,
 ):
-    function_ = lambda source_path, target_path: saving_function(
-        processing_function(loading_function(source_path)), target_path
-    )
     processor = Processor(
         function_=function_,
         source_paths=source_paths,

diff --git a/uberduck_ml_dev/data/processor.py b/uberduck_ml_dev/data/processor.py
@@ -20,7 +20,7 @@ def __init__(
 
     def _get_data(self, source_path, target_path):
         # NOTE (Sam): we need caching to debug training issues in dev and for speed!
-        # NOTE (Sam): won't catch issues with recomputation using different parameters but name name
+        # NOTE (Sam): won't catch issues with recomputation using different parameters but ssame name
         # TODO (Sam): add hashing
         if self.recompute or not os.path.exists(target_path):
             self.function_(source_path, target_path)

diff --git a/uberduck_ml_dev/models/hifigan.py b/uberduck_ml_dev/models/hifigan.py
@@ -119,6 +119,8 @@ def __init__(
             self.conv_pre = Conv1d(
                 initial_channel, upsample_initial_channel, 7, 1, padding=3
             )
+
+        print(use_noise_convs, "asdfasdfasdf")
         if use_noise_convs:
             self.noise_convs = nn.ModuleList()
             self.m_source = SourceModuleHnNSF(