Merge pull request #25 from ktonal/develop

v0.2.3
ktonal · Jun 16, 2021 · 606cabe · 606cabe
2 parents b8d6713 + e24b1cf
commit 606cabe
Show file tree

Hide file tree

Showing 12 changed files with 216 additions and 23 deletions.
diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml
@@ -19,8 +19,11 @@ jobs:
         python -m pip install --quiet --upgrade pip
         pip install --quiet -r requirements.txt
         pip install --quiet hatch
+        pip list | grep torch
 
     - name: Test
+      env:
+        CUDA_VISIBLE_DEVICES: ""
       run: hatch test -nd
 
     - name: Build dist and Publish on TestPypi

diff --git a/mimikit/__init__.py b/mimikit/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.2.2'
+__version__ = '0.2.3'
 
 from . import audios
 from . import connectors

diff --git a/mimikit/data/datamodule.py b/mimikit/data/datamodule.py
@@ -8,7 +8,8 @@
 from typing import Iterable, Optional, Callable
 import re
 from random import randint
-from torch._six import container_abcs, string_classes
+from torch._six import string_classes
+import collections
 
 from . import Database
 
@@ -116,13 +117,12 @@ def __call__(self, feat_data, item):
         sliced = super(AsFramedSlice, self).__call__(feat_data, item)
         if self.as_strided:
             if isinstance(sliced, np.ndarray):
-                itemsize = sliced.dtype.itemsize
-                as_strided = lambda arr: np_as_strided(arr,
-                                                       shape=(self.length, self.frame_size),
-                                                       strides=(itemsize, itemsize))
+                as_strided = lambda tensor: torch.as_strided(torch.from_numpy(tensor),
+                                                             size=(self.length-self.frame_size+1, self.frame_size),
+                                                             stride=(1, 1))
             else:
                 as_strided = lambda tensor: torch.as_strided(tensor,
-                                                             size=(self.length, self.frame_size),
+                                                             size=(self.length-self.frame_size+1, self.frame_size),
                                                              stride=(1, 1))
 
             with torch.no_grad():
@@ -157,11 +157,11 @@ def process_batch(batch, test=lambda x: False, func=lambda x: x):
     elem_type = type(batch)
     if test(batch):
         return func(batch)
-    elif isinstance(batch, container_abcs.Mapping):
+    elif isinstance(batch, collections.abc.Mapping):
         return {key: process_batch(batch[key], test, func) for key in batch}
     elif isinstance(batch, tuple) and hasattr(batch, '_fields'):  # namedtuple
         return elem_type(*(process_batch(d, test, func) for d in batch))
-    elif isinstance(batch, container_abcs.Sequence) and not isinstance(batch, string_classes):
+    elif isinstance(batch, collections.abc.Sequence) and not isinstance(batch, string_classes):
         return [process_batch(d, test, func) for d in batch]
     else:
         return batch

diff --git a/mimikit/models/freqnet.py b/mimikit/models/freqnet.py
@@ -110,7 +110,7 @@ def demo():
 
     # DATA
 
-    # list of files or directories to use as data
+    # list of files or directories to use as data ("./" is the cwd of the notebook)
     sources = ['./data']
     # audio sample rate
     sr = 22050
@@ -166,6 +166,8 @@ def demo():
     db_path = 'freqnet-demo.h5'
     print("collecting data...")
     db = mmk.Database.create(db_path, sources, schema)
+    if not len(db.fft.files):
+        raise ValueError("Empty db. No audio files were found")
     print("successfully created the db.")
 
     """### create network and train"""

diff --git a/mimikit/models/parts/sequence_model.py b/mimikit/models/parts/sequence_model.py
@@ -108,7 +108,7 @@ def on_epoch_end(self, trainer: pl.Trainer, model: SequenceModel):
             if self.plot_audios:
                 plt.figure(figsize=(20, 2))
                 plt.plot(y)
-                plt.show()
+                plt.show(block=False)
             if self.play_audios:
                 audio(y, sr=sr, hop_length=hop_length)
 

diff --git a/mimikit/models/s2s_lstm.py b/mimikit/models/s2s_lstm.py
@@ -77,7 +77,7 @@ def demo():
 
     # DATA
 
-    # list of files or directories to use as data
+    # list of files or directories to use as data ("./" is the cwd of the notebook)
     sources = ['./data']
     # audio sample rate
     sr = 22050
@@ -128,6 +128,8 @@ def demo():
     db_path = 's2s-demo.h5'
     print("collecting data...")
     db = mmk.Database.create(db_path, sources, schema)
+    if not len(db.fft.files):
+        raise ValueError("Empty db. No audio files were found")
     print("successfully created the db.")
 
     """### create network and train"""

diff --git a/mimikit/models/sample_rnn.py b/mimikit/models/sample_rnn.py
@@ -38,7 +38,7 @@ def batch_signature(self, stage='fit'):
                 Input('qx', AsFramedSlice(shift, batch_seq_len, frame_size=fs,
                                           as_strided=False)))
         inputs.append(
-            Input('qx', AsFramedSlice(shifts[-1], batch_seq_len, frame_size=frame_sizes[-1],
+            Input('qx', AsFramedSlice(shifts[-1], batch_seq_len+frame_sizes[-1]-1, frame_size=frame_sizes[-1],
                                       as_strided=True)))
         targets = Target('qx', AsSlice(shift=frame_sizes[0], length=batch_seq_len))
         if stage in ('fit', 'train', 'val'):
@@ -108,7 +108,7 @@ def demo():
 
     # DATA
 
-    # list of files or directories to use as data
+    # list of files or directories to use as data ("./" is the cwd of the notebook)
     sources = ['./data']
     # audio sample rate
     sr = 16000
@@ -149,7 +149,7 @@ def demo():
     n_steps = 15 * sr
     # the sampling temperature changes outputs a lot!
     # roughly : prefer values close to 1. & hot -> noisy ; cold -> silence
-    temperature = torch.tensor([.9, .999, 1.25]).unsqueeze(1).to('cuda')
+    temperature = torch.tensor([.9, .999, 1.25]).unsqueeze(1)
 
     assert temperature.size(0) == n_examples, "number of values in temperature must be equal to n_examples"
     print("arguments are ok!")
@@ -160,6 +160,8 @@ def demo():
     db_path = 'sample-rnn-demo.h5'
     print("collecting data...")
     db = mmk.Database.create(db_path, sources, schema)
+    if not len(db.qx.files):
+        raise ValueError("Empty db. No audio files were found...")
     print("successfully created the db.")
 
     """### create network and train"""
@@ -186,7 +188,7 @@ def demo():
                               n_steps=n_steps,
                               play_audios=True,
                               plot_audios=True,
-                              temperature=temperature)
+                              temperature=temperature.to('cuda') if torch.cuda.is_available() else temperature)
 
     trainer = mmk.get_trainer(root_dir=None,
                               max_epochs=max_epochs,

diff --git a/mimikit/models/wavenet.py b/mimikit/models/wavenet.py
@@ -1,5 +1,7 @@
 import dataclasses as dtc
 import torch.nn as nn
+from itertools import accumulate
+import operator
 import torch
 
 from ..abstract.features import SegmentLabels, FilesLabels
@@ -27,9 +29,11 @@ def schema(cls, sr=22050, emphasis=0., q_levels=256,
 
         schema = {"qx": MuLawSignal(sr=sr, emphasis=emphasis, q_levels=q_levels)}
         if segment_labels:
-            schema.update({
-                'loc': SegmentLabels(input_key='qx')
-            })
+            # not yet supported...
+            pass
+            # schema.update({
+            #     'loc': SegmentLabels(input_key='qx')
+            # })
         if files_labels:
             schema.update({
                 'glob': FilesLabels(input_key='qx')
@@ -48,7 +52,7 @@ def dependant_hp(cls, db):
         )
 
     def batch_signature(self, stage='fit'):
-        inpt = Input('qx', AsSlice(shift=0, length=self.batch_seq_length))
+        inpt = [Input('qx', AsSlice(shift=0, length=self.batch_seq_length))]
         trgt = Target('qx', AsSlice(shift=self.shift,
                                     length=self.output_shape((-1, self.batch_seq_length, -1))[1]))
         # where are we conditioned?
@@ -92,3 +96,133 @@ def encode_inputs(self, inputs: torch.Tensor):
 
     def decode_outputs(self, outputs: torch.Tensor):
         return self.feature.decode(outputs)
+
+    @staticmethod
+    def rf(n_layers, kernel_size):
+        if isinstance(kernel_size, tuple):
+            assert sum(n_layers) == len(kernel_size), "total number of layers and of kernel sizes must match"
+            k_iter = kernel_size
+            dilations = list(accumulate([1, *kernel_size], operator.mul))
+        else:
+            # reverse_dilation_order leads to the connectivity of the FFTNet
+            k_iter = [kernel_size] * sum(n_layers)
+            dilations = [kernel_size ** (i)
+                         for block in n_layers for i in range(block)]
+        seq = list(dilations[i-1] * k_iter[i-1] for i in accumulate(n_layers))
+        return sum(seq) - len(seq) + 1
+
+
+def demo():
+    """### import and arguments"""
+    import mimikit as mmk
+    import torch
+
+    # DATA
+
+    # list of files or directories to use as data ("./" is the cwd of the notebook)
+    sources = ['./data']
+    # audio sample rate
+    sr = 16000
+    # number of quantization levels (256 -> 8-bit)
+    q_levels = 256
+
+    # NETWORK
+
+    # the number of layers determines 'how much past' is used to predict the next future step
+    # here you can make blocks of layers by specifying a tuple of integers, e.g. (2, 3, 2)
+    n_layers = (3,)
+    # kernel_size is the size of the convolution. You can specify a single int for the whole
+    # network or one size per layer
+    kernel_size = (16, 8, 2)
+    # how many parameters pro convolution layer
+    gate_dim = 256
+    # next arg can take 3 values : -1 -> input & output are summed at the end of the input,
+    # 1 -> at the beginning, 0 -> they are not summed
+    accum_outputs = 0
+    # the next 2 args can take integers or None. Integers add skips and/or residuals layers of this size.
+    # None adds no layers
+    skip_dim = None
+    residuals_dim = None
+
+    # OPTIMIZATION
+
+    # how many epochs should we train for
+    max_epochs = 50
+    # how many examples are used pro training steps
+    batch_size = 16
+    # the learning rate
+    max_lr = 5e-4
+    # betas control how fast the network changes its 'learning course'.
+    # generally, betas should be close but smaller than 1. and be balanced with the batch_size :
+    # the smaller the batch, the higher the betas 'could be'.
+    betas = (0.9, 0.93)
+    # one wavenet epoch can be very long, so as to monitor the net's progress,
+    # we limit the number of batches pro epoch
+    limit_train_batches = 1000
+
+    # MONITORING
+
+    # how often should the network generate during training
+    every_n_epochs = 4
+    # how many examples from random prompts should be generated
+    n_examples = 3
+    # how many steps (1 step = 1 sample) should be generated
+    n_steps = 5 * sr
+    # the sampling temperature changes outputs a lot!
+    # roughly : prefer values close to 1. & hot -> noisy ; cold -> silence
+    temperature = torch.tensor([.9, .999, 1.25]).unsqueeze(1)
+
+    assert temperature.size(0) == n_examples, "number of values in temperature must be equal to n_examples"
+    rf = mmk.WaveNet.rf(n_layers, kernel_size)
+    print("arguments are ok! The network will have a receptive field of size :", rf, "samples")
+
+    """### create the data"""
+    schema = mmk.WaveNet.schema(sr, 0., q_levels)
+
+    db_path = 'wavenet-demo.h5'
+    print("collecting data...")
+    db = mmk.Database.create(db_path, sources, schema)
+    if not len(db.qx.files):
+        raise ValueError("Empty db. No audio files were found...")
+    print("successfully created the db.")
+
+    """### create network and train"""
+    net = mmk.WaveNet(
+        **mmk.WaveNet.dependant_hp(db),
+        kernel_size=kernel_size,
+        gate_dim=gate_dim,
+        accum_outputs=accum_outputs,
+        residuals_dim=residuals_dim,
+        skip_dim=skip_dim,
+        n_layers=n_layers,
+        batch_size=batch_size,
+        batch_seq_length=rf * 2 if rf <= 128 else rf + rf // 4,
+        max_lr=max_lr,
+        betas=betas,
+        div_factor=5,
+    )
+    print(net.hparams)
+
+    dm = mmk.DataModule(net, db,
+                        splits=tuple(),
+                        in_mem_data=True)
+
+    cb = mmk.GenerateCallback(every_n_epochs, indices=[None] * n_examples,
+                              n_steps=n_steps,
+                              play_audios=True,
+                              plot_audios=True,
+                              temperature=temperature.to('cuda') if torch.cuda.is_available() else temperature)
+
+    trainer = mmk.get_trainer(root_dir=None,
+                              max_epochs=max_epochs,
+                              callbacks=[cb],
+                              limit_train_batches=limit_train_batches,
+                              checkpoint_callback=False)
+    print("here we go!")
+    trainer.fit(net, datamodule=dm)
+
+    """----------------------------"""
+
+
+if __name__ == '__main__':
+    demo()
diff --git a/mimikit/networks/wavenet.py b/mimikit/networks/wavenet.py
@@ -254,10 +254,13 @@ def predict_(outpt, temp):
         if temp is None:
             return nn.Softmax(dim=-1)(outpt).argmax(dim=-1, keepdims=True)
         else:
-            return torch.multinomial(nn.Softmax(dim=-1)(outpt / temp), 1)
+            return torch.multinomial(nn.Softmax(dim=-1)(outpt / temp.to(outpt)), 1)
 
     def generate_(self, prompt, n_steps, temperature=0.5, benchmark=False):
-        return self.generate_slow(prompt, n_steps, temperature)
+        if self.receptive_field <= 64:
+            return self.generate_slow(prompt, n_steps, temperature)
+        # prompt is a list but generate fast only accepts one tensor prompt...
+        return self.generate_fast(prompt[0], n_steps, temperature)
 
     def generate_slow(self, prompt, n_steps, temperature=0.5):
 

diff --git a/requirements.txt b/requirements.txt
@@ -4,7 +4,7 @@ librosa==0.8
 torchaudio>=0.8.0
 h5py==2.10.0
 tables>=3.6
-torch>=1.6.0
+torch>=1.6.0,<1.9.0
 pytorch-lightning==1.2.4
 tqdm==4.42.1
 matplotlib

diff --git a/setup.py b/setup.py
@@ -58,6 +58,7 @@
             'samplernn=mimikit.models.sample_rnn:demo',
             'freqnet=mimikit.models.freqnet:demo',
             'seq2seq=mimikit.models.s2s_lstm:demo',
+            'wavenet=mimikit.models.wavenet:demo',
         ]}
 
 }

diff --git a/tests/test_models.py b/tests/test_models.py
@@ -0,0 +1,46 @@
+import pytest
+from inspect import getsource
+import re
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import soundfile
+
+
+from mimikit.models.freqnet import demo as fnet
+from mimikit.models.sample_rnn import demo as srnn
+from mimikit.models.s2s_lstm import demo as s2s
+from mimikit.models.wavenet import demo as wnet
+
+
+@pytest.fixture
+def example_root(tmp_path):
+    root = (tmp_path / "models")
+    root.mkdir()
+    data = (root / "data")
+    data.mkdir()
+    # we need at least 8 sec of audio...
+    audio = np.random.rand(22050 * 10) - .5
+    soundfile.write(str(data / "example.wav"), audio, 22050, 'PCM_24', format="WAV")
+    return str(root)
+
+
+@pytest.mark.parametrize("model", [fnet, srnn, s2s, wnet])
+def test_models(example_root, monkeypatch, model):
+    if torch.cuda.is_available():
+        monkeypatch.setattr(torch.cuda, "is_available", lambda: False)
+    src = getsource(model)
+
+    src = re.sub(r"db_path = '.*.h5'\n", f"db_path = '{example_root}/data.h5'\n", src)
+    src = re.sub(r"sources =.*\n", f"sources = ['{example_root}']\n", src)
+    src = re.sub(r"every_n_epochs =.*\n", "every_n_epochs=1\n", src)
+    src = re.sub(r"n_steps =.*\n", "n_steps = 10\n", src)
+    src = re.sub(r"limit_train_batches=.*\n", "", src)
+    src = re.sub(r"max_epochs=.*,\n", "max_epochs=1,limit_train_batches=10,\n", src)
+    src = re.sub(r"root_dir=.*\n", f"root_dir ='{example_root}',\n", src)
+    exec(src)
+    locals()["demo"]()
+    plt.close('all')
+    # we only need that the demo runs without raising exceptions
+    assert True
+    return