Skip to content

Commit

Permalink
Merge pull request #25 from ktonal/develop
Browse files Browse the repository at this point in the history
v0.2.3
  • Loading branch information
antoinedaurat authored Jun 16, 2021
2 parents b8d6713 + e24b1cf commit 606cabe
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 23 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ jobs:
python -m pip install --quiet --upgrade pip
pip install --quiet -r requirements.txt
pip install --quiet hatch
pip list | grep torch
- name: Test
env:
CUDA_VISIBLE_DEVICES: ""
run: hatch test -nd

- name: Build dist and Publish on TestPypi
Expand Down
2 changes: 1 addition & 1 deletion mimikit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.2.2'
__version__ = '0.2.3'

from . import audios
from . import connectors
Expand Down
16 changes: 8 additions & 8 deletions mimikit/data/datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from typing import Iterable, Optional, Callable
import re
from random import randint
from torch._six import container_abcs, string_classes
from torch._six import string_classes
import collections

from . import Database

Expand Down Expand Up @@ -116,13 +117,12 @@ def __call__(self, feat_data, item):
sliced = super(AsFramedSlice, self).__call__(feat_data, item)
if self.as_strided:
if isinstance(sliced, np.ndarray):
itemsize = sliced.dtype.itemsize
as_strided = lambda arr: np_as_strided(arr,
shape=(self.length, self.frame_size),
strides=(itemsize, itemsize))
as_strided = lambda tensor: torch.as_strided(torch.from_numpy(tensor),
size=(self.length-self.frame_size+1, self.frame_size),
stride=(1, 1))
else:
as_strided = lambda tensor: torch.as_strided(tensor,
size=(self.length, self.frame_size),
size=(self.length-self.frame_size+1, self.frame_size),
stride=(1, 1))

with torch.no_grad():
Expand Down Expand Up @@ -157,11 +157,11 @@ def process_batch(batch, test=lambda x: False, func=lambda x: x):
elem_type = type(batch)
if test(batch):
return func(batch)
elif isinstance(batch, container_abcs.Mapping):
elif isinstance(batch, collections.abc.Mapping):
return {key: process_batch(batch[key], test, func) for key in batch}
elif isinstance(batch, tuple) and hasattr(batch, '_fields'): # namedtuple
return elem_type(*(process_batch(d, test, func) for d in batch))
elif isinstance(batch, container_abcs.Sequence) and not isinstance(batch, string_classes):
elif isinstance(batch, collections.abc.Sequence) and not isinstance(batch, string_classes):
return [process_batch(d, test, func) for d in batch]
else:
return batch
Expand Down
4 changes: 3 additions & 1 deletion mimikit/models/freqnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def demo():

# DATA

# list of files or directories to use as data
# list of files or directories to use as data ("./" is the cwd of the notebook)
sources = ['./data']
# audio sample rate
sr = 22050
Expand Down Expand Up @@ -166,6 +166,8 @@ def demo():
db_path = 'freqnet-demo.h5'
print("collecting data...")
db = mmk.Database.create(db_path, sources, schema)
if not len(db.fft.files):
raise ValueError("Empty db. No audio files were found")
print("successfully created the db.")

"""### create network and train"""
Expand Down
2 changes: 1 addition & 1 deletion mimikit/models/parts/sequence_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def on_epoch_end(self, trainer: pl.Trainer, model: SequenceModel):
if self.plot_audios:
plt.figure(figsize=(20, 2))
plt.plot(y)
plt.show()
plt.show(block=False)
if self.play_audios:
audio(y, sr=sr, hop_length=hop_length)

Expand Down
4 changes: 3 additions & 1 deletion mimikit/models/s2s_lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def demo():

# DATA

# list of files or directories to use as data
# list of files or directories to use as data ("./" is the cwd of the notebook)
sources = ['./data']
# audio sample rate
sr = 22050
Expand Down Expand Up @@ -128,6 +128,8 @@ def demo():
db_path = 's2s-demo.h5'
print("collecting data...")
db = mmk.Database.create(db_path, sources, schema)
if not len(db.fft.files):
raise ValueError("Empty db. No audio files were found")
print("successfully created the db.")

"""### create network and train"""
Expand Down
10 changes: 6 additions & 4 deletions mimikit/models/sample_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def batch_signature(self, stage='fit'):
Input('qx', AsFramedSlice(shift, batch_seq_len, frame_size=fs,
as_strided=False)))
inputs.append(
Input('qx', AsFramedSlice(shifts[-1], batch_seq_len, frame_size=frame_sizes[-1],
Input('qx', AsFramedSlice(shifts[-1], batch_seq_len+frame_sizes[-1]-1, frame_size=frame_sizes[-1],
as_strided=True)))
targets = Target('qx', AsSlice(shift=frame_sizes[0], length=batch_seq_len))
if stage in ('fit', 'train', 'val'):
Expand Down Expand Up @@ -108,7 +108,7 @@ def demo():

# DATA

# list of files or directories to use as data
# list of files or directories to use as data ("./" is the cwd of the notebook)
sources = ['./data']
# audio sample rate
sr = 16000
Expand Down Expand Up @@ -149,7 +149,7 @@ def demo():
n_steps = 15 * sr
# the sampling temperature changes outputs a lot!
# roughly : prefer values close to 1. & hot -> noisy ; cold -> silence
temperature = torch.tensor([.9, .999, 1.25]).unsqueeze(1).to('cuda')
temperature = torch.tensor([.9, .999, 1.25]).unsqueeze(1)

assert temperature.size(0) == n_examples, "number of values in temperature must be equal to n_examples"
print("arguments are ok!")
Expand All @@ -160,6 +160,8 @@ def demo():
db_path = 'sample-rnn-demo.h5'
print("collecting data...")
db = mmk.Database.create(db_path, sources, schema)
if not len(db.qx.files):
raise ValueError("Empty db. No audio files were found...")
print("successfully created the db.")

"""### create network and train"""
Expand All @@ -186,7 +188,7 @@ def demo():
n_steps=n_steps,
play_audios=True,
plot_audios=True,
temperature=temperature)
temperature=temperature.to('cuda') if torch.cuda.is_available() else temperature)

trainer = mmk.get_trainer(root_dir=None,
max_epochs=max_epochs,
Expand Down
142 changes: 138 additions & 4 deletions mimikit/models/wavenet.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import dataclasses as dtc
import torch.nn as nn
from itertools import accumulate
import operator
import torch

from ..abstract.features import SegmentLabels, FilesLabels
Expand Down Expand Up @@ -27,9 +29,11 @@ def schema(cls, sr=22050, emphasis=0., q_levels=256,

schema = {"qx": MuLawSignal(sr=sr, emphasis=emphasis, q_levels=q_levels)}
if segment_labels:
schema.update({
'loc': SegmentLabels(input_key='qx')
})
# not yet supported...
pass
# schema.update({
# 'loc': SegmentLabels(input_key='qx')
# })
if files_labels:
schema.update({
'glob': FilesLabels(input_key='qx')
Expand All @@ -48,7 +52,7 @@ def dependant_hp(cls, db):
)

def batch_signature(self, stage='fit'):
inpt = Input('qx', AsSlice(shift=0, length=self.batch_seq_length))
inpt = [Input('qx', AsSlice(shift=0, length=self.batch_seq_length))]
trgt = Target('qx', AsSlice(shift=self.shift,
length=self.output_shape((-1, self.batch_seq_length, -1))[1]))
# where are we conditioned?
Expand Down Expand Up @@ -92,3 +96,133 @@ def encode_inputs(self, inputs: torch.Tensor):

def decode_outputs(self, outputs: torch.Tensor):
return self.feature.decode(outputs)

@staticmethod
def rf(n_layers, kernel_size):
if isinstance(kernel_size, tuple):
assert sum(n_layers) == len(kernel_size), "total number of layers and of kernel sizes must match"
k_iter = kernel_size
dilations = list(accumulate([1, *kernel_size], operator.mul))
else:
# reverse_dilation_order leads to the connectivity of the FFTNet
k_iter = [kernel_size] * sum(n_layers)
dilations = [kernel_size ** (i)
for block in n_layers for i in range(block)]
seq = list(dilations[i-1] * k_iter[i-1] for i in accumulate(n_layers))
return sum(seq) - len(seq) + 1


def demo():
"""### import and arguments"""
import mimikit as mmk
import torch

# DATA

# list of files or directories to use as data ("./" is the cwd of the notebook)
sources = ['./data']
# audio sample rate
sr = 16000
# number of quantization levels (256 -> 8-bit)
q_levels = 256

# NETWORK

# the number of layers determines 'how much past' is used to predict the next future step
# here you can make blocks of layers by specifying a tuple of integers, e.g. (2, 3, 2)
n_layers = (3,)
# kernel_size is the size of the convolution. You can specify a single int for the whole
# network or one size per layer
kernel_size = (16, 8, 2)
# how many parameters pro convolution layer
gate_dim = 256
# next arg can take 3 values : -1 -> input & output are summed at the end of the input,
# 1 -> at the beginning, 0 -> they are not summed
accum_outputs = 0
# the next 2 args can take integers or None. Integers add skips and/or residuals layers of this size.
# None adds no layers
skip_dim = None
residuals_dim = None

# OPTIMIZATION

# how many epochs should we train for
max_epochs = 50
# how many examples are used pro training steps
batch_size = 16
# the learning rate
max_lr = 5e-4
# betas control how fast the network changes its 'learning course'.
# generally, betas should be close but smaller than 1. and be balanced with the batch_size :
# the smaller the batch, the higher the betas 'could be'.
betas = (0.9, 0.93)
# one wavenet epoch can be very long, so as to monitor the net's progress,
# we limit the number of batches pro epoch
limit_train_batches = 1000

# MONITORING

# how often should the network generate during training
every_n_epochs = 4
# how many examples from random prompts should be generated
n_examples = 3
# how many steps (1 step = 1 sample) should be generated
n_steps = 5 * sr
# the sampling temperature changes outputs a lot!
# roughly : prefer values close to 1. & hot -> noisy ; cold -> silence
temperature = torch.tensor([.9, .999, 1.25]).unsqueeze(1)

assert temperature.size(0) == n_examples, "number of values in temperature must be equal to n_examples"
rf = mmk.WaveNet.rf(n_layers, kernel_size)
print("arguments are ok! The network will have a receptive field of size :", rf, "samples")

"""### create the data"""
schema = mmk.WaveNet.schema(sr, 0., q_levels)

db_path = 'wavenet-demo.h5'
print("collecting data...")
db = mmk.Database.create(db_path, sources, schema)
if not len(db.qx.files):
raise ValueError("Empty db. No audio files were found...")
print("successfully created the db.")

"""### create network and train"""
net = mmk.WaveNet(
**mmk.WaveNet.dependant_hp(db),
kernel_size=kernel_size,
gate_dim=gate_dim,
accum_outputs=accum_outputs,
residuals_dim=residuals_dim,
skip_dim=skip_dim,
n_layers=n_layers,
batch_size=batch_size,
batch_seq_length=rf * 2 if rf <= 128 else rf + rf // 4,
max_lr=max_lr,
betas=betas,
div_factor=5,
)
print(net.hparams)

dm = mmk.DataModule(net, db,
splits=tuple(),
in_mem_data=True)

cb = mmk.GenerateCallback(every_n_epochs, indices=[None] * n_examples,
n_steps=n_steps,
play_audios=True,
plot_audios=True,
temperature=temperature.to('cuda') if torch.cuda.is_available() else temperature)

trainer = mmk.get_trainer(root_dir=None,
max_epochs=max_epochs,
callbacks=[cb],
limit_train_batches=limit_train_batches,
checkpoint_callback=False)
print("here we go!")
trainer.fit(net, datamodule=dm)

"""----------------------------"""


if __name__ == '__main__':
demo()
7 changes: 5 additions & 2 deletions mimikit/networks/wavenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,10 +254,13 @@ def predict_(outpt, temp):
if temp is None:
return nn.Softmax(dim=-1)(outpt).argmax(dim=-1, keepdims=True)
else:
return torch.multinomial(nn.Softmax(dim=-1)(outpt / temp), 1)
return torch.multinomial(nn.Softmax(dim=-1)(outpt / temp.to(outpt)), 1)

def generate_(self, prompt, n_steps, temperature=0.5, benchmark=False):
return self.generate_slow(prompt, n_steps, temperature)
if self.receptive_field <= 64:
return self.generate_slow(prompt, n_steps, temperature)
# prompt is a list but generate fast only accepts one tensor prompt...
return self.generate_fast(prompt[0], n_steps, temperature)

def generate_slow(self, prompt, n_steps, temperature=0.5):

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ librosa==0.8
torchaudio>=0.8.0
h5py==2.10.0
tables>=3.6
torch>=1.6.0
torch>=1.6.0,<1.9.0
pytorch-lightning==1.2.4
tqdm==4.42.1
matplotlib
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
'samplernn=mimikit.models.sample_rnn:demo',
'freqnet=mimikit.models.freqnet:demo',
'seq2seq=mimikit.models.s2s_lstm:demo',
'wavenet=mimikit.models.wavenet:demo',
]}

}
Expand Down
46 changes: 46 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pytest
from inspect import getsource
import re
import torch
import numpy as np
import matplotlib.pyplot as plt
import soundfile


from mimikit.models.freqnet import demo as fnet
from mimikit.models.sample_rnn import demo as srnn
from mimikit.models.s2s_lstm import demo as s2s
from mimikit.models.wavenet import demo as wnet


@pytest.fixture
def example_root(tmp_path):
root = (tmp_path / "models")
root.mkdir()
data = (root / "data")
data.mkdir()
# we need at least 8 sec of audio...
audio = np.random.rand(22050 * 10) - .5
soundfile.write(str(data / "example.wav"), audio, 22050, 'PCM_24', format="WAV")
return str(root)


@pytest.mark.parametrize("model", [fnet, srnn, s2s, wnet])
def test_models(example_root, monkeypatch, model):
if torch.cuda.is_available():
monkeypatch.setattr(torch.cuda, "is_available", lambda: False)
src = getsource(model)

src = re.sub(r"db_path = '.*.h5'\n", f"db_path = '{example_root}/data.h5'\n", src)
src = re.sub(r"sources =.*\n", f"sources = ['{example_root}']\n", src)
src = re.sub(r"every_n_epochs =.*\n", "every_n_epochs=1\n", src)
src = re.sub(r"n_steps =.*\n", "n_steps = 10\n", src)
src = re.sub(r"limit_train_batches=.*\n", "", src)
src = re.sub(r"max_epochs=.*,\n", "max_epochs=1,limit_train_batches=10,\n", src)
src = re.sub(r"root_dir=.*\n", f"root_dir ='{example_root}',\n", src)
exec(src)
locals()["demo"]()
plt.close('all')
# we only need that the demo runs without raising exceptions
assert True
return

0 comments on commit 606cabe

Please sign in to comment.