-
Notifications
You must be signed in to change notification settings - Fork 187
/
live_edit.py
427 lines (376 loc) · 12.1 KB
/
live_edit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
import pygame
import random, math
import numpy as np
import cv2
import pyaudio
import midi
import wave
#User constants
device = "cpu"
dir_name = 'History/'
sub_dir_name = 'e2000/'
sample_rate = 48000
note_dt = 2000 #Num Samples
note_duration = 20000 #Num Samples
note_decay = 5.0 / sample_rate
num_params = 120
num_measures = 16
num_sigmas = 5.0
note_thresh = 32
use_pca = True
is_ae = True
background_color = (210, 210, 210)
edge_color = (60, 60, 60)
slider_colors = [(90, 20, 20), (90, 90, 20), (20, 90, 20), (20, 90, 90), (20, 20, 90), (90, 20, 90)]
note_w = 96
note_h = 96
note_pad = 2
notes_rows = num_measures / 8
notes_cols = 8
slider_num = min(40, num_params)
slider_h = 200
slider_pad = 5
tick_pad = 4
control_w = 210
control_h = 30
control_pad = 5
control_num = 3
control_colors = [(255,0,0), (0,255,0), (0,0,255)]
control_inits = [0.75, 0.5, 0.5]
#Derived constants
notes_w = notes_cols * (note_w + note_pad*2)
notes_h = notes_rows * (note_h + note_pad*2)
sliders_w = notes_w
sliders_h = slider_h + slider_pad*2
controls_w = control_w * control_num
controls_h = control_h
window_w = notes_w
window_h = notes_h + sliders_h + controls_h
slider_w = (window_w - slider_pad*2) / slider_num
notes_x = 0
notes_y = sliders_h
sliders_x = slider_pad
sliders_y = slider_pad
controls_x = (window_w - controls_w) / 2
controls_y = notes_h + sliders_h
#Global variables
prev_mouse_pos = None
mouse_pressed = 0
cur_slider_ix = 0
cur_control_ix = 0
volume = 3000
instrument = 0
needs_update = True
cur_params = np.zeros((num_params,), dtype=np.float32)
cur_notes = np.zeros((num_measures, note_h, note_w), dtype=np.uint8)
cur_controls = np.array(control_inits, dtype=np.float32)
#Setup audio stream
audio = pyaudio.PyAudio()
audio_notes = []
audio_time = 0
note_time = 0
note_time_dt = 0
audio_reset = False
audio_pause = False
def audio_callback(in_data, frame_count, time_info, status):
global audio_time
global audio_notes
global audio_reset
global note_time
global note_time_dt
#Check if needs restart
if audio_reset:
audio_notes = []
audio_time = 0
note_time = 0
note_time_dt = 0
audio_reset = False
#Check if paused
if audio_pause and status is not None:
data = np.zeros((frame_count,), dtype=np.float32)
return (data.tobytes(), pyaudio.paContinue)
#Find and add any notes in this time window
cur_dt = note_dt
while note_time_dt < audio_time + frame_count:
measure_ix = note_time / note_h
if measure_ix >= num_measures:
break
note_ix = note_time % note_h
notes = np.where(cur_notes[measure_ix, note_ix] >= note_thresh)[0]
for note in notes:
freq = 2 * 38.89 * pow(2.0, note / 12.0) / sample_rate
audio_notes.append((note_time_dt, freq))
note_time += 1
note_time_dt += cur_dt
#Generate the tones
data = np.zeros((frame_count,), dtype=np.float32)
for t,f in audio_notes:
x = np.arange(audio_time - t, audio_time + frame_count - t)
x = np.maximum(x, 0)
if instrument == 0:
w = np.sign(1 - np.mod(x * f, 2)) #Square
elif instrument == 1:
w = np.mod(x * f - 1, 2) - 1 #Sawtooth
elif instrument == 2:
w = 2*np.abs(np.mod(x * f - 0.5, 2) - 1) - 1 #Triangle
elif instrument == 3:
w = np.sin(x * f * math.pi) #Sine
#w = np.floor(w*8)/8
w[x == 0] = 0
w *= volume * np.exp(-x*note_decay)
data += w
data = np.clip(data, -32000, 32000).astype(np.int16)
#Remove notes that are too old
audio_time += frame_count
audio_notes = [(t,f) for t,f in audio_notes if audio_time < t + note_duration]
#Reset if loop occurs
if note_time / note_h >= num_measures:
audio_time = 0
note_time = 0
note_time_dt = 0
audio_notes = []
#Return the sound clip
return (data.tobytes(), pyaudio.paContinue)
#Keras
print "Loading Keras..."
import os
os.environ['THEANORC'] = "./" + device + ".theanorc"
os.environ['KERAS_BACKEND'] = "theano"
import theano
print "Theano Version: " + theano.__version__
import keras
print "Keras Version: " + keras.__version__
from keras.models import Model, Sequential, load_model
from keras.layers import Dense, Activation, Dropout, Flatten, Reshape
from keras.layers.convolutional import Conv2D, Conv2DTranspose, ZeroPadding2D
from keras.layers.pooling import MaxPooling2D
from keras.layers.noise import GaussianNoise
from keras.layers.local import LocallyConnected2D
from keras.optimizers import Adam, RMSprop, SGD
from keras.regularizers import l2
from keras.losses import binary_crossentropy
from keras.layers.advanced_activations import ELU
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model
from keras import backend as K
K.set_image_data_format('channels_first')
print "Loading Encoder..."
model = load_model(dir_name + 'model.h5')
enc = K.function([model.get_layer('encoder').input, K.learning_phase()],
[model.layers[-1].output])
enc_model = Model(inputs=model.input, outputs=model.get_layer('pre_encoder').output)
print "Loading Statistics..."
means = np.load(dir_name + sub_dir_name + 'means.npy')
evals = np.load(dir_name + sub_dir_name + 'evals.npy')
evecs = np.load(dir_name + sub_dir_name + 'evecs.npy')
stds = np.load(dir_name + sub_dir_name + 'stds.npy')
print "Loading Songs..."
y_samples = np.load('samples.npy')
y_lengths = np.load('lengths.npy')
#Open a window
pygame.init()
pygame.font.init()
screen = pygame.display.set_mode((window_w, window_h))
notes_surface = screen.subsurface((notes_x, notes_y, notes_w, notes_h))
pygame.display.set_caption('MusicEdit')
font = pygame.font.SysFont("monospace", 15)
#Start the audio stream
audio_stream = audio.open(
format=audio.get_format_from_width(2),
channels=1,
rate=sample_rate,
output=True,
stream_callback=audio_callback)
audio_stream.start_stream()
def update_mouse_click(mouse_pos):
global cur_slider_ix
global cur_control_ix
global mouse_pressed
x = (mouse_pos[0] - sliders_x)
y = (mouse_pos[1] - sliders_y)
if x >= 0 and y >= 0 and x < sliders_w and y < sliders_h:
cur_slider_ix = x / slider_w
mouse_pressed = 1
x = (mouse_pos[0] - controls_x)
y = (mouse_pos[1] - controls_y)
if x >= 0 and y >= 0 and x < controls_w and y < controls_h:
cur_control_ix = x / control_w
mouse_pressed = 2
def apply_controls():
global note_thresh
global note_dt
global volume
note_thresh = (1.0 - cur_controls[0]) * 200 + 10
note_dt = (1.0 - cur_controls[1]) * 1800 + 200
volume = cur_controls[2] * 6000
def update_mouse_move(mouse_pos):
global needs_update
if mouse_pressed == 1:
y = (mouse_pos[1] - sliders_y)
if y >= 0 and y <= slider_h:
val = (float(y) / slider_h - 0.5) * (num_sigmas * 2)
cur_params[cur_slider_ix] = val
needs_update = True
elif mouse_pressed == 2:
x = (mouse_pos[0] - (controls_x + cur_control_ix*control_w))
if x >= control_pad and x <= control_w - control_pad:
val = float(x - control_pad) / (control_w - control_pad*2)
cur_controls[cur_control_ix] = val
apply_controls()
def draw_controls():
for i in xrange(control_num):
x = controls_x + i * control_w + control_pad
y = controls_y + control_pad
w = control_w - control_pad*2
h = control_h - control_pad*2
col = control_colors[i]
pygame.draw.rect(screen, col, (x, y, int(w*cur_controls[i]), h))
pygame.draw.rect(screen, (0,0,0), (x, y, w, h), 1)
def draw_sliders():
for i in xrange(slider_num):
slider_color = slider_colors[i % len(slider_colors)]
x = sliders_x + i * slider_w
y = sliders_y
cx = x + slider_w / 2
cy_1 = y
cy_2 = y + slider_h
pygame.draw.line(screen, slider_color, (cx, cy_1), (cx, cy_2))
cx_1 = x + tick_pad
cx_2 = x + slider_w - tick_pad
for j in xrange(int(num_sigmas * 2 + 1)):
ly = y + slider_h/2.0 + (j-num_sigmas)*slider_h/(num_sigmas*2.0)
ly = int(ly)
col = (0,0,0) if j - num_sigmas == 0 else slider_color
pygame.draw.line(screen, col, (cx_1, ly), (cx_2, ly))
py = y + int((cur_params[i] / (num_sigmas * 2) + 0.5) * slider_h)
pygame.draw.circle(screen, slider_color, (cx, py), (slider_w - tick_pad)/2)
def notes_to_img(notes):
output = np.full((3, notes_h, notes_w), 64, dtype=np.uint8)
for i in xrange(notes_rows):
for j in xrange(notes_cols):
x = note_pad + j*(note_w + note_pad*2)
y = note_pad + i*(note_h + note_pad*2)
ix = i*notes_cols + j
measure = np.rot90(notes[ix])
played_only = np.where(measure >= note_thresh, 255, 0)
output[0,y:y+note_h,x:x+note_w] = np.minimum(measure * (255.0 / note_thresh), 255.0)
output[1,y:y+note_h,x:x+note_w] = played_only
output[2,y:y+note_h,x:x+note_w] = played_only
return np.transpose(output, (2, 1, 0))
def draw_notes():
pygame.surfarray.blit_array(notes_surface, notes_to_img(cur_notes))
measure_ix = note_time / note_h
note_ix = note_time % note_h
x = notes_x + note_pad + (measure_ix % notes_cols) * (note_w + note_pad*2) + note_ix
y = notes_y +note_pad + (measure_ix / notes_cols) * (note_h + note_pad*2)
pygame.draw.rect(screen, (255,255,0), (x, y, 4, note_h), 0)
#Main loop
running = True
rand_ix = 0
cur_len = 0
apply_controls()
while running:
#Process events
for event in pygame.event.get():
if event.type == pygame.QUIT:
running = False
break
elif event.type == pygame.MOUSEBUTTONDOWN:
if pygame.mouse.get_pressed()[0]:
prev_mouse_pos = pygame.mouse.get_pos()
update_mouse_click(prev_mouse_pos)
update_mouse_move(prev_mouse_pos)
elif pygame.mouse.get_pressed()[2]:
cur_params = np.zeros((num_params,), dtype=np.float32)
needs_update = True
elif event.type == pygame.MOUSEBUTTONUP:
mouse_pressed = 0
prev_mouse_pos = None
elif event.type == pygame.MOUSEMOTION and mouse_pressed > 0:
update_mouse_move(pygame.mouse.get_pos())
elif event.type == pygame.KEYDOWN:
if event.key == pygame.K_r:
cur_params = np.clip(np.random.normal(0.0, 1.0, (num_params,)), -num_sigmas, num_sigmas)
needs_update = True
audio_reset = True
if event.key == pygame.K_e:
cur_params = np.clip(np.random.normal(0.0, 2.0, (num_params,)), -num_sigmas, num_sigmas)
needs_update = True
audio_reset = True
if event.key == pygame.K_o:
print "RandIx: " + str(rand_ix)
if is_ae:
example_song = y_samples[cur_len:cur_len + num_measures]
cur_notes = example_song * 255
x = enc_model.predict(np.expand_dims(example_song, 0), batch_size=1)[0]
cur_len += y_lengths[rand_ix]
rand_ix += 1
else:
rand_ix = np.array([rand_ix], dtype=np.int64)
x = enc_model.predict(rand_ix, batch_size=1)[0]
rand_ix = (rand_ix + 1) % model.layers[0].input_dim
if use_pca:
cur_params = np.dot(x - means, evecs.T) / evals
else:
cur_params = (x - means) / stds
needs_update = True
audio_reset = True
if event.key == pygame.K_g:
audio_pause = True
audio_reset = True
midi.samples_to_midi(cur_notes, 'live.mid', 16, note_thresh)
save_audio = ''
while True:
save_audio += audio_callback(None, 1024, None, None)[0]
if audio_time == 0:
break
wave_output = wave.open('live.wav', 'w')
wave_output.setparams((1, 2, sample_rate, 0, 'NONE', 'not compressed'))
wave_output.writeframes(save_audio)
wave_output.close()
audio_pause = False
if event.key == pygame.K_ESCAPE:
running = False
break
if event.key == pygame.K_SPACE:
audio_pause = not audio_pause
if event.key == pygame.K_TAB:
audio_reset = True
if event.key == pygame.K_1:
instrument = 0
if event.key == pygame.K_2:
instrument = 1
if event.key == pygame.K_3:
instrument = 2
if event.key == pygame.K_4:
instrument = 3
if event.key == pygame.K_c:
y = np.expand_dims(np.where(cur_notes > note_thresh, 1, 0), 0)
x = enc_model.predict(y)[0]
if use_pca:
cur_params = np.dot(x - means, evecs.T) / evals
else:
cur_params = (x - means) / stds
needs_update = True
#Check if we need an update
if needs_update:
if use_pca:
x = means + np.dot(cur_params * evals, evecs)
else:
x = means + stds * cur_params
x = np.expand_dims(x, axis=0)
y = enc([x, 0])[0][0]
cur_notes = (y * 255.0).astype(np.uint8)
needs_update = False
#Draw to the screen
screen.fill(background_color)
draw_notes()
draw_sliders()
draw_controls()
#Flip the screen buffer
pygame.display.flip()
pygame.time.wait(10)
#Close the audio stream
audio_stream.stop_stream()
audio_stream.close()
audio.terminate()