-
Notifications
You must be signed in to change notification settings - Fork 3
/
prosodeep.py
executable file
·584 lines (523 loc) · 25.4 KB
/
prosodeep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ProsoDeep - a Python prosody analysis system based on the modelling paradigm of
the Superposition of Functional Contours (SFC) prosody model [1] and comprising
the following prosody models:
- Superposition of Functional Contours (SFC) model, original Python
implementation was known as PySFC [2]
- Weighted Superposition of Functional Contours (WSFC) model [3]
- Variational Prosody Model (VPM) [4]
- Variational Recurrent Prosody Model (VRPM) [5]
[1] Bailly, Gérard, and Bleicke Holm. "SFC: a trainable prosodic model."
Speech communication 46, no. 3 (2005): 348-364.
[2] Gerazov Branislav and Gérard Bailly, “PySFC – A System for Prosody Analysis based on the Superposition of Functional Contours Prosody Model,” in Speech Prosody, Poznan, Poland, 13 – 16 June, 2018.
[3] Gerazov Branislav, Gérard Bailly, and Yi Xu, “A Weighted Superposition of Functional Contours model for modelling contextual prominence of elementary prosodic contours,” in Proceedings of Interspeech, Hyderabad, India, 02 – 07 Sep, 2018.
[4] Gerazov Branislav, Gérard Bailly, Omar Mohammed, Yi Xu, and Philip N. Garner, “A Variational Prosody Model for the decomposition and synthesis of speech prosody,” in ArXiv e-prints, 22 June 2018. https://arxiv.org/abs/1806.08685
[5] Gerazov Branislav, Gérard Bailly, Omar Mohammed, Yi Xu, and Philip N. Garner, “Embedding Context-Dependent Variations of Prosodic Contours using Variational Encoding for Decomposing the Structure of Speech Prosody,” Workshop on Prosody and Meaning: Information Structure and Beyond, Aix-en-Provence, France, 8 November 2018.
@authors:
Branislav Gerazov Oct 2017
Copyright 2019 by GIPSA-lab, Grenoble INP, Grenoble, France.
See the file LICENSE for the licence associated with this software.
"""
import pandas as pd
from matplotlib import pyplot as plt
import pickle
from datetime import datetime
import logging
import os
import shutil
import sys
import re
import argparse
from prosodeep import (prosodeep_params, prosodeep_corpus, prosodeep_learn,
prosodeep_dsp, prosodeep_eval, prosodeep_plot)
start_time = datetime.now() # start stopwatch
print()
#%% enable to run with older backend on GPU server
# plt.switch_backend('agg') # to be run using ssh
plt.switch_backend('Qt5Agg')
#%% parse input arguments
parser = argparse.ArgumentParser()
parser = prosodeep_params.create_parser(parser)
args = parser.parse_args()
#% init system/model parameters
params = prosodeep_params.Params(args)
#% mkdirs
if os.path.isdir(params.save_path):
if params.remove_folders: # delete them
shutil.rmtree(params.save_path, ignore_errors=False)
os.makedirs(params.save_path, exist_ok=True)
if params.plot_f0s:
os.makedirs(params.save_path+'/f0s', exist_ok=True)
if params.plot_contours:
os.makedirs(params.save_path+'/all_f0', exist_ok=True)
if params.plot_duration:
os.makedirs(params.save_path+'/all_dur', exist_ok=True)
if params.plot_expansion:
os.makedirs(params.save_path+'/all_exp', exist_ok=True)
if params.plot_eval:
os.makedirs(params.save_path+'/eval_f0', exist_ok=True)
#% logger setup
logging.basicConfig(filename=params.save_path+'/prosodeep.log', filemode='w',
format='%(asctime)s %(name)-12s: %(levelname)-8s: %(message)s',
datefmt='%H:%M:%S',
level=logging.INFO)
# define a Handler which writes INFO messages or higher to the sys.stderr
console = logging.StreamHandler()
console.setLevel(logging.INFO)
# set a format which is simpler for console use
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s: %(message)s')
console.setFormatter(formatter) # tell the handler to use this format
logging.getLogger('').addHandler(console) # add the handler to the root logger
#%% start logging
# add all command line args passed to code
logging.info('Command line arguments')
if args is not None and not args.ignore:
settings = ''
for arg in args.__dict__:
try:
settings += f'{arg}={getattr(params, arg)}, '
except:
pass
logging.info(settings)
# add all params
logging.info('All params')
settings = ''
for param in params.__dict__:
try:
settings += f'{param}={getattr(params, param)}, '
except:
pass
logging.info(settings)
if params.use_cuda:
logging.info('Using GPU')
else:
logging.info('Using CPU')
#%% load corpus
pkl_corpus_name = params.pkl_path + params.corpus_name + '.pkl'
if params.load_corpus and os.path.isfile(pkl_corpus_name):
logging.info('Found corpus ' + params.corpus_name + '.pkl')
logging.info('Loading corpus ...')
with open(pkl_corpus_name, 'rb') as f:
data = pickle.load(f)
fpro_stats, corpus, f0_data, utterances, phone_set, phone_cnts, _ = data
f0_ref, isochrony_clock, isochrony_gravity, disol, stats = fpro_stats
# # add context if it isn't there
if not all([s in corpus.columns for s in params.context_columns]):
corpus = prosodeep_corpus.add_context(corpus, params)
with open(pkl_corpus_name, 'wb') as f:
fpro_stats = f0_ref, isochrony_clock, isochrony_gravity, disol, stats
# to avoid bad headers
data = (fpro_stats, corpus, f0_data, utterances, phone_set,
phone_cnts, params)
pickle.dump(data, f, -1) # last version
#
#%% rebuild corpus
else:
logging.info('Building corpus ' + params.corpus_name + '.pkl')
fpro_stats, corpus, f0_data, utterances, phone_set, phone_cnts = \
prosodeep_corpus.build_corpus(params)
f0_ref, isochrony_clock, isochrony_gravity, disol, stats = fpro_stats
if not params.do_all_phrases:
corpus = prosodeep_corpus.remove_phrase_types(corpus, params)
corpus = prosodeep_corpus.downcast_corpus(corpus, params.columns)
# add context one-hot vectors
if (params.vae and 'context' in params.vae_input) or (
params.use_strength and params.strength_method == 'context'
) or 'rnn_vae' in params.model_type:
corpus = prosodeep_corpus.add_context(corpus, params)
# add contour counts
corpus = prosodeep_corpus.add_contour_generator_count(corpus, params)
with open(params.pkl_path + params.corpus_name + '.pkl', 'wb') as f:
fpro_stats = f0_ref, isochrony_clock, isochrony_gravity, disol, stats
# to avoid bad headers
data = (fpro_stats, corpus, f0_data, utterances, phone_set, phone_cnts,
params)
pickle.dump(data, f, -1) # last version
os.system('spd-say "Building the corpus is done."')
#%% load processed corpus
pkl_process_corp_name = params.pkl_path + params.processed_corpus_name + '.pkl'
if params.load_processed_corpus and \
os.path.isfile(pkl_process_corp_name):
logging.info('Loading processed corpus '+ pkl_process_corp_name)
with open(pkl_process_corp_name, 'rb') as f:
data = pickle.load(f)
if any(x in params.model_type for x in ['deep', 'baseline']):
(corpus, f0_data, fpro_stats, utterances, dict_files, dict_models,
dict_losses, dict_scope_counts, params) = data
else:
(corpus, f0_data, fpro_stats, utterances, dict_files,
dict_contour_generators,
dict_losses, dict_scope_counts, params) = data
if params.use_test_set:
corpus_test = corpus['corpus_test']
corpus = corpus['corpus']
else:
logging.info('Processing corpus ...')
#%% if not all phrase types remove those not necessary
if not params.do_all_phrases:
corpus = corpus[corpus.phrasetype.isin(params.phrase_types)]
# fix and add columns to the corpus
corpus = prosodeep_corpus.scale_and_expand_corpus(corpus, params)
# split corpus into train and test
if params.use_test_set:
train_ind, test_ind = prosodeep_corpus.split_corpus(
corpus, params.test_size, stratify=params.test_stratify,
random_state=42)
corpus, corpus_test = corpus.iloc[train_ind], corpus.iloc[test_ind]
else:
corpus_test = None
# get the scope counts
logging.info('Counting scopes ...')
dict_scope_counts ={}
### TODO - get rid of loop for phrase_list
for phrase_type in params.phrase_list:
dict_scope_counts[phrase_type] = prosodeep_corpus.contour_scope_count(
corpus, phrase_type=phrase_type, max_scope=40)
#%% phrase loop - if not bunched!
# init dictionaries per phrase type
dict_contour_generators = {}
dict_models = {}
dict_losses = {}
dict_files = {}
if params.use_pretrained_models: # preload models
if 'anbysyn' in params.model_type:
with open(params.pretrained_models, 'rb') as f:
data = pickle.load(f)
_, _, _, _, _, pretrained_models, _, _, _ = data
if params.database == 'liu':
pretrained_models = pretrained_models['all'] # all for chen -> liu
else:
pretrained_models = pretrained_models['all'] # DC can be the reference
else: # deep models
with open(params.pretrained_models, 'rb') as f:
data = pickle.load(f)
_, _, _, _, _, pretrained_models, _, _, _ = data
pretrained_models = pretrained_models['all'] # all for chen -> liu
contour_generators = pretrained_models.contour_generators
else:
contour_generators = None
for phrase_type in params.phrase_list:
logging.info('='*42)
logging.info('Training for phrase {} from {} ...'.format(
phrase_type, params.phrase_types))
# init contour generators
logging.info('Initialising contour generators and masks ...')
contour_generators = {}
contour_keys = []
for function_type in params.phrase_types + params.function_types:
if function_type in dict_scope_counts[phrase_type].keys():
contour_keys.append(function_type)
if 'anbysyn' in params.model_type:
for contour_type in contour_keys:
if (params.use_pretrained_models
and contour_type in pretrained_models.keys()):
if params.database == 'liu':
# for liu only keep tones
if contour_type not in params.tones: # + ['WB']:
contour_generators[contour_type] = \
prosodeep_learn.construct_contour_generator(
contour_type, params)
continue
# copy only the contour generator layers
contour_generator = prosodeep_learn.construct_contour_generator(
contour_type, params)
contour_generator_pre = pretrained_models[contour_type]
for l in ['hidden0', 'out_contour']:
layer = getattr(
contour_generator_pre.contour_generator, l)
# freeze if necessary
if params.freeze:
for p in layer.parameters():
p.requires_grad = False
setattr(contour_generator.contour_generator, l, layer)
contour_generators[contour_type] = contour_generator
else:
contour_generator = pretrained_models[contour_type]
contour_generator.reg_strengths = params.reg_strengths
contour_generator.reg_strengths_mean = params.reg_strengths_mean
contour_generator.reset_optimizer()
if params.freeze:
for l in ['hidden0', 'out_contour']:
layer = getattr(contour_generator.contour_generator, l)
for p in layer.parameters():
p.requires_grad = False
contour_generators[contour_type] = contour_generator
else:
contour_generators[contour_type] = \
prosodeep_learn.construct_contour_generator(
contour_type, params)
# save them in dictonary
dict_contour_generators[phrase_type] = contour_generators
# create masks
(files, mask_all_files, mask_file_dict,
mask_contours, n_units_dict, mask_unit_dict
) = prosodeep_corpus.create_masks(
corpus, contour_keys, params,
phrase_type=phrase_type)
dict_files[phrase_type] = files
start_train = datetime.now()
#%% normalise input ramps
if params.normalisation_type == 'minmax':
corpus, feats_min, feats_max = prosodeep_dsp.normalise_min_max(
corpus, params)
corpus_test, _, _ = prosodeep_dsp.normalise_min_max(
corpus_test, params, feats_min=feats_min, feats_max=feats_max)
params.feat_min_train = feats_min
params.feat_max_train = feats_max
#%% model training
if params.model_type == 'anbysyn':
(corpus,
dict_contour_generators[phrase_type],
dict_losses[phrase_type],
losses_DC) = prosodeep_learn.analysis_by_synthesis(
corpus, mask_all_files,
mask_file_dict, mask_contours,
n_units_dict, mask_unit_dict,
contour_keys,
contour_generators, params)
else:
if 'rnn' in params.model_type:
(corpus, model,
batch_losses,
epoch_cnt, epoch_losses) = prosodeep_learn.train_rnn_model(
corpus,
contour_generators=dict_contour_generators[phrase_type],
params=params)
elif any(x in params.model_type
for x in ['baseline', 'deep', 'deep_vae']):
(corpus, model,
batch_losses,
epoch_cnt, epoch_losses) = prosodeep_learn.train_model(
corpus,
contour_generators=dict_contour_generators[phrase_type],
params=params)
dict_losses[phrase_type] = (batch_losses, epoch_cnt, epoch_losses)
dict_models[phrase_type] = model
#%% save results
corpus.loc[:, 'f01':] = corpus.loc[:, 'f01':].apply(pd.to_numeric,
downcast='float')
if params.save_processed_data and not params.use_test_set:
with open(params.pkl_path + params.processed_corpus_name+'.pkl', 'wb') as f:
corpus_dict = corpus
if any(x in params.model_type for x in ['deep', 'baseline']):
data = (corpus_dict, f0_data, fpro_stats, utterances, dict_files,
dict_models,
dict_losses, dict_scope_counts, params)
else:
data = (corpus_dict, f0_data, fpro_stats, utterances, dict_files,
dict_contour_generators,
dict_losses, dict_scope_counts, params)
pickle.dump(data, f, -1) # last version
end_time = datetime.now()
dif_time = end_time - start_train
logging.info('='*42)
logging.info('Finished training in {}'.format(dif_time))
# os.system('espeak "Training complete."')
#%% make a DataFrame from utterances
# db_utterances = pd.DataFrame(data=list(utterances.values()),
# index=utterances.keys(), columns=['utterance'])
#db_utterances["length"] = db_utterances.utterance.apply(lambda x: len(x.split()))
#%% plot contours
colors = prosodeep_plot.init_colors(params)
if params.plot_contours:
logging.info('='*42)
logging.info('='*42)
logging.info('Plotting final iterations ...')
for phrase_type, files in dict_files.items():
if params.plot_n_files is not None:
l = files[:params.plot_n_files]
elif params.plot_last_n_files is not None:
l = files[-params.plot_last_n_files:]
else:
l = files
if params.database == 'morlec' and len(params.phrase_types) > 1:
# plot last from all phrasetypes
m = []
for file in l:
nr = re.match('(.*)_(\d*).*',file).groups()[1]
for phrase in params.phrase_types:
m.append(phrase+'_'+nr+'.TextGrid')
l = m
if params.database == 'liu': # plot 1st iteration from all sentences
m = []
for file in l:
if re.match('F1_1.*1\.TextGrid', file):
m.append(file)
l = m[:6] # reduce plotting
for file in l:
if 'baseline' not in params.model_type: # synthesise individual contours
if 'rnn' in params.model_type:
corpus = prosodeep_learn.synthesise_rnn_contours(
corpus, dict_models[phrase_type], file, params)
corpus_test = prosodeep_learn.synthesise_rnn_contours(
corpus_test, dict_models[phrase_type], file, params)
elif 'deep' in params.model_type:
corpus = prosodeep_learn.synthesise_deep_contours(
corpus, dict_models[phrase_type], file, params)
#
logging.info('Plotting f0 and dur for file {} ...'.format(file))
prosodeep_plot.plot_contours(params.save_path+'/'+phrase_type+'_f0/',
file, utterances,
corpus,
# corpus_test,
colors, params, plot_contour='f0',
show_plot=params.show_plot)
if params.plot_duration:
prosodeep_plot.plot_contours(
params.save_path+'/'+phrase_type+'_dur/',
file, utterances,
corpus, colors, params,
plot_contour='dur',
show_plot=params.show_plot)
#%% plot expansion
if params.plot_expansion:
logging.info('Plotting expansions ...')
if 'deep' in params.model_type: # no expansion for baseline
for phrase_type in params.phrase_list:
prosodeep_plot.plot_expansion(params.save_path+'/'+phrase_type+'_exp/',
dict_models[phrase_type].contour_generators,
colors, dict_scope_counts[phrase_type], params,
show_plot=params.show_plot)
elif 'anbysyn' in params.model_type:
for phrase_type, contour_generators in dict_contour_generators.items():
scope_counts = dict_scope_counts[phrase_type]
prosodeep_plot.plot_expansion(params.save_path+'/'+phrase_type+'_exp/',
contour_generators,
colors, dict_scope_counts[phrase_type],
params, show_plot=params.show_plot)
#%% plot losses
logging.info('='*42)
logging.info('Plotting losses ...')
if any(x in params.model_type for x in ['deep', 'baseline']):
for phrase_type, losses in dict_losses.items():
batch_losses, epoch_cnt, epoch_losses = losses
prosodeep_plot.plot_batch_losses(params.save_path, batch_losses, losses,
epoch_cnt, epoch_losses,
log_scale=False, show_plot=params.show_plot)
else:
for phrase_type, losses in dict_losses.items():
losses = dict_losses[phrase_type]
prosodeep_plot.plot_losses(params.save_path, phrase_type, losses,
log_scale=True,
show_plot=params.show_plot)
#%% final losses
if 'deep' not in params.model_type:
logging.info('Plotting final losses ...')
prosodeep_plot.plot_final_losses(dict_losses, params,
show_plot=params.show_plot)
#%% evaluate performance
logging.info('Evaluating reconstruction performance ...')
if params.use_test_set:
# synthesise test corpus data!
if 'anbysyn' in params.model_type: # anbysyn synthesise contours
corpus_test = prosodeep_learn.synthesise_anbysyn_testset(
corpus_test, contour_generators, params)
elif 'rnn' in params.model_type:
for phrase_type, files in dict_files.items():
corpus_test = prosodeep_learn.synthesise_rnn_testset(
corpus_test,
dict_models[phrase_type],
params)
elif any(x in params.model_type for x in ['deep', 'baseline']):
for phrase_type, files in dict_files.items():
corpus_test = prosodeep_learn.synthesise_deep_testset(
corpus_test,
dict_models[phrase_type],
params)
# save processed data
if params.save_processed_data:
with open(params.pkl_path + params.processed_corpus_name+'.pkl', 'wb') as f:
corpus_dict = {'corpus':corpus, 'corpus_test':corpus_test}
if any(x in params.model_type for x in ['deep', 'baseline']):
data = (corpus_dict, f0_data, fpro_stats, utterances, dict_files,
dict_models,
dict_losses, dict_scope_counts, params)
else:
data = (corpus_dict, f0_data, fpro_stats, utterances, dict_files,
dict_contour_generators,
dict_losses, dict_scope_counts, params)
pickle.dump(data, f, -1) # last version
else:
corpus_test = corpus
eval_pkl_name = params.pkl_path + params.processed_corpus_name + '_eval.pkl'
if params.load_eval_corpus and os.path.isfile(eval_pkl_name):
with open(eval_pkl_name, 'rb') as f:
data = pickle.load(f)
corpus_eval, corpus_eval_sum = data
else:
corpus_eval = None
corpus_eval_sum = None
for eval_ref in params.eval_refs:
for eval_unit in params.eval_units:
for eval_segment in params.eval_segments:
for eval_weight in params.eval_weights:
corpus_eval = prosodeep_eval.eval_performance(
corpus_test, f0_data, params, eval_unit, eval_ref,
eval_weight, eval_segment, corpus_eval=corpus_eval)
# drop nans due to good files:
corpus_eval.dropna(inplace=True)
mask_row = prosodeep_eval.get_mask(
corpus_eval, eval_unit, eval_ref,
eval_weight, eval_segment)
print(corpus_eval.loc[mask_row].describe())
sys.stdout.flush()
corpus_eval_sum = prosodeep_eval.eval_sum(
corpus_eval, eval_unit, eval_ref,
eval_weight, eval_segment, params,
corpus_eval_sum=corpus_eval_sum)
if params.save_eval_data:
with open(eval_pkl_name, 'wb') as f:
data = corpus_eval, corpus_eval_sum
pickle.dump(data, f, -1) # last version
spreadname = '{}/{}_evaluation_stats.xls'.format(
params.save_path, params.processed_corpus_name)
writer = pd.ExcelWriter(spreadname)
mask_row = corpus_eval_sum.measure == 'wrmse'
corpus_eval_sum.loc[mask_row].to_excel(writer,'wrmse')
mask_row = corpus_eval_sum.measure == 'wcorr'
corpus_eval_sum.loc[mask_row].to_excel(writer,'wcorr')
writer.save()
logging.info('Evaluation data saved in {}.'.format(spreadname))
#%% plot eval data
logging.info('Plotting performance statistics ...')
if params.plot_eval:
for eval_ref in params.eval_refs:
for eval_unit in params.eval_units:
for eval_segment in params.eval_segments:
for eval_weight in params.eval_weights:
combination = '{}_{}_{}_{}'.format(
eval_segment, eval_ref,
eval_unit, eval_weight)
mask_row = prosodeep_eval.get_mask(
corpus_eval, eval_unit, eval_ref,
eval_weight, eval_segment)
prosodeep_plot.plot_histograms(
corpus_eval[mask_row].wrmse,
corpus_eval[mask_row].wrmse.mean(),
corpus_eval[mask_row].wrmse.median(),
params.save_path,
plot_type='rmse_'+combination,
show_plot=params.show_plot)
prosodeep_plot.plot_histograms(
corpus_eval[mask_row].wcorr,
corpus_eval[mask_row].wcorr.mean(),
corpus_eval[mask_row].wcorr.median(),
params.save_path,
plot_type='corr_'+combination,
show_plot=params.show_plot)
#%% wrap up
end_time = datetime.now()
dif_time = end_time - start_time
logging.info('='*42)
prompt = 'Finished in {}'.format(dif_time)
logging.info(prompt)
logging.info('='*42)
print()
print()
# os.system('espeak "Finished boss!"')
#%% shut down
#import time
#time.sleep(30*60)
#os.system('/sbin/poweroff')