Skip to content

Commit

Permalink
点検系をいろいろ更新
Browse files Browse the repository at this point in the history
  • Loading branch information
oatsu-gh committed Aug 29, 2021
1 parent d514231 commit e598493
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 62 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
hydra-core
hydra-core<1.1
joblib
nnmnkwii >= 0.0.23
nnsvs
Expand Down
2 changes: 1 addition & 1 deletion synthesis/nnsvs_gen_override.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def gen_waveform(labels,
aperiodicity.astype(np.float64),
sample_rate, frame_period)

# 音量を小さくする
# 音量を小さくする(音割れ防止)
# TODO: ここのかける定数をいい感じにする
spectrogram *= 0.000000001
sp = pyworld.code_spectral_envelope(spectrogram, sample_rate, 60)
Expand Down
12 changes: 12 additions & 0 deletions synthesis/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
hydra-core<1.1
joblib
nnmnkwii>=0.0.23
nnsvs
numpy>=1.20
omegaconf
scipy
torch==1.7.1
tqdm
utaupy>=1.10
pyyaml
scikit-learn<0.24.0
29 changes: 29 additions & 0 deletions train/conf/train/duration/train/myconfig_mdn_2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# @package _group_

out_dir: exp
nepochs: 50
checkpoint_epoch_interval: 50

stream_wise_loss: false
use_detect_anomaly: true

optim:
optimizer:
name: Adam
params:
lr: 0.001
betas: [0.9, 0.999]
weight_decay: 0.0
lr_scheduler:
name: StepLR
params:
step_size: 2
gamma: 0.5

resume:
checkpoint:
load_optimizer: false

cudnn:
benchmark: false
deterministic: true
62 changes: 62 additions & 0 deletions train/log2csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#! /usr/bin/env python3
# coding: utf-8
# Copyright (c) 2020 oatsu
"""
nnsvsの学習時のログファイル train.log を読み取り、CSVに変換する。
グラフ生成はしたいがとりあえずExcelでつくる。
"""
from glob import glob
# from datetime import datetime
# from pprint import pprint
from os.path import basename, splitext

from tqdm import tqdm


def read_log(path_log):
"""
ログファイルを読み取って、結果を抽出して返す。
"""
# ファイル読み取り
with open(path_log, 'r') as fl:
lines = fl.readlines()
# lossの値を格納するリスト
loss_train_no_dev = []
loss_dev = []
# lossの値を取得してリストに追加
for line in lines:
if '[train_no_dev] [Epoch ' in line:
loss_train_no_dev.append(line.split()[7])
elif '[dev] [Epoch ' in line:
loss_dev.append(line.split()[7])
# 結果を返す
return loss_train_no_dev, loss_dev


def generate_csv(loss_train_no_dev, loss_dev, path_csv_out):
"""
loss_train_no_dev (list)
loss_dev (list)
"""
epoch_number_list = (str(i+1) for i in range(len(loss_train_no_dev)))
l_data = list(zip(epoch_number_list, loss_train_no_dev, loss_dev))
# 出力用の文字列にする
s_csv = 'epoch, loss(train_no_dev), loss(dev)\n'
s_csv += '\n'.join([','.join(v) for v in l_data])
with open(path_csv_out, 'w') as fc:
fc.write(s_csv)


def main():
path_log_dir = input('path_log_dir: ').strip('"')
list_path_log = glob(f'{path_log_dir}/**/*.log', recursive=True)
for i, path_log in enumerate(tqdm(list_path_log)):
loss_train_no_dev, loss_dev = read_log(path_log)
# datetime_now = datetime.now().strftime("%Y%m%d_%H%M%S")
path_csv_out = f'{splitext(basename(path_log))[0]}_{i}.csv'
generate_csv(loss_train_no_dev, loss_dev, path_csv_out)


if __name__ == '__main__':
main()
input('おわり')
4 changes: 4 additions & 0 deletions train/stage0/assert_wav_is_longer_than_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,12 @@ def main(path_config_yaml):
# DBに同梱されていたLABファイルを丸める
wav_dir_in = join(out_dir, 'wav')
full_align_dir_in = join(out_dir, 'full_align_round')
full_score_dir_in = join(out_dir, 'full_score_round')
# 点検する
print('Comparing length of LAB and WAV')
compare_wav_files_and_lab_files(wav_dir_in, full_align_dir_in)
print('Comparing length of score and WAV')
compare_wav_files_and_lab_files(wav_dir_in, full_score_dir_in)


if __name__ == '__main__':
Expand Down
27 changes: 15 additions & 12 deletions train/stage0/compare_mono_align_and_mono_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
from natsort import natsorted
from tqdm import tqdm

THRESHOLD = 250
VOWELS = ('a', 'i', 'u', 'e', 'o', 'A', 'I', 'U', 'E', 'O', 'N')
VOWELS = {'a', 'i', 'u', 'e', 'o', 'A', 'I', 'U', 'E', 'O', 'N'}


def phoneme_is_ok(path_mono_align_lab, path_mono_score_lab):
Expand All @@ -31,10 +30,7 @@ def phoneme_is_ok(path_mono_align_lab, path_mono_score_lab):
"""
mono_align_label = up.label.load(path_mono_align_lab)
mono_score_label = up.label.load(path_mono_score_lab)
# assert len(mono_label) == len(sinsy_mono), \
# 'DB同梱のラベル({}, {})と楽譜から生成したラベル({}, {})の音素数が一致しません。'.format(
# len(mono_label), path_mono_label, len(sinsy_mono), path_sinsy_mono
# )
# 全音素記号が一致したらTrueを返す
for mono_align_phoneme, mono_score_phoneme in zip(mono_align_label, mono_score_label):
if mono_align_phoneme.symbol != mono_score_phoneme.symbol:
error_message = '\n'.join([
Expand All @@ -44,7 +40,14 @@ def phoneme_is_ok(path_mono_align_lab, path_mono_score_lab):
])
logging.error(error_message)
return False
# 全音素記号が一致したらTrueを返す
if len(mono_align_label) != len(mono_score_label):
error_message = '\n'.join([
f'DB同梱のラベルと楽譜から生成したラベルの音素数が一致しません。({basename(path_mono_align_lab)})',
f' DB同梱ラベルの音素数 : {len(mono_align_label)}\t({path_mono_align_lab})',
f' 楽譜からのラベルの音素数: {len(mono_score_label)}\t({path_mono_score_lab})'
])
logging.error(error_message)
return False
return True


Expand Down Expand Up @@ -109,8 +112,7 @@ def offet_is_ok(path_mono_align_lab,
最初の音素の長さを比較して、閾値以上ずれていたらエラーを返す。
threshold_ms の目安: 300ms-600ms (5sigma-10sigma)
"""
k = {'strict': 5, 'medium': 6, 'lenient': 7}[mode]
# TODO: medianとか使うようにする
k = {'strict': 5, 'medium': 6, 'lenient': 7}.get(mode, 6)
# 単位換算して100nsにする
upper_threshold = mean_100ns + k * stdev_100ns
lower_threshold = mean_100ns - k * stdev_100ns
Expand Down Expand Up @@ -150,7 +152,7 @@ def vowel_durations_are_ok(path_mono_align_lab,
- ふつう: 5sigma
- 厳しめ: 4sigma
"""
k = {'strict': 4, 'medium': 5, 'lenient': 6}[mode]
k = {'strict': 4, 'medium': 5, 'lenient': 6}.get(mode, 6)
# 単位換算して100nsにする
upper_threshold = mean_100ns + k * stdev_100ns
lower_threshold = mean_100ns - k * stdev_100ns
Expand Down Expand Up @@ -190,6 +192,7 @@ def main(path_config_yaml):
out_dir = config['out_dir']
mono_align_files = natsorted(glob(f'{out_dir}/mono_align_round/*.lab'))
mono_score_files = natsorted(glob(f'{out_dir}/mono_score_round/*.lab'))
duration_check_mode = config['stage0']['vowel_duration_check']

# mono_align_labの最初の音素が時刻0から始まるようにする。
print('Overwriting mono-align-LAB so that it starts with zero.')
Expand All @@ -212,7 +215,7 @@ def main(path_config_yaml):
print('Checking first pau duration')
for path_mono_align, path_mono_score in zip(tqdm(mono_align_files), mono_score_files):
if not offet_is_ok(path_mono_align, path_mono_score,
mean_100ns, stdev_100ns, mode='medium'):
mean_100ns, stdev_100ns, mode=duration_check_mode):
invalid_basenames.append(basename(path_mono_align))
if len(invalid_basenames) > 0:
raise Exception('DBから生成したラベルと楽譜から生成したラベルに不整合があります。'
Expand All @@ -222,7 +225,7 @@ def main(path_config_yaml):
print('Comparing mono-align-LAB durations and mono-score-LAB durations')
for path_mono_align, path_mono_score in zip(tqdm(mono_align_files), mono_score_files):
vowel_durations_are_ok(path_mono_align, path_mono_score,
mean_100ns, stdev_100ns, mode='strict')
mean_100ns, stdev_100ns, mode=duration_check_mode)


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion train/stage0/copy_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def main(path_config_yaml):
with open(path_config_yaml, 'r') as fy:
config = yaml.load(fy, Loader=yaml.FullLoader)
# 歌唱DBのパスを取得
db_root = expanduser(config['db_root']).strip('"')
db_root = expanduser(config['stage0']['db_root']).strip('"')
# ファイルのコピー先を取得
out_dir = config['out_dir'].strip('"')

Expand Down
82 changes: 35 additions & 47 deletions train/stage0/segment_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,36 @@
from utaupy.label import Label


def all_phonemes_are_rest(label) -> bool:
def all_phonemes_are_rest(label: Union[Label, HTSFullLabel]) -> bool:
"""
フルラベル中に休符しかないかどうか判定
フルラベルまたはモノラベル中に休符しかないかどうか判定
"""
rests = ('pau', 'sil')
# モノラベルのとき
if isinstance(label, Label):
for phoneme in label:
if phoneme.symbol not in rests:
return False
return True
# フルラベルのとき
if isinstance(label, HTSFullLabel):
for oneline in label:
if oneline.phoneme.identity not in rests:
return False
return True
# フルラベルでもモノラベルでもないとき
raise ValueError("Argument 'label' must be Label object or HTSFullLabel object.")
rests = set(['pau', 'sil'])
# 全部の音素が休符であるか否か
result = all(phoneme.symbol in rests for phoneme in label)
return result

# def all_phonemes_are_rest_old(label: Union[Label, HTSFullLabel]) -> bool:
# """
# フルラベル中に休符しかないかどうか判定(旧実装)
# """
# rests = set(['pau', 'sil'])
# # モノラベルのとき
# if isinstance(label, Label):
# for phoneme in label:
# if phoneme.symbol not in rests:
# return False
# return True
# # フルラベルのとき
# if isinstance(label, HTSFullLabel):
# for oneline in label:
# if oneline.phoneme.identity not in rests:
# return False
# return True
# # フルラベルでもモノラベルでもないとき
# raise ValueError("Argument 'label' must be Label object or HTSFullLabel object.")




def split_mono_label_short(label: Label) -> List[Label]:
Expand Down Expand Up @@ -188,7 +199,7 @@ def split_full_label_long(full_label: HTSFullLabel) -> list:
return result


def split_label(label: Union[Label, HTSFullLabel], mode: str, middle_frequency: int = 3
def split_label(label: Union[Label, HTSFullLabel], mode: str, middle_frequency: int
) -> List[Union[Label, HTSFullLabel]]:
"""
ラベルを分割してリストにして返す。フルラベルとモノラベルを自動で使い分ける。
Expand All @@ -214,29 +225,6 @@ def split_label(label: Union[Label, HTSFullLabel], mode: str, middle_frequency:
return result


def test_full(mode):
"""
単独のフルラベルを休符で分割する。
"""
path_in = input('path_in: ')
label = up.hts.load(path_in)
split_result = split_label(label, mode=mode)
for i, full_label in enumerate(split_result):
path_out = path_in.replace('.lab', f'_split_{str(i).zfill(6)}.lab')
full_label.write(path_out, strict_sinsy_style=False)


def test_mono():
"""
単独のモノラベルを休符で分割する。
"""
path_in = input('path_in: ')
split_result = split_mono_label_long(up.label.load(path_in))
for i, mono_label in enumerate(split_result):
path_out = path_in.replace('.lab', f'_split_{str(i).zfill(6)}.lab')
mono_label.write(path_out)


def remove_zensou_and_kousou(path_lab):
"""
長すぎてGPUメモリを食いつぶすような音素を除去(前奏、間奏、後奏とか)
Expand All @@ -253,8 +241,8 @@ def main(path_config_yaml):
with open(path_config_yaml, 'r') as fy:
config = yaml.load(fy, Loader=yaml.FullLoader)
out_dir = config['out_dir']
mode = config['segmentation_mode']
middle_frequency = config['middle_frequency']
mode = config['stage0']['segmentation_mode']
middle_frequency = config['stage0']['middle_frequency']

full_score_round_files = natsorted(glob(f'{out_dir}/full_score_round/*.lab'))
mono_score_round_files = natsorted(glob(f'{out_dir}/mono_score_round/*.lab'))
Expand All @@ -270,23 +258,23 @@ def main(path_config_yaml):
for path in tqdm(full_score_round_files):
songname = splitext(basename(path))[0]
label = up.hts.load(path)
for idx, segment in enumerate(split_label(label, mode)):
for idx, segment in enumerate(split_label(label, mode, middle_frequency)):
path_out = f'{out_dir}/full_score_round_seg/{songname}_seg{idx}.lab'
segment.write(path_out, strict_sinsy_style=False)

print('Segmenting full_align_round label files')
for path in tqdm(full_align_round_files):
songname = splitext(basename(path))[0]
label = up.hts.load(path)
for idx, segment in enumerate(split_label(label, mode)):
for idx, segment in enumerate(split_label(label, mode, middle_frequency)):
path_out = f'{out_dir}/full_align_round_seg/{songname}_seg{idx}.lab'
segment.write(path_out, strict_sinsy_style=False)

print('Segmenting mono_score_round label files')
for path in tqdm(mono_score_round_files):
songname = splitext(basename(path))[0]
label = up.label.load(path)
for idx, segment in enumerate(split_label(label, mode)):
for idx, segment in enumerate(split_label(label, mode, middle_frequency)):
path_out = f'{out_dir}/mono_score_round_seg/{songname}_seg{idx}.lab'
segment.write(path_out)

Expand All @@ -295,7 +283,7 @@ def main(path_config_yaml):
for path in tqdm(mono_align_round_files):
songname = splitext(basename(path))[0]
label = up.label.load(path)
for idx, segment in enumerate(split_label(label, mode)):
for idx, segment in enumerate(split_label(label, mode, middle_frequency)):
path_out = f'{out_dir}/mono_align_round_seg/{songname}_seg{idx}.lab'
segment.write(path_out)

Expand Down

0 comments on commit e598493

Please sign in to comment.