点検系をいろいろ更新

oatsu-gh · Aug 29, 2021 · e598493 · e598493
1 parent d514231
commit e598493
Show file tree

Hide file tree

Showing 9 changed files with 160 additions and 62 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-hydra-core
+hydra-core<1.1
 joblib
 nnmnkwii >= 0.0.23
 nnsvs

diff --git a/synthesis/nnsvs_gen_override.py b/synthesis/nnsvs_gen_override.py
@@ -109,7 +109,7 @@ def gen_waveform(labels,
                                             aperiodicity.astype(np.float64),
                                             sample_rate, frame_period)
 
-    # 音量を小さくする
+    # 音量を小さくする(音割れ防止)
     # TODO: ここのかける定数をいい感じにする
     spectrogram *= 0.000000001
     sp = pyworld.code_spectral_envelope(spectrogram, sample_rate, 60)

diff --git a/synthesis/requirements.txt b/synthesis/requirements.txt
@@ -0,0 +1,12 @@
+hydra-core<1.1
+joblib
+nnmnkwii>=0.0.23
+nnsvs
+numpy>=1.20
+omegaconf
+scipy
+torch==1.7.1
+tqdm
+utaupy>=1.10
+pyyaml
+scikit-learn<0.24.0
diff --git a/train/conf/train/duration/train/myconfig_mdn_2.yaml b/train/conf/train/duration/train/myconfig_mdn_2.yaml
@@ -0,0 +1,29 @@
+# @package _group_
+
+out_dir:                   exp
+nepochs:                   50
+checkpoint_epoch_interval: 50
+
+stream_wise_loss:          false
+use_detect_anomaly:        true
+
+optim:
+  optimizer:
+    name:                  Adam
+    params:
+      lr:                  0.001
+      betas:               [0.9, 0.999]
+      weight_decay:        0.0
+  lr_scheduler:
+    name:                  StepLR
+    params:
+      step_size:           2
+      gamma:               0.5
+
+resume:
+  checkpoint:
+  load_optimizer:          false
+
+cudnn:
+  benchmark:               false
+  deterministic:           true
diff --git a/train/log2csv.py b/train/log2csv.py
@@ -0,0 +1,62 @@
+#! /usr/bin/env python3
+# coding: utf-8
+# Copyright (c) 2020 oatsu
+"""
+nnsvsの学習時のログファイル train.log を読み取り、CSVに変換する。
+グラフ生成はしたいがとりあえずExcelでつくる。
+"""
+from glob import glob
+# from datetime import datetime
+# from pprint import pprint
+from os.path import basename, splitext
+
+from tqdm import tqdm
+
+
+def read_log(path_log):
+    """
+    ログファイルを読み取って、結果を抽出して返す。
+    """
+    # ファイル読み取り
+    with open(path_log, 'r') as fl:
+        lines = fl.readlines()
+    # lossの値を格納するリスト
+    loss_train_no_dev = []
+    loss_dev = []
+    # lossの値を取得してリストに追加
+    for line in lines:
+        if '[train_no_dev] [Epoch ' in line:
+            loss_train_no_dev.append(line.split()[7])
+        elif '[dev] [Epoch ' in line:
+            loss_dev.append(line.split()[7])
+    # 結果を返す
+    return loss_train_no_dev, loss_dev
+
+
+def generate_csv(loss_train_no_dev, loss_dev, path_csv_out):
+    """
+    loss_train_no_dev (list)
+    loss_dev (list)
+    """
+    epoch_number_list = (str(i+1) for i in range(len(loss_train_no_dev)))
+    l_data = list(zip(epoch_number_list, loss_train_no_dev, loss_dev))
+    # 出力用の文字列にする
+    s_csv = 'epoch, loss(train_no_dev), loss(dev)\n'
+    s_csv += '\n'.join([','.join(v) for v in l_data])
+    with open(path_csv_out, 'w') as fc:
+        fc.write(s_csv)
+
+
+def main():
+    path_log_dir = input('path_log_dir: ').strip('"')
+    list_path_log = glob(f'{path_log_dir}/**/*.log', recursive=True)
+    for i, path_log in enumerate(tqdm(list_path_log)):
+        loss_train_no_dev, loss_dev = read_log(path_log)
+        # datetime_now = datetime.now().strftime("%Y%m%d_%H%M%S")
+        path_csv_out = f'{splitext(basename(path_log))[0]}_{i}.csv'
+        generate_csv(loss_train_no_dev, loss_dev, path_csv_out)
+
+
+if __name__ == '__main__':
+    main()
+    input('おわり')
diff --git a/train/stage0/assert_wav_is_longer_than_lab.py b/train/stage0/assert_wav_is_longer_than_lab.py
@@ -61,8 +61,12 @@ def main(path_config_yaml):
     # DBに同梱されていたLABファイルを丸める
     wav_dir_in = join(out_dir, 'wav')
     full_align_dir_in = join(out_dir, 'full_align_round')
+    full_score_dir_in = join(out_dir, 'full_score_round')
     # 点検する
+    print('Comparing length of LAB and WAV')
     compare_wav_files_and_lab_files(wav_dir_in, full_align_dir_in)
+    print('Comparing length of score and WAV')
+    compare_wav_files_and_lab_files(wav_dir_in, full_score_dir_in)
 
 
 if __name__ == '__main__':

diff --git a/train/stage0/compare_mono_align_and_mono_score.py b/train/stage0/compare_mono_align_and_mono_score.py
@@ -21,8 +21,7 @@
 from natsort import natsorted
 from tqdm import tqdm
 
-THRESHOLD = 250
-VOWELS = ('a', 'i', 'u', 'e', 'o', 'A', 'I', 'U', 'E', 'O', 'N')
+VOWELS = {'a', 'i', 'u', 'e', 'o', 'A', 'I', 'U', 'E', 'O', 'N'}
 
 
 def phoneme_is_ok(path_mono_align_lab, path_mono_score_lab):
@@ -31,10 +30,7 @@ def phoneme_is_ok(path_mono_align_lab, path_mono_score_lab):
     """
     mono_align_label = up.label.load(path_mono_align_lab)
     mono_score_label = up.label.load(path_mono_score_lab)
-    # assert len(mono_label) == len(sinsy_mono), \
-    #     'DB同梱のラベル({}, {})と楽譜から生成したラベル({}, {})の音素数が一致しません。'.format(
-    #     len(mono_label), path_mono_label, len(sinsy_mono), path_sinsy_mono
-    # )
+    # 全音素記号が一致したらTrueを返す
     for mono_align_phoneme, mono_score_phoneme in zip(mono_align_label, mono_score_label):
         if mono_align_phoneme.symbol != mono_score_phoneme.symbol:
             error_message = '\n'.join([
@@ -44,7 +40,14 @@ def phoneme_is_ok(path_mono_align_lab, path_mono_score_lab):
             ])
             logging.error(error_message)
             return False
-    # 全音素記号が一致したらTrueを返す
+    if len(mono_align_label) != len(mono_score_label):
+        error_message = '\n'.join([
+            f'DB同梱のラベルと楽譜から生成したラベルの音素数が一致しません。({basename(path_mono_align_lab)})',
+            f'  DB同梱ラベルの音素数    : {len(mono_align_label)}\t({path_mono_align_lab})',
+            f'  楽譜からのラベルの音素数: {len(mono_score_label)}\t({path_mono_score_lab})'
+        ])
+        logging.error(error_message)
+        return False
     return True
 
 
@@ -109,8 +112,7 @@ def offet_is_ok(path_mono_align_lab,
     最初の音素の長さを比較して、閾値以上ずれていたらエラーを返す。
     threshold_ms の目安: 300ms-600ms (5sigma-10sigma)
     """
-    k = {'strict': 5, 'medium': 6, 'lenient': 7}[mode]
-    # TODO: medianとか使うようにする
+    k = {'strict': 5, 'medium': 6, 'lenient': 7}.get(mode, 6)
     # 単位換算して100nsにする
     upper_threshold = mean_100ns + k * stdev_100ns
     lower_threshold = mean_100ns - k * stdev_100ns
@@ -150,7 +152,7 @@ def vowel_durations_are_ok(path_mono_align_lab,
     - ふつう: 5sigma
     - 厳しめ: 4sigma
     """
-    k = {'strict': 4, 'medium': 5, 'lenient': 6}[mode]
+    k = {'strict': 4, 'medium': 5, 'lenient': 6}.get(mode, 6)
     # 単位換算して100nsにする
     upper_threshold = mean_100ns + k * stdev_100ns
     lower_threshold = mean_100ns - k * stdev_100ns
@@ -190,6 +192,7 @@ def main(path_config_yaml):
     out_dir = config['out_dir']
     mono_align_files = natsorted(glob(f'{out_dir}/mono_align_round/*.lab'))
     mono_score_files = natsorted(glob(f'{out_dir}/mono_score_round/*.lab'))
+    duration_check_mode = config['stage0']['vowel_duration_check']
 
     # mono_align_labの最初の音素が時刻0から始まるようにする。
     print('Overwriting mono-align-LAB so that it starts with zero.')
@@ -212,7 +215,7 @@ def main(path_config_yaml):
     print('Checking first pau duration')
     for path_mono_align, path_mono_score in zip(tqdm(mono_align_files), mono_score_files):
         if not offet_is_ok(path_mono_align, path_mono_score,
-                           mean_100ns, stdev_100ns, mode='medium'):
+                           mean_100ns, stdev_100ns, mode=duration_check_mode):
             invalid_basenames.append(basename(path_mono_align))
     if len(invalid_basenames) > 0:
         raise Exception('DBから生成したラベルと楽譜から生成したラベルに不整合があります。'
@@ -222,7 +225,7 @@ def main(path_config_yaml):
     print('Comparing mono-align-LAB durations and mono-score-LAB durations')
     for path_mono_align, path_mono_score in zip(tqdm(mono_align_files), mono_score_files):
         vowel_durations_are_ok(path_mono_align, path_mono_score,
-                               mean_100ns, stdev_100ns, mode='strict')
+                               mean_100ns, stdev_100ns, mode=duration_check_mode)
 
 
 if __name__ == '__main__':

diff --git a/train/stage0/copy_files.py b/train/stage0/copy_files.py
@@ -44,7 +44,7 @@ def main(path_config_yaml):
     with open(path_config_yaml, 'r') as fy:
         config = yaml.load(fy, Loader=yaml.FullLoader)
     # 歌唱DBのパスを取得
-    db_root = expanduser(config['db_root']).strip('"')
+    db_root = expanduser(config['stage0']['db_root']).strip('"')
     # ファイルのコピー先を取得
     out_dir = config['out_dir'].strip('"')
 

diff --git a/train/stage0/segment_lab.py b/train/stage0/segment_lab.py
@@ -18,25 +18,36 @@
 from utaupy.label import Label
 
 
-def all_phonemes_are_rest(label) -> bool:
+def all_phonemes_are_rest(label: Union[Label, HTSFullLabel]) -> bool:
     """
-    フルラベル中に休符しかないかどうか判定
+    フルラベルまたはモノラベル中に休符しかないかどうか判定
     """
-    rests = ('pau', 'sil')
-    # モノラベルのとき
-    if isinstance(label, Label):
-        for phoneme in label:
-            if phoneme.symbol not in rests:
-                return False
-        return True
-    # フルラベルのとき
-    if isinstance(label, HTSFullLabel):
-        for oneline in label:
-            if oneline.phoneme.identity not in rests:
-                return False
-        return True
-    # フルラベルでもモノラベルでもないとき
-    raise ValueError("Argument 'label' must be Label object or HTSFullLabel object.")
+    rests = set(['pau', 'sil'])
+    # 全部の音素が休符であるか否か
+    result = all(phoneme.symbol in rests for phoneme in label)
+    return result
+
+# def all_phonemes_are_rest_old(label: Union[Label, HTSFullLabel]) -> bool:
+#     """
+#     フルラベル中に休符しかないかどうか判定(旧実装)
+#     """
+#     rests = set(['pau', 'sil'])
+#     # モノラベルのとき
+#     if isinstance(label, Label):
+#         for phoneme in label:
+#             if phoneme.symbol not in rests:
+#                 return False
+#         return True
+#     # フルラベルのとき
+#     if isinstance(label, HTSFullLabel):
+#         for oneline in label:
+#             if oneline.phoneme.identity not in rests:
+#                 return False
+#         return True
+#     # フルラベルでもモノラベルでもないとき
+#     raise ValueError("Argument 'label' must be Label object or HTSFullLabel object.")
+
+
 
 
 def split_mono_label_short(label: Label) -> List[Label]:
@@ -188,7 +199,7 @@ def split_full_label_long(full_label: HTSFullLabel) -> list:
     return result
 
 
-def split_label(label: Union[Label, HTSFullLabel], mode: str, middle_frequency: int = 3
+def split_label(label: Union[Label, HTSFullLabel], mode: str, middle_frequency: int
                 ) -> List[Union[Label, HTSFullLabel]]:
     """
     ラベルを分割してリストにして返す。フルラベルとモノラベルを自動で使い分ける。
@@ -214,29 +225,6 @@ def split_label(label: Union[Label, HTSFullLabel], mode: str, middle_frequency:
     return result
 
 
-def test_full(mode):
-    """
-    単独のフルラベルを休符で分割する。
-    """
-    path_in = input('path_in: ')
-    label = up.hts.load(path_in)
-    split_result = split_label(label, mode=mode)
-    for i, full_label in enumerate(split_result):
-        path_out = path_in.replace('.lab', f'_split_{str(i).zfill(6)}.lab')
-        full_label.write(path_out, strict_sinsy_style=False)
-
-
-def test_mono():
-    """
-    単独のモノラベルを休符で分割する。
-    """
-    path_in = input('path_in: ')
-    split_result = split_mono_label_long(up.label.load(path_in))
-    for i, mono_label in enumerate(split_result):
-        path_out = path_in.replace('.lab', f'_split_{str(i).zfill(6)}.lab')
-        mono_label.write(path_out)
-
-
 def remove_zensou_and_kousou(path_lab):
     """
     長すぎてGPUメモリを食いつぶすような音素を除去(前奏、間奏、後奏とか)
@@ -253,8 +241,8 @@ def main(path_config_yaml):
     with open(path_config_yaml, 'r') as fy:
         config = yaml.load(fy, Loader=yaml.FullLoader)
     out_dir = config['out_dir']
-    mode = config['segmentation_mode']
-    middle_frequency = config['middle_frequency']
+    mode = config['stage0']['segmentation_mode']
+    middle_frequency = config['stage0']['middle_frequency']
 
     full_score_round_files = natsorted(glob(f'{out_dir}/full_score_round/*.lab'))
     mono_score_round_files = natsorted(glob(f'{out_dir}/mono_score_round/*.lab'))
@@ -270,23 +258,23 @@ def main(path_config_yaml):
     for path in tqdm(full_score_round_files):
         songname = splitext(basename(path))[0]
         label = up.hts.load(path)
-        for idx, segment in enumerate(split_label(label, mode)):
+        for idx, segment in enumerate(split_label(label, mode, middle_frequency)):
             path_out = f'{out_dir}/full_score_round_seg/{songname}_seg{idx}.lab'
             segment.write(path_out, strict_sinsy_style=False)
 
     print('Segmenting full_align_round label files')
     for path in tqdm(full_align_round_files):
         songname = splitext(basename(path))[0]
         label = up.hts.load(path)
-        for idx, segment in enumerate(split_label(label, mode)):
+        for idx, segment in enumerate(split_label(label, mode, middle_frequency)):
             path_out = f'{out_dir}/full_align_round_seg/{songname}_seg{idx}.lab'
             segment.write(path_out, strict_sinsy_style=False)
 
     print('Segmenting mono_score_round label files')
     for path in tqdm(mono_score_round_files):
         songname = splitext(basename(path))[0]
         label = up.label.load(path)
-        for idx, segment in enumerate(split_label(label, mode)):
+        for idx, segment in enumerate(split_label(label, mode, middle_frequency)):
             path_out = f'{out_dir}/mono_score_round_seg/{songname}_seg{idx}.lab'
             segment.write(path_out)
 
@@ -295,7 +283,7 @@ def main(path_config_yaml):
     for path in tqdm(mono_align_round_files):
         songname = splitext(basename(path))[0]
         label = up.label.load(path)
-        for idx, segment in enumerate(split_label(label, mode)):
+        for idx, segment in enumerate(split_label(label, mode, middle_frequency)):
             path_out = f'{out_dir}/mono_align_round_seg/{songname}_seg{idx}.lab'
             segment.write(path_out)