Skip to content

Commit

Permalink
Merge pull request #101 from r9y9/svs
Browse files Browse the repository at this point in the history
[Frontend] [hts]: fixes for singing voice synthesis
  • Loading branch information
r9y9 authored Apr 26, 2020
2 parents 39ac339 + 357966b commit 1cfaeca
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 29 deletions.
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ install:
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda info -a
- "conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy cython nose pytorch -c pytorch"
- "conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy cython nose pytorch=1.4 -c pytorch"
- activate test-environment
- pip install scikit-learn==0.20.0

Expand Down
6 changes: 4 additions & 2 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ Change log

v0.0.21 <2020-xx-xx>

- `#99`_: FIx future warning from sklearn
- `#99`_: Fix future warning from sklearn
- `#101`_: [hts][frontend] various fixes for singing voice synthesis. Our frontend now supports MIDI number extraction. HTSLabelFile supports list and slice indexing.

--------------------

Expand Down Expand Up @@ -198,4 +199,5 @@ v0.0.1 <2017-08-14>
.. _#91: https://github.com/r9y9/nnmnkwii/issues/91
.. _#95: https://github.com/r9y9/nnmnkwii/issues/95
.. _#98: https://github.com/r9y9/nnmnkwii/pull/98
.. _#99: https://github.com/r9y9/nnmnkwii/issues/99
.. _#99: https://github.com/r9y9/nnmnkwii/issues/99
.. _#101: https://github.com/r9y9/nnmnkwii/pull/101
21 changes: 21 additions & 0 deletions nnmnkwii/frontend/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,22 @@
from __future__ import division, print_function, absolute_import

NOTE_MAPPING = {
'A0': 21, 'Bb0': 22, 'B0': 23, 'C1': 24, 'Db1': 25, 'D1': 26,
'Eb1': 27, 'E1': 28, 'F1': 29, 'Gb1': 30, 'G1': 31, 'Ab1': 32,
'A1': 33, 'Bb1': 34, 'B1': 35, 'C2': 36, 'Db2': 37, 'D2': 38,
'Eb2': 39, 'E2': 40, 'F2': 41, 'Gb2': 42, 'G2': 43, 'Ab2': 44,
'A2': 45, 'Bb2': 46, 'B2': 47, 'C3': 48, 'Db3': 49, 'D3': 50,
'Eb3': 51, 'E3': 52, 'F3': 53, 'Gb3': 54, 'G3': 55, 'Ab3': 56,
'A3': 57, 'Bb3': 58, 'B3': 59, 'C4': 60, 'Db4': 61, 'D4': 62,
'Eb4': 63, 'E4': 64, 'F4': 65, 'Gb4': 66, 'G4': 67, 'Ab4': 68,
'A4': 69, 'Bb4': 70, 'B4': 71, 'C5': 72, 'Db5': 73, 'D5': 74,
'Eb5': 75, 'E5': 76, 'F5': 77, 'Gb5': 78, 'G5': 79, 'Ab5': 80,
'A5': 81, 'Bb5': 82, 'B5': 83, 'C6': 84, 'Db6': 85, 'D6': 86,
'Eb6': 87, 'E6': 88, 'F6': 89, 'Gb6': 90, 'G6': 91, 'Ab6': 92,
'A6': 93, 'Bb6': 94, 'B6': 95, 'C7': 96, 'Db7': 97, 'D7': 98,
'Eb7': 99, 'E7': 100, 'F7': 101, 'Gb7': 102, 'G7': 103, 'Ab7': 104,
'A7': 105, 'Bb7': 106, 'B7': 107, 'C8': 108, 'Db8': 109, 'D8': 110,
'Eb8': 111, 'E8': 112, 'F8': 113, 'Gb8': 114, 'G8': 115, 'Ab8': 116,
'A8': 117, 'Bb8': 118, 'B8': 119, 'C9': 120, 'Db9': 121, 'D9': 122,
'Eb9': 123, 'E9': 124, 'F9': 125, 'Gb9': 126, 'G9': 127,
}
8 changes: 5 additions & 3 deletions nnmnkwii/frontend/merlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import numpy as np

from nnmnkwii.io import hts
from nnmnkwii.frontend import NOTE_MAPPING


def get_frame_feature_size(subphone_features="full"):
Expand Down Expand Up @@ -155,6 +156,8 @@ def pattern_matching_continous_position(continuous_dict, label):
ms = current_compiled.search(label)
if ms is not None:
continuous_value = ms.group(1)
if continuous_value in NOTE_MAPPING:
continuous_value = NOTE_MAPPING[continuous_value]

lab_continuous_vector[0, i] = continuous_value

Expand Down Expand Up @@ -236,7 +239,6 @@ def load_labels_with_phone_alignment(hts_labels,
raise ValueError(
"Combination of subphone_features and add_frame_features is not supported: {}, {}".format(
subphone_features, add_frame_features))

label_feature_matrix[label_feature_index:label_feature_index +
frame_number, ] = current_block_binary_array
label_feature_index = label_feature_index + frame_number
Expand Down Expand Up @@ -587,7 +589,7 @@ def extract_dur_from_state_alignment_labels(hts_labels,
else:
pass

# dur_feature_matrix = dur_feature_matrix[0:dur_feature_index, ]
dur_feature_matrix = dur_feature_matrix[0:dur_feature_index, ]
return dur_feature_matrix


Expand Down Expand Up @@ -634,7 +636,7 @@ def extract_dur_from_phone_alignment_labels(hts_labels,
else:
assert False

# dur_feature_matrix = dur_feature_matrix[0:dur_feature_index]
dur_feature_matrix = dur_feature_matrix[0:dur_feature_index]
return dur_feature_matrix


Expand Down
75 changes: 52 additions & 23 deletions nnmnkwii/io/hts.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@

import numpy as np
import re

# TODO: consider two label alignmetn format
from copy import copy


class HTSLabelFile(object):
Expand Down Expand Up @@ -96,13 +95,29 @@ def __init__(self, frame_shift_in_micro_sec=50000):
self.start_times = []
self.end_times = []
self.contexts = []
frame_shift_in_micro_sec = frame_shift_in_micro_sec
self.frame_shift_in_micro_sec = frame_shift_in_micro_sec

def __len__(self):
return len(self.start_times)

def __getitem__(self, idx):
return self.start_times[idx], self.end_times[idx], self.contexts[idx]
if isinstance(idx, slice):
# yes, this is inefficient and there will probably a bette way
# but this is okay for now
current, stop, _ = idx.indices(len(self))
obj = copy(self)
obj.start_times = obj.start_times[current:stop]
obj.end_times = obj.end_times[current:stop]
obj.contexts = obj.contexts[current:stop]
return obj
elif isinstance(idx, list):
obj = copy(self)
obj.start_times = list(np.asarray(obj.start_times)[idx])
obj.end_times = list(np.asarray(obj.end_times)[idx])
obj.contexts = list(np.asarray(obj.contexts)[idx])
return obj
else:
return self.start_times[idx], self.end_times[idx], self.contexts[idx]

def __str__(self):
ret = ""
Expand All @@ -115,11 +130,18 @@ def __str__(self):
def __repr__(self):
return str(self)

def append(self, label):
def round_(self):
s = self.frame_shift_in_micro_sec
self.start_times = list(np.round(np.asarray(self.start_times) / s).astype(np.int) * s)
self.end_times = list(np.round(np.asarray(self.end_times) / s).astype(np.int) * s)
return self

def append(self, label, strict=True):
"""Append a single alignment label
Args:
label (tuple): tuple of (start_time, end_time, context).
strict (bool): strict mode.
Returns:
self
Expand All @@ -132,14 +154,15 @@ def append(self, label):
start_time = int(start_time)
end_time = int(end_time)

if start_time >= end_time:
raise ValueError(
"end_time ({}) must be larger than start_time ({}).".format(
end_time, start_time))
if len(self.end_times) > 0 and start_time != self.end_times[-1]:
raise ValueError(
"start_time ({}) must be equal to the last end_time ({}).".format(
start_time, self.end_times[-1]))
if strict:
if start_time >= end_time:
raise ValueError(
"end_time ({}) must be larger than start_time ({}).".format(
end_time, start_time))
if len(self.end_times) > 0 and start_time != self.end_times[-1]:
raise ValueError(
"start_time ({}) must be equal to the last end_time ({}).".format(
start_time, self.end_times[-1]))

self.start_times.append(start_time)
self.end_times.append(end_time)
Expand All @@ -152,14 +175,14 @@ def set_durations(self, durations, frame_shift_in_micro_sec=50000):
TODO:
this should be refactored
"""
offset = self.start_times[0]

# Unwrap state-axis
end_times = np.cumsum(
end_times = offset + np.cumsum(
durations.reshape(-1, 1) * frame_shift_in_micro_sec).astype(np.int)
if len(end_times) != len(self.end_times):
raise RuntimeError("Unexpected input, maybe")
# Assuming first label starts with time `0`
# Is this really true? probably no
start_times = np.hstack((0, end_times[:-1])).astype(np.int)
start_times = np.hstack((offset, end_times[:-1])).astype(np.int)
self.start_times, self.end_times = start_times, end_times

def load(self, path=None, lines=None):
Expand Down Expand Up @@ -302,7 +325,7 @@ def load(path=None, lines=None):
return labels.load(path, lines)


def wildcards2regex(question, convert_number_pattern=False):
def wildcards2regex(question, convert_number_pattern=False, convert_note_pattern=True):
"""subphone_features
Convert HTK-style question into regular expression for searching labels.
If convert_number_pattern, keep the following sequences unescaped for
Expand All @@ -329,17 +352,25 @@ def wildcards2regex(question, convert_number_pattern=False):
question = question.replace('\\(\\\\d\\+\\)', '(\d+)')
question = question.replace(
'\\(\\[\\\\d\\\\\\.\\]\\+\\)', '([\d\.]+)')
if convert_note_pattern:
question = question.replace(
'\\(\\[A\\-Z\\]\\[b\\]\\?\\[0\\-9\\]\\+\\)', '([A-Z][b]?[0-9]+)')
question = question.replace('\\(\\\\NOTE\\)', '([A-Z][b]?[0-9]+)')
return question


def load_question_set(qs_file_name):
def load_question_set(qs_file_name, append_hat_for_LL=True):
"""Load HTS-style question and convert it to binary/continuous feature
extraction regexes.
This code was taken from Merlin.
Args:
qs_file_name (str): Input HTS-style question file path
append_hat_for_LL (bool): Append ^ for LL regex search.
Note that the most left context is assumed to be phoneme identity
before the previous phoneme (i.e. LL-xx). This parameter should be False
for the HTS-demo_NIT-SONG070-F001 demo.
Returns:
(binary_dict, continuous_dict): Binary/continuous feature extraction
Expand All @@ -356,8 +387,7 @@ def load_question_set(qs_file_name):
continuous_qs_index = 0
binary_dict = {}
continuous_dict = {}
# I guess `LL` means Left-left, but it doesn't seem to be docmented
# anywhere

LL = re.compile(re.escape('LL-'))

for line in lines:
Expand All @@ -383,10 +413,9 @@ def load_question_set(qs_file_name):
continuous_qs_index = continuous_qs_index + 1
elif temp_list[0] == 'QS':
re_list = []
# import ipdb; ipdb.set_trace()
for temp_question in question_list:
processed_question = wildcards2regex(temp_question)
if LL.search(question_key) and processed_question[0] != '^':
if append_hat_for_LL and LL.search(question_key) and processed_question[0] != '^':
processed_question = '^' + processed_question
re_list.append(re.compile(processed_question))

Expand Down
Loading

0 comments on commit 1cfaeca

Please sign in to comment.