Skip to content

Commit f0acefa

Browse files
authored
Add files via upload
1 parent e93c13e commit f0acefa

File tree

1 file changed

+8
-51
lines changed

1 file changed

+8
-51
lines changed

data/ljspeech.py

Lines changed: 8 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,34 @@
1-
from concurrent.futures import ProcessPoolExecutor
2-
from functools import partial
31
import numpy as np
42
import os
5-
import audio
3+
import Audio
64

75

8-
def build_from_path(in_dir, out_dir, num_workers=16, tqdm=lambda x: x):
9-
'''Preprocesses the LJ Speech dataset from a given input path into a given output directory.
10-
11-
Args:
12-
in_dir: The directory where you have downloaded the LJ Speech dataset
13-
out_dir: The directory to write the output into
14-
num_workers: Optional number of worker processes to parallelize across
15-
tqdm: You can optionally pass tqdm to get a nice progress bar
16-
17-
Returns:
18-
A list of tuples describing the training examples. This should be written to train.txt
19-
'''
20-
21-
# We use ProcessPoolExecutor to parallelize across processes. This is just an optimization and you
22-
# can omit it and just call _process_utterance on each input if you want.
23-
24-
executor = ProcessPoolExecutor(max_workers=num_workers)
25-
futures = []
6+
def build_from_path(in_dir, out_dir):
267
index = 1
8+
out = list()
279

2810
with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f:
2911
for line in f:
3012
parts = line.strip().split('|')
3113
wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0])
3214
text = parts[2]
33-
futures.append(executor.submit(
34-
partial(_process_utterance, out_dir, index, wav_path, text)))
15+
out.append(_process_utterance(out_dir, index, wav_path, text))
3516

3617
if index % 100 == 0:
3718
print("Done %d" % index)
3819
index = index + 1
3920

40-
return [future.result() for future in tqdm(futures)]
21+
return out
4122

4223

4324
def _process_utterance(out_dir, index, wav_path, text):
44-
'''Preprocesses a single utterance audio/text pair.
45-
46-
This writes the mel and linear scale spectrograms to disk and returns a tuple to write
47-
to the train.txt file.
48-
49-
Args:
50-
out_dir: The directory to write the spectrograms into
51-
index: The numeric index to use in the spectrogram filenames.
52-
wav_path: Path to the audio file containing the speech input
53-
text: The text spoken in the input audio file
54-
55-
Returns:
56-
A (spectrogram_filename, mel_filename, n_frames, text) tuple to write to train.txt
57-
'''
58-
59-
# Load the audio to a numpy array:
60-
wav = audio.load_wav(wav_path)
61-
62-
# Compute the linear-scale spectrogram from the wav:
63-
spectrogram = audio.spectrogram(wav).astype(np.float32)
64-
n_frames = spectrogram.shape[1]
65-
6625
# Compute a mel-scale spectrogram from the wav:
67-
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32)
26+
mel_spectrogram = Audio.tools.get_mel(wav_path).numpy().astype(np.float32)
27+
# print(mel_spectrogram)
6828

6929
# Write the spectrograms to disk:
70-
# spectrogram_filename = 'ljspeech-spec-%05d.npy' % index
7130
mel_filename = 'ljspeech-mel-%05d.npy' % index
7231
np.save(os.path.join(out_dir, mel_filename),
7332
mel_spectrogram.T, allow_pickle=False)
7433

75-
# Return a tuple describing this training example:
76-
# return (spectrogram_filename, mel_filename, n_frames, text)
77-
return (mel_filename, n_frames, text)
34+
return text

0 commit comments

Comments
 (0)