-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathnvidia_preprocessing.py
55 lines (48 loc) · 2.2 KB
/
nvidia_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import glob
import tqdm
import torch
import argparse
import numpy as np
from utils.stft import TacotronSTFT
from utils.util import read_wav_np
from dataset.audio_processing import pitch
from utils.hparams import HParam
def main(args, hp):
stft = TacotronSTFT(filter_length=hp.audio.n_fft,
hop_length=hp.audio.hop_length,
win_length=hp.audio.win_length,
n_mel_channels=hp.audio.n_mels,
sampling_rate=hp.audio.sample_rate,
mel_fmin=hp.audio.fmin,
mel_fmax=hp.audio.fmax)
wav_files = glob.glob(os.path.join(args.data_path, '**', '*.wav'), recursive=True)
mel_path = os.path.join(hp.data.data_dir, 'mels')
energy_path = os.path.join(hp.data.data_dir, 'energy')
pitch_path = os.path.join(hp.data.data_dir, 'pitch')
os.makedirs(mel_path, exist_ok=True)
os.makedirs(energy_path, exist_ok=True)
os.makedirs(pitch_path, exist_ok=True)
print("Sample Rate : ", hp.audio.sample_rate)
for wavpath in tqdm.tqdm(wav_files, desc='preprocess wav to mel'):
sr, wav = read_wav_np(wavpath, hp.audio.sample_rate)
p = pitch(wav, hp) # [T, ] T = Number of frames
wav = torch.from_numpy(wav).unsqueeze(0)
mel, mag = stft.mel_spectrogram(wav) # mel [1, 80, T] mag [1, num_mag, T]
mel = mel.squeeze(0) # [num_mel, T]
mag = mag.squeeze(0) # [num_mag, T]
e = torch.norm(mag, dim=0) # [T, ]
p = p[:mel.shape[1]]
id = os.path.basename(wavpath).split(".")[0]
np.save('{}/{}.npy'.format(mel_path, id), mel.numpy(), allow_pickle=False)
np.save('{}/{}.npy'.format(energy_path, id), e.numpy(), allow_pickle=False)
np.save('{}/{}.npy'.format(pitch_path, id), p, allow_pickle=False)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--data_path', type=str, required=True,
help="root directory of wav files")
parser.add_argument('-c', '--config', type=str, required=True,
help="yaml file for configuration")
args = parser.parse_args()
hp = HParam(args.config)
main(args, hp)