-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Client improvement & GPT-SoVITS support
1. [Core] Add GPT-SoVITS model training&inference 2. [Core] Simplify DatasetCreator to make it more readable and support GPT-SoVITS 3. [Core] Split 'OutputDir' param into params as 'OutputRoot'&'OutputDirName' 4. [GUI] Simplify all tools' output param options and allow user to manage output root directory in settings page 5. [GUI] Fix blank border issue (under windows10 system) for WindowBase 6. [GUI] Add rect-monitoring signal and mask effect for WindowBase and LineEditBase 7. [GUI] Support loading dict type info from manifest and appending local files' folder name for model management
- Loading branch information
1 parent
9538c54
commit c7bb295
Showing
47 changed files
with
17,103 additions
and
7,554 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
import os, sys, shutil | ||
from typing import Union, Optional | ||
from glob import glob | ||
from pathlib import Path | ||
|
||
from .utils.Creating_Directories import create_directories | ||
from .utils.Convert_SRT_to_CSV import change_encoding, convert_srt_to_csv | ||
from .utils.Split_Audio import split_files | ||
from .utils.Create_DS_CSV import create_DS_csv | ||
from .utils.Merge_CSV import merge_csv | ||
from .utils.Merge_Transcripts_and_Files import merge_transcripts_and_wav_files | ||
from .utils.Clean import clean_unwanted_characters | ||
from .utils.Create_Dataset_Loading_Script import Transcript_Writer | ||
|
||
|
||
class Dataset_Creating: | ||
''' | ||
1. Convert SRT to CSV | ||
2. Reorgnize CSV content | ||
3. Split and downsample WAV | ||
''' | ||
def __init__(self, | ||
SRT_Dir: str, | ||
AudioSpeakersData_Path: str, | ||
#WAV_Time_Limitation: float = 10.00, | ||
DataFormat: str = 'PATH|NAME|LANG|TEXT', | ||
#Add_AuxiliaryData: bool = False, | ||
#AuxiliaryData_Path: str = './AuxiliaryData/AuxiliaryData.txt', | ||
Output_Root: str = "./", | ||
Output_DirName: str = "", | ||
FileList_Name: str = 'FileList' | ||
): | ||
self.SRT_Dir = SRT_Dir | ||
def Get_WAV_Paths_Input(): | ||
WAV_Paths_Input = [] | ||
if Path(AudioSpeakersData_Path).is_dir(): | ||
for SubPath in glob(Path(AudioSpeakersData_Path).joinpath('**', '*.wav').__str__(), recursive = True): | ||
Audio = Path(SubPath).as_posix() | ||
WAV_Paths_Input.append(Audio) | ||
if Path(AudioSpeakersData_Path).is_file(): | ||
with open(file = AudioSpeakersData_Path, mode = 'r', encoding = 'utf-8') as AudioSpeakersData: | ||
AudioSpeakerLines = AudioSpeakersData.readlines() | ||
for AudioSpeakerLine in AudioSpeakerLines: | ||
Audio = AudioSpeakerLine.split('|')[0] | ||
WAV_Paths_Input.append(Audio) | ||
return WAV_Paths_Input | ||
self.WAV_Paths_Input = Get_WAV_Paths_Input() | ||
self.WAV_Dir_Split = Path(Output_Root).joinpath(Output_DirName).as_posix() | ||
def Get_AudioSpeakers(): | ||
AudioSpeakers = {} | ||
if Path(AudioSpeakersData_Path).is_dir(): | ||
for SubPath in glob(Path(AudioSpeakersData_Path).joinpath('**', '*.wav').__str__(), recursive = True): | ||
Audio = Path(self.WAV_Dir_Split).joinpath(Path(SubPath).name).as_posix() | ||
Speaker = Path(SubPath).parent.name | ||
AudioSpeakers[Audio] = Speaker | ||
if Path(AudioSpeakersData_Path).is_file(): | ||
with open(file = AudioSpeakersData_Path, mode = 'r', encoding = 'utf-8') as AudioSpeakersData: | ||
AudioSpeakerLines = AudioSpeakersData.readlines() | ||
for AudioSpeakerLine in AudioSpeakerLines: | ||
Audio = Path(self.WAV_Dir_Split).joinpath(Path(AudioSpeakerLine.split('|')[0]).name).as_posix() | ||
Speaker = AudioSpeakerLine.split('|')[1].strip() | ||
AudioSpeakers[Audio] = Speaker | ||
return AudioSpeakers | ||
self.AudioSpeakers = Get_AudioSpeakers() | ||
#self.WAV_Time_Limitation = WAV_Time_Limitation | ||
self.DataFormat = DataFormat.replace('路径', 'PATH').replace('人名', 'NAME').replace('语言', 'LANG').replace('文本', 'TEXT') | ||
self.FileList_Path = Path(self.WAV_Dir_Split).joinpath(FileList_Name).as_posix() + ".txt" | ||
|
||
def CallingFunctions(self): | ||
SRT_Counter = len(glob(os.path.join(self.SRT_Dir, '*.srt'))) | ||
|
||
if SRT_Counter == 0: | ||
print('!!! Please add srt_file(s) to %s-folder' %self.SRT_Dir) | ||
sys.exit() | ||
|
||
# Create directories | ||
CSV_Dir_Prepared = './Temp/ready_for_merging' | ||
CSV_Dir_Merged = './Temp/merged_csv' | ||
CSV_Dir_Final = './Temp/final_csv' | ||
create_directories(self.WAV_Dir_Split, CSV_Dir_Prepared, CSV_Dir_Merged, CSV_Dir_Final) | ||
|
||
# Changing encoding from utf-8 to utf-8-sig | ||
print('Encoding srt_file(s) to utf-8...') | ||
for SRT in glob(os.path.join(self.SRT_Dir, '*.srt')): | ||
change_encoding(SRT) | ||
print('Encoding of %s-file(s) changed' %SRT_Counter) | ||
print('---------------------------------------------------------------------') | ||
|
||
# Extracting information from srt-files to csv | ||
print('Extracting information from srt_file(s) to csv_files') | ||
for File in glob(os.path.join(self.SRT_Dir, '*.srt')): | ||
convert_srt_to_csv(File, CSV_Dir_Prepared) | ||
print('%s-file(s) converted and saved as csv-files to ./csv' %SRT_Counter) | ||
print('---------------------------------------------------------------------') | ||
|
||
# Now slice audio according to start- and end-times in csv | ||
print('Slicing audio according to start- and end-times of transcript_csvs...') | ||
split_files(CSV_Dir_Prepared, self.WAV_Paths_Input, self.WAV_Dir_Split) | ||
WAV_Counter = len(glob(os.path.join(self.WAV_Dir_Split, '*.wav'))) | ||
print('Slicing complete. {} files in dir {}'.format(WAV_Counter, self.WAV_Dir_Split)) | ||
print('---------------------------------------------------------------------') | ||
|
||
# Now create list of filepaths and -size of dir ./split_audio | ||
create_DS_csv(self.WAV_Dir_Split, CSV_Dir_Merged) | ||
print('DS_csv with Filepaths - and sizes created.') | ||
print('---------------------------------------------------------------------') | ||
|
||
# Now join all seperate csv files | ||
merge_csv(CSV_Dir_Prepared, CSV_Dir_Merged) | ||
print('Merged csv with all transcriptions created.') | ||
print('---------------------------------------------------------------------') | ||
|
||
# Merge the csv with transcriptions and the file-csv with paths and sizes | ||
CSV_Name_Final = 'DS_training_final.csv' | ||
merge_transcripts_and_wav_files(CSV_Dir_Merged, CSV_Dir_Final, CSV_Name_Final) | ||
print('Final DS csv generated.') | ||
print('---------------------------------------------------------------------') | ||
|
||
# Clean the data of unwanted characters and translate numbers from int to words | ||
CSV_Path_Final_Cleaned = clean_unwanted_characters(CSV_Dir_Final, CSV_Name_Final) | ||
print('Unwanted characters cleaned.') | ||
print('---------------------------------------------------------------------') | ||
|
||
# Write transcript to text-file for model training | ||
Transcript_Writer(self.AudioSpeakers, self.DataFormat, CSV_Path_Final_Cleaned, self.WAV_Dir_Split, self.FileList_Path) | ||
print('Transcript written.') | ||
print('---------------------------------------------------------------------') | ||
|
||
# Now remove the created folders | ||
for folders in [CSV_Dir_Prepared, CSV_Dir_Merged, CSV_Dir_Final]: | ||
shutil.rmtree(folders, ignore_errors = True) | ||
print('Temp files removed.') | ||
print('********************************************** FINISHED ************************************************') | ||
|
||
print(f'Final processed audio is in {self.WAV_Dir_Split}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import os | ||
import pandas as pd | ||
#from unidecode import unidecode | ||
|
||
|
||
def Read_CSV(CSV_Path): | ||
try: | ||
return pd.read_csv(CSV_Path, engine = 'python') | ||
except Exception as e: | ||
print(e, type(e)) | ||
if (isinstance(e, pd.errors.EmptyDataError)): | ||
pass | ||
|
||
|
||
def clean_unwanted_characters(CSV_Dir_Final, CSV_Name_Final): | ||
''' | ||
Remove unwanted characters. After cleaning the transcripts, the text is extracted and saved in a txt file which can be used for training the language model. | ||
''' | ||
DF_DS_Final = Read_CSV(os.path.join(CSV_Dir_Final, CSV_Name_Final)) | ||
|
||
# some srt files contain font codes which are removed hereby | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('<font color=#91FFFF>', '', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('<font color=#72FD59>', '', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('<font color=#E8E858>', '', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('<font color=#FFFFFF>', '', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('</font>', '', regex=True) | ||
|
||
''' | ||
# Characters to be removed | ||
punct = str(['.!"#$%&\'()*+,-/:;<–=>?@[\\]^_°`{}~ ̀ ̆ ̃ ́']) | ||
transtab = str.maketrans(dict.fromkeys(punct, ' ')) | ||
''' | ||
DF_DS_Final = DF_DS_Final.dropna() | ||
''' | ||
DF_DS_Final['transcript'] = '£'.join(DF_DS_Final['transcript'].tolist()).translate(transtab).split('£') | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].str.lower() | ||
''' | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('\s+', '', regex = True) # Replace line feeds without spaces | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].str.strip() | ||
''' | ||
# Further remove unwanted characters | ||
remove_char = '鄚氏鐷顤鐰鄣酹輐霵鐼羦鄜酲酺酺礫飉舣δφℳˁカᛠᛏˁːɣ\ʿʻʾŋ\ʹªьʺъˀˇʼʔˊˈ!"#$%&\()*+,-./:;<=>?@[]^_`{|}~' | ||
table_2 = str.maketrans('','', remove_char) | ||
DF_DS_Final['transcript'] = [w.translate(table_2) for w in DF_DS_Final['transcript']] | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('ä','ae', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('ö','oe', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('ü','ue', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('α','alpha', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('ə','e', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('ё','e', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('γ','gamma', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('µ','mikro', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('π','pi', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('β','beta', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('ζ','zeta', regex=True) | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].replace('ß','ss', regex=True) | ||
# to get rid of final unwanted characters transform characters to strictly unicode | ||
def to_ASCII(text): | ||
text = unidecode(text) | ||
return text | ||
DF_DS_Final['transcript'] = DF_DS_Final['transcript'].apply(to_ASCII) | ||
''' | ||
|
||
# Save cleaned files | ||
CSV_Name_Final_Cleaned = CSV_Name_Final[:-4] | ||
CSV_Path_Final_Cleaned = os.path.join(CSV_Dir_Final, (CSV_Name_Final_Cleaned + '_cleaned.csv')) | ||
DF_DS_Final.to_csv(CSV_Path_Final_Cleaned, header = True, index = False, encoding = 'utf-8') #DF_DS_Final.to_csv('./merged_csv/' + final_path + '_char_removed.csv', header = True, index = False, encoding = 'utf-8-sig') | ||
|
||
print('Length of ds_final: {}'.format(len(DF_DS_Final))) | ||
print('Final Files cleaned of unwanted characters') | ||
|
||
return CSV_Path_Final_Cleaned |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import pandas as pd | ||
import os | ||
import io | ||
import re | ||
import numpy as np | ||
|
||
|
||
def change_encoding(SRT_Path): | ||
''' | ||
Change encoding from utf-8 to utf-8-sig to keep Umlaute (e.g. ä, ö, ü) | ||
''' | ||
with io.open(SRT_Path, 'r', encoding = 'utf-8') as f: | ||
text = f.read() | ||
# process Unicode text | ||
with io.open(SRT_Path, 'w', encoding = 'utf-8-sig') as f: | ||
f.write(text) | ||
|
||
|
||
def convert_srt_to_csv( | ||
SRT_Path, | ||
CSV_Dir | ||
): | ||
''' | ||
Extract start time, end-time and subtitle from the SRT_Path-files and store in a csv. In preparation for audio-splitting, a column id is generated from the filename with the addition of a unique number. | ||
''' | ||
with open(SRT_Path, 'r', encoding = 'utf-8-sig') as h: | ||
Sub = h.readlines() #returns list of all lines | ||
|
||
Re_Pattern = r'[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}' | ||
# Get start times | ||
Times = list(filter(re.compile(Re_Pattern).search, Sub)) | ||
Start_Times = [time.split('-->')[0].strip() for time in Times] | ||
End_Times = [time.split('-->')[1].strip() for time in Times] | ||
|
||
# Get lines | ||
Lines = [[]] | ||
for Sentence in Sub: | ||
if re.match(Re_Pattern, Sentence): | ||
Lines[-1].pop() | ||
Lines.append([]) | ||
else: | ||
Lines[-1].append(Sentence) | ||
|
||
Lines = Lines[1:] # all text in lists | ||
|
||
Column_Names = ['id', 'start_times', 'end_times', 'transcript'] | ||
DF_Text = pd.DataFrame(columns = Column_Names) | ||
|
||
DF_Text['start_times'] = Start_Times | ||
DF_Text['end_times'] = End_Times | ||
DF_Text['transcript'] = [" ".join(i).strip() for i in Lines] | ||
|
||
DF_Text['id'] = np.arange(len(DF_Text)) | ||
ID_Extension = os.path.basename(SRT_Path).replace('.srt', '_') | ||
''' | ||
ID_Extension = ID_Extension.replace(' ', '_') | ||
ID_Extension = ID_Extension.replace('-', '_') | ||
ID_Extension = ID_Extension.replace('.', '_') | ||
ID_Extension = ID_Extension.replace('__', '_') | ||
ID_Extension = ID_Extension.replace('___', '_') | ||
''' | ||
DF_Text['id'] = ID_Extension + DF_Text['id'].map(str) | ||
|
||
file_extension = ID_Extension[:-1] | ||
|
||
# converting the times to milliseconds | ||
def convert_to_ms(time): | ||
h_ms = int(time[:2])*3600000 | ||
m_ms = int(time[3:5])*60000 | ||
s_ms = int(time[6:8])*1000 | ||
ms = int(time[9:12]) | ||
ms_total = h_ms + m_ms + s_ms + ms | ||
return(ms_total) | ||
|
||
def conv_int(start): | ||
new_start = int(start) | ||
return(new_start) | ||
|
||
DF_Text['start_times'] = DF_Text['start_times'].apply(convert_to_ms) | ||
DF_Text['start_times'] = DF_Text['start_times'].apply(conv_int) | ||
|
||
DF_Text['end_times'] = DF_Text['end_times'].apply(convert_to_ms) | ||
|
||
DF_Text.to_csv(os.path.join(CSV_Dir, (file_extension + '.csv')), index = False, header = True, encoding = 'utf-8-sig') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import pandas as pd | ||
import os | ||
from glob import glob | ||
import wave | ||
import contextlib | ||
|
||
|
||
def create_DS_csv( | ||
WAV_Dir_Extract, | ||
CSV_Dir | ||
): | ||
''' | ||
Create csv with filepath and -size in preparation for final DS training-csv | ||
''' | ||
#this function holds the code to extract the filepath and filesize of all audio in the respective directory | ||
print(f'Extracting filepath and -size for every .wav file in {WAV_Dir_Extract}') | ||
Data = pd.DataFrame(columns = ['wav_filename', 'wav_filesize', 'duration']) | ||
DF = pd.DataFrame(columns = ['wav_filename', 'wav_filesize', 'duration']) | ||
|
||
for entry in glob(os.path.join(WAV_Dir_Extract, '*.wav')): | ||
filepath = os.path.abspath(entry) | ||
filesize = os.path.getsize(entry) | ||
with contextlib.closing(wave.open(entry, 'rb')) as f: | ||
frames = f.getnframes() | ||
rate = f.getframerate() | ||
duration = frames / float(rate) | ||
DF['wav_filename'] = [filepath] | ||
DF['wav_filesize'] = [filesize] | ||
DF['duration'] = [duration] | ||
Data = pd.concat([Data, DF], ignore_index = True) | ||
|
||
Data.to_csv(os.path.join(CSV_Dir, 'Filepath_Filesize.csv'), header = True, index = False, encoding = 'utf-8-sig') |
Oops, something went wrong.