Skip to content

Commit 6579229

Browse files
committed
更新拼音参数和几条文件路径,声学模型文件跟之前版本不再兼容,需要重新训练
1 parent 07c9b36 commit 6579229

File tree

6 files changed

+17
-17
lines changed

6 files changed

+17
-17
lines changed

SpeechModel24.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ class ModelSpeech(): # 语音模型类
2828
def __init__(self, datapath):
2929
'''
3030
初始化
31-
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
31+
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
3232
'''
33-
MS_OUTPUT_SIZE = 1422
33+
MS_OUTPUT_SIZE = 1424
3434
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
3535
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
3636
self.label_max_string_length = 64

SpeechModel25.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ class ModelSpeech(): # 语音模型类
2828
def __init__(self, datapath):
2929
'''
3030
初始化
31-
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
31+
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
3232
'''
33-
MS_OUTPUT_SIZE = 1422
33+
MS_OUTPUT_SIZE = 1424
3434
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
3535
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
3636
self.label_max_string_length = 64

SpeechModel251.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ class ModelSpeech(): # 语音模型类
3232
def __init__(self, datapath):
3333
'''
3434
初始化
35-
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
35+
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
3636
'''
37-
MS_OUTPUT_SIZE = 1422
37+
MS_OUTPUT_SIZE = 1424
3838
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
3939
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
4040
self.label_max_string_length = 64

SpeechModel251_p.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ class ModelSpeech(): # 语音模型类
3232
def __init__(self, datapath):
3333
'''
3434
初始化
35-
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
35+
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
3636
'''
37-
MS_OUTPUT_SIZE = 1422
37+
MS_OUTPUT_SIZE = 1424
3838
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
3939
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
4040
self.label_max_string_length = 64

SpeechModel26.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ class ModelSpeech(): # 语音模型类
2929
def __init__(self, datapath):
3030
'''
3131
初始化
32-
默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
32+
默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块
3333
'''
34-
MS_OUTPUT_SIZE = 1422
34+
MS_OUTPUT_SIZE = 1424
3535
self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小
3636
#self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
3737
self.label_max_string_length = 64

test.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@
88
import platform as plat
99

1010
from SpeechModel251 import ModelSpeech
11-
from LanguageModel import ModelLanguage
11+
from LanguageModel2 import ModelLanguage
1212
from keras import backend as K
1313

1414
datapath = ''
1515
modelpath = 'model_speech'
1616

1717
system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断
1818
if(system_type == 'Windows'):
19-
datapath = 'E:\\语音数据集'
19+
datapath = 'D:\\语音数据集'
2020
modelpath = modelpath + '\\'
2121
elif(system_type == 'Linux'):
2222
datapath = 'dataset'
@@ -29,14 +29,14 @@
2929
ms = ModelSpeech(datapath)
3030

3131
#ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model')
32-
ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_117000.model')
32+
ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_12000.model')
3333

3434
#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)
3535
r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav')
36-
#r = ms.RecognizeSpeech_FromFile('E:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
37-
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
38-
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV')
39-
#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav')
36+
#r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav')
37+
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav')
38+
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV')
39+
#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav')
4040

4141
K.clear_session()
4242

0 commit comments

Comments
 (0)