add Data annotation function #78

CheshireCC · Mar 5, 2024 · 08a7a5f · 08a7a5f
1 parent 6cf5513
commit 08a7a5f
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 16 deletions.
diff --git a/fasterWhisperGUIConfig.json b/fasterWhisperGUIConfig.json
@@ -6,8 +6,8 @@
         "tracks": 1
     },
     "model_param": {
-        "localModel": false,
-        "onlineModel": true,
+        "localModel": true,
+        "onlineModel": false,
         "model_path": "",
         "modelName": 0,
         "use_v3_model": false,
@@ -39,7 +39,7 @@
     },
     "Transcription_param": {
         "aggregate_contents": true,
-        "language": 1,
+        "language": 3,
         "task": false,
         "beam_size": "5",
         "best_of": "5",
@@ -67,11 +67,11 @@
         "tabMovable": false,
         "tabScrollable": false,
         "tabShadowEnabled": false,
-        "tabMaxWidth": 367,
+        "tabMaxWidth": 253,
         "closeDisplayMode": 0,
         "whisperXMinSpeaker": 3,
         "whisperXMaxSpeaker": 3,
-        "outputFormat": 2,
+        "outputFormat": 0,
         "outputEncoding": 1
     }
 }
diff --git a/faster_whisper_GUI/config.py b/faster_whisper_GUI/config.py
@@ -4,7 +4,9 @@
 Language_without_space = ["ja","zh","ko","yue"]
 Language_dict = {
                 "en": "english",
-                "zh": "chinese",
+                "zht": "Traditional Chinese",
+                "zhs": "Simplified Chinese ",
+                "yue": "cantonese",
                 "de": "german",
                 "es": "spanish",
                 "ru": "russian",
@@ -102,7 +104,6 @@
                 "ba": "bashkir",
                 "jw": "javanese",
                 "su": "sundanese",
-                "yue": "cantonese"
             }
 
 Preciese_list = ['int8',

diff --git a/faster_whisper_GUI/mainWindows.py b/faster_whisper_GUI/mainWindows.py
@@ -620,6 +620,9 @@ def getParamTranscribe(self) -> dict:
         language = self.page_transcribes.combox_language.currentText().split("-")[0]
         if language == "Auto":
             language = None
+        if language in ["zht","zhs"]:
+            language = "zh"
+
         Transcribe_params["language"] = language
 
         task = self.page_transcribes.switchButton_Translate_to_English.isChecked()
@@ -1331,8 +1334,10 @@ def outputAudioPartWithSpeaker(self):
         # self.page_output.outputAudioPartWithSpeakerButton.setEnabled(False)
         self.setPageOutButtonStatus()
 
+        language = self.page_transcribes.combox_language.currentText().split("-")[0]
+
         output_path = self.page_output.outputGroupWidget.LineEdit_output_dir.text()
-        self.splitAudioFileWithSpeakerWorker = SplitAudioFileWithSpeakersWorker(self.current_result,output_path, self)
+        self.splitAudioFileWithSpeakerWorker = SplitAudioFileWithSpeakersWorker(self.current_result,output_path,language ,self)
         self.splitAudioFileWithSpeakerWorker.result_signal.connect(self.splitAudioFileWithSpeakerWorkerFinished)
         self.splitAudioFileWithSpeakerWorker.current_task_signal.connect(lambda file: self.setStateTool(self.tr("分割音频"), self.tr("处理文件：") + file, False))
         self.splitAudioFileWithSpeakerWorker.start()

diff --git a/faster_whisper_GUI/split_audio.py b/faster_whisper_GUI/split_audio.py
@@ -10,10 +10,11 @@ class SplitAudioFileWithSpeakersWorker(QThread):
     result_signal = Signal(str)
     current_task_signal = Signal(str)
 
-    def __init__(self, segments_path_info_list:list, output_path, parent=None):
+    def __init__(self, segments_path_info_list:list, output_path, language="", parent=None):
         super().__init__(parent)
         self.segments_path_info_list = segments_path_info_list
         self.output_path = output_path
+        self.language = language
 
         # 检查输出目录
         if output_path and not os.path.exists(self.output_path):
@@ -39,8 +40,8 @@ def getOutPutFileName(self, output_path:str, start_time:str, end_time:str, speak
         if not(speaker is None) and speaker != "":
             fileName = os.path.join(output_path, f"{speaker}_{start_time.replace(':','_')}_{end_time.replace(':','_')}.wav")
         else:
-            fileName = os.path.join(output_path, f"UnClassedSpeaker_{start_time.replace(':','_')}_{end_time.replace(':','_')}.wav")
-        return fileName
+            fileName = os.path.join(output_path, f"UnKnownSpeaker{start_time.replace(':','_')}_{end_time.replace(':','_')}.wav")
+        return fileName.replace('\\','/')
 
 
     def run(self):
@@ -58,31 +59,48 @@ def run(self):
             else:
                 output_path = self.output_path
             output_path = os.path.join(output_path, ".".join(file.split('.')[:-1]))
+            output_path = output_path.replace("\\","/")
 
             # print(output_path)
             # 检查输出路径
             if not os.path.exists(output_path):
                 os.makedirs(output_path)
 
+            # 数据标注文件
+            list_file = open(f"{output_path + '/' + '00_list.csv'}","w",encoding="utf8")
+            # 格式：vocal_path|speaker_name|language|text
+            list_file.write("vocal_path,speaker_name,language,text\n")
+
             for segment in segments:
                 # if not segment.speaker : continue
 
-                start_time = secondsToHMS(segment.start)
-                end_time = secondsToHMS(segment.end)
+                start_time = secondsToHMS(segment.start).replace(',','.')
+                end_time = secondsToHMS(segment.end).replace(',','.')
                 speaker = segment.speaker
 
-                commandLine = self.creatCommandLine(start_time.replace(',','.'),end_time.replace(',','.'),path,output_path,speaker)
+                if speaker is None or speaker == "":
+                    speaker = "UnKnownSpeaker"
+
+                commandLine = self.creatCommandLine(start_time,end_time,path,output_path,speaker)
 
                 # print(commandLine)
                 temp_process = subprocess.Popen(commandLine, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding="utf-8", text=True,
                                                 creationflags=subprocess.CREATE_NO_WINDOW)
                 temp_process.wait()
 
+                # 获取并整理文件名
+                output_fileName = self.getOutPutFileName(output_path, start_time, end_time, speaker)
+                output_fileName = output_fileName.replace('\\','/')
+
+                # 输出标注信息
+                list_file.write(f"{output_fileName},{speaker},{self.language},{segment.text}\n")
+
+        list_file.close()
         # 完成后发送结果信号
         result = "over"
         self.result_signal.emit(result)
         self.stop()
 
     def stop(self):
         self.is_running = False
-        self.quit()
+        self.quit()
diff --git a/faster_whisper_GUI/tranccribePageNavigationInterface.py b/faster_whisper_GUI/tranccribePageNavigationInterface.py
@@ -117,7 +117,7 @@ def setupUI(self):
         self.combox_language = EditableComboBox()
         self.combox_language.addItem("Auto")
         for key, value in self.LANGUAGES_DICT.items():
-            self.combox_language.addItem(f"{key}-{value.capitalize()}")
+            self.combox_language.addItem(f"{key}-{value.title()}")
 
         self.combox_language.setCurrentIndex(0)
         completer_language = QCompleter([item.text for item in self.combox_language.items])