telepace · cubxxw · Nov 1, 2024 · Nov 1, 2024 · Dec 17, 2024 · Dec 17, 2024
@@ -1,27 +1,33 @@
 # MinIO 配置
-VOICEFLOW_MINIO_ENDPOINT='localhost:9000'      # MinIO 服务地址
-VOICEFLOW_MINIO_ACCESS_KEY='minioadmin'        # MinIO 访问密钥
-VOICEFLOW_MINIO_SECRET_KEY='minioadmin'        # MinIO 密钥
+VOICEFLOW_MINIO_ENDPOINT='s3.api..cc'      # MinIO 服务地址
+VOICEFLOW_MINIO_ACCESS_KEY=''        # MinIO 访问密钥
+VOICEFLOW_MINIO_SECRET_KEY=''        # MinIO 密钥
 
 # Azure 配置
-VOICEFLOW_AZURE_STT_KEY='your_azure_stt_key'   # Azure 语音转文本密钥
-VOICEFLOW_AZURE_TTS_KEY='your_azure_tts_key'   # Azure 文本转语音密钥
-VOICEFLOW_AZURE_REGION='eastus'                # Azure 服务区域
+VOICEFLOW_AZURE_STT_KEY=''          # Azure STT 密钥
+VOICEFLOW_AZURE_TTS_KEY=''          # Azure TTS 密钥
+SPEECH_KEY=''                        # Azure 语音密钥
+VOICEFLOW_AZURE_REGION='japaneast'  # Azure 区域
+
+# AWS 配置
+VOICEFLOW_AWS_SECRET_ACCESS_KEY=''   # AWS 秘密访问密钥
+VOICEFLOW_AWS_ACCESS_KEY_ID=''       # AWS 访问密钥 ID
 
 # Google 配置
-VOICEFLOW_GOOGLE_STT_KEY='your_google_stt_key' # Google 语音转文本密钥
-VOICEFLOW_GOOGLE_TTS_KEY='your_google_tts_key' # Google 文本转语音密钥
+VOICEFLOW_GOOGLE_STT_KEY=''          # Google STT 密钥
+VOICEFLOW_GOOGLE_TTS_KEY=''          # Google TTS 密钥
 
 # OpenAI 配置
-VOICEFLOW_OPENAI_API_KEY='your_openai_api_key' # OpenAI API 密钥
+VOICEFLOW_OPENAI_API_KEY=''          # OpenAI API 密钥
+VOICEFLOW_OPENAI_BASE_URL=''         # OpenAI 基础 URL
 
 # AssemblyAI 配置
-VOICEFLOW_ASSEMBLYAI_API_KEY='your_assemblyai_api_key' # AssemblyAI API 密钥
-
-# 语音服务端口配置
-VOICEFLOW_SERVER_PORT=80                    # VoiceFlow 服务端口, 默认是 80
+VOICEFLOW_ASSEMBLYAI_API_KEY=''      # AssemblyAI API 密钥
 
 # VOLCENGINE 配置
-VOICEFLOW_VOLCENGINE_ACCESS_KEY=''
-VOICEFLOW_VOLCENGINE_APP_KEY=''
-VOICEFLOW_VOLCENGINE_WS_URL='wss://openspeech.bytedance.com/api/v3/sauc/bigmode
+VOICEFLOW_VOLCENGINE_ACCESS_KEY=''    # VOLCENGINE 访问密钥
+VOICEFLOW_VOLCENGINE_APP_KEY=''       # VOLCENGINE 应用密钥
+VOICEFLOW_VOLCENGINE_WS_URL='wss://openspeech.bytedance.com/api/v3/sauc/bigmodel'  # VOLCENGINE WebSocket URL
+
+# 语音服务端口配置
+VOICEFLOW_SERVER_PORT=18080           # 语音服务端口
@@ -0,0 +1,27 @@
+// cmd/voiceflow/realtime.go
+package main
+
+import (
+	"fmt"
+	"github.com/spf13/cobra"
+	"github.com/telepace/voiceflow/pkg/voiceprocessor"
+)
+
+var realtimeCmd = &cobra.Command{
+	Use:   "realtime",
+	Short: "在终端中实时监听语音并翻译",
+	RunE:  runRealtime,
+}
+
+func init() {
+	rootCmd.AddCommand(realtimeCmd)
+}
+
+func runRealtime(cmd *cobra.Command, args []string) error {
+	fmt.Println("启动实时语音监听...")
+	err := voiceprocessor.StartRealtime()
+	if err != nil {
+		return err
+	}
+	return nil
+}
@@ -1,18 +1,21 @@
-// root.go
+// cmd/voiceflow/root.go
 package main
 
 import (
 	"context"
 	"embed"
 	"fmt"
-	"github.com/joho/godotenv"
-	"github.com/telepace/voiceflow/pkg/config"
 	"io/fs"
+	"io/ioutil"
 	"net/http"
 	"os"
 	"strings"
 	"time"
 
+	"github.com/joho/godotenv"
+	"github.com/telepace/voiceflow/pkg/config"
+	"github.com/telepace/voiceflow/pkg/sttservice"
+
 	"github.com/spf13/cobra"
 	"github.com/spf13/viper"
 
@@ -64,9 +67,20 @@ var rootCmd = &cobra.Command{
 	RunE:  run,
 }
 
+// 添加新的子命令 transcribe
+var transcribeCmd = &cobra.Command{
+	Use:   "transcribe",
+	Short: "Transcribe an audio file using STT service",
+	Long:  `Transcribe an audio file by specifying its path and using the configured STT service.`,
+	RunE:  runTranscribe,
+}
+
 func run(cmd *cobra.Command, args []string) error {
 	ctx := context.Background()
 
+	if err := ensureDirectories(); err != nil {
+		logger.Fatalf("Failed to ensure directories: %v", err)
+	}
 	// Load configuration
 	cfg, err := config.GetConfig()
 	if err != nil {
@@ -102,7 +116,7 @@ func run(cmd *cobra.Command, args []string) error {
 	// Set up HTTP server
 	mux := http.NewServeMux()
 	if err := setupFileServers(mux); err != nil {
-		return fmt.Errorf("failed to setup file servers: %w", err)
+		logger.Fatalf("Failed to setup file servers: %v", err)
 	}
 
 	// Initialize WebSocket server
@@ -158,6 +172,8 @@ func Execute() {
 	}
 }
 
+var transcribeFile string
+
 func init() {
 	cobra.OnInitialize(initConfig)
 
@@ -180,14 +196,22 @@ func init() {
 
 	// 绑定到 viper
 	viper.BindPFlags(rootCmd.PersistentFlags())
+
+	// 配置 transcribe 子命令的标志
+	transcribeCmd.Flags().StringVarP(&transcribeFile, "file", "f", "", "Path to the audio file to transcribe")
+	transcribeCmd.MarkFlagRequired("file") // 标记为必需
+
+	// 将 transcribe 子命令添加到 rootCmd
+	rootCmd.AddCommand(transcribeCmd)
 }
 
 func initConfig() {
 	// 加载 .env 文件
 	if err := godotenv.Load(); err != nil {
 		logger.Warn("No .env file found or failed to load, proceeding without it")
 	} else {
-		logger.Info(".env file loaded")
+		envPath, _ := os.Getwd()
+		logger.Info(fmt.Sprintf(".env file loaded from: %s/.env", envPath))
 	}
 
 	if cfgFile != "" {
@@ -228,8 +252,72 @@ func setDefaults() {
 	viper.SetDefault("logging.compress", true)
 	viper.SetDefault("logging.report_caller", true)
 
+	// AWS 默认配置
+	viper.SetDefault("aws.region", "us-east-2")
+
 	// 其他服务配置...
 	viper.SetDefault("web.port", 18090)
 	viper.SetDefault("minio.enabled", true)
 	viper.SetDefault("minio.endpoint", "localhost:9000")
 }
+
+// runTranscribe 处理 transcribe 子命令的逻辑
+func runTranscribe(cmd *cobra.Command, args []string) error {
+	ctx := context.Background()
+
+	// 初始化配置
+	if err := ensureDirectories(); err != nil {
+		logger.Fatalf("Failed to ensure directories: %v", err)
+	}
+
+	cfg, err := config.GetConfig()
+	if err != nil {
+		return fmt.Errorf("failed to get config: %w", err)
+	}
+
+	// 初始化日志
+	logCfg := logger.Config{
+		Level:        cfg.Logging.Level,
+		Format:       cfg.Logging.Format,
+		Filename:     cfg.Logging.Filename,
+		MaxSize:      cfg.Logging.MaxSize,
+		MaxBackups:   cfg.Logging.MaxBackups,
+		MaxAge:       cfg.Logging.MaxAge,
+		Compress:     cfg.Logging.Compress,
+		ReportCaller: cfg.Logging.ReportCaller,
+	}
+
+	fields := logger.StandardFields{
+		ServiceID:  "voiceflow",
+		InstanceID: fmt.Sprintf("instance-%d", time.Now().Unix()),
+	}
+
+	if err := logger.Init(logCfg, fields); err != nil {
+		return fmt.Errorf("failed to initialize logger: %w", err)
+	}
+
+	// 记录启动信息
+	logger.InfoContextf(ctx, "Starting VoiceFlow transcribe command with config: %+v", cfg)
+
+	// 初始化服务
+	serverpkg.InitServices()
+
+	// 读取音频文件
+	audioData, err := ioutil.ReadFile(transcribeFile)
+	if err != nil {
+		logger.Errorf("Failed to read audio file: %v", err)
+		return fmt.Errorf("failed to read audio file: %w", err)
+	}
+
+	// 调用 STT 服务进行转录
+	transcript, err := sttservice.Recognize(audioData)
+	if err != nil {
+		logger.Errorf("STT Recognize error: %v", err)
+		return fmt.Errorf("STT Recognize error: %w", err)
+	}
+
+	// 输出转录结果
+	fmt.Printf("Transcript:\n%s\n", transcript)
+
+	return nil
+}
@@ -0,0 +1,29 @@
+// cmd/voiceflow/transcribe.go
+package main
+
+//import (
+//	"fmt"
+//	"github.com/spf13/cobra"
+//	"github.com/telepace/voiceflow/pkg/voiceprocessor"
+//)
+//
+//var transcribeCmd = &cobra.Command{
+//	Use:   "transcribe [音频文件路径]",
+//	Short: "转录并翻译指定的音频文件",
+//	Args:  cobra.ExactArgs(1),
+//	RunE:  runTranscribe,
+//}
+//
+//func init() {
+//	rootCmd.AddCommand(transcribeCmd)
+//}
+//
+//func runTranscribe(cmd *cobra.Command, args []string) error {
+//	audioFile := args[0]
+//	fmt.Printf("正在转录音频文件：%s\n", audioFile)
+//	err := voiceprocessor.TranscribeFile(audioFile)
+//	if err != nil {
+//		return err
+//	}
+//	return nil
+//}
@@ -1,3 +1,5 @@
+// cmd/voiceflow/voiceflow.go
+
 package main
 
 func main() {

@@ -10,7 +10,11 @@ ws.onopen = () => {
 
 ws.onmessage = (event) => {
     const data = JSON.parse(event.data);
-    if (data.text) {
+    if (data.partial_text) {
+        // 显示部分转录文本
+        updatePartialMessage('你', data.partial_text);
+    } else if (data.text) {
+        // 显示最终转录文本
         appendMessage('助手', data.text);
     } else if (data.audio_url) {
         appendAudioMessage('助手', data.audio_url);
@@ -58,32 +62,50 @@ function startRecording() {
             recordVoiceBtn.classList.add('recording');
 
             mediaRecorder = new MediaRecorder(stream);
-            mediaRecorder.start();
+
+            // 设置 timeslice 控制音频数据可用的频率（例如每250毫秒）
+            const timeslice = 250; // 时间，单位为毫秒
+
+            mediaRecorder.start(timeslice);
 
             mediaRecorder.ondataavailable = e => {
-                audioChunks.push(e.data);
+                if (e.data && e.data.size > 0) {
+                    // 将每个音频块实时发送到后端
+                    sendAudioChunk(e.data);
+                }
             };
 
             mediaRecorder.onstop = () => {
-                const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
-                audioChunks = [];
-                sendAudioMessage(audioBlob);
                 isRecording = false;
                 recordVoiceBtn.classList.remove('recording');
 
                 // 停止所有音频轨道，释放麦克风
                 mediaStream.getTracks().forEach(track => track.stop());
                 mediaStream = null;
+
+                // 可选：向后端发送结束信号
+                ws.send(JSON.stringify({ end: true }));
             };
         })
         .catch(err => {
             console.error('麦克风访问错误:', err);
         });
 }
 
+function sendAudioChunk(audioBlob) {
+    // 将音频 blob 转换为 ArrayBuffer
+    const reader = new FileReader();
+    reader.onload = () => {
+        // 将音频块发送到后端
+        ws.send(reader.result);
+    };
+    reader.readAsArrayBuffer(audioBlob);
+}
+
 function stopRecording() {
     if (mediaRecorder && isRecording) {
         mediaRecorder.stop();
+        ws.send(JSON.stringify({ end: true }));
     }
 }
 
@@ -141,7 +163,54 @@ function sendAudioMessage(audioBlob) {
     reader.readAsArrayBuffer(audioBlob);
 }
 
+let partialMessageDiv;
+
+function updatePartialMessage(user, text) {
+    if (!partialMessageDiv) {
+        partialMessageDiv = document.createElement('div');
+        partialMessageDiv.classList.add('message');
+
+        const userSpan = document.createElement('span');
+        userSpan.classList.add('user');
+        userSpan.textContent = `${user}: `;
+
+        const textSpan = document.createElement('span');
+        textSpan.classList.add('partial-text');
+
+        partialMessageDiv.appendChild(userSpan);
+        partialMessageDiv.appendChild(textSpan);
+        chatWindow.appendChild(partialMessageDiv);
+    }
+
+    const textSpan = partialMessageDiv.querySelector('.partial-text');
+    textSpan.textContent = text;
+    chatWindow.scrollTop = chatWindow.scrollHeight;
+}
+
+// 当录音结束时，清除部分消息
+function clearPartialMessage() {
+    if (partialMessageDiv) {
+        partialMessageDiv.remove();
+        partialMessageDiv = null;
+    }
+}
+
+// 修改录音停止的函数，添加清除部分消息的逻辑
+function stopRecording() {
+    if (mediaRecorder && isRecording) {
+        mediaRecorder.stop();
+        clearPartialMessage();
+    }
+}
+
+
+// 当最终文本到达时，替换部分转录文本
 function appendMessage(user, text) {
+    if (partialMessageDiv) {
+        partialMessageDiv.remove();
+        partialMessageDiv = null;
+    }
+    // 继续现有代码，添加消息
     const messageDiv = document.createElement('div');
     messageDiv.classList.add('message');