Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add voice code #13

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 22 additions & 16 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,27 +1,33 @@
# MinIO 配置
VOICEFLOW_MINIO_ENDPOINT='localhost:9000' # MinIO 服务地址
VOICEFLOW_MINIO_ACCESS_KEY='minioadmin' # MinIO 访问密钥
VOICEFLOW_MINIO_SECRET_KEY='minioadmin' # MinIO 密钥
VOICEFLOW_MINIO_ENDPOINT='s3.api..cc' # MinIO 服务地址
VOICEFLOW_MINIO_ACCESS_KEY='' # MinIO 访问密钥
VOICEFLOW_MINIO_SECRET_KEY='' # MinIO 密钥

# Azure 配置
VOICEFLOW_AZURE_STT_KEY='your_azure_stt_key' # Azure 语音转文本密钥
VOICEFLOW_AZURE_TTS_KEY='your_azure_tts_key' # Azure 文本转语音密钥
VOICEFLOW_AZURE_REGION='eastus' # Azure 服务区域
VOICEFLOW_AZURE_STT_KEY='' # Azure STT 密钥
VOICEFLOW_AZURE_TTS_KEY='' # Azure TTS 密钥
SPEECH_KEY='' # Azure 语音密钥
VOICEFLOW_AZURE_REGION='japaneast' # Azure 区域

# AWS 配置
VOICEFLOW_AWS_SECRET_ACCESS_KEY='' # AWS 秘密访问密钥
VOICEFLOW_AWS_ACCESS_KEY_ID='' # AWS 访问密钥 ID

# Google 配置
VOICEFLOW_GOOGLE_STT_KEY='your_google_stt_key' # Google 语音转文本密钥
VOICEFLOW_GOOGLE_TTS_KEY='your_google_tts_key' # Google 文本转语音密钥
VOICEFLOW_GOOGLE_STT_KEY='' # Google STT 密钥
VOICEFLOW_GOOGLE_TTS_KEY='' # Google TTS 密钥

# OpenAI 配置
VOICEFLOW_OPENAI_API_KEY='your_openai_api_key' # OpenAI API 密钥
VOICEFLOW_OPENAI_API_KEY='' # OpenAI API 密钥
VOICEFLOW_OPENAI_BASE_URL='' # OpenAI 基础 URL

# AssemblyAI 配置
VOICEFLOW_ASSEMBLYAI_API_KEY='your_assemblyai_api_key' # AssemblyAI API 密钥

# 语音服务端口配置
VOICEFLOW_SERVER_PORT=80 # VoiceFlow 服务端口, 默认是 80
VOICEFLOW_ASSEMBLYAI_API_KEY='' # AssemblyAI API 密钥

# VOLCENGINE 配置
VOICEFLOW_VOLCENGINE_ACCESS_KEY=''
VOICEFLOW_VOLCENGINE_APP_KEY=''
VOICEFLOW_VOLCENGINE_WS_URL='wss://openspeech.bytedance.com/api/v3/sauc/bigmode
VOICEFLOW_VOLCENGINE_ACCESS_KEY='' # VOLCENGINE 访问密钥
VOICEFLOW_VOLCENGINE_APP_KEY='' # VOLCENGINE 应用密钥
VOICEFLOW_VOLCENGINE_WS_URL='wss://openspeech.bytedance.com/api/v3/sauc/bigmodel' # VOLCENGINE WebSocket URL

# 语音服务端口配置
VOICEFLOW_SERVER_PORT=18080 # 语音服务端口
27 changes: 27 additions & 0 deletions cmd/voiceflow/realtime.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// cmd/voiceflow/realtime.go
package main

import (
"fmt"
"github.com/spf13/cobra"
"github.com/telepace/voiceflow/pkg/voiceprocessor"
)

var realtimeCmd = &cobra.Command{
Use: "realtime",
Short: "在终端中实时监听语音并翻译",
RunE: runRealtime,
}

func init() {
rootCmd.AddCommand(realtimeCmd)
}

func runRealtime(cmd *cobra.Command, args []string) error {
fmt.Println("启动实时语音监听...")
err := voiceprocessor.StartRealtime()
if err != nil {
return err
}
return nil
}
98 changes: 93 additions & 5 deletions cmd/voiceflow/root.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
// root.go
// cmd/voiceflow/root.go
package main

import (
"context"
"embed"
"fmt"
"github.com/joho/godotenv"
"github.com/telepace/voiceflow/pkg/config"
"io/fs"
"io/ioutil"
"net/http"
"os"
"strings"
"time"

"github.com/joho/godotenv"
"github.com/telepace/voiceflow/pkg/config"
"github.com/telepace/voiceflow/pkg/sttservice"

"github.com/spf13/cobra"
"github.com/spf13/viper"

Expand Down Expand Up @@ -64,9 +67,20 @@ var rootCmd = &cobra.Command{
RunE: run,
}

// 添加新的子命令 transcribe
var transcribeCmd = &cobra.Command{
Use: "transcribe",
Short: "Transcribe an audio file using STT service",
Long: `Transcribe an audio file by specifying its path and using the configured STT service.`,
RunE: runTranscribe,
}

func run(cmd *cobra.Command, args []string) error {
ctx := context.Background()

if err := ensureDirectories(); err != nil {
logger.Fatalf("Failed to ensure directories: %v", err)
}
// Load configuration
cfg, err := config.GetConfig()
if err != nil {
Expand Down Expand Up @@ -102,7 +116,7 @@ func run(cmd *cobra.Command, args []string) error {
// Set up HTTP server
mux := http.NewServeMux()
if err := setupFileServers(mux); err != nil {
return fmt.Errorf("failed to setup file servers: %w", err)
logger.Fatalf("Failed to setup file servers: %v", err)
}

// Initialize WebSocket server
Expand Down Expand Up @@ -158,6 +172,8 @@ func Execute() {
}
}

var transcribeFile string

func init() {
cobra.OnInitialize(initConfig)

Expand All @@ -180,14 +196,22 @@ func init() {

// 绑定到 viper
viper.BindPFlags(rootCmd.PersistentFlags())

// 配置 transcribe 子命令的标志
transcribeCmd.Flags().StringVarP(&transcribeFile, "file", "f", "", "Path to the audio file to transcribe")
transcribeCmd.MarkFlagRequired("file") // 标记为必需

// 将 transcribe 子命令添加到 rootCmd
rootCmd.AddCommand(transcribeCmd)
}

func initConfig() {
// 加载 .env 文件
if err := godotenv.Load(); err != nil {
logger.Warn("No .env file found or failed to load, proceeding without it")
} else {
logger.Info(".env file loaded")
envPath, _ := os.Getwd()
logger.Info(fmt.Sprintf(".env file loaded from: %s/.env", envPath))
}

if cfgFile != "" {
Expand Down Expand Up @@ -228,8 +252,72 @@ func setDefaults() {
viper.SetDefault("logging.compress", true)
viper.SetDefault("logging.report_caller", true)

// AWS 默认配置
viper.SetDefault("aws.region", "us-east-2")

// 其他服务配置...
viper.SetDefault("web.port", 18090)
viper.SetDefault("minio.enabled", true)
viper.SetDefault("minio.endpoint", "localhost:9000")
}

// runTranscribe 处理 transcribe 子命令的逻辑
func runTranscribe(cmd *cobra.Command, args []string) error {
ctx := context.Background()

// 初始化配置
if err := ensureDirectories(); err != nil {
logger.Fatalf("Failed to ensure directories: %v", err)
}

cfg, err := config.GetConfig()
if err != nil {
return fmt.Errorf("failed to get config: %w", err)
}

// 初始化日志
logCfg := logger.Config{
Level: cfg.Logging.Level,
Format: cfg.Logging.Format,
Filename: cfg.Logging.Filename,
MaxSize: cfg.Logging.MaxSize,
MaxBackups: cfg.Logging.MaxBackups,
MaxAge: cfg.Logging.MaxAge,
Compress: cfg.Logging.Compress,
ReportCaller: cfg.Logging.ReportCaller,
}

fields := logger.StandardFields{
ServiceID: "voiceflow",
InstanceID: fmt.Sprintf("instance-%d", time.Now().Unix()),
}

if err := logger.Init(logCfg, fields); err != nil {
return fmt.Errorf("failed to initialize logger: %w", err)
}

// 记录启动信息
logger.InfoContextf(ctx, "Starting VoiceFlow transcribe command with config: %+v", cfg)

// 初始化服务
serverpkg.InitServices()

// 读取音频文件
audioData, err := ioutil.ReadFile(transcribeFile)
if err != nil {
logger.Errorf("Failed to read audio file: %v", err)
return fmt.Errorf("failed to read audio file: %w", err)
}

// 调用 STT 服务进行转录
transcript, err := sttservice.Recognize(audioData)
if err != nil {
logger.Errorf("STT Recognize error: %v", err)
return fmt.Errorf("STT Recognize error: %w", err)
}

// 输出转录结果
fmt.Printf("Transcript:\n%s\n", transcript)

return nil
}
29 changes: 29 additions & 0 deletions cmd/voiceflow/transcribe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// cmd/voiceflow/transcribe.go
package main

//import (
// "fmt"
// "github.com/spf13/cobra"
// "github.com/telepace/voiceflow/pkg/voiceprocessor"
//)
//
//var transcribeCmd = &cobra.Command{
// Use: "transcribe [音频文件路径]",
// Short: "转录并翻译指定的音频文件",
// Args: cobra.ExactArgs(1),
// RunE: runTranscribe,
//}
//
//func init() {
// rootCmd.AddCommand(transcribeCmd)
//}
//
//func runTranscribe(cmd *cobra.Command, args []string) error {
// audioFile := args[0]
// fmt.Printf("正在转录音频文件:%s\n", audioFile)
// err := voiceprocessor.TranscribeFile(audioFile)
// if err != nil {
// return err
// }
// return nil
//}
2 changes: 2 additions & 0 deletions cmd/voiceflow/voiceflow.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// cmd/voiceflow/voiceflow.go

package main

func main() {
Expand Down
81 changes: 75 additions & 6 deletions cmd/voiceflow/web/script.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ ws.onopen = () => {

ws.onmessage = (event) => {
const data = JSON.parse(event.data);
if (data.text) {
if (data.partial_text) {
// 显示部分转录文本
updatePartialMessage('你', data.partial_text);
} else if (data.text) {
// 显示最终转录文本
appendMessage('助手', data.text);
} else if (data.audio_url) {
appendAudioMessage('助手', data.audio_url);
Expand Down Expand Up @@ -58,32 +62,50 @@ function startRecording() {
recordVoiceBtn.classList.add('recording');

mediaRecorder = new MediaRecorder(stream);
mediaRecorder.start();

// 设置 timeslice 控制音频数据可用的频率(例如每250毫秒)
const timeslice = 250; // 时间,单位为毫秒

mediaRecorder.start(timeslice);

mediaRecorder.ondataavailable = e => {
audioChunks.push(e.data);
if (e.data && e.data.size > 0) {
// 将每个音频块实时发送到后端
sendAudioChunk(e.data);
}
};

mediaRecorder.onstop = () => {
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
audioChunks = [];
sendAudioMessage(audioBlob);
isRecording = false;
recordVoiceBtn.classList.remove('recording');

// 停止所有音频轨道,释放麦克风
mediaStream.getTracks().forEach(track => track.stop());
mediaStream = null;

// 可选:向后端发送结束信号
ws.send(JSON.stringify({ end: true }));
};
})
.catch(err => {
console.error('麦克风访问错误:', err);
});
}

function sendAudioChunk(audioBlob) {
// 将音频 blob 转换为 ArrayBuffer
const reader = new FileReader();
reader.onload = () => {
// 将音频块发送到后端
ws.send(reader.result);
};
reader.readAsArrayBuffer(audioBlob);
}

function stopRecording() {
if (mediaRecorder && isRecording) {
mediaRecorder.stop();
ws.send(JSON.stringify({ end: true }));
}
}

Expand Down Expand Up @@ -141,7 +163,54 @@ function sendAudioMessage(audioBlob) {
reader.readAsArrayBuffer(audioBlob);
}

let partialMessageDiv;

function updatePartialMessage(user, text) {
if (!partialMessageDiv) {
partialMessageDiv = document.createElement('div');
partialMessageDiv.classList.add('message');

const userSpan = document.createElement('span');
userSpan.classList.add('user');
userSpan.textContent = `${user}: `;

const textSpan = document.createElement('span');
textSpan.classList.add('partial-text');

partialMessageDiv.appendChild(userSpan);
partialMessageDiv.appendChild(textSpan);
chatWindow.appendChild(partialMessageDiv);
}

const textSpan = partialMessageDiv.querySelector('.partial-text');
textSpan.textContent = text;
chatWindow.scrollTop = chatWindow.scrollHeight;
}

// 当录音结束时,清除部分消息
function clearPartialMessage() {
if (partialMessageDiv) {
partialMessageDiv.remove();
partialMessageDiv = null;
}
}

// 修改录音停止的函数,添加清除部分消息的逻辑
function stopRecording() {
if (mediaRecorder && isRecording) {
mediaRecorder.stop();
clearPartialMessage();
}
}


// 当最终文本到达时,替换部分转录文本
function appendMessage(user, text) {
if (partialMessageDiv) {
partialMessageDiv.remove();
partialMessageDiv = null;
}
// 继续现有代码,添加消息
const messageDiv = document.createElement('div');
messageDiv.classList.add('message');

Expand Down
Loading
Loading