From 7b2c9a1f65f4796350f26fec6f7cc468008b8ffd Mon Sep 17 00:00:00 2001 From: Ethan Liu Date: Fri, 30 Jun 2023 18:25:42 +0800 Subject: [PATCH] feat: supprt tts auto play --- CHANGE_LOG.md | 3 ++ CHANGE_LOG.zh_CN.md | 3 ++ package.json | 5 ++-- pnpm-lock.yaml | 30 ++++++++++++++++++++ src/app/api/azure/route.ts | 5 +++- src/app/api/openai/route.ts | 7 +++-- src/components/ttsSetting/index.tsx | 43 +++++++++++++++++++++++++++-- src/hooks/useChatGPT.tsx | 28 +++++++++++++++++++ src/hooks/useTTS/useTTS.ts | 34 +++++++++++++++++++---- src/locales/en.json | 2 ++ src/locales/zh-CN.json | 2 ++ 11 files changed, 148 insertions(+), 14 deletions(-) diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 47af887..d1b9b6a 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -8,17 +8,20 @@ - Fixed bug of missing bufferutil and utf-8-validate after importing azure tts locally - Fixed issue of mistakenly referencing client code in the service side +- Fix bug for Azure TTS play and pause controls ### Add - Use the language model previously selected by the user when creating a new conversation. - Add global initialization loading +- Azure TTS adds an automatic playback option (Premium) ### Changed - Refactor scrolling list module, optimize performance - Remove unnecessary features to improve performance. - Optimize UI details and unify global icon styles. Replace react-icons with MingCute Icon. +- Optimize OpenAI/Azure API to return error messages in a more user-friendly format ## v0.7.1 diff --git a/CHANGE_LOG.zh_CN.md b/CHANGE_LOG.zh_CN.md index 021b337..31dbcf8 100644 --- a/CHANGE_LOG.zh_CN.md +++ b/CHANGE_LOG.zh_CN.md @@ -8,17 +8,20 @@ - 修复本地引入 azure tts 后,缺失 bufferutil 和 utf-8-validate 的 bug - 修复在 service 端错误引用了 client 代码的问题 +- 修复 azure tts 播放和暂停控制的 bug ### 新增 - 新建会话时,沿用用户之前选择的语言模型 - 添加初始化全局 loading +- Azure TTS 新增自动播放选项(Premium) ### 调整 - 重构滚动列表模块,优化性能 - 删减部分多余功能,提升性能 - 优化 UI 细节,统一全局图标样式。将 react-icons 替换为 MingCute Icon +- 优化 OpenAI/Azure 接口返回错误格式 ## v0.7.1 diff --git a/package.json b/package.json index 60c197b..3bc2b22 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "l-gpt", - "version": "0.7.1", + "version": "0.7.2", "private": true, "scripts": { "dev": "next dev", @@ -25,6 +25,7 @@ "@radix-ui/react-progress": "1.0.3", "@radix-ui/react-select": "1.2.2", "@radix-ui/react-slider": "1.1.2", + "@radix-ui/react-switch": "^1.0.3", "@radix-ui/react-tabs": "1.0.4", "@radix-ui/react-toast": "1.1.4", "@react-email/components": "0.0.7", @@ -44,7 +45,7 @@ "eslint-config-next": "13.4.7", "file-saver": "2.0.5", "framer-motion": "10.12.17", - "gpt-tokens": "1.0.9", + "gpt-tokens": "1.0.10", "js-tiktoken": "1.0.7", "l-hooks": "0.4.6", "math-random": "2.0.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a5e5373..1ca0a4f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -41,6 +41,9 @@ dependencies: '@radix-ui/react-slider': specifier: 1.1.2 version: 1.1.2(@types/react-dom@18.2.6)(@types/react@18.2.14)(react-dom@18.2.0)(react@18.2.0) + '@radix-ui/react-switch': + specifier: ^1.0.3 + version: 1.0.3(@types/react-dom@18.2.6)(@types/react@18.2.14)(react-dom@18.2.0)(react@18.2.0) '@radix-ui/react-tabs': specifier: 1.0.4 version: 1.0.4(@types/react-dom@18.2.6)(@types/react@18.2.14)(react-dom@18.2.0)(react@18.2.0) @@ -2556,6 +2559,33 @@ packages: react: 18.2.0 dev: false + /@radix-ui/react-switch@1.0.3(@types/react-dom@18.2.6)(@types/react@18.2.14)(react-dom@18.2.0)(react@18.2.0): + resolution: {integrity: sha512-mxm87F88HyHztsI7N+ZUmEoARGkC22YVW5CaC+Byc+HRpuvCrOBPTAnXgf+tZ/7i0Sg/eOePGdMhUKhPaQEqow==} + peerDependencies: + '@types/react': '*' + '@types/react-dom': '*' + react: ^16.8 || ^17.0 || ^18.0 + react-dom: ^16.8 || ^17.0 || ^18.0 + peerDependenciesMeta: + '@types/react': + optional: true + '@types/react-dom': + optional: true + dependencies: + '@babel/runtime': 7.21.5 + '@radix-ui/primitive': 1.0.1 + '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.2.14)(react@18.2.0) + '@radix-ui/react-context': 1.0.1(@types/react@18.2.14)(react@18.2.0) + '@radix-ui/react-primitive': 1.0.3(@types/react-dom@18.2.6)(@types/react@18.2.14)(react-dom@18.2.0)(react@18.2.0) + '@radix-ui/react-use-controllable-state': 1.0.1(@types/react@18.2.14)(react@18.2.0) + '@radix-ui/react-use-previous': 1.0.1(@types/react@18.2.14)(react@18.2.0) + '@radix-ui/react-use-size': 1.0.1(@types/react@18.2.14)(react@18.2.0) + '@types/react': 18.2.14 + '@types/react-dom': 18.2.6 + react: 18.2.0 + react-dom: 18.2.0(react@18.2.0) + dev: false + /@radix-ui/react-tabs@1.0.4(@types/react-dom@18.2.6)(@types/react@18.2.14)(react-dom@18.2.0)(react@18.2.0): resolution: {integrity: sha512-egZfYY/+wRNCflXNHx+dePvnz9FbmssDTJBtgRfDY7e8SE5oIo3Py2eCB1ckAbh1Q7cQ/6yJZThJ++sgbxibog==} peerDependencies: diff --git a/src/app/api/azure/route.ts b/src/app/api/azure/route.ts index 5351859..f40811f 100644 --- a/src/app/api/azure/route.ts +++ b/src/app/api/azure/route.ts @@ -202,6 +202,9 @@ export async function POST(request: Request) { return new Response(readable, response); } catch (error: any) { console.log(error, "azure error"); - return new Response("Error", { status: 500 }); + return NextResponse.json( + { error: { message: error?.message || "Error" } }, + { status: 500 } + ); } } diff --git a/src/app/api/openai/route.ts b/src/app/api/openai/route.ts index 5cdcf78..682f422 100644 --- a/src/app/api/openai/route.ts +++ b/src/app/api/openai/route.ts @@ -205,8 +205,11 @@ export async function POST(request: Request) { ); return new Response(readable, response); - } catch (error) { + } catch (error: any) { console.log(error, "openai error"); - return new Response("Error", { status: 500 }); + return NextResponse.json( + { error: { message: error?.message || "Error" } }, + { status: 500 } + ); } } diff --git a/src/components/ttsSetting/index.tsx b/src/components/ttsSetting/index.tsx index 1796df3..5497d55 100644 --- a/src/components/ttsSetting/index.tsx +++ b/src/components/ttsSetting/index.tsx @@ -2,6 +2,7 @@ import React from "react"; import { useTranslations } from "next-intl"; +import * as Switch from "@radix-ui/react-switch"; import { cn } from "@/lib"; import Icon from "@/components/icon"; import { @@ -32,8 +33,16 @@ const mapRate = (rate: TTSRate) => { const TTS: React.FC = () => { const [open, setOpen] = useTTSOpen(); - const { voice, voices, rate, updateVoice, updateVoices, updateRate } = - useTTS(); + const { + voice, + voices, + rate, + autoPlay, + updateVoice, + updateVoices, + updateRate, + updateAutoPlay, + } = useTTS(); const [loading, setLoading] = React.useState(false); const t = useTranslations("tts"); @@ -123,13 +132,41 @@ const TTS: React.FC = () => {
+
+
+ {t("auto-play")} + + + +
+ updateAutoPlay(checked ? "1" : "0")} + className={cn( + "w-12 h-6 rounded-full relative outline-none cursor-pointer transition-colors", + "data-[state=unchecked]:bg-neutral-200/80 data-[state=checked]:bg-sky-400" + )} + > + + +
); }; diff --git a/src/hooks/useChatGPT.tsx b/src/hooks/useChatGPT.tsx index 9cb4141..f518928 100644 --- a/src/hooks/useChatGPT.tsx +++ b/src/hooks/useChatGPT.tsx @@ -15,6 +15,7 @@ import { useRecharge, useStreamDecoder, useScrollToBottom, + useTTS, BASE_PROMPT, } from "@/hooks"; import type { ChatItem, ChannelListItem } from "@/hooks"; @@ -29,6 +30,7 @@ export const useChatGPT = () => { const [, setRechargeOpen] = useRecharge(); const { decoder } = useStreamDecoder(); const { scrollToBottom } = useScrollToBottom(); + const { speak, autoPlay } = useTTS(); const { openai, azure } = useLLM(); const LLMOptions = React.useMemo(() => [openai, azure], [openai, azure]); @@ -237,6 +239,32 @@ export const useChatGPT = () => { findCh = list.find((item) => item.channel_id === channel_id); if (!findCh) return channel; + const findLast = findCh.chat_list.at(-1); + + if (autoPlay !== "0" && findLast?.role === "assistant") { + const findTTSActive = findCh.chat_list.find( + (item) => item.tts_loading + ); + if (findTTSActive) findTTSActive.tts_loading = false; + + findLast.tts_loading = true; + speak(findLast.content, () => { + setChannel((channel) => { + const { list } = channel; + const findCh = list.find( + (item) => item.channel_id === channel_id + ); + if (!findCh) return channel; + const findChat = findCh.chat_list.find( + (val) => val.id === findLast.id + ); + if (!findChat) return channel; + findChat.tts_loading = false; + return channel; + }); + }); + } + // end channel loading findCh.channel_loading = false; diff --git a/src/hooks/useTTS/useTTS.ts b/src/hooks/useTTS/useTTS.ts index 5709931..38425fb 100644 --- a/src/hooks/useTTS/useTTS.ts +++ b/src/hooks/useTTS/useTTS.ts @@ -13,10 +13,15 @@ interface ITTS { voice: string; voices: any[]; rate: TTSRate; + player: SpeakerAudioDestination | null; + // 0:false, 1:true + autoPlay: string; updateVoice: (voice: string) => void; updateVoices: (voices: string[]) => void; updateRate: (rate: TTSRate) => void; + updateAutoPlay: (autoPlay: string) => void; + updatePlayer: (player: SpeakerAudioDestination | null) => void; } const key = process.env.NEXT_PUBLIC_AZURE_TTS_KEY || ""; @@ -26,6 +31,8 @@ const useStore = create((set) => ({ voice: "zh-CN-XiaoxiaoNeural", voices: [], rate: "medium", + autoPlay: "0", + player: null, updateVoice: (voice: string) => { localStorage.setItem("voice", voice); @@ -39,26 +46,35 @@ const useStore = create((set) => ({ localStorage.setItem("voiceRate", rate); set({ rate }); }, + updateAutoPlay: (autoPlay: string) => { + localStorage.setItem("autoPlay", autoPlay); + set({ autoPlay }); + }, + updatePlayer: (player: SpeakerAudioDestination | null) => { + set({ player }); + }, })); export const useTTS = () => { const configRef = React.useRef(null); - const playerRef = React.useRef(null); const synthRef = React.useRef(null); - const { voice, voices, rate } = useStore(); + const { voice, voices, rate, autoPlay, player } = useStore(); const updateVoice = useStore((state) => state.updateVoice); const updateVoices = useStore((state) => state.updateVoices); const updateRate = useStore((state) => state.updateRate); + const updateAutoPlay = useStore((state) => state.updateAutoPlay); + const updatePlayer = useStore((state) => state.updatePlayer); const speak = (content: string, cb?: () => void) => { return new Promise((resolve, reject) => { try { pause(); - playerRef.current = new SpeakerAudioDestination(); + const player = new SpeakerAudioDestination(); + updatePlayer(player); - const audioConfig = AudioConfig.fromSpeakerOutput(playerRef.current); + const audioConfig = AudioConfig.fromSpeakerOutput(player); if (!configRef.current) { configRef.current = SpeechConfig.fromSubscription(key, region); } @@ -83,7 +99,7 @@ export const useTTS = () => { synthRef.current?.close(); }); - playerRef.current.onAudioEnd = () => { + player.onAudioEnd = () => { cb?.(); }; } catch (error) { @@ -92,12 +108,15 @@ export const useTTS = () => { }); }; - const pause = () => playerRef.current?.pause(); + const pause = React.useCallback(() => { + player?.pause(); + }, [player]); React.useEffect(() => { const localVoice = localStorage.getItem("voice") || "zh-CN-XiaoxiaoNeural"; const localVoices = localStorage.getItem("voices"); const localRate = localStorage.getItem("voiceRate") || "medium"; + const localAutoPlay = localStorage.getItem("autoPlay") || "0"; try { updateVoices(localVoices ? JSON.parse(localVoices) : []); @@ -106,6 +125,7 @@ export const useTTS = () => { } updateVoice(localVoice); updateRate(localRate as TTSRate); + updateAutoPlay(localAutoPlay); }, []); return { @@ -114,8 +134,10 @@ export const useTTS = () => { voice, voices, rate, + autoPlay, updateVoice, updateVoices, updateRate, + updateAutoPlay, }; }; diff --git a/src/locales/en.json b/src/locales/en.json index 13a6039..bf92fbf 100644 --- a/src/locales/en.json +++ b/src/locales/en.json @@ -265,6 +265,8 @@ "what-is-token": "What is a Token?" }, "tts": { + "auto-play": "Auto Play", + "auto-play-tip": "Play the audio automatically after each reply", "azure-tts": "Azure TTS", "fast": "fast", "medium": "medium", diff --git a/src/locales/zh-CN.json b/src/locales/zh-CN.json index ea1a798..be8f04e 100644 --- a/src/locales/zh-CN.json +++ b/src/locales/zh-CN.json @@ -265,6 +265,8 @@ "what-is-token": "什么是 Token?" }, "tts": { + "auto-play": "自动播放", + "auto-play-tip": "在每次回复后自动播放音频", "azure-tts": "Azure 文本转语音", "fast": "较快", "medium": "适中",