feat: supprt tts auto play

LTopx · Jun 30, 2023 · 7b2c9a1 · 7b2c9a1
1 parent 3024150
commit 7b2c9a1
Show file tree

Hide file tree

Showing 11 changed files with 148 additions and 14 deletions.
diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md
@@ -8,17 +8,20 @@
 
 - Fixed bug of missing bufferutil and utf-8-validate after importing azure tts locally
 - Fixed issue of mistakenly referencing client code in the service side
+- Fix bug for Azure TTS play and pause controls
 
 ### Add
 
 - Use the language model previously selected by the user when creating a new conversation.
 - Add global initialization loading
+- Azure TTS adds an automatic playback option (Premium)
 
 ### Changed
 
 - Refactor scrolling list module, optimize performance
 - Remove unnecessary features to improve performance.
 - Optimize UI details and unify global icon styles. Replace react-icons with MingCute Icon.
+- Optimize OpenAI/Azure API to return error messages in a more user-friendly format
 
 ## v0.7.1
 

diff --git a/CHANGE_LOG.zh_CN.md b/CHANGE_LOG.zh_CN.md
@@ -8,17 +8,20 @@
 
 - 修复本地引入 azure tts 后，缺失 bufferutil 和 utf-8-validate 的 bug
 - 修复在 service 端错误引用了 client 代码的问题
+- 修复 azure tts 播放和暂停控制的 bug
 
 ### 新增
 
 - 新建会话时，沿用用户之前选择的语言模型
 - 添加初始化全局 loading
+- Azure TTS 新增自动播放选项（Premium）
 
 ### 调整
 
 - 重构滚动列表模块，优化性能
 - 删减部分多余功能，提升性能
 - 优化 UI 细节，统一全局图标样式。将 react-icons 替换为 MingCute Icon
+- 优化 OpenAI/Azure 接口返回错误格式
 
 ## v0.7.1
 

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "l-gpt",
-  "version": "0.7.1",
+  "version": "0.7.2",
   "private": true,
   "scripts": {
     "dev": "next dev",
@@ -25,6 +25,7 @@
     "@radix-ui/react-progress": "1.0.3",
     "@radix-ui/react-select": "1.2.2",
     "@radix-ui/react-slider": "1.1.2",
+    "@radix-ui/react-switch": "^1.0.3",
     "@radix-ui/react-tabs": "1.0.4",
     "@radix-ui/react-toast": "1.1.4",
     "@react-email/components": "0.0.7",
@@ -44,7 +45,7 @@
     "eslint-config-next": "13.4.7",
     "file-saver": "2.0.5",
     "framer-motion": "10.12.17",
-    "gpt-tokens": "1.0.9",
+    "gpt-tokens": "1.0.10",
     "js-tiktoken": "1.0.7",
     "l-hooks": "0.4.6",
     "math-random": "2.0.1",

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/app/api/azure/route.ts b/src/app/api/azure/route.ts
@@ -202,6 +202,9 @@ export async function POST(request: Request) {
     return new Response(readable, response);
   } catch (error: any) {
     console.log(error, "azure error");
-    return new Response("Error", { status: 500 });
+    return NextResponse.json(
+      { error: { message: error?.message || "Error" } },
+      { status: 500 }
+    );
   }
 }
diff --git a/src/app/api/openai/route.ts b/src/app/api/openai/route.ts
@@ -205,8 +205,11 @@ export async function POST(request: Request) {
     );
 
     return new Response(readable, response);
-  } catch (error) {
+  } catch (error: any) {
     console.log(error, "openai error");
-    return new Response("Error", { status: 500 });
+    return NextResponse.json(
+      { error: { message: error?.message || "Error" } },
+      { status: 500 }
+    );
   }
 }
diff --git a/src/components/ttsSetting/index.tsx b/src/components/ttsSetting/index.tsx
@@ -2,6 +2,7 @@
 
 import React from "react";
 import { useTranslations } from "next-intl";
+import * as Switch from "@radix-ui/react-switch";
 import { cn } from "@/lib";
 import Icon from "@/components/icon";
 import {
@@ -32,8 +33,16 @@ const mapRate = (rate: TTSRate) => {
 
 const TTS: React.FC = () => {
   const [open, setOpen] = useTTSOpen();
-  const { voice, voices, rate, updateVoice, updateVoices, updateRate } =
-    useTTS();
+  const {
+    voice,
+    voices,
+    rate,
+    autoPlay,
+    updateVoice,
+    updateVoices,
+    updateRate,
+    updateAutoPlay,
+  } = useTTS();
   const [loading, setLoading] = React.useState(false);
 
   const t = useTranslations("tts");
@@ -123,13 +132,41 @@ const TTS: React.FC = () => {
       </div>
       <div>
         <Slider
-          className="flex-1"
+          className="flex-1 px-1"
           max={1}
           step={0.25}
           defaultValue={mapRate(rate)}
           onChange={onChangeRate}
         />
       </div>
+      <div
+        className={cn(
+          "flex items-center justify-between py-2 px-1 border-b",
+          "border-slate-100 dark:border-neutral-500/60"
+        )}
+      >
+        <div className="flex text-sm gap-2 items-center">
+          {t("auto-play")}
+          <Tooltip title={t("auto-play-tip")}>
+            <Icon icon="question_line" size={18} />
+          </Tooltip>
+        </div>
+        <Switch.Root
+          defaultChecked={autoPlay === "0" ? false : true}
+          onCheckedChange={(checked) => updateAutoPlay(checked ? "1" : "0")}
+          className={cn(
+            "w-12 h-6 rounded-full relative outline-none cursor-pointer transition-colors",
+            "data-[state=unchecked]:bg-neutral-200/80 data-[state=checked]:bg-sky-400"
+          )}
+        >
+          <Switch.Thumb
+            className={cn(
+              "block w-4 h-4 bg-white rounded-full transition-all",
+              "translate-x-1 data-[state=checked]:translate-x-7"
+            )}
+          />
+        </Switch.Root>
+      </div>
     </Modal>
   );
 };

diff --git a/src/hooks/useChatGPT.tsx b/src/hooks/useChatGPT.tsx
@@ -15,6 +15,7 @@ import {
   useRecharge,
   useStreamDecoder,
   useScrollToBottom,
+  useTTS,
   BASE_PROMPT,
 } from "@/hooks";
 import type { ChatItem, ChannelListItem } from "@/hooks";
@@ -29,6 +30,7 @@ export const useChatGPT = () => {
   const [, setRechargeOpen] = useRecharge();
   const { decoder } = useStreamDecoder();
   const { scrollToBottom } = useScrollToBottom();
+  const { speak, autoPlay } = useTTS();
   const { openai, azure } = useLLM();
   const LLMOptions = React.useMemo(() => [openai, azure], [openai, azure]);
 
@@ -237,6 +239,32 @@ export const useChatGPT = () => {
           findCh = list.find((item) => item.channel_id === channel_id);
           if (!findCh) return channel;
 
+          const findLast = findCh.chat_list.at(-1);
+
+          if (autoPlay !== "0" && findLast?.role === "assistant") {
+            const findTTSActive = findCh.chat_list.find(
+              (item) => item.tts_loading
+            );
+            if (findTTSActive) findTTSActive.tts_loading = false;
+
+            findLast.tts_loading = true;
+            speak(findLast.content, () => {
+              setChannel((channel) => {
+                const { list } = channel;
+                const findCh = list.find(
+                  (item) => item.channel_id === channel_id
+                );
+                if (!findCh) return channel;
+                const findChat = findCh.chat_list.find(
+                  (val) => val.id === findLast.id
+                );
+                if (!findChat) return channel;
+                findChat.tts_loading = false;
+                return channel;
+              });
+            });
+          }
+
           // end channel loading
           findCh.channel_loading = false;
 

diff --git a/src/hooks/useTTS/useTTS.ts b/src/hooks/useTTS/useTTS.ts
@@ -13,10 +13,15 @@ interface ITTS {
   voice: string;
   voices: any[];
   rate: TTSRate;
+  player: SpeakerAudioDestination | null;
+  // 0:false, 1:true
+  autoPlay: string;
 
   updateVoice: (voice: string) => void;
   updateVoices: (voices: string[]) => void;
   updateRate: (rate: TTSRate) => void;
+  updateAutoPlay: (autoPlay: string) => void;
+  updatePlayer: (player: SpeakerAudioDestination | null) => void;
 }
 
 const key = process.env.NEXT_PUBLIC_AZURE_TTS_KEY || "";
@@ -26,6 +31,8 @@ const useStore = create<ITTS>((set) => ({
   voice: "zh-CN-XiaoxiaoNeural",
   voices: [],
   rate: "medium",
+  autoPlay: "0",
+  player: null,
 
   updateVoice: (voice: string) => {
     localStorage.setItem("voice", voice);
@@ -39,26 +46,35 @@ const useStore = create<ITTS>((set) => ({
     localStorage.setItem("voiceRate", rate);
     set({ rate });
   },
+  updateAutoPlay: (autoPlay: string) => {
+    localStorage.setItem("autoPlay", autoPlay);
+    set({ autoPlay });
+  },
+  updatePlayer: (player: SpeakerAudioDestination | null) => {
+    set({ player });
+  },
 }));
 
 export const useTTS = () => {
   const configRef = React.useRef<SpeechConfig | null>(null);
-  const playerRef = React.useRef<SpeakerAudioDestination | null>(null);
   const synthRef = React.useRef<SpeechSynthesizer | null>(null);
 
-  const { voice, voices, rate } = useStore();
+  const { voice, voices, rate, autoPlay, player } = useStore();
 
   const updateVoice = useStore((state) => state.updateVoice);
   const updateVoices = useStore((state) => state.updateVoices);
   const updateRate = useStore((state) => state.updateRate);
+  const updateAutoPlay = useStore((state) => state.updateAutoPlay);
+  const updatePlayer = useStore((state) => state.updatePlayer);
 
   const speak = (content: string, cb?: () => void) => {
     return new Promise((resolve, reject) => {
       try {
         pause();
-        playerRef.current = new SpeakerAudioDestination();
+        const player = new SpeakerAudioDestination();
+        updatePlayer(player);
 
-        const audioConfig = AudioConfig.fromSpeakerOutput(playerRef.current);
+        const audioConfig = AudioConfig.fromSpeakerOutput(player);
         if (!configRef.current) {
           configRef.current = SpeechConfig.fromSubscription(key, region);
         }
@@ -83,7 +99,7 @@ export const useTTS = () => {
           synthRef.current?.close();
         });
 
-        playerRef.current.onAudioEnd = () => {
+        player.onAudioEnd = () => {
           cb?.();
         };
       } catch (error) {
@@ -92,12 +108,15 @@ export const useTTS = () => {
     });
   };
 
-  const pause = () => playerRef.current?.pause();
+  const pause = React.useCallback(() => {
+    player?.pause();
+  }, [player]);
 
   React.useEffect(() => {
     const localVoice = localStorage.getItem("voice") || "zh-CN-XiaoxiaoNeural";
     const localVoices = localStorage.getItem("voices");
     const localRate = localStorage.getItem("voiceRate") || "medium";
+    const localAutoPlay = localStorage.getItem("autoPlay") || "0";
 
     try {
       updateVoices(localVoices ? JSON.parse(localVoices) : []);
@@ -106,6 +125,7 @@ export const useTTS = () => {
     }
     updateVoice(localVoice);
     updateRate(localRate as TTSRate);
+    updateAutoPlay(localAutoPlay);
   }, []);
 
   return {
@@ -114,8 +134,10 @@ export const useTTS = () => {
     voice,
     voices,
     rate,
+    autoPlay,
     updateVoice,
     updateVoices,
     updateRate,
+    updateAutoPlay,
   };
 };
diff --git a/src/locales/en.json b/src/locales/en.json
@@ -265,6 +265,8 @@
     "what-is-token": "What is a Token?"
   },
   "tts": {
+    "auto-play": "Auto Play",
+    "auto-play-tip": "Play the audio automatically after each reply",
     "azure-tts": "Azure TTS",
     "fast": "fast",
     "medium": "medium",

diff --git a/src/locales/zh-CN.json b/src/locales/zh-CN.json
@@ -265,6 +265,8 @@
     "what-is-token": "什么是 Token?"
   },
   "tts": {
+    "auto-play": "自动播放",
+    "auto-play-tip": "在每次回复后自动播放音频",
     "azure-tts": "Azure 文本转语音",
     "fast": "较快",
     "medium": "适中",