Skip to content

Commit f957989

Browse files
authored
Added Support for Kokoro
Kokoro TTS Support
2 parents 3e8155b + 491b616 commit f957989

File tree

10 files changed

+218
-8
lines changed

10 files changed

+218
-8
lines changed

docs/SUMMARY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
* [Using Coqui Local](./guides/using-coqui.md)
3636
* [Using Piper ](./guides/using-piper.md)
3737
* [Using Alltalk TTS](./guides/using-alltalk.md)
38+
* [Using Kokoro TTS](./guides/using-kokoro.md)
3839
* [Using RVC](./guides/using-rvc.md)
3940

4041

docs/guides/using-kokoro.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
---
2+
title: Using Kokoro
3+
order: 16
4+
---
5+
6+
Navigate to [Kokoro TTS GitHub repository](https://github.com/hexgrad/kokoro).
7+
8+
## Setting Up Kokoro TTS Server
9+
10+
### Clone the Repository
11+
```bash
12+
git clone https://github.com/flukexp/kokoro-tts.git
13+
cd kokoro-tts
14+
```
15+
16+
### Create a Virtual Environment
17+
```bash
18+
python -m venv venv
19+
source venv/bin/activate # On Windows, use `venv\Scripts\activate`
20+
```
21+
22+
### Install Dependencies
23+
```bash
24+
pip install -r requirements.txt
25+
```
26+
27+
## Running the Server
28+
29+
### Start the FastAPI Server
30+
```bash
31+
python server.py
32+
```
33+
34+
## Make sure Kokoro is enabled for TTS:
35+
36+
```bash
37+
Settings -> Text-to-Speech -> TTS Backend -> Kokoro
38+
```
39+
40+
## Set the voice
41+
42+
```bash
43+
Settings -> Text-to-Speech -> Kokoro -> Voice
44+
```
45+
46+
## Using Kokoro with OpenAI TTS
47+
You can use Kokoro by choosing OpenAI and configuring your Kokoro endpoint and voice.
48+
49+
### Notes
50+
- Kokoro TTS can be used as a local text-to-speech backend in your application.
51+
- If you want to explore more models or functionalities, refer to the official [Kokoro TTS GitHub repository](https://github.com/hexgrad/kokoro).

src/components/settings.tsx

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ import { AmicaLifePage } from "./settings/AmicaLifePage";
7272
import { useVrmStoreContext } from "@/features/vrmStore/vrmStoreContext";
7373
import { OpenRouterSettings } from "./settings/OpenRouterSettingsPage";
7474
import { ExternalAPIPage } from "./settings/ExternalAPIPage";
75+
import { KokoroSettingsPage } from "./settings/KokoroSettingsPage";
7576

7677

7778
export const Settings = ({
@@ -135,6 +136,9 @@ export const Settings = ({
135136

136137
const [localXTTSUrl, setLocalXTTSUrl] = useState(config("localXTTS_url"));
137138

139+
const [kokoroUrl, setKokoroUrl] = useState(config("kokoro_url"));
140+
const [kokoroVoice, setKokoroVoice] = useState(config("kokoro_voice"));
141+
138142
const [visionBackend, setVisionBackend] = useState(config("vision_backend"));
139143
const [visionLlamaCppUrl, setVisionLlamaCppUrl] = useState(config("vision_llamacpp_url"));
140144
const [visionOllamaUrl, setVisionOllamaUrl] = useState(config("vision_ollama_url"));
@@ -284,6 +288,7 @@ export const Settings = ({
284288
rvcUrl,rvcEnabled,rvcModelName,rvcIndexPath,rvcF0upKey,rvcF0Method,rvcIndexRate,rvcFilterRadius,,rvcResampleSr,rvcRmsMixRate,rvcProtect,
285289
coquiLocalUrl,coquiLocalVoiceId,
286290
localXTTSUrl,
291+
kokoroUrl, kokoroVoice,
287292
visionBackend,
288293
visionLlamaCppUrl,
289294
visionOllamaUrl, visionOllamaModel,
@@ -368,7 +373,7 @@ export const Settings = ({
368373

369374
case 'tts':
370375
return <MenuPage
371-
keys={["tts_backend", "elevenlabs_settings", "speecht5_settings", "coquiLocal_settings", "openai_tts_settings", "piper_settings", "localXTTS_settings", "rvc_settings"]}
376+
keys={["tts_backend", "elevenlabs_settings", "speecht5_settings", "coquiLocal_settings", "openai_tts_settings", "piper_settings", "localXTTS_settings", "kokoro_settings", "rvc_settings"]}
372377
menuClick={handleMenuClick} />;
373378

374379
case 'stt':
@@ -583,6 +588,15 @@ export const Settings = ({
583588
setSettingsUpdated={setSettingsUpdated}
584589
/>
585590

591+
case 'kokoro_settings':
592+
return <KokoroSettingsPage
593+
kokoroUrl={kokoroUrl}
594+
kokoroVoice={kokoroVoice}
595+
setKokoroVoice={setKokoroVoice}
596+
setKokoroUrl={setKokoroUrl}
597+
setSettingsUpdated={setSettingsUpdated}
598+
/>
599+
586600
case 'rvc_settings':
587601
return <RVCSettingsPage
588602
rvcUrl={rvcUrl}

src/components/settings/CoquiLocalSettingsPage.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ export function CoquiLocalSettingsPage({
6161
<li className="py-4">
6262
<FormRow label={t("Voice ID")}>
6363
<select
64+
className="mt-2 block w-full rounded-md border-0 py-1.5 pl-3 pr-10 text-gray-900 ring-1 ring-inset ring-gray-300 focus:ring-2 focus:ring-indigo-600 sm:text-sm sm:leading-6"
6465
value={coquiLocalVoiceId}
6566
onChange={(event: React.ChangeEvent<any>) => {
6667
event.preventDefault();
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import React, { useState, useEffect } from 'react';
2+
import { useTranslation } from 'react-i18next';
3+
import { BasicPage, FormRow, NotUsingAlert } from './common';
4+
import { TextInput } from "@/components/textInput";
5+
import { config, updateConfig } from "@/utils/config";
6+
import { kokoroVoiceList } from '@/features/kokoro/kokoro';
7+
8+
export function KokoroSettingsPage({
9+
kokoroUrl,
10+
kokoroVoice,
11+
setKokoroUrl,
12+
setKokoroVoice,
13+
setSettingsUpdated,
14+
}: {
15+
kokoroUrl: string;
16+
kokoroVoice: string;
17+
setKokoroUrl: (key: string) => void;
18+
setSettingsUpdated: (updated: boolean) => void;
19+
setKokoroVoice: (key: string) => void;
20+
}) {
21+
const { t } = useTranslation();
22+
const [voiceList, setVoiceList] = useState<{ key: string; label: string }[]>([]);
23+
24+
useEffect(() => {
25+
async function fetchVoiceList() {
26+
try {
27+
const data = await kokoroVoiceList();
28+
if (data && data.voices) {
29+
const formattedVoices = data.voices.map((voice: string) => ({
30+
key: voice,
31+
label: voice,
32+
}));
33+
setVoiceList(formattedVoices);
34+
}
35+
} catch (error) {
36+
console.error("Error fetching kokoro voice list:", error);
37+
}
38+
}
39+
fetchVoiceList();
40+
}, []);
41+
42+
return (
43+
<BasicPage
44+
title={t("Kokoro") + " "+ t("Settings")}
45+
description={t("kokoro_desc", "Configure Kokoro")}
46+
>
47+
{ config("tts_backend") !== "kokoro" && (
48+
<NotUsingAlert>
49+
{t("not_using_alert", "You are not currently using {{name}} as your {{what}} backend. These settings will not be used.", {name: t("Kokoro"), what: t("TTS")})}
50+
</NotUsingAlert>
51+
) }
52+
<ul role="list" className="divide-y divide-gray-100 max-w-xs">
53+
<li className="py-4">
54+
<FormRow label={t("URL")}>
55+
<TextInput
56+
value={kokoroUrl}
57+
onChange={(event: React.ChangeEvent<any>) => {
58+
setKokoroUrl(event.target.value);
59+
updateConfig("kokoro_url", event.target.value);
60+
setSettingsUpdated(true);
61+
}}
62+
/>
63+
</FormRow>
64+
</li>
65+
<li className="py-4">
66+
<FormRow label={t("Voices")}>
67+
<select
68+
className="mt-2 block w-full rounded-md border-0 py-1.5 pl-3 pr-10 text-gray-900 ring-1 ring-inset ring-gray-300 focus:ring-2 focus:ring-indigo-600 sm:text-sm sm:leading-6"
69+
value={kokoroVoice}
70+
onChange={(event: React.ChangeEvent<any>) => {
71+
event.preventDefault();
72+
setKokoroVoice(event.target.value);
73+
updateConfig("kokoro_voice", event.target.value);
74+
setSettingsUpdated(true);
75+
}}
76+
>
77+
{voiceList.map((voice) => (
78+
<option key={voice.key} value={voice.key}>{t(voice.label)}</option>
79+
))}
80+
</select>
81+
</FormRow>
82+
</li>
83+
</ul>
84+
</BasicPage>
85+
);
86+
}

src/components/settings/TTSBackendPage.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ const ttsEngines = [
1111
{key: "localXTTS", label: "Alltalk TTS"}, // Our local TTS endpoint (XTTS based)
1212
{key: "piper", label: "Piper"},
1313
{key: "coquiLocal", label: "Coqui Local"},
14+
{key: "kokoro", label: "Kokoro"},
1415
];
1516

1617
function idToTitle(id: string): string {
@@ -57,7 +58,7 @@ export function TTSBackendPage({
5758
</select>
5859
</FormRow>
5960
</li>
60-
{ ["elevenlabs", "speecht5", "openai_tts", "piper", "coquiLocal", "localXTTS"].includes(ttsBackend) && (
61+
{ ["elevenlabs", "speecht5", "openai_tts", "piper", "coquiLocal", "localXTTS", "kokoro"].includes(ttsBackend) && (
6162
<li className="py-4">
6263
<FormRow label={`${t("Configure")} ${t(idToTitle(ttsBackend))}`}>
6364
<button

src/components/settings/common.tsx

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,15 +171,16 @@ export function getIconFromPage(page: string): JSX.Element {
171171
case 'elevenlabs_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
172172
case 'speecht5_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
173173
case 'openai_tts_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
174-
case 'piper_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
175-
case 'rvc_settings': return <CogIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
174+
case 'piper_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
175+
case 'rvc_settings': return <CogIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
176176
case 'coquiLocal_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
177-
case 'localXTTS_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
177+
case 'localXTTS_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
178+
case 'kokoro_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
178179

179-
case 'stt_backend': return <PencilSquareIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
180-
case 'stt_wake_word': return <MoonIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
180+
case 'stt_backend': return <PencilSquareIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
181+
case 'stt_wake_word': return <MoonIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
181182
case 'whisper_openai_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
182-
case 'whispercpp_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
183+
case 'whispercpp_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
183184

184185
case 'vision_backend': return <EyeDropperIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
185186
case 'vision_llamacpp_settings': return <AdjustmentsHorizontalIcon className="h-5 w-5 flex-none text-gray-800" aria-hidden="true" />;
@@ -231,6 +232,7 @@ function getLabelFromPage(page: string): string {
231232
case 'rvc_settings': return t('RVC');
232233
case 'coquiLocal_settings': return t('Coqui Local');
233234
case 'localXTTS_settings': return t('Alltalk');
235+
case 'kokoro_settings': return t('Kokoro');
234236

235237
case 'vision_backend': return t('Vision Backend');
236238
case 'vision_llamacpp_settings': return t('LLama.cpp');

src/features/chat/chat.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import { getOpenRouterChatResponseStream } from './openRouterChat';
4343
import { handleUserInput } from '../externalAPI/externalAPI';
4444
import { loadVRMAnimation } from '@/lib/VRMAnimation/loadVRMAnimation';
4545
import isDev from '@/utils/isDev';
46+
import { kokoro } from "../kokoro/kokoro";
4647

4748
type Speak = {
4849
audioBuffer: ArrayBuffer | null;
@@ -663,6 +664,10 @@ export class Chat {
663664
const voice = await coquiLocal(talk.message);
664665
return voice.audio;
665666
}
667+
case "kokoro": {
668+
const voice = await kokoro(talk.message);
669+
return voice.audio;
670+
}
666671
}
667672
} catch (e: any) {
668673
console.error(e.toString());

src/features/kokoro/kokoro.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import { config } from '@/utils/config';
2+
3+
export async function kokoro(
4+
message: string,
5+
) {
6+
try {
7+
const res = await fetch(`${config("kokoro_url")}/tts`, {
8+
method: "POST",
9+
body: JSON.stringify({
10+
text: message,
11+
voice: config("kokoro_voice"),
12+
}),
13+
headers: {
14+
"Content-Type": "application/json",
15+
},
16+
});
17+
if (! res.ok) {
18+
console.error(res);
19+
throw new Error("Kokoro TTS API Error");
20+
}
21+
const data = (await res.arrayBuffer()) as any;
22+
23+
return { audio: data };
24+
} catch (e) {
25+
console.error('ERROR', e);
26+
throw new Error("Kokoro TTS API Error");
27+
}
28+
}
29+
30+
export async function kokoroVoiceList(
31+
) {
32+
try {
33+
const response = await fetch(`${config("kokoro_url")}/voices`, {
34+
method: 'GET',
35+
headers: {
36+
'Accept': "application/text",
37+
}
38+
})
39+
40+
return response.json();
41+
42+
} catch (error) {
43+
44+
console.error('Error fetching kokoro voice:', error);
45+
throw error;
46+
}
47+
}

src/utils/config.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ export const defaults = {
7777
rvc_protect: process.env.NEXT_PUBLIC_RVC_PROTECT ?? '0.33',
7878
coquiLocal_url: process.env.NEXT_PUBLIC_COQUILOCAL_URL ?? 'http://localhost:5002',
7979
coquiLocal_voiceid: process.env.NEXT_PUBLIC_COQUILOCAL_VOICEID ?? 'p240',
80+
kokoro_url: process.env.NEXT_PUBLIC_KOKORO_URL ?? 'http://localhost:8080',
81+
kokoro_voice: process.env.NEXT_PUBLIC_KOKORO_VOICE ?? 'af_bella',
8082
piper_url: process.env.NEXT_PUBLIC_PIPER_URL ?? 'https://i-love-amica.com:5000/tts',
8183
elevenlabs_apikey: process.env.NEXT_PUBLIC_ELEVENLABS_APIKEY ??'',
8284
elevenlabs_voiceid: process.env.NEXT_PUBLIC_ELEVENLABS_VOICEID ?? '21m00Tcm4TlvDq8ikWAM',

0 commit comments

Comments
 (0)