Merge branch 'main' into farfromrefug-main

Conflicts: .gitignore generate/gen_missing_items.ts types.ts utils/parse_args.ts utils/utils.ts
jersou · Oct 7, 2024 · 03f074f · 03f074f
2 parents a04ddda + 7d6e5ed
commit 03f074f
Show file tree

Hide file tree

Showing 14 changed files with 243 additions and 56 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,4 +15,4 @@
 /gui/dist
 /gui/.vite
 node_modules
-vendor
+/.spg-TTS-cache/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+### v0.5.8 / ??.??.??
+
+- feat: cache generated TTS file
+
 ### v0.5.7 / 2024.10.06
 
 - fix: coqui-tts error on windows

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# Studio-Pack-Generator
+# Studio-Pack-Generator (SPG)
 
 This project convert a folder or a RSS URL to
 [Studio](https://github.com/marian-m12l/studio) pack zip for Lunii device, see
@@ -245,6 +245,9 @@ Options:
   -u, --gui                          open GUI (on localhost:5555)                             [boolean] [default: false]
       --port                         port of GUI server                                         [number] [default: 5555]
       --config-file                  json config file                                                           [string]
+      --skip-read-tts-cache          disable the TTS cache usage                              [boolean] [default: false]
+      --skip-write-tts-cache         disable the TTS cache write                              [boolean] [default: false]
+      --tts-cache-path               path to the TTS cache                [string] [default: "<SPG dir>/.spg-TTS-cache"]
 ```
 
 Separate options by spaces, ex :
@@ -290,6 +293,11 @@ All key/value are optional, ex:
 }
 ```
 
+## TTS cache
+
+A folder `<studio-pack-generator install dir>/.spg-TTS-cache/` is used to keep
+the generated audio files.
+
 ## OpenAI TTS
 
 To use OpenAI TTS, use `--use-open-ai-tts` option, and you must set the API key:
@@ -329,6 +337,10 @@ studio-pack-generator -x -o output/dir  2-full.zip
 Note: it doesn't work well with "menu" nodes and with pack without "question"
 stage.
 
+## TTS cache
+
+To speed up / save CPU
+
 ## json config file
 
 The parameters can be imported from a json file with :
@@ -373,7 +385,10 @@ File format (all the properties are optionals) :
   "coquiTtsModel": "tts_models/multilingual/multi-dataset/xtts_v2",
   "coquiTtsLanguageIdx": "fr",
   "coquiTtsSpeakerIdx": "Abrahan Mack",
-  "port": 5555
+  "port": 5555,
+  "skipWriteTtsCache": false,
+  "skipReadTtsCache": false,
+  "ttsCachePath": "/tmp/spg-tts-cache"
 }
 ```
 

diff --git a/deno.json b/deno.json
@@ -19,6 +19,7 @@
     "@jersou/clite": "jsr:@jersou/clite@^0.5.0",
     "@libs/xml": "jsr:@libs/xml@^6.0.0",
     "@std/assert": "jsr:@std/assert@^1.0.6",
+    "@std/crypto": "jsr:@std/crypto@^1.0.3",
     "@std/encoding": "jsr:@std/encoding@^1.0.5",
     "@std/fmt": "jsr:@std/fmt@^1.0.2",
     "@std/fs": "jsr:@std/fs@^1.0.4",

diff --git a/deno.lock b/deno.lock
diff --git a/generate/basic_tts.ts b/generate/basic_tts.ts
@@ -7,6 +7,8 @@ import {
   getPico2waveCommand,
 } from "../utils/external_commands.ts";
 import type { ModOptions } from "../types.ts";
+import { cacheTtsFile, useCachedTtsFile } from "./tts_cache.ts";
+import { bgRed } from "@std/fmt/colors";
 
 let hasPico2waveWslCache: undefined | boolean;
 
@@ -40,6 +42,18 @@ export async function generate_audio_basic_tts(
   if (
     Deno.build.os === "windows" && (opt.skipWsl || !(await hasPico2waveWsl()))
   ) {
+    await windows_tts(outputPath, opt, title);
+  } else if (Deno.build.os === "darwin" && !(await hasPico2wave())) {
+    await macos_tts(outputPath, opt, title);
+  } else {
+    await pico2wave_tts(lang, outputPath, opt, title);
+  }
+}
+
+async function windows_tts(outputPath: string, opt: ModOptions, title: string) {
+  const cacheKey = ["windows_tts", title];
+
+  if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
     const audioFormat = "[System.Speech.AudioFormat.SpeechAudioFormatInfo]::" +
       "new(8000,[System.Speech.AudioFormat.AudioBitsPerSample]" +
       "::Sixteen,[System.Speech.AudioFormat.AudioChannel]::Mono)";
@@ -52,18 +66,48 @@ export async function generate_audio_basic_tts(
       `$speak.Speak(" . ${title.replace(/["' ]/g, " ")} . "); ` +
       `$speak.Dispose();`,
     ];
-    await $`PowerShell ${args}`.noThrow();
-  } else if (Deno.build.os === "darwin" && !(await hasPico2wave())) {
+    const res = await $`PowerShell ${args}`.noThrow();
+    if (res.code === 0) {
+      if (!opt.skipWriteTtsCache) {
+        await cacheTtsFile(outputPath, cacheKey);
+      }
+    } else {
+      console.log(bgRed(`windows_tts gen KO for "${title}"`));
+    }
+  }
+}
+
+async function macos_tts(outputPath: string, opt: ModOptions, title: string) {
+  const cacheKey = ["macos_tts", title];
+  if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
     const args = [
       "-o",
       convertPath(outputPath, opt),
       "--file-format",
       "WAVE",
       "--data-format",
       "LEF32@22050",
+      title,
     ];
-    await $`say ${args}`.noThrow();
-  } else {
+    const res = await $`say ${args}`.noThrow();
+    if (res.code === 0) {
+      if (!opt.skipWriteTtsCache) {
+        await cacheTtsFile(outputPath, cacheKey);
+      }
+    } else {
+      console.log(bgRed(`macos_tts gen KO for "${title}"`));
+    }
+  }
+}
+
+async function pico2wave_tts(
+  lang: string,
+  outputPath: string,
+  opt: ModOptions,
+  title: string,
+) {
+  const cacheKey = ["pico2wave_tts", title, lang];
+  if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
     const pico2waveCommand = await getPico2waveCommand();
     const cmd = [
       pico2waveCommand[0],
@@ -74,6 +118,13 @@ export async function generate_audio_basic_tts(
       convertPath(outputPath, opt),
       ` . ${title} . `,
     ];
-    await $`${cmd}`.noThrow();
+    const res = await $`${cmd}`.noThrow();
+    if (res.code === 0) {
+      if (!opt.skipWriteTtsCache) {
+        await cacheTtsFile(outputPath, cacheKey);
+      }
+    } else {
+      console.log(bgRed(`pico2wave_tts gen KO for "${title}"`));
+    }
   }
 }
diff --git a/generate/coqui_tts.ts b/generate/coqui_tts.ts
@@ -0,0 +1,45 @@
+import type { ModOptions } from "../types.ts";
+import { cacheTtsFile, useCachedTtsFile } from "./tts_cache.ts";
+import { getCoquiCommand } from "../utils/external_commands.ts";
+import $ from "@david/dax";
+import { bgRed } from "@std/fmt/colors";
+
+export async function generate_audio_with_coqui(
+  title: string,
+  opt: ModOptions,
+  outputPath: string,
+) {
+  const cacheKey = [
+    "CoquiTts",
+    title,
+    opt.coquiTtsSpeakerIdx,
+    opt.coquiTtsLanguageIdx,
+    opt.coquiTtsModel,
+  ];
+  if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
+    const coquiCommand = await getCoquiCommand();
+    const cmd = [
+      ...coquiCommand,
+      "--text",
+      title,
+      "--model_name",
+      opt.coquiTtsModel,
+      "--out_path",
+      outputPath,
+    ];
+    if (opt.coquiTtsLanguageIdx) {
+      cmd.push("--language_idx", opt.coquiTtsLanguageIdx);
+    }
+    if (opt.coquiTtsSpeakerIdx) {
+      cmd.push("--speaker_idx", opt.coquiTtsSpeakerIdx);
+    }
+    const res = await $`${cmd}`.noThrow(true);
+    if (res.code === 0) {
+      if (!opt.skipWriteTtsCache) {
+        await cacheTtsFile(outputPath, cacheKey);
+      }
+    } else {
+      console.log(bgRed(`Coqui gen KO for "${title}"`));
+    }
+  }
+}
diff --git a/generate/gen_audio.ts b/generate/gen_audio.ts
@@ -1,8 +1,7 @@
 import { generate_audio_basic_tts } from "./basic_tts.ts";
 import { generate_audio_with_openAI } from "./openai_tts.ts";
 import type { ModOptions } from "../types.ts";
-import { getCoquiCommand } from "../utils/external_commands.ts";
-import $ from "@david/dax";
+import { generate_audio_with_coqui } from "./coqui_tts.ts";
 
 export async function generateAudio(
   title: string,
@@ -11,29 +10,10 @@ export async function generateAudio(
   opt: ModOptions,
 ) {
   if (opt.useOpenAiTts) {
-    await generate_audio_with_openAI(
-      title,
-      outputPath.replace(/\.wav/i, ".mp3"),
-      opt,
-    );
+    const output = outputPath.replace(/\.wav/i, ".mp3");
+    await generate_audio_with_openAI(title, output, opt);
   } else if (opt.useCoquiTts) {
-    const coquiCommand = await getCoquiCommand();
-    const cmd = [
-      ...coquiCommand,
-      "--text",
-      title,
-      "--model_name",
-      opt.coquiTtsModel,
-      "--out_path",
-      outputPath,
-    ];
-    if (opt.coquiTtsLanguageIdx) {
-      cmd.push("--language_idx", opt.coquiTtsLanguageIdx);
-    }
-    if (opt.coquiTtsSpeakerIdx) {
-      cmd.push("--speaker_idx", opt.coquiTtsSpeakerIdx);
-    }
-    await $`${cmd}`;
+    await generate_audio_with_coqui(title, opt, outputPath);
   } else {
     await generate_audio_basic_tts(title, outputPath, lang, opt);
   }

diff --git a/generate/openai_tts.ts b/generate/openai_tts.ts
@@ -3,6 +3,7 @@ import { bgRed, blue } from "@std/fmt/colors";
 import $ from "@david/dax";
 
 import type { ModOptions } from "../types.ts";
+import { cacheTtsFile, useCachedTtsFile } from "./tts_cache.ts";
 
 let openAI_client: OpenAI;
 
@@ -11,30 +12,36 @@ export async function generate_audio_with_openAI(
   outputPath: string,
   opt: ModOptions,
 ) {
-  if (!openAI_client) {
-    if (opt?.openAiApiKey) {
-      openAI_client = new OpenAI({ apiKey: opt.openAiApiKey });
-    } else if (Deno.env.has("OPENAI_API_KEY")) {
-      openAI_client = new OpenAI();
+  const cacheKey = ["OpenAiTts", title, opt.openAiVoice, opt.openAiModel];
+  if (opt.skipReadTtsCache || !await useCachedTtsFile(outputPath, cacheKey)) {
+    if (!openAI_client) {
+      if (opt?.openAiApiKey) {
+        openAI_client = new OpenAI({ apiKey: opt.openAiApiKey });
+      } else if (Deno.env.has("OPENAI_API_KEY")) {
+        openAI_client = new OpenAI();
+      } else {
+        const apiKey = await $.prompt({
+          message: "OPENAI_API_KEY :",
+          mask: true,
+        });
+        openAI_client = new OpenAI({ apiKey });
+      }
+    }
+    const result = await openAI_client.audio.speech.create({
+      input: title,
+      response_format: "mp3",
+      model: opt?.openAiModel ?? "tts-1",
+      voice: opt?.openAiVoice ?? "onyx",
+    });
+    if (result.ok) {
+      console.log(blue(`OpenAI gen OK of "${title}" in ${outputPath}`));
+      const file = await Deno.open(outputPath, { create: true, write: true });
+      await result.body!.pipeTo(file.writable);
+      if (!opt.skipWriteTtsCache) {
+        await cacheTtsFile(outputPath, cacheKey);
+      }
     } else {
-      const apiKey = await $.prompt({
-        message: "OPENAI_API_KEY :",
-        mask: true,
-      });
-      openAI_client = new OpenAI({ apiKey });
+      console.log(bgRed(`OpenAI gen KO for "${title}"`), result);
     }
   }
-  const result = await openAI_client.audio.speech.create({
-    input: title,
-    response_format: "mp3",
-    model: opt?.openAiModel ?? "tts-1",
-    voice: opt?.openAiVoice ?? "onyx",
-  });
-  if (result.ok) {
-    console.log(blue(`OpenAI gen OK of "${title}" in ${outputPath}`));
-    const file = await Deno.open(outputPath, { create: true, write: true });
-    await result.body!.pipeTo(file.writable);
-  } else {
-    console.log(bgRed(`OpenAI gen KO of ${title}`), result);
-  }
 }
diff --git a/generate/tts_cache.ts b/generate/tts_cache.ts
@@ -0,0 +1,39 @@
+import { getSpgDirPath } from "../utils/utils.ts";
+import { crypto } from "@std/crypto/crypto";
+import { encodeHex } from "@std/encoding/hex";
+import $ from "@david/dax";
+import { green, yellow } from "@std/fmt/colors";
+
+export function getDefaultTtsPath() {
+  return getSpgDirPath().resolve(".spg-TTS-cache");
+}
+
+export function getCachePath(key: (string | boolean | undefined)[]) {
+  const data = new TextEncoder().encode(JSON.stringify(key));
+  const sum = encodeHex(crypto.subtle.digestSync("MD5", data));
+  return getDefaultTtsPath().join(sum.substring(0, 2)).join(sum);
+}
+
+export async function cacheTtsFile(
+  output: string,
+  key: (string | undefined | boolean)[],
+) {
+  const cachePath = getCachePath(key);
+  await cachePath.resolve("..").mkdir({ recursive: true });
+  await $.path(output).copyFile(cachePath);
+}
+
+export async function useCachedTtsFile(
+  output: string,
+  key: (string | undefined | boolean)[],
+): Promise<boolean> {
+  const cachePath = getCachePath(key);
+  if (await cachePath.exists()) {
+    await cachePath.copyFile(output);
+    console.log(green(`use TTS cached for ${output}`));
+    return true;
+  } else {
+    console.log(yellow(`no TTS cache found for ${output}`));
+    return false;
+  }
+}