rhasspy · luckybit4755 · Jun 5, 2024 · Jun 30, 2024 · Jun 30, 2024 · Aug 22, 2024
diff --git a/src/python_run/README_http.md b/src/python_run/README_http.md
@@ -25,3 +25,15 @@ Using a `POST` request:
 ```sh
 curl -X POST -H 'Content-Type: text/plain' --data 'This is a test.' -o test.wav 'localhost:5000'
 ```
+
+Using a JSON `POST` request:
+```sh
+curl -qs -H 'Content-Type: application/json' -d '{"text":"Such a nice test", "voice":"en_US-amy-medium"}' -o test.json 'http://localhost:5000'
+```
+
+The JSON will look like
+```json
+{"Content-Type":"audio/wav","text":"Such a nice test","audio":"<base-64 encode wav data>"}
+```
+
+Note: the "voice" parameter is optional.
diff --git a/src/python_run/piper/http_server.py b/src/python_run/piper/http_server.py
@@ -1,7 +1,10 @@
 #!/usr/bin/env python3
 import argparse
+import base64
 import io
+import json
 import logging
+import nltk
 import wave
 from pathlib import Path
 from typing import Any, Dict
@@ -14,6 +17,24 @@
 _LOGGER = logging.getLogger()
 
 
+def load_voice(args):
+    model_path = Path(args.model)
+    if not model_path.exists():
+        # Load voice info
+        voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
+
+        # Resolve aliases for backwards compatibility with old voice names
+        aliases_info: Dict[str, Any] = {}
+        for voice_info in voices_info.values():
+            for voice_alias in voice_info.get("aliases", []):
+                aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
+
+        voices_info.update(aliases_info)
+        ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
+        args.model, args.config = find_voice(args.model, args.data_dir)
+    return PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
+
+
 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--host", default="0.0.0.0", help="HTTP server host")
@@ -73,24 +94,12 @@ def main() -> None:
         # Download to first data directory by default
         args.download_dir = args.data_dir[0]
 
-    # Download voice if file doesn't exist
-    model_path = Path(args.model)
-    if not model_path.exists():
-        # Load voice info
-        voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
+    # Load voice and download voice if file doesn't exist
+    voice = load_voice(args)
+    if not voice:
+        _LOGGER.error("could not load voice")
+        return
 
-        # Resolve aliases for backwards compatibility with old voice names
-        aliases_info: Dict[str, Any] = {}
-        for voice_info in voices_info.values():
-            for voice_alias in voice_info.get("aliases", []):
-                aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
-
-        voices_info.update(aliases_info)
-        ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
-        args.model, args.config = find_voice(args.model, args.data_dir)
-
-    # Load voice
-    voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
     synthesize_args = {
         "speaker_id": args.speaker,
         "length_scale": args.length_scale,
@@ -104,8 +113,17 @@ def main() -> None:
 
     @app.route("/", methods=["GET", "POST"])
     def app_synthesize() -> bytes:
+        is_json = False
         if request.method == "POST":
             text = request.data.decode("utf-8")
+            if "application/json" == request.headers.get("Content-Type"):
+                is_json = True
+                body = json.loads(text)
+                text = body.get("text")
+                voice_name = body.get("voice")
+                if voice_name:
+                    args.model = voice_name
+                    voice = load_voice(args)
         else:
             text = request.args.get("text", "")
 
@@ -116,9 +134,22 @@ def app_synthesize() -> bytes:
         _LOGGER.debug("Synthesizing text: %s", text)
         with io.BytesIO() as wav_io:
             with wave.open(wav_io, "wb") as wav_file:
-                voice.synthesize(text, wav_file, **synthesize_args)
-
-            return wav_io.getvalue()
+                if len(text) < 133:
+                    voice.synthesize(text, wav_file, **synthesize_args)
+                else:
+                    set_paramaters = True
+                    for sentence in nltk.sent_tokenize(text):
+                        _LOGGER.info(f"read: '{sentence}'")
+                        voice.synthesize(sentence, wav_file, set_paramaters=set_paramaters, **synthesize_args)
+                        set_paramaters = False
+            response = wav_io.getvalue()
+            if is_json:
+                response = {
+                    "Content-Type": "audio/wav",
+                    "audio": base64.b64encode(response).decode("utf-8"),
+                    "text": text,
+                }
+            return response
 
     app.run(host=args.host, port=args.port)
 

diff --git a/src/python_run/piper/voice.py b/src/python_run/piper/voice.py
@@ -90,16 +90,18 @@ def synthesize(
         self,
         text: str,
         wav_file: wave.Wave_write,
+        set_paramaters: Optional[bool] = True,
         speaker_id: Optional[int] = None,
         length_scale: Optional[float] = None,
         noise_scale: Optional[float] = None,
         noise_w: Optional[float] = None,
         sentence_silence: float = 0.0,
     ):
-        """Synthesize WAV audio from text."""
-        wav_file.setframerate(self.config.sample_rate)
-        wav_file.setsampwidth(2)  # 16-bit
-        wav_file.setnchannels(1)  # mono
+        if set_paramaters:
+            """Synthesize WAV audio from text."""
+            wav_file.setframerate(self.config.sample_rate)
+            wav_file.setsampwidth(2)  # 16-bit
+            wav_file.setnchannels(1)  # mono
 
         for audio_bytes in self.synthesize_stream_raw(
             text,

diff --git a/src/python_run/requirements.txt b/src/python_run/requirements.txt
@@ -1,2 +1,3 @@
 piper-phonemize~=1.1.0
 onnxruntime>=1.11.0,<2
+nltk>=3.8.1