Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add json support to http_server #519

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/python_run/README_http.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,15 @@ Using a `POST` request:
```sh
curl -X POST -H 'Content-Type: text/plain' --data 'This is a test.' -o test.wav 'localhost:5000'
```

Using a JSON `POST` request:
```sh
curl -qs -H 'Content-Type: application/json' -d '{"text":"Such a nice test", "voice":"en_US-amy-medium"}' -o test.json 'http://localhost:5000'
```

The JSON will look like
```json
{"Content-Type":"audio/wav","text":"Such a nice test","audio":"<base-64 encode wav data>"}
```

Note: the "voice" parameter is optional.
71 changes: 51 additions & 20 deletions src/python_run/piper/http_server.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#!/usr/bin/env python3
import argparse
import base64
import io
import json
import logging
import nltk
import wave
from pathlib import Path
from typing import Any, Dict
Expand All @@ -14,6 +17,24 @@
_LOGGER = logging.getLogger()


def load_voice(args):
model_path = Path(args.model)
if not model_path.exists():
# Load voice info
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)

# Resolve aliases for backwards compatibility with old voice names
aliases_info: Dict[str, Any] = {}
for voice_info in voices_info.values():
for voice_alias in voice_info.get("aliases", []):
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}

voices_info.update(aliases_info)
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
args.model, args.config = find_voice(args.model, args.data_dir)
return PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--host", default="0.0.0.0", help="HTTP server host")
Expand Down Expand Up @@ -73,24 +94,12 @@ def main() -> None:
# Download to first data directory by default
args.download_dir = args.data_dir[0]

# Download voice if file doesn't exist
model_path = Path(args.model)
if not model_path.exists():
# Load voice info
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
# Load voice and download voice if file doesn't exist
voice = load_voice(args)
if not voice:
_LOGGER.error("could not load voice")
return

# Resolve aliases for backwards compatibility with old voice names
aliases_info: Dict[str, Any] = {}
for voice_info in voices_info.values():
for voice_alias in voice_info.get("aliases", []):
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}

voices_info.update(aliases_info)
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
args.model, args.config = find_voice(args.model, args.data_dir)

# Load voice
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
synthesize_args = {
"speaker_id": args.speaker,
"length_scale": args.length_scale,
Expand All @@ -104,8 +113,17 @@ def main() -> None:

@app.route("/", methods=["GET", "POST"])
def app_synthesize() -> bytes:
is_json = False
if request.method == "POST":
text = request.data.decode("utf-8")
if "application/json" == request.headers.get("Content-Type"):
is_json = True
body = json.loads(text)
text = body.get("text")
voice_name = body.get("voice")
if voice_name:
args.model = voice_name
voice = load_voice(args)
else:
text = request.args.get("text", "")

Expand All @@ -116,9 +134,22 @@ def app_synthesize() -> bytes:
_LOGGER.debug("Synthesizing text: %s", text)
with io.BytesIO() as wav_io:
with wave.open(wav_io, "wb") as wav_file:
voice.synthesize(text, wav_file, **synthesize_args)

return wav_io.getvalue()
if len(text) < 133:
voice.synthesize(text, wav_file, **synthesize_args)
else:
set_paramaters = True
for sentence in nltk.sent_tokenize(text):
_LOGGER.info(f"read: '{sentence}'")
voice.synthesize(sentence, wav_file, set_paramaters=set_paramaters, **synthesize_args)
set_paramaters = False
response = wav_io.getvalue()
if is_json:
response = {
"Content-Type": "audio/wav",
"audio": base64.b64encode(response).decode("utf-8"),
"text": text,
}
return response

app.run(host=args.host, port=args.port)

Expand Down
10 changes: 6 additions & 4 deletions src/python_run/piper/voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,18 @@ def synthesize(
self,
text: str,
wav_file: wave.Wave_write,
set_paramaters: Optional[bool] = True,
speaker_id: Optional[int] = None,
length_scale: Optional[float] = None,
noise_scale: Optional[float] = None,
noise_w: Optional[float] = None,
sentence_silence: float = 0.0,
):
"""Synthesize WAV audio from text."""
wav_file.setframerate(self.config.sample_rate)
wav_file.setsampwidth(2) # 16-bit
wav_file.setnchannels(1) # mono
if set_paramaters:
"""Synthesize WAV audio from text."""
wav_file.setframerate(self.config.sample_rate)
wav_file.setsampwidth(2) # 16-bit
wav_file.setnchannels(1) # mono

for audio_bytes in self.synthesize_stream_raw(
text,
Expand Down
1 change: 1 addition & 0 deletions src/python_run/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
piper-phonemize~=1.1.0
onnxruntime>=1.11.0,<2
nltk>=3.8.1