From 7dccbfdcd5220c7f7f9a1835ed6a3ced7f372e62 Mon Sep 17 00:00:00 2001 From: kirianguiller Date: Mon, 1 Mar 2021 15:17:15 +0100 Subject: [PATCH] handle multi speaker and gst in Synthetizer class --- TTS/server/server.py | 19 ++++- TTS/server/templates/index.html | 25 +++++- TTS/tts/utils/synthesis.py | 33 ++++---- TTS/utils/generic_utils.py | 23 +++++- TTS/utils/synthesizer.py | 133 +++++++++++++++++++------------- 5 files changed, 150 insertions(+), 83 deletions(-) diff --git a/TTS/server/server.py b/TTS/server/server.py index 2372a57a..2275e825 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -10,6 +10,7 @@ from flask import Flask, render_template, request, send_file from TTS.utils.io import load_config from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer +from TTS.utils.generic_utils import style_wav_uri_to_dict def create_argparser(): @@ -81,13 +82,19 @@ synthesizer = Synthesizer( args.tts_checkpoint, args.tts_config, args.vocoder_checkpoint, args.vocoder_config, args.use_cuda ) +use_speaker_embedding = synthesizer.tts_config.get("use_external_speaker_embedding_file", False) +use_gst = synthesizer.tts_config.get("use_gst", False) app = Flask(__name__) @app.route("/") def index(): - return render_template("index.html", show_details=args.show_details) - + return render_template( + 'index.html', + show_details=args.show_details, + use_speaker_embedding=use_speaker_embedding, + use_gst = use_gst + ) @app.route("/details") def details(): @@ -108,9 +115,13 @@ def details(): @app.route("/api/tts", methods=["GET"]) def tts(): - text = request.args.get("text") + text = request.args.get('text') + speaker_json_key = request.args.get('speaker', "") + style_wav = request.args.get('style-wav', "") + + style_wav = style_wav_uri_to_dict(style_wav) print(" > Model input: {}".format(text)) - wavs = synthesizer.tts(text) + wavs = synthesizer.tts(text, speaker_json_key=speaker_json_key, style_wav=style_wav) out = io.BytesIO() synthesizer.save_wav(wavs, out) return send_file(out, mimetype="audio/wav") diff --git a/TTS/server/templates/index.html b/TTS/server/templates/index.html index 635db844..87321c61 100644 --- a/TTS/server/templates/index.html +++ b/TTS/server/templates/index.html @@ -60,6 +60,14 @@ + {%if use_speaker_embedding%} + + {%endif%} + + {%if use_gst%} + + {%endif%} +

{%if show_details%} @@ -73,15 +81,24 @@