html formatting, enable multi-speaker model on the server with a dropdown menu to select the speaker

pull/441/head
Eren Gölge 2021-04-22 15:22:36 +02:00
parent f9f3d04d14
commit ad047c8195
2 changed files with 137 additions and 94 deletions

View File

@ -32,12 +32,19 @@ def create_argparser():
"--model_name",
type=str,
default="tts_models/en/ljspeech/tacotron2-DDC",
help="Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>",
help=
"Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>",
)
parser.add_argument("--vocoder_name", type=str, default=None, help="name of one of the released vocoder models.")
parser.add_argument("--vocoder_name",
type=str,
default=None,
help="name of one of the released vocoder models.")
# Args for running custom models
parser.add_argument("--config_path", default=None, type=str, help="Path to model config file.")
# Args for running custom models
parser.add_argument("--config_path",
default=None,
type=str,
help="Path to model config file.")
parser.add_argument(
"--model_path",
type=str,
@ -47,15 +54,34 @@ def create_argparser():
parser.add_argument(
"--vocoder_path",
type=str,
help="Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).",
help=
"Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).",
default=None,
)
parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None)
parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
parser.add_argument("--port", type=int, default=5002, help="port to listen on.")
parser.add_argument("--use_cuda", type=convert_boolean, default=False, help="true to use CUDA.")
parser.add_argument("--debug", type=convert_boolean, default=False, help="true to enable Flask debug mode.")
parser.add_argument("--show_details", type=convert_boolean, default=False, help="Generate model detail page.")
parser.add_argument("--vocoder_config_path",
type=str,
help="Path to vocoder model config file.",
default=None)
parser.add_argument("--speakers_file_path",
type=str,
help="JSON file for multi-speaker model.",
default=None)
parser.add_argument("--port",
type=int,
default=5002,
help="port to listen on.")
parser.add_argument("--use_cuda",
type=convert_boolean,
default=False,
help="true to use CUDA.")
parser.add_argument("--debug",
type=convert_boolean,
default=False,
help="true to enable Flask debug mode.")
parser.add_argument("--show_details",
type=convert_boolean,
default=False,
help="Generate model detail page.")
return parser
@ -83,11 +109,14 @@ if args.list_models:
# CASE2: load pre-trained model paths
if args.model_name is not None and not args.model_path:
model_path, config_path, model_item = manager.download_model(args.model_name)
args.vocoder_name = model_item["default_vocoder"] if args.vocoder_name is None else args.vocoder_name
model_path, config_path, model_item = manager.download_model(
args.model_name)
args.vocoder_name = model_item[
"default_vocoder"] if args.vocoder_name is None else args.vocoder_name
if args.vocoder_name is not None and not args.vocoder_path:
vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
vocoder_path, vocoder_config_path, _ = manager.download_model(
args.vocoder_name)
# CASE3: set custome model paths
if args.model_path is not None:
@ -100,11 +129,11 @@ if args.vocoder_path is not None:
vocoder_config_path = args.vocoder_config_path
# load models
synthesizer = Synthesizer(
model_path, config_path, speakers_file_path, vocoder_path, vocoder_config_path, args.use_cuda
)
synthesizer = Synthesizer(model_path, config_path, speakers_file_path,
vocoder_path, vocoder_config_path, args.use_cuda)
use_speaker_embedding = synthesizer.tts_config.get("use_external_speaker_embedding_file", False)
use_speaker_embedding = synthesizer.tts_config.get(
"use_external_speaker_embedding_file", False)
use_gst = synthesizer.tts_config.get("use_gst", False)
app = Flask(__name__)
@ -131,9 +160,11 @@ def style_wav_uri_to_dict(style_wav: str) -> Union[str, dict]:
@app.route("/")
def index():
return render_template(
"index.html", show_details=args.show_details, use_speaker_embedding=use_speaker_embedding, use_gst=use_gst
)
return render_template("index.html",
show_details=args.show_details,
use_speaker_embedding=use_speaker_embedding,
speaker_ids=synthesizer.speaker_manager.speaker_ids,
use_gst=use_gst)
@app.route("/details")
@ -156,8 +187,8 @@ def details():
@app.route("/api/tts", methods=["GET"])
def tts():
text = request.args.get("text")
speaker_idx = request.args.get("speaker", "")
style_wav = request.args.get("style-wav", "")
speaker_idx = request.args.get("speaker_id", "")
style_wav = request.args.get("style_wav", "")
style_wav = style_wav_uri_to_dict(style_wav)
print(" > Model input: {}".format(text))

View File

@ -1,7 +1,7 @@
<!DOCTYPE html>
<html lang="en">
<head>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
@ -12,24 +12,26 @@
<!-- Bootstrap core CSS -->
<link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous" rel="stylesheet">
integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"
rel="stylesheet">
<!-- Custom styles for this template -->
<style>
body {
padding-top: 54px;
}
@media (min-width: 992px) {
body {
padding-top: 56px;
padding-top: 54px;
}
}
@media (min-width: 992px) {
body {
padding-top: 56px;
}
}
</style>
</head>
</head>
<body>
<a href="https://github.com/coqui-ai/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
<body>
<a href="https://github.com/coqui-ai/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;"
src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
<!-- Navigation -->
<!--
@ -54,78 +56,88 @@
<!-- Page Content -->
<div class="container">
<div class="row">
<div class="col-lg-12 text-center">
<img class="mt-5" src="{{url_for('static', filename='coqui-log-green-TTS.png')}}" align="middle" width="512"/>
<div class="row">
<div class="col-lg-12 text-center">
<img class="mt-5" src="{{url_for('static', filename='coqui-log-green-TTS.png')}}" align="middle"
width="512" />
<ul class="list-unstyled">
</ul>
{%if use_speaker_embedding%}
<input id="speaker-json-key" placeholder="speaker json key.." size=45 type="text" name="speaker-json-key">
{%endif%}
<ul class="list-unstyled">
</ul>
{%if use_gst%}
<input value='{"0": 0.1}' id="style-wav" placeholder="style wav (dict or path ot wav).." size=45 type="text" name="style-wav">
{%endif%}
{%if use_gst%}
<input value='{"0": 0.1}' id="style_wav" placeholder="style wav (dict or path ot wav).." size=45
type="text" name="style_wav">
{%endif%}
<input id="text" placeholder="Type here..." size=45 type="text" name="text">
<button id="speak-button" name="speak">Speak</button><br/><br/>
{%if show_details%}
<button id="details-button" onclick="location.href = 'details'" name="model-details">Model Details</button><br/><br/>
{%endif%}
<audio id="audio" controls autoplay hidden></audio>
<p id="message"></p>
<input id="text" placeholder="Type here..." size=45 type="text" name="text">
<button id="speak-button" name="speak">Speak</button><br /><br />
{%if use_speaker_embedding%}
Choose a speaker:
<select id="speaker_id" name=speaker_id method="GET" action="/">
{% for speaker_id in speaker_ids %}
<option value="{{speaker_id}}" SELECTED>{{speaker_id}}</option>"
{% endfor %}
</select><br /><br />
{%endif%}
{%if show_details%}
<button id="details-button" onclick="location.href = 'details'" name="model-details">Model
Details</button><br /><br />
{%endif%}
<audio id="audio" controls autoplay hidden></audio>
<p id="message"></p>
</div>
</div>
</div>
</div>
<!-- Bootstrap core JavaScript -->
<script>
function getTextValue(textId) {
const container = q(textId)
if (container) {
function getTextValue(textId) {
const container = q(textId)
if (container) {
return container.value
}
return ""
}
function q(selector) {return document.querySelector(selector)}
q('#text').focus()
function do_tts(e) {
const text = q('#text').value
const speakerJsonKey = getTextValue('#speaker-json-key')
const styleWav = getTextValue('#style-wav')
if (text) {
q('#message').textContent = 'Synthesizing...'
q('#speak-button').disabled = true
q('#audio').hidden = true
synthesize(text, speakerJsonKey, styleWav)
}
e.preventDefault()
return false
return ""
}
function q(selector) { return document.querySelector(selector) }
q('#text').focus()
function do_tts(e) {
const text = q('#text').value
const speaker_id = getTextValue('#speaker_id')
const style_wav = getTextValue('#style_wav')
if (text) {
q('#message').textContent = 'Synthesizing...'
q('#speak-button').disabled = true
q('#audio').hidden = true
synthesize(text, speaker_id, style_wav)
}
q('#speak-button').addEventListener('click', do_tts)
q('#text').addEventListener('keyup', function(e) {
if (e.keyCode == 13) { // enter
e.preventDefault()
return false
}
q('#speak-button').addEventListener('click', do_tts)
q('#text').addEventListener('keyup', function (e) {
if (e.keyCode == 13) { // enter
do_tts(e)
}
})
function synthesize(text, speakerJsonKey="", styleWav="") {
fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker=${encodeURIComponent(speakerJsonKey)}&style-wav=${encodeURIComponent(styleWav)}` , {cache: 'no-cache'})
.then(function(res) {
if (!res.ok) throw Error(res.statusText)
return res.blob()
}).then(function(blob) {
q('#message').textContent = ''
q('#speak-button').disabled = false
q('#audio').src = URL.createObjectURL(blob)
q('#audio').hidden = false
}).catch(function(err) {
q('#message').textContent = 'Error: ' + err.message
q('#speak-button').disabled = false
})
}
</script>
})
function synthesize(text, speaker_id = "", style_wav = "") {
fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}`, { cache: 'no-cache' })
.then(function (res) {
if (!res.ok) throw Error(res.statusText)
return res.blob()
}).then(function (blob) {
q('#message').textContent = ''
q('#speak-button').disabled = false
q('#audio').src = URL.createObjectURL(blob)
q('#audio').hidden = false
}).catch(function (err) {
q('#message').textContent = 'Error: ' + err.message
q('#speak-button').disabled = false
})
}
</script>
</body>
</body>
</html>
</html>