mirror of https://github.com/coqui-ai/TTS.git
html formatting, enable multi-speaker model on the server with a dropdown menu to select the speaker
parent
f9f3d04d14
commit
ad047c8195
|
@ -32,12 +32,19 @@ def create_argparser():
|
|||
"--model_name",
|
||||
type=str,
|
||||
default="tts_models/en/ljspeech/tacotron2-DDC",
|
||||
help="Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>",
|
||||
help=
|
||||
"Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>",
|
||||
)
|
||||
parser.add_argument("--vocoder_name", type=str, default=None, help="name of one of the released vocoder models.")
|
||||
parser.add_argument("--vocoder_name",
|
||||
type=str,
|
||||
default=None,
|
||||
help="name of one of the released vocoder models.")
|
||||
|
||||
# Args for running custom models
|
||||
parser.add_argument("--config_path", default=None, type=str, help="Path to model config file.")
|
||||
# Args for running custom models
|
||||
parser.add_argument("--config_path",
|
||||
default=None,
|
||||
type=str,
|
||||
help="Path to model config file.")
|
||||
parser.add_argument(
|
||||
"--model_path",
|
||||
type=str,
|
||||
|
@ -47,15 +54,34 @@ def create_argparser():
|
|||
parser.add_argument(
|
||||
"--vocoder_path",
|
||||
type=str,
|
||||
help="Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).",
|
||||
help=
|
||||
"Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None)
|
||||
parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
|
||||
parser.add_argument("--port", type=int, default=5002, help="port to listen on.")
|
||||
parser.add_argument("--use_cuda", type=convert_boolean, default=False, help="true to use CUDA.")
|
||||
parser.add_argument("--debug", type=convert_boolean, default=False, help="true to enable Flask debug mode.")
|
||||
parser.add_argument("--show_details", type=convert_boolean, default=False, help="Generate model detail page.")
|
||||
parser.add_argument("--vocoder_config_path",
|
||||
type=str,
|
||||
help="Path to vocoder model config file.",
|
||||
default=None)
|
||||
parser.add_argument("--speakers_file_path",
|
||||
type=str,
|
||||
help="JSON file for multi-speaker model.",
|
||||
default=None)
|
||||
parser.add_argument("--port",
|
||||
type=int,
|
||||
default=5002,
|
||||
help="port to listen on.")
|
||||
parser.add_argument("--use_cuda",
|
||||
type=convert_boolean,
|
||||
default=False,
|
||||
help="true to use CUDA.")
|
||||
parser.add_argument("--debug",
|
||||
type=convert_boolean,
|
||||
default=False,
|
||||
help="true to enable Flask debug mode.")
|
||||
parser.add_argument("--show_details",
|
||||
type=convert_boolean,
|
||||
default=False,
|
||||
help="Generate model detail page.")
|
||||
return parser
|
||||
|
||||
|
||||
|
@ -83,11 +109,14 @@ if args.list_models:
|
|||
|
||||
# CASE2: load pre-trained model paths
|
||||
if args.model_name is not None and not args.model_path:
|
||||
model_path, config_path, model_item = manager.download_model(args.model_name)
|
||||
args.vocoder_name = model_item["default_vocoder"] if args.vocoder_name is None else args.vocoder_name
|
||||
model_path, config_path, model_item = manager.download_model(
|
||||
args.model_name)
|
||||
args.vocoder_name = model_item[
|
||||
"default_vocoder"] if args.vocoder_name is None else args.vocoder_name
|
||||
|
||||
if args.vocoder_name is not None and not args.vocoder_path:
|
||||
vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
|
||||
vocoder_path, vocoder_config_path, _ = manager.download_model(
|
||||
args.vocoder_name)
|
||||
|
||||
# CASE3: set custome model paths
|
||||
if args.model_path is not None:
|
||||
|
@ -100,11 +129,11 @@ if args.vocoder_path is not None:
|
|||
vocoder_config_path = args.vocoder_config_path
|
||||
|
||||
# load models
|
||||
synthesizer = Synthesizer(
|
||||
model_path, config_path, speakers_file_path, vocoder_path, vocoder_config_path, args.use_cuda
|
||||
)
|
||||
synthesizer = Synthesizer(model_path, config_path, speakers_file_path,
|
||||
vocoder_path, vocoder_config_path, args.use_cuda)
|
||||
|
||||
use_speaker_embedding = synthesizer.tts_config.get("use_external_speaker_embedding_file", False)
|
||||
use_speaker_embedding = synthesizer.tts_config.get(
|
||||
"use_external_speaker_embedding_file", False)
|
||||
use_gst = synthesizer.tts_config.get("use_gst", False)
|
||||
app = Flask(__name__)
|
||||
|
||||
|
@ -131,9 +160,11 @@ def style_wav_uri_to_dict(style_wav: str) -> Union[str, dict]:
|
|||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return render_template(
|
||||
"index.html", show_details=args.show_details, use_speaker_embedding=use_speaker_embedding, use_gst=use_gst
|
||||
)
|
||||
return render_template("index.html",
|
||||
show_details=args.show_details,
|
||||
use_speaker_embedding=use_speaker_embedding,
|
||||
speaker_ids=synthesizer.speaker_manager.speaker_ids,
|
||||
use_gst=use_gst)
|
||||
|
||||
|
||||
@app.route("/details")
|
||||
|
@ -156,8 +187,8 @@ def details():
|
|||
@app.route("/api/tts", methods=["GET"])
|
||||
def tts():
|
||||
text = request.args.get("text")
|
||||
speaker_idx = request.args.get("speaker", "")
|
||||
style_wav = request.args.get("style-wav", "")
|
||||
speaker_idx = request.args.get("speaker_id", "")
|
||||
style_wav = request.args.get("style_wav", "")
|
||||
|
||||
style_wav = style_wav_uri_to_dict(style_wav)
|
||||
print(" > Model input: {}".format(text))
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
|
@ -12,24 +12,26 @@
|
|||
|
||||
<!-- Bootstrap core CSS -->
|
||||
<link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
|
||||
integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous" rel="stylesheet">
|
||||
integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"
|
||||
rel="stylesheet">
|
||||
|
||||
<!-- Custom styles for this template -->
|
||||
<style>
|
||||
body {
|
||||
padding-top: 54px;
|
||||
}
|
||||
@media (min-width: 992px) {
|
||||
body {
|
||||
padding-top: 56px;
|
||||
padding-top: 54px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 992px) {
|
||||
body {
|
||||
padding-top: 56px;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<a href="https://github.com/coqui-ai/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
|
||||
<body>
|
||||
<a href="https://github.com/coqui-ai/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;"
|
||||
src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
|
||||
|
||||
<!-- Navigation -->
|
||||
<!--
|
||||
|
@ -54,78 +56,88 @@
|
|||
|
||||
<!-- Page Content -->
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col-lg-12 text-center">
|
||||
<img class="mt-5" src="{{url_for('static', filename='coqui-log-green-TTS.png')}}" align="middle" width="512"/>
|
||||
<div class="row">
|
||||
<div class="col-lg-12 text-center">
|
||||
<img class="mt-5" src="{{url_for('static', filename='coqui-log-green-TTS.png')}}" align="middle"
|
||||
width="512" />
|
||||
|
||||
<ul class="list-unstyled">
|
||||
</ul>
|
||||
{%if use_speaker_embedding%}
|
||||
<input id="speaker-json-key" placeholder="speaker json key.." size=45 type="text" name="speaker-json-key">
|
||||
{%endif%}
|
||||
<ul class="list-unstyled">
|
||||
</ul>
|
||||
|
||||
{%if use_gst%}
|
||||
<input value='{"0": 0.1}' id="style-wav" placeholder="style wav (dict or path ot wav).." size=45 type="text" name="style-wav">
|
||||
{%endif%}
|
||||
{%if use_gst%}
|
||||
<input value='{"0": 0.1}' id="style_wav" placeholder="style wav (dict or path ot wav).." size=45
|
||||
type="text" name="style_wav">
|
||||
{%endif%}
|
||||
|
||||
<input id="text" placeholder="Type here..." size=45 type="text" name="text">
|
||||
<button id="speak-button" name="speak">Speak</button><br/><br/>
|
||||
{%if show_details%}
|
||||
<button id="details-button" onclick="location.href = 'details'" name="model-details">Model Details</button><br/><br/>
|
||||
{%endif%}
|
||||
<audio id="audio" controls autoplay hidden></audio>
|
||||
<p id="message"></p>
|
||||
<input id="text" placeholder="Type here..." size=45 type="text" name="text">
|
||||
<button id="speak-button" name="speak">Speak</button><br /><br />
|
||||
|
||||
{%if use_speaker_embedding%}
|
||||
Choose a speaker:
|
||||
<select id="speaker_id" name=speaker_id method="GET" action="/">
|
||||
{% for speaker_id in speaker_ids %}
|
||||
<option value="{{speaker_id}}" SELECTED>{{speaker_id}}</option>"
|
||||
{% endfor %}
|
||||
</select><br /><br />
|
||||
{%endif%}
|
||||
|
||||
{%if show_details%}
|
||||
<button id="details-button" onclick="location.href = 'details'" name="model-details">Model
|
||||
Details</button><br /><br />
|
||||
{%endif%}
|
||||
<audio id="audio" controls autoplay hidden></audio>
|
||||
<p id="message"></p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Bootstrap core JavaScript -->
|
||||
<script>
|
||||
function getTextValue(textId) {
|
||||
const container = q(textId)
|
||||
if (container) {
|
||||
function getTextValue(textId) {
|
||||
const container = q(textId)
|
||||
if (container) {
|
||||
return container.value
|
||||
}
|
||||
return ""
|
||||
}
|
||||
function q(selector) {return document.querySelector(selector)}
|
||||
q('#text').focus()
|
||||
function do_tts(e) {
|
||||
const text = q('#text').value
|
||||
const speakerJsonKey = getTextValue('#speaker-json-key')
|
||||
const styleWav = getTextValue('#style-wav')
|
||||
if (text) {
|
||||
q('#message').textContent = 'Synthesizing...'
|
||||
q('#speak-button').disabled = true
|
||||
q('#audio').hidden = true
|
||||
synthesize(text, speakerJsonKey, styleWav)
|
||||
}
|
||||
e.preventDefault()
|
||||
return false
|
||||
return ""
|
||||
}
|
||||
function q(selector) { return document.querySelector(selector) }
|
||||
q('#text').focus()
|
||||
function do_tts(e) {
|
||||
const text = q('#text').value
|
||||
const speaker_id = getTextValue('#speaker_id')
|
||||
const style_wav = getTextValue('#style_wav')
|
||||
if (text) {
|
||||
q('#message').textContent = 'Synthesizing...'
|
||||
q('#speak-button').disabled = true
|
||||
q('#audio').hidden = true
|
||||
synthesize(text, speaker_id, style_wav)
|
||||
}
|
||||
q('#speak-button').addEventListener('click', do_tts)
|
||||
q('#text').addEventListener('keyup', function(e) {
|
||||
if (e.keyCode == 13) { // enter
|
||||
e.preventDefault()
|
||||
return false
|
||||
}
|
||||
q('#speak-button').addEventListener('click', do_tts)
|
||||
q('#text').addEventListener('keyup', function (e) {
|
||||
if (e.keyCode == 13) { // enter
|
||||
do_tts(e)
|
||||
}
|
||||
})
|
||||
function synthesize(text, speakerJsonKey="", styleWav="") {
|
||||
fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker=${encodeURIComponent(speakerJsonKey)}&style-wav=${encodeURIComponent(styleWav)}` , {cache: 'no-cache'})
|
||||
.then(function(res) {
|
||||
if (!res.ok) throw Error(res.statusText)
|
||||
return res.blob()
|
||||
}).then(function(blob) {
|
||||
q('#message').textContent = ''
|
||||
q('#speak-button').disabled = false
|
||||
q('#audio').src = URL.createObjectURL(blob)
|
||||
q('#audio').hidden = false
|
||||
}).catch(function(err) {
|
||||
q('#message').textContent = 'Error: ' + err.message
|
||||
q('#speak-button').disabled = false
|
||||
})
|
||||
}
|
||||
</script>
|
||||
})
|
||||
function synthesize(text, speaker_id = "", style_wav = "") {
|
||||
fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}`, { cache: 'no-cache' })
|
||||
.then(function (res) {
|
||||
if (!res.ok) throw Error(res.statusText)
|
||||
return res.blob()
|
||||
}).then(function (blob) {
|
||||
q('#message').textContent = ''
|
||||
q('#speak-button').disabled = false
|
||||
q('#audio').src = URL.createObjectURL(blob)
|
||||
q('#audio').hidden = false
|
||||
}).catch(function (err) {
|
||||
q('#message').textContent = 'Error: ' + err.message
|
||||
q('#speak-button').disabled = false
|
||||
})
|
||||
}
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
</html>
|
Loading…
Reference in New Issue