html formatting, enable multi-speaker model on the server with a dropdown menu to select the speaker

2021-04-22 15:22:36 +02:00 · 2021-04-22 15:22:36 +02:00 · ad047c8195
parent f9f3d04d14
commit ad047c8195
2 changed files with 137 additions and 94 deletions
--- a/TTS/server/server.py
+++ b/TTS/server/server.py
@ -32,12 +32,19 @@ def create_argparser():
        "--model_name",
        type=str,
        default="tts_models/en/ljspeech/tacotron2-DDC",
-        help="Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>",
+        help=
+        "Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>",
    )
-    parser.add_argument("--vocoder_name", type=str, default=None, help="name of one of the released vocoder models.")
+    parser.add_argument("--vocoder_name",
+                        type=str,
+                        default=None,
+                        help="name of one of the released vocoder models.")

-     # Args for running custom models
-    parser.add_argument("--config_path", default=None, type=str, help="Path to model config file.")
+    # Args for running custom models
+    parser.add_argument("--config_path",
+                        default=None,
+                        type=str,
+                        help="Path to model config file.")
    parser.add_argument(
        "--model_path",
        type=str,
@ -47,15 +54,34 @@ def create_argparser():
    parser.add_argument(
        "--vocoder_path",
        type=str,
-        help="Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).",
+        help=
+        "Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).",
        default=None,
    )
-    parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None)
-    parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
-    parser.add_argument("--port", type=int, default=5002, help="port to listen on.")
-    parser.add_argument("--use_cuda", type=convert_boolean, default=False, help="true to use CUDA.")
-    parser.add_argument("--debug", type=convert_boolean, default=False, help="true to enable Flask debug mode.")
-    parser.add_argument("--show_details", type=convert_boolean, default=False, help="Generate model detail page.")
+    parser.add_argument("--vocoder_config_path",
+                        type=str,
+                        help="Path to vocoder model config file.",
+                        default=None)
+    parser.add_argument("--speakers_file_path",
+                        type=str,
+                        help="JSON file for multi-speaker model.",
+                        default=None)
+    parser.add_argument("--port",
+                        type=int,
+                        default=5002,
+                        help="port to listen on.")
+    parser.add_argument("--use_cuda",
+                        type=convert_boolean,
+                        default=False,
+                        help="true to use CUDA.")
+    parser.add_argument("--debug",
+                        type=convert_boolean,
+                        default=False,
+                        help="true to enable Flask debug mode.")
+    parser.add_argument("--show_details",
+                        type=convert_boolean,
+                        default=False,
+                        help="Generate model detail page.")
    return parser


@ -83,11 +109,14 @@ if args.list_models:

 # CASE2: load pre-trained model paths
 if args.model_name is not None and not args.model_path:
-    model_path, config_path, model_item = manager.download_model(args.model_name)
-    args.vocoder_name = model_item["default_vocoder"] if args.vocoder_name is None else args.vocoder_name
+    model_path, config_path, model_item = manager.download_model(
+        args.model_name)
+    args.vocoder_name = model_item[
+        "default_vocoder"] if args.vocoder_name is None else args.vocoder_name

 if args.vocoder_name is not None and not args.vocoder_path:
-    vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
+    vocoder_path, vocoder_config_path, _ = manager.download_model(
+        args.vocoder_name)

 # CASE3: set custome model paths
 if args.model_path is not None:
@ -100,11 +129,11 @@ if args.vocoder_path is not None:
    vocoder_config_path = args.vocoder_config_path

 # load models
-synthesizer = Synthesizer(
-    model_path, config_path, speakers_file_path, vocoder_path, vocoder_config_path, args.use_cuda
-)
+synthesizer = Synthesizer(model_path, config_path, speakers_file_path,
+                          vocoder_path, vocoder_config_path, args.use_cuda)

-use_speaker_embedding = synthesizer.tts_config.get("use_external_speaker_embedding_file", False)
+use_speaker_embedding = synthesizer.tts_config.get(
+    "use_external_speaker_embedding_file", False)
 use_gst = synthesizer.tts_config.get("use_gst", False)
 app = Flask(__name__)

@ -131,9 +160,11 @@ def style_wav_uri_to_dict(style_wav: str) -> Union[str, dict]:

@app.route("/")
 def index():
-    return render_template(
-        "index.html", show_details=args.show_details, use_speaker_embedding=use_speaker_embedding, use_gst=use_gst
-    )
+    return render_template("index.html",
+                           show_details=args.show_details,
+                           use_speaker_embedding=use_speaker_embedding,
+                           speaker_ids=synthesizer.speaker_manager.speaker_ids,
+                           use_gst=use_gst)


@app.route("/details")
@ -156,8 +187,8 @@ def details():
@app.route("/api/tts", methods=["GET"])
 def tts():
    text = request.args.get("text")
-    speaker_idx = request.args.get("speaker", "")
-    style_wav = request.args.get("style-wav", "")
+    speaker_idx = request.args.get("speaker_id", "")
+    style_wav = request.args.get("style_wav", "")

    style_wav = style_wav_uri_to_dict(style_wav)
    print(" > Model input: {}".format(text))
--- a/TTS/server/templates/index.html
+++ b/TTS/server/templates/index.html
@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">

-  <head>
+<head>

    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
@ -12,24 +12,26 @@

    <!-- Bootstrap core CSS -->
    <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
-     integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous" rel="stylesheet">
+        integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"
+        rel="stylesheet">

    <!-- Custom styles for this template -->
    <style>
-      body {
-        padding-top: 54px;
-      }
-      @media (min-width: 992px) {
        body {
-          padding-top: 56px;
+            padding-top: 54px;
        }
-      }

+        @media (min-width: 992px) {
+            body {
+                padding-top: 56px;
+            }
+        }
    </style>
-  </head>
+</head>

-  <body>
-    <a href="https://github.com/coqui-ai/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
+<body>
+    <a href="https://github.com/coqui-ai/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;"
+            src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>

    <!-- Navigation -->
    <!--
@ -54,78 +56,88 @@

    <!-- Page Content -->
    <div class="container">
-      <div class="row">
-        <div class="col-lg-12 text-center">
-            <img class="mt-5" src="{{url_for('static', filename='coqui-log-green-TTS.png')}}" align="middle" width="512"/>
+        <div class="row">
+            <div class="col-lg-12 text-center">
+                <img class="mt-5" src="{{url_for('static', filename='coqui-log-green-TTS.png')}}" align="middle"
+                    width="512" />

-          <ul class="list-unstyled">
-          </ul>
-          {%if use_speaker_embedding%}
-          <input id="speaker-json-key" placeholder="speaker json key.." size=45 type="text" name="speaker-json-key">
-          {%endif%}
+                <ul class="list-unstyled">
+                </ul>

-          {%if use_gst%}
-          <input value='{"0": 0.1}' id="style-wav" placeholder="style wav (dict or path ot wav).." size=45 type="text" name="style-wav">
-          {%endif%}
+                {%if use_gst%}
+                <input value='{"0": 0.1}' id="style_wav" placeholder="style wav (dict or path ot wav).." size=45
+                    type="text" name="style_wav">
+                {%endif%}

-          <input id="text" placeholder="Type here..." size=45 type="text" name="text">
-          <button id="speak-button" name="speak">Speak</button><br/><br/>
-          {%if show_details%}
-            <button id="details-button" onclick="location.href = 'details'" name="model-details">Model Details</button><br/><br/>
-          {%endif%}
-          <audio id="audio" controls autoplay hidden></audio>
-          <p id="message"></p>
+                <input id="text" placeholder="Type here..." size=45 type="text" name="text">
+                <button id="speak-button" name="speak">Speak</button><br /><br />
+
+                {%if use_speaker_embedding%}
+                Choose a speaker:
+                <select id="speaker_id" name=speaker_id method="GET" action="/">
+                    {% for speaker_id in speaker_ids %}
+                    <option value="{{speaker_id}}" SELECTED>{{speaker_id}}</option>"
+                    {% endfor %}
+                </select><br /><br />
+                {%endif%}
+
+                {%if show_details%}
+                <button id="details-button" onclick="location.href = 'details'" name="model-details">Model
+                    Details</button><br /><br />
+                {%endif%}
+                <audio id="audio" controls autoplay hidden></audio>
+                <p id="message"></p>
+            </div>
        </div>
-      </div>
    </div>

    <!-- Bootstrap core JavaScript -->
    <script>
-            function getTextValue(textId) {
-              const container = q(textId)
-              if (container) {
+        function getTextValue(textId) {
+            const container = q(textId)
+            if (container) {
                return container.value
-              } 
-              return ""
            }
-            function q(selector) {return document.querySelector(selector)}
-            q('#text').focus()
-            function do_tts(e) {
-                const text = q('#text').value
-                const speakerJsonKey = getTextValue('#speaker-json-key')
-                const styleWav = getTextValue('#style-wav')
-                if (text) {
-                    q('#message').textContent = 'Synthesizing...'
-                    q('#speak-button').disabled = true
-                    q('#audio').hidden = true
-                    synthesize(text, speakerJsonKey, styleWav)
-                }
-                e.preventDefault()
-                return false
+            return ""
+        }
+        function q(selector) { return document.querySelector(selector) }
+        q('#text').focus()
+        function do_tts(e) {
+            const text = q('#text').value
+            const speaker_id = getTextValue('#speaker_id')
+            const style_wav = getTextValue('#style_wav')
+            if (text) {
+                q('#message').textContent = 'Synthesizing...'
+                q('#speak-button').disabled = true
+                q('#audio').hidden = true
+                synthesize(text, speaker_id, style_wav)
            }
-            q('#speak-button').addEventListener('click', do_tts)
-            q('#text').addEventListener('keyup', function(e) {
-              if (e.keyCode == 13) { // enter
+            e.preventDefault()
+            return false
+        }
+        q('#speak-button').addEventListener('click', do_tts)
+        q('#text').addEventListener('keyup', function (e) {
+            if (e.keyCode == 13) { // enter
                do_tts(e)
-              }
-            })
-            function synthesize(text, speakerJsonKey="", styleWav="") {
-                fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker=${encodeURIComponent(speakerJsonKey)}&style-wav=${encodeURIComponent(styleWav)}` , {cache: 'no-cache'})
-                    .then(function(res) {
-                        if (!res.ok) throw Error(res.statusText)
-                            return res.blob()
-                        }).then(function(blob) {
-                            q('#message').textContent = ''
-                            q('#speak-button').disabled = false
-                            q('#audio').src = URL.createObjectURL(blob)
-                            q('#audio').hidden = false
-                        }).catch(function(err) {
-                            q('#message').textContent = 'Error: ' + err.message
-                            q('#speak-button').disabled = false
-                        })
            }
-        </script>
+        })
+        function synthesize(text, speaker_id = "", style_wav = "") {
+            fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}`, { cache: 'no-cache' })
+                .then(function (res) {
+                    if (!res.ok) throw Error(res.statusText)
+                    return res.blob()
+                }).then(function (blob) {
+                    q('#message').textContent = ''
+                    q('#speak-button').disabled = false
+                    q('#audio').src = URL.createObjectURL(blob)
+                    q('#audio').hidden = false
+                }).catch(function (err) {
+                    q('#message').textContent = 'Error: ' + err.message
+                    q('#speak-button').disabled = false
+                })
+        }
+    </script>

-  </body>
+</body>

-</html>
+</html>