README update, set default models for synthesize.py and server.py. Disable verbose for ap init.

2021-01-27 11:46:01 +01:00 · 2021-01-27 11:46:01 +01:00 · 534e3c67c6
parent 3d46d544ad
commit 534e3c67c6
4 changed files with 12 additions and 9 deletions
--- a/README.md
+++ b/README.md
@ -8,11 +8,11 @@
 [![PyPI version](https://badge.fury.io/py/TTS.svg)](https://badge.fury.io/py/TTS)
 [![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/coqui-ai/TTS/blob/master/CODE_OF_CONDUCT.md)
-:loudspeaker: [English Voice Samples](https://erogol.github.io/ddc-samples/) and [SoundCloud playlist](https://soundcloud.com/user-565970875/pocket-article-wavernn-and-tacotron2)
+📢 [English Voice Samples](https://erogol.github.io/ddc-samples/) and [SoundCloud playlist](https://soundcloud.com/user-565970875/pocket-article-wavernn-and-tacotron2)
-:man_cook:  [TTS training recipes](https://github.com/erogol/TTS_recipes)
+👩🏽‍🍳  [TTS training recipes](https://github.com/erogol/TTS_recipes)
-:page_facing_up: [Text-to-Speech paper collection](https://github.com/erogol/TTS-papers)
+📄 [Text-to-Speech paper collection](https://github.com/erogol/TTS-papers)
 ## 💬 Where to ask questions
 Please use our dedicated channels for questions and discussion. Help is much more valuable if it's shared publicly, so that more people can benefit from it.
--- a/TTS/bin/synthesize.py
+++ b/TTS/bin/synthesize.py
@ -35,6 +35,9 @@ def main():
    # list provided models
    ./TTS/bin/synthesize.py --list_models
    # run tts with default models.
    ./TTS/bin synthesize.py --text "Text for TTS"
    # run a model from the list
    ./TTS/bin/synthesize.py --text "Text for TTS" --model_name "<language>/<dataset>/<model_name>" --vocoder_name "<language>/<dataset>/<model_name>" --output_path
@ -67,14 +70,14 @@ def main():
    parser.add_argument(
        '--model_name',
        type=str,
-        default=None,
+        default="tts_models/en/ljspeech/speedy-speech-wn",
        help=
        'Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>'
    )
    parser.add_argument(
        '--vocoder_name',
        type=str,
-        default=None,
+        default="vocoder_models/en/ljspeech/mulitband-melgan",
        help=
        'Name of one of the pre-trained  vocoder models in format <language>/<dataset>/<model_name>'
    )
--- a/TTS/server/server.py
+++ b/TTS/server/server.py
@ -17,8 +17,8 @@ def create_argparser():
    parser = argparse.ArgumentParser()
    parser.add_argument('--list_models', type=convert_boolean, nargs='?', const=True, default=False, help='list available pre-trained tts and vocoder models.')
-    parser.add_argument('--model_name', type=str, help='name of one of the released tts models.')
+    parser.add_argument('--model_name', type=str, default="tts_models/en/ljspeech/speedy-speech-wn", help='name of one of the released tts models.')
-    parser.add_argument('--vocoder_name', type=str, help='name of one of the released vocoder models.')
+    parser.add_argument('--vocoder_name', type=str, default="vocoder_models/en/ljspeech/mulitband-melgan", help='name of one of the released vocoder models.')
    parser.add_argument('--tts_checkpoint', type=str, help='path to custom tts checkpoint file')
    parser.add_argument('--tts_config', type=str, help='path to custom tts config.json file')
    parser.add_argument('--tts_speakers', type=str, help='path to JSON file containing speaker ids, if speaker ids are used in the model')
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@ -79,7 +79,7 @@ class Synthesizer(object):
        self.tts_config = load_config(tts_config)
        self.use_phonemes = self.tts_config.use_phonemes
-        self.ap = AudioProcessor(**self.tts_config.audio)
+        self.ap = AudioProcessor(verbose=False, **self.tts_config.audio)
        if 'characters' in self.tts_config.keys():
            symbols, phonemes = make_symbols(**self.tts_config.characters)
@ -96,7 +96,7 @@ class Synthesizer(object):
    def load_vocoder(self, model_file, model_config, use_cuda):
        self.vocoder_config = load_config(model_config)
-        self.vocoder_ap = AudioProcessor(**self.vocoder_config['audio'])
+        self.vocoder_ap = AudioProcessor(verbose=False, **self.vocoder_config['audio'])
        self.vocoder_model = setup_generator(self.vocoder_config)
        self.vocoder_model.load_checkpoint(self.vocoder_config, model_file, eval=True)
        if use_cuda: