From 3c80f7acd9f129410fd09c80a82b769b96706485 Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Thu, 18 Jul 2019 16:41:50 +0200 Subject: [PATCH] bug fix for synthesize.py for multi-speaker model case and no wavernn --- synthesize.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/synthesize.py b/synthesize.py index 2082879b..33a31c69 100644 --- a/synthesize.py +++ b/synthesize.py @@ -9,8 +9,6 @@ from utils.generic_utils import load_config, setup_model from utils.text.symbols import symbols, phonemes from utils.audio import AudioProcessor -from WaveRNN.models.wavernn import Model as VocoderModel - def tts(model, vocoder_model, @@ -77,10 +75,18 @@ if __name__ == "__main__": type=bool, help="If True, vocoder model uses faster batch processing.", default=True) + parser.add_argument( + '--speakers_json', + type=str, + help="JSON file for multi-speaker model.", + default="" + ) args = parser.parse_args() if args.vocoder_path != "": assert args.use_cuda, " [!] Enable cuda for vocoder." + from WaveRNN.models.wavernn import Model as VocoderModel + # load the config C = load_config(args.config_path) C.forward_attn_mask = True @@ -88,9 +94,16 @@ if __name__ == "__main__": # load the audio processor ap = AudioProcessor(**C.audio) + # load speakers + if args.speakers_json != '': + speakers = json.load(open(args.speakers_json, 'r')) + num_speakers = len(speakers) + else: + num_speakers = 0 + # load the model num_chars = len(phonemes) if C.use_phonemes else len(symbols) - model = setup_model(num_chars, C) + model = setup_model(num_chars, num_speakers, C) cp = torch.load(args.model_path) model.load_state_dict(cp['model']) model.eval()