From bdf69446653fe84e6fd69fb91c2bf99adfd7efff Mon Sep 17 00:00:00 2001 From: SanjaESC Date: Sun, 12 Jul 2020 10:40:33 +0200 Subject: [PATCH] fix fft_size key error --- mozilla_voice_tts/tts/models/tacotron_abstract.py | 1 + tests/inputs/test_config.json | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/mozilla_voice_tts/tts/models/tacotron_abstract.py b/mozilla_voice_tts/tts/models/tacotron_abstract.py index 13c3e948..d1148be5 100644 --- a/mozilla_voice_tts/tts/models/tacotron_abstract.py +++ b/mozilla_voice_tts/tts/models/tacotron_abstract.py @@ -177,6 +177,7 @@ class TacotronAbstract(ABC, nn.Module): elif style_input is None: gst_outputs = torch.zeros(1, 1, self.gst_embedding_dim).to(device) else: + # pylint: disable=not-callable gst_outputs = self.gst_layer(style_input) embedded_gst = gst_outputs.repeat(1, inputs.size(1), 1) return inputs, embedded_gst diff --git a/tests/inputs/test_config.json b/tests/inputs/test_config.json index 6da13bfc..b34a53a8 100644 --- a/tests/inputs/test_config.json +++ b/tests/inputs/test_config.json @@ -2,7 +2,7 @@ "audio":{ "audio_processor": "audio", // to use dictate different audio processors, if available. "num_mels": 80, // size of the mel spec frame. - "num_freq": 513, // number of stft frequency levels. Size of the linear spectogram frame. + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. "sample_rate": 22050, // wav sample-rate. If different than the original data, it is resampled. "frame_length_ms": null, // stft window length in ms. "frame_shift_ms": null, // stft window hop-lengh in ms. @@ -51,5 +51,15 @@ "output_path": "result", "min_seq_len": 0, "max_seq_len": 300, - "log_dir": "tests/outputs/" + "log_dir": "tests/outputs/", + + "use_speaker_embedding": false, + "use_gst": false, + "gst": { + "gst_style_input": null, + "gst_embedding_dim": 512, + "gst_num_heads": 4, + "gst_style_tokens": 10 + }, + }