fix fft_size key error

pull/10/head
SanjaESC 2020-07-12 10:40:33 +02:00 committed by erogol
parent 8d0d4919fd
commit bdf6944665
2 changed files with 13 additions and 2 deletions

View File

@ -177,6 +177,7 @@ class TacotronAbstract(ABC, nn.Module):
elif style_input is None:
gst_outputs = torch.zeros(1, 1, self.gst_embedding_dim).to(device)
else:
# pylint: disable=not-callable
gst_outputs = self.gst_layer(style_input)
embedded_gst = gst_outputs.repeat(1, inputs.size(1), 1)
return inputs, embedded_gst

View File

@ -2,7 +2,7 @@
"audio":{
"audio_processor": "audio", // to use dictate different audio processors, if available.
"num_mels": 80, // size of the mel spec frame.
"num_freq": 513, // number of stft frequency levels. Size of the linear spectogram frame.
"fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame.
"sample_rate": 22050, // wav sample-rate. If different than the original data, it is resampled.
"frame_length_ms": null, // stft window length in ms.
"frame_shift_ms": null, // stft window hop-lengh in ms.
@ -51,5 +51,15 @@
"output_path": "result",
"min_seq_len": 0,
"max_seq_len": 300,
"log_dir": "tests/outputs/"
"log_dir": "tests/outputs/",
"use_speaker_embedding": false,
"use_gst": false,
"gst": {
"gst_style_input": null,
"gst_embedding_dim": 512,
"gst_num_heads": 4,
"gst_style_tokens": 10
},
}