diff --git a/config.json b/config.json index 604794ad..ed9764b1 100644 --- a/config.json +++ b/config.json @@ -7,7 +7,6 @@ "preemphasis": 0.97, "min_level_db": -100, "ref_level_db": 20, - "hidden_size": 128, "embedding_size": 256, "text_cleaner": "english_cleaners", diff --git a/models/tacotron.py b/models/tacotron.py index a485d68e..05bb1292 100644 --- a/models/tacotron.py +++ b/models/tacotron.py @@ -8,7 +8,7 @@ from TTS.layers.tacotron import Prenet, Encoder, Decoder, CBHG class Tacotron(nn.Module): def __init__(self, embedding_dim=256, linear_dim=1025, mel_dim=80, - freq_dim=1025, r=5, padding_idx=None): + r=5, padding_idx=None): super(Tacotron, self).__init__() self.r = r @@ -24,7 +24,7 @@ class Tacotron(nn.Module): self.decoder = Decoder(256, mel_dim, r) self.postnet = CBHG(mel_dim, K=8, projections=[256, mel_dim]) - self.last_linear = nn.Linear(mel_dim * 2, freq_dim) + self.last_linear = nn.Linear(mel_dim * 2, linear_dim) def forward(self, characters, mel_specs=None): diff --git a/train.py b/train.py index 77288f77..87908717 100644 --- a/train.py +++ b/train.py @@ -332,9 +332,8 @@ def main(args): pin_memory=True) model = Tacotron(c.embedding_size, - c.hidden_size, - c.num_mels, c.num_freq, + c.num_mels, c.r) optimizer = optim.Adam(model.parameters(), lr=c.lr)