mirror of https://github.com/coqui-ai/TTS.git
config update and initial bias for graves attention
parent
926a4d36ce
commit
b904bc02d6
|
@ -34,7 +34,6 @@
|
|||
|
||||
"reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers.
|
||||
|
||||
<<<<<<< HEAD
|
||||
// TRAINING
|
||||
"batch_size": 32, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
|
||||
"eval_batch_size":16,
|
||||
|
@ -48,9 +47,6 @@
|
|||
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
|
||||
|
||||
// OPTIMIZER
|
||||
=======
|
||||
"model": "Tacotron2", // one of the model in models/
|
||||
>>>>>>> config update and bug fixes
|
||||
"grad_clip": 1, // upper limit for gradients for clipping.
|
||||
"epochs": 1000, // total number of epochs to train.
|
||||
"lr": 0.0001, // Initial learning rate. If Noam decay is active, maximum learning rate.
|
||||
|
@ -63,12 +59,8 @@
|
|||
"prenet_type": "original", // "original" or "bn".
|
||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||
|
||||
<<<<<<< HEAD
|
||||
// ATTENTION
|
||||
"attention_type": "original", // 'original' or 'graves'
|
||||
=======
|
||||
"attention_type": "graves", // 'original' or 'graves'
|
||||
>>>>>>> config update and bug fixes
|
||||
"attention_heads": 5, // number of attention heads (only for 'graves')
|
||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||
|
|
|
@ -119,11 +119,16 @@ class GravesAttention(nn.Module):
|
|||
self.epsilon = 1e-5
|
||||
self.J = None
|
||||
self.N_a = nn.Sequential(
|
||||
nn.Linear(query_dim, query_dim),
|
||||
nn.Linear(query_dim, query_dim, bias=True),
|
||||
nn.Tanh(),
|
||||
nn.Linear(query_dim, 3*K))
|
||||
nn.Linear(query_dim, 3*K, bias=True))
|
||||
self.attention_weights = None
|
||||
self.mu_prev = None
|
||||
self.init_layers()
|
||||
|
||||
def init_layers(self):
|
||||
torch.nn.init.constant_(self.N_a[2].bias[10:15], 0.5)
|
||||
torch.nn.init.constant_(self.N_a[2].bias[5:10], 10)
|
||||
|
||||
def init_states(self, inputs):
|
||||
if self.J is None or inputs.shape[1] > self.J.shape[-1]:
|
||||
|
|
2
train.py
2
train.py
|
@ -198,7 +198,7 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st, scheduler,
|
|||
|
||||
loss.backward()
|
||||
optimizer, current_lr = adam_weight_decay(optimizer)
|
||||
grad_norm, _ = check_update(model, c.grad_clip)
|
||||
grad_norm, _ = check_update(model.decoder, c.grad_clip)
|
||||
optimizer.step()
|
||||
|
||||
# compute alignment score
|
||||
|
|
Loading…
Reference in New Issue