libri tts config, and bug fix

2019-07-16 15:17:38 +02:00 · 2019-07-16 15:17:38 +02:00 · aec7f02817
parent a953961535
commit aec7f02817
2 changed files with 7 additions and 7 deletions
--- a/config_libritts.json
+++ b/config_libritts.json
@ -37,22 +37,22 @@
    "lr": 0.0001,                  // Initial learning rate. If Noam decay is active, maximum learning rate.
    "lr_decay": false,             // if true, Noam learning rate decaying is applied through training.
    "warmup_steps": 4000,          // Noam decay steps to increase the learning rate from 0 to "lr"
    "windowing": false,            // Enables attention windowing. Used only in eval mode.
    "memory_size": 5,              // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5. 
    "attention_norm": "sigmoid",   // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
    "prenet_type": "original",     // ONLY TACOTRON2 - "original" or "bn".
    "prenet_dropout": true,        // ONLY TACOTRON2 - enable/disable dropout at prenet. 
-    "use_forward_attn": true,      // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
+    "windowing": false,            // Enables attention windowing. Used only in eval mode.
    "use_forward_attn": false,      // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
    "forward_attn_mask": false, 
    "transition_agent": false,     // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
-    "location_attn": false,        // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
+    "location_attn": true,        // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
    "loss_masking": true,         // enable / disable loss masking against the sequence padding.
    "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
    "stopnet": true,               // Train stopnet predicting the end of synthesis. 
    "separate_stopnet": true,     // Train stopnet seperately if 'stopnet==true'. It prevents stopnet loss to influence the rest of the model. It causes a better model, but it trains SLOWER.
-    "tb_model_param_stats": false,     // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. 
+    "tb_model_param_stats": true,     // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. 
-    "batch_size": 16,       // Batch size for training. Lower values than 32 might cause hard to learn attention.
+    "batch_size": 24,       // Batch size for training. Lower values than 32 might cause hard to learn attention.
    "eval_batch_size":16,   
    "r": 1,                 // Number of frames to predict for step.
    "wd": 0.000001,         // Weight decay weight.
@ -61,7 +61,7 @@
    "print_step": 10,       // Number of steps to log traning on console.
    "batch_group_size": 0,  //Number of batches to shuffle after bucketing.
-    "run_eval": false,
+    "run_eval": true,
    "test_delay_epochs": 5,  //Until attention is aligned, testing only wastes computation time.
    "test_sentences_file": null,  // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
    "data_path": "/home/erogol/Data/Libri-TTS/train-clean-360/",  // DATASET-RELATED: can overwritten from command argument
--- a/utils/generic_utils.py
+++ b/utils/generic_utils.py
@ -275,7 +275,7 @@ def setup_model(num_chars, num_speakers, c):
    elif c.model.lower() == "tacotron2":
        model = MyModel(
            num_chars=num_chars,
-            num_speakers=c.num_speakers,
+            num_speakers=num_speakers,
            r=c.r,
            attn_win=c.windowing,
            attn_norm=c.attention_norm,