"prenet_dropout":true,// ONLY TACOTRON2 - enable/disable dropout at prenet.
"use_forward_attn":true,// ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
"transition_agent":true,// ONLY TACOTRON2 - enable/disable transition agent of forward attention.
"location_attn":false,// ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
"loss_masking":true,// enable / disable loss masking against the sequence padding.
"enable_eos_bos_chars":false,// enable/disable beginning of sentence and end of sentence chars.
"stopnet":true,// Train stopnet predicting the end of synthesis.
"separate_stopnet":true,// Train stopnet seperately if 'stopnet==true'. It prevents stopnet loss to influence the rest of the model. It causes a better model, but it trains SLOWER.
"tb_model_param_stats":false,// true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
"batch_size":32,// Batch size for training. Lower values than 32 might cause hard to learn attention.
"eval_batch_size":16,
"r":5,// Number of frames to predict for step.
"wd":0.000001,// Weight decay weight.
"checkpoint":true,// If true, it saves checkpoints per "save_step"
"save_step":1000,// Number of training steps expected to save traning stats and checkpoints.
"print_step":10,// Number of steps to log traning on console.
"batch_group_size":0,//Number of batches to shuffle after bucketing.
"run_eval":true,
"test_delay_epochs":5,//Until attention is aligned, testing only wastes computation time.
"data_path":"/media/erogol/data_ssd/Data/Mozilla/",// DATASET-RELATED: can overwritten from command argument
"meta_file_train":"metadata_train.txt",// DATASET-RELATED: metafile for training dataloader.
"meta_file_val":"metadata_val.txt",// DATASET-RELATED: metafile for evaluation dataloader.
"dataset":"mozilla",// DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
"min_seq_len":0,// DATASET-RELATED: minimum text length to use in training
"max_seq_len":150,// DATASET-RELATED: maximum text length
"output_path":"../keep/",// DATASET-RELATED: output path for all training outputs.
"num_loader_workers":4,// number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers":4,// number of evaluation data loader processes.
"phoneme_cache_path":"mozilla_us_phonemes",// phoneme computation is slow, therefore, it caches results in the given folder.
"use_phonemes":true,// use phonemes instead of raw characters. It is suggested for better pronounciation.
"phoneme_language":"en-us",// depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages