From 73581cd94cea080ffb09be443f3817d2ef3fb7dc Mon Sep 17 00:00:00 2001 From: erogol Date: Thu, 29 Oct 2020 16:50:07 +0100 Subject: [PATCH] renaming train scripts and updating tests --- README.md | 16 +++++++++------- TTS/bin/{train_tts.py => train_tacotron.py} | 13 +++++-------- ...train_gan_vocoder.py => train_vocoder_gan.py} | 0 ...ain_wavegrad.py => train_vocoder_wavegrad.py} | 8 +++----- ...vernn_vocoder.py => train_vocoder_wavernn.py} | 0 TTS/tts/configs/config.json | 9 ++++++--- run_tests.sh | 3 ++- tests/inputs/test_train_config.json | 10 ++++++++++ ...{test_tts_train.sh => test_tacotron_train.sh} | 0 tests/test_vocoder_gan_train.sh | 4 ++-- tests/test_vocoder_wavernn_train.sh | 4 ++-- 11 files changed, 39 insertions(+), 28 deletions(-) rename TTS/bin/{train_tts.py => train_tacotron.py} (98%) rename TTS/bin/{train_gan_vocoder.py => train_vocoder_gan.py} (100%) rename TTS/bin/{train_wavegrad.py => train_vocoder_wavegrad.py} (99%) rename TTS/bin/{train_wavernn_vocoder.py => train_vocoder_wavernn.py} (100%) rename tests/{test_tts_train.sh => test_tacotron_train.sh} (100%) diff --git a/README.md b/README.md index 5b048c42..7488103c 100644 --- a/README.md +++ b/README.md @@ -150,23 +150,25 @@ head -n 12000 metadata_shuf.csv > metadata_train.csv tail -n 1100 metadata_shuf.csv > metadata_val.csv ``` -To train a new model, you need to define your own ```config.json``` file (check the example) and call with the command below. You also set the model architecture in ```config.json```. +To train a new model, you need to define your own ```config.json``` to define model details, trainin configuration and more (check the examples). Then call the corressponding train script. -```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json``` +For instance, in order to train a tacotron or tacotron2 model on LJSpeech dataset, follow these steps. + +```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json``` To fine-tune a model, use ```--restore_path```. -```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar``` +```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar``` To continue an old training run, use ```--continue_path```. -```python TTS/bin/train_tts.py --continue_path /path/to/your/run_folder/``` +```python TTS/bin/train_tacotron.py --continue_path /path/to/your/run_folder/``` -For multi-GPU training use ```distribute.py```. It enables process based multi-GPU training where each process uses a single GPU. +For multi-GPU training, call ```distribute.py```. It runs any provided train script in multi-GPU setting. -```CUDA_VISIBLE_DEVICES="0,1,4" TTS/bin/distribute.py --script train_tts.py --config_path TTS/tts/configs/config.json``` +```CUDA_VISIBLE_DEVICES="0,1,4" python TTS/bin/distribute.py --script train_tacotron.py --config_path TTS/tts/configs/config.json``` -Each run creates a new output folder and ```config.json``` is copied under this folder. +Each run creates a new output folder accomodating used ```config.json```, model checkpoints and tensorboard logs. In case of any error or intercepted execution, if there is no checkpoint yet under the output folder, the whole folder is going to be removed. diff --git a/TTS/bin/train_tts.py b/TTS/bin/train_tacotron.py similarity index 98% rename from TTS/bin/train_tts.py rename to TTS/bin/train_tacotron.py index 8029ab21..dd9f0e55 100644 --- a/TTS/bin/train_tts.py +++ b/TTS/bin/train_tacotron.py @@ -7,27 +7,25 @@ import os import sys import time import traceback +from random import randrange import numpy as np import torch - -from random import randrange from torch.utils.data import DataLoader from TTS.tts.datasets.preprocess import load_meta_data from TTS.tts.datasets.TTSDataset import MyDataset from TTS.tts.layers.losses import TacotronLoss -from TTS.tts.utils.distribute import (DistributedSampler, - apply_gradient_allreduce, - init_distributed, reduce_tensor) -from TTS.tts.utils.generic_utils import setup_model, check_config_tts +from TTS.tts.utils.generic_utils import check_config_tts, setup_model from TTS.tts.utils.io import save_best_model, save_checkpoint from TTS.tts.utils.measures import alignment_diagonal_score -from TTS.tts.utils.speakers import parse_speakers, load_speaker_mapping +from TTS.tts.utils.speakers import load_speaker_mapping, parse_speakers from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.tts.utils.visual import plot_alignment, plot_spectrogram from TTS.utils.audio import AudioProcessor from TTS.utils.console_logger import ConsoleLogger +from TTS.utils.distribute import (DistributedSampler, apply_gradient_allreduce, + init_distributed, reduce_tensor) from TTS.utils.generic_utils import (KeepAverage, count_parameters, create_experiment_folder, get_git_branch, remove_experiment_folder, set_init_dict) @@ -38,7 +36,6 @@ from TTS.utils.training import (NoamLR, adam_weight_decay, check_update, gradual_training_scheduler, set_weight_decay, setup_torch_training_env) - use_cuda, num_gpus = setup_torch_training_env(True, False) diff --git a/TTS/bin/train_gan_vocoder.py b/TTS/bin/train_vocoder_gan.py similarity index 100% rename from TTS/bin/train_gan_vocoder.py rename to TTS/bin/train_vocoder_gan.py diff --git a/TTS/bin/train_wavegrad.py b/TTS/bin/train_vocoder_wavegrad.py similarity index 99% rename from TTS/bin/train_wavegrad.py rename to TTS/bin/train_vocoder_wavegrad.py index 13434979..96191569 100644 --- a/TTS/bin/train_wavegrad.py +++ b/TTS/bin/train_vocoder_wavegrad.py @@ -132,10 +132,6 @@ def train(model, criterion, optimizer, optimizer.zero_grad() - # schedule update - if scheduler is not None: - scheduler.step() - # backward pass with loss scaling if c.mixed_precision: scaler.scale(loss).backward() @@ -150,7 +146,9 @@ def train(model, criterion, optimizer, c.clip_grad) optimizer.step() - + # schedule update + if scheduler is not None: + scheduler.step() # disconnect loss values loss_dict = dict() diff --git a/TTS/bin/train_wavernn_vocoder.py b/TTS/bin/train_vocoder_wavernn.py similarity index 100% rename from TTS/bin/train_wavernn_vocoder.py rename to TTS/bin/train_vocoder_wavernn.py diff --git a/TTS/tts/configs/config.json b/TTS/tts/configs/config.json index 1b63b037..55f9306c 100644 --- a/TTS/tts/configs/config.json +++ b/TTS/tts/configs/config.json @@ -68,11 +68,14 @@ "apex_amp_level": null, // level of optimization with NVIDIA's apex feature for automatic mixed FP16/FP32 precision (AMP), NOTE: currently only O1 is supported, and use "O1" to activate. // LOSS SETTINGS - "loss_masking": true, // enable / disable loss masking against the sequence padding. + "loss_masking": false, // enable / disable loss masking against the sequence padding. "decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled "postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled - "ga_alpha": 5.0, // weight for guided attention loss. If > 0, guided attention is enabled. - "diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled. + "decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled + "postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled // VALIDATION "run_eval": true, diff --git a/run_tests.sh b/run_tests.sh index 998d8ec4..46f18f01 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -6,9 +6,10 @@ TF_CPP_MIN_LOG_LEVEL=3 # runtime tests ./tests/test_server_package.sh && \ ./tests/test_tts_train.sh && \ +./tests/test_glow-tts_train.sh && \ ./tests/test_vocoder_gan_train.sh && \ ./tests/test_vocoder_wavernn_train.sh && \ -./tests/test_glow-tts_train.sh && \ +./tests/test_vocoder_wavegrad_train.sh && \ # linter check cardboardlinter --refspec master \ No newline at end of file diff --git a/tests/inputs/test_train_config.json b/tests/inputs/test_train_config.json index ddb71384..2e2d6d46 100644 --- a/tests/inputs/test_train_config.json +++ b/tests/inputs/test_train_config.json @@ -74,6 +74,16 @@ "test_delay_epochs": 0, //Until attention is aligned, testing only wastes computation time. "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + // LOSS SETTINGS + "loss_masking": false, // enable / disable loss masking against the sequence padding. + "decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled + "postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled + "ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled. + "decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled + "postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + // OPTIMIZER "noam_schedule": false, // use noam warmup and lr schedule. "grad_clip": 1.0, // upper limit for gradients for clipping. diff --git a/tests/test_tts_train.sh b/tests/test_tacotron_train.sh similarity index 100% rename from tests/test_tts_train.sh rename to tests/test_tacotron_train.sh diff --git a/tests/test_vocoder_gan_train.sh b/tests/test_vocoder_gan_train.sh index 75773cc3..474ef9a7 100755 --- a/tests/test_vocoder_gan_train.sh +++ b/tests/test_vocoder_gan_train.sh @@ -5,11 +5,11 @@ echo "$BASEDIR" # create run dir mkdir $BASEDIR/train_outputs # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_gan_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json # find the training folder LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_gan_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER # remove all the outputs rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER diff --git a/tests/test_vocoder_wavernn_train.sh b/tests/test_vocoder_wavernn_train.sh index f2e32116..ffa30d40 100755 --- a/tests/test_vocoder_wavernn_train.sh +++ b/tests/test_vocoder_wavernn_train.sh @@ -5,11 +5,11 @@ echo "$BASEDIR" # create run dir mkdir $BASEDIR/train_outputs # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_wavernn_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json # find the training folder LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_wavernn_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER # remove all the outputs rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER \ No newline at end of file