renaming train scripts and updating tests

2020-10-29 16:50:07 +01:00 · 2020-10-29 16:50:07 +01:00 · 73581cd94c
parent 39c71ee8a9
commit 73581cd94c
11 changed files with 39 additions and 28 deletions
--- a/README.md
+++ b/README.md
@ -150,23 +150,25 @@ head -n 12000 metadata_shuf.csv > metadata_train.csv
 tail -n 1100 metadata_shuf.csv > metadata_val.csv
 ```

-To train a new model, you need to define your own ```config.json``` file (check the example) and call with the command below. You also set the model architecture in  ```config.json```.
+To train a new model, you need to define your own ```config.json``` to define model details, trainin configuration and more (check the examples). Then call the corressponding train script.

-```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json```
+For instance, in order to train a tacotron or tacotron2 model on LJSpeech dataset, follow these steps.
+
+```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json```

 To fine-tune a model, use ```--restore_path```.

-```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar```
+```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar```

 To continue an old training run, use ```--continue_path```.

-```python TTS/bin/train_tts.py --continue_path /path/to/your/run_folder/```
+```python TTS/bin/train_tacotron.py --continue_path /path/to/your/run_folder/```

-For multi-GPU training use ```distribute.py```. It enables process based multi-GPU training where each process uses a single GPU.
+For multi-GPU training, call ```distribute.py```. It runs any provided train script in multi-GPU setting.

-```CUDA_VISIBLE_DEVICES="0,1,4" TTS/bin/distribute.py --script train_tts.py --config_path TTS/tts/configs/config.json```
+```CUDA_VISIBLE_DEVICES="0,1,4" python TTS/bin/distribute.py --script train_tacotron.py --config_path TTS/tts/configs/config.json```

-Each run creates a new output folder and ```config.json``` is copied under this folder.
+Each run creates a new output folder accomodating used ```config.json```, model checkpoints and tensorboard logs.

 In case of any error or intercepted execution, if there is no checkpoint yet under the output folder, the whole folder is going to be removed.

--- a/TTS/bin/train_tacotron.py
+++ b/TTS/bin/train_tacotron.py
@ -7,27 +7,25 @@ import os
 import sys
 import time
 import traceback
+from random import randrange

 import numpy as np
 import torch
-
-from random import randrange
 from torch.utils.data import DataLoader
 from TTS.tts.datasets.preprocess import load_meta_data
 from TTS.tts.datasets.TTSDataset import MyDataset
 from TTS.tts.layers.losses import TacotronLoss
-from TTS.tts.utils.distribute import (DistributedSampler,
-                                      apply_gradient_allreduce,
-                                      init_distributed, reduce_tensor)
-from TTS.tts.utils.generic_utils import setup_model, check_config_tts
+from TTS.tts.utils.generic_utils import check_config_tts, setup_model
 from TTS.tts.utils.io import save_best_model, save_checkpoint
 from TTS.tts.utils.measures import alignment_diagonal_score
-from TTS.tts.utils.speakers import parse_speakers, load_speaker_mapping
+from TTS.tts.utils.speakers import load_speaker_mapping, parse_speakers
 from TTS.tts.utils.synthesis import synthesis
 from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
 from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
 from TTS.utils.audio import AudioProcessor
 from TTS.utils.console_logger import ConsoleLogger
+from TTS.utils.distribute import (DistributedSampler, apply_gradient_allreduce,
+                                  init_distributed, reduce_tensor)
 from TTS.utils.generic_utils import (KeepAverage, count_parameters,
                                     create_experiment_folder, get_git_branch,
                                     remove_experiment_folder, set_init_dict)
@ -38,7 +36,6 @@ from TTS.utils.training import (NoamLR, adam_weight_decay, check_update,
                                gradual_training_scheduler, set_weight_decay,
                                setup_torch_training_env)

-
 use_cuda, num_gpus = setup_torch_training_env(True, False)


--- a/TTS/bin/train_vocoder_gan.py
+++ b/TTS/bin/train_vocoder_gan.py
--- a/TTS/bin/train_vocoder_wavegrad.py
+++ b/TTS/bin/train_vocoder_wavegrad.py
@ -132,10 +132,6 @@ def train(model, criterion, optimizer,

        optimizer.zero_grad()

-         # schedule update
-        if scheduler is not None:
-            scheduler.step()
-
        # backward pass with loss scaling
        if c.mixed_precision:
            scaler.scale(loss).backward()
@ -150,7 +146,9 @@ def train(model, criterion, optimizer,
                                           c.clip_grad)
            optimizer.step()

-
+        # schedule update
+        if scheduler is not None:
+            scheduler.step()

        # disconnect loss values
        loss_dict = dict()
--- a/TTS/bin/train_vocoder_wavernn.py
+++ b/TTS/bin/train_vocoder_wavernn.py
--- a/TTS/tts/configs/config.json
+++ b/TTS/tts/configs/config.json
@ -68,11 +68,14 @@
    "apex_amp_level": null,     // level of optimization with NVIDIA's apex feature for automatic mixed FP16/FP32 precision (AMP), NOTE: currently only O1 is supported, and use "O1" to activate.

    // LOSS SETTINGS
-    "loss_masking": true,       // enable / disable loss masking against the sequence padding.
+    "loss_masking": false,       // enable / disable loss masking against the sequence padding.
    "decoder_loss_alpha": 0.5,  // decoder loss weight. If > 0, it is enabled
    "postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
-    "ga_alpha": 5.0,           // weight for guided attention loss. If > 0, guided attention is enabled.
-    "diff_spec_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
+    "ga_alpha": 10.0,           // weight for guided attention loss. If > 0, guided attention is enabled.
+    "decoder_diff_spec_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
+    "postnet_diff_spec_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
+    "decoder_ssim_alpha": 0.5,     // differential spectral loss weight. If > 0, it is enabled
+    "postnet_ssim_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled

    // VALIDATION
    "run_eval": true,
--- a/run_tests.sh
+++ b/run_tests.sh
@ -6,9 +6,10 @@ TF_CPP_MIN_LOG_LEVEL=3
 # runtime tests
 ./tests/test_server_package.sh && \
 ./tests/test_tts_train.sh && \
+./tests/test_glow-tts_train.sh && \
 ./tests/test_vocoder_gan_train.sh && \
 ./tests/test_vocoder_wavernn_train.sh && \
-./tests/test_glow-tts_train.sh && \
+./tests/test_vocoder_wavegrad_train.sh && \

 # linter check
 cardboardlinter --refspec master
--- a/tests/inputs/test_train_config.json
+++ b/tests/inputs/test_train_config.json
@ -74,6 +74,16 @@
    "test_delay_epochs": 0,  //Until attention is aligned, testing only wastes computation time.
    "test_sentences_file": null,  // set a file to load sentences to be used for testing. If it is null then we use default english sentences.

+    // LOSS SETTINGS
+    "loss_masking": false,       // enable / disable loss masking against the sequence padding.
+    "decoder_loss_alpha": 0.5,  // decoder loss weight. If > 0, it is enabled
+    "postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
+    "ga_alpha": 10.0,           // weight for guided attention loss. If > 0, guided attention is enabled.
+    "decoder_diff_spec_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
+    "postnet_diff_spec_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
+    "decoder_ssim_alpha": 0.5,     // differential spectral loss weight. If > 0, it is enabled
+    "postnet_ssim_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
+
    // OPTIMIZER
    "noam_schedule": false,        // use noam warmup and lr schedule.
    "grad_clip": 1.0,              // upper limit for gradients for clipping.
--- a/tests/test_tacotron_train.sh
+++ b/tests/test_tacotron_train.sh
--- a/tests/test_vocoder_gan_train.sh
+++ b/tests/test_vocoder_gan_train.sh
@ -5,11 +5,11 @@ echo "$BASEDIR"
 # create run dir
 mkdir $BASEDIR/train_outputs
 # run training
-CUDA_VISIBLE_DEVICES="" python TTS/bin/train_gan_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
+CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
 # find the training folder
 LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
 echo $LATEST_FOLDER
 # continue the previous training
-CUDA_VISIBLE_DEVICES=""  python TTS/bin/train_gan_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
+CUDA_VISIBLE_DEVICES=""  python TTS/bin/train_vocoder_gan.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
 # remove all the outputs
 rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER
--- a/tests/test_vocoder_wavernn_train.sh
+++ b/tests/test_vocoder_wavernn_train.sh
@ -5,11 +5,11 @@ echo "$BASEDIR"
 # create run dir
 mkdir $BASEDIR/train_outputs
 # run training
-CUDA_VISIBLE_DEVICES="" python TTS/bin/train_wavernn_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
+CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
 # find the training folder
 LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
 echo $LATEST_FOLDER
 # continue the previous training
-CUDA_VISIBLE_DEVICES=""  python TTS/bin/train_wavernn_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
+CUDA_VISIBLE_DEVICES=""  python TTS/bin/train_vocoder_wavernn.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
 # remove all the outputs
 rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER