renaming train scripts and updating tests

pull/10/head
erogol 2020-10-29 16:50:07 +01:00
parent 39c71ee8a9
commit 73581cd94c
11 changed files with 39 additions and 28 deletions

View File

@ -150,23 +150,25 @@ head -n 12000 metadata_shuf.csv > metadata_train.csv
tail -n 1100 metadata_shuf.csv > metadata_val.csv
```
To train a new model, you need to define your own ```config.json``` file (check the example) and call with the command below. You also set the model architecture in ```config.json```.
To train a new model, you need to define your own ```config.json``` to define model details, trainin configuration and more (check the examples). Then call the corressponding train script.
```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json```
For instance, in order to train a tacotron or tacotron2 model on LJSpeech dataset, follow these steps.
```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json```
To fine-tune a model, use ```--restore_path```.
```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar```
```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar```
To continue an old training run, use ```--continue_path```.
```python TTS/bin/train_tts.py --continue_path /path/to/your/run_folder/```
```python TTS/bin/train_tacotron.py --continue_path /path/to/your/run_folder/```
For multi-GPU training use ```distribute.py```. It enables process based multi-GPU training where each process uses a single GPU.
For multi-GPU training, call ```distribute.py```. It runs any provided train script in multi-GPU setting.
```CUDA_VISIBLE_DEVICES="0,1,4" TTS/bin/distribute.py --script train_tts.py --config_path TTS/tts/configs/config.json```
```CUDA_VISIBLE_DEVICES="0,1,4" python TTS/bin/distribute.py --script train_tacotron.py --config_path TTS/tts/configs/config.json```
Each run creates a new output folder and ```config.json``` is copied under this folder.
Each run creates a new output folder accomodating used ```config.json```, model checkpoints and tensorboard logs.
In case of any error or intercepted execution, if there is no checkpoint yet under the output folder, the whole folder is going to be removed.

View File

@ -7,27 +7,25 @@ import os
import sys
import time
import traceback
from random import randrange
import numpy as np
import torch
from random import randrange
from torch.utils.data import DataLoader
from TTS.tts.datasets.preprocess import load_meta_data
from TTS.tts.datasets.TTSDataset import MyDataset
from TTS.tts.layers.losses import TacotronLoss
from TTS.tts.utils.distribute import (DistributedSampler,
apply_gradient_allreduce,
init_distributed, reduce_tensor)
from TTS.tts.utils.generic_utils import setup_model, check_config_tts
from TTS.tts.utils.generic_utils import check_config_tts, setup_model
from TTS.tts.utils.io import save_best_model, save_checkpoint
from TTS.tts.utils.measures import alignment_diagonal_score
from TTS.tts.utils.speakers import parse_speakers, load_speaker_mapping
from TTS.tts.utils.speakers import load_speaker_mapping, parse_speakers
from TTS.tts.utils.synthesis import synthesis
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
from TTS.utils.audio import AudioProcessor
from TTS.utils.console_logger import ConsoleLogger
from TTS.utils.distribute import (DistributedSampler, apply_gradient_allreduce,
init_distributed, reduce_tensor)
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
create_experiment_folder, get_git_branch,
remove_experiment_folder, set_init_dict)
@ -38,7 +36,6 @@ from TTS.utils.training import (NoamLR, adam_weight_decay, check_update,
gradual_training_scheduler, set_weight_decay,
setup_torch_training_env)
use_cuda, num_gpus = setup_torch_training_env(True, False)

View File

@ -132,10 +132,6 @@ def train(model, criterion, optimizer,
optimizer.zero_grad()
# schedule update
if scheduler is not None:
scheduler.step()
# backward pass with loss scaling
if c.mixed_precision:
scaler.scale(loss).backward()
@ -150,7 +146,9 @@ def train(model, criterion, optimizer,
c.clip_grad)
optimizer.step()
# schedule update
if scheduler is not None:
scheduler.step()
# disconnect loss values
loss_dict = dict()

View File

@ -68,11 +68,14 @@
"apex_amp_level": null, // level of optimization with NVIDIA's apex feature for automatic mixed FP16/FP32 precision (AMP), NOTE: currently only O1 is supported, and use "O1" to activate.
// LOSS SETTINGS
"loss_masking": true, // enable / disable loss masking against the sequence padding.
"loss_masking": false, // enable / disable loss masking against the sequence padding.
"decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled
"postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
"ga_alpha": 5.0, // weight for guided attention loss. If > 0, guided attention is enabled.
"diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
"postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
"decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled
"postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
// VALIDATION
"run_eval": true,

View File

@ -6,9 +6,10 @@ TF_CPP_MIN_LOG_LEVEL=3
# runtime tests
./tests/test_server_package.sh && \
./tests/test_tts_train.sh && \
./tests/test_glow-tts_train.sh && \
./tests/test_vocoder_gan_train.sh && \
./tests/test_vocoder_wavernn_train.sh && \
./tests/test_glow-tts_train.sh && \
./tests/test_vocoder_wavegrad_train.sh && \
# linter check
cardboardlinter --refspec master

View File

@ -74,6 +74,16 @@
"test_delay_epochs": 0, //Until attention is aligned, testing only wastes computation time.
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
// LOSS SETTINGS
"loss_masking": false, // enable / disable loss masking against the sequence padding.
"decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled
"postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
"postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
"decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled
"postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
// OPTIMIZER
"noam_schedule": false, // use noam warmup and lr schedule.
"grad_clip": 1.0, // upper limit for gradients for clipping.

View File

@ -5,11 +5,11 @@ echo "$BASEDIR"
# create run dir
mkdir $BASEDIR/train_outputs
# run training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_gan_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
# find the training folder
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
echo $LATEST_FOLDER
# continue the previous training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_gan_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
# remove all the outputs
rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER

View File

@ -5,11 +5,11 @@ echo "$BASEDIR"
# create run dir
mkdir $BASEDIR/train_outputs
# run training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_wavernn_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
# find the training folder
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
echo $LATEST_FOLDER
# continue the previous training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_wavernn_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
# remove all the outputs
rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER