mirror of https://github.com/coqui-ai/TTS.git
renaming train scripts and updating tests
parent
39c71ee8a9
commit
73581cd94c
16
README.md
16
README.md
|
@ -150,23 +150,25 @@ head -n 12000 metadata_shuf.csv > metadata_train.csv
|
|||
tail -n 1100 metadata_shuf.csv > metadata_val.csv
|
||||
```
|
||||
|
||||
To train a new model, you need to define your own ```config.json``` file (check the example) and call with the command below. You also set the model architecture in ```config.json```.
|
||||
To train a new model, you need to define your own ```config.json``` to define model details, trainin configuration and more (check the examples). Then call the corressponding train script.
|
||||
|
||||
```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json```
|
||||
For instance, in order to train a tacotron or tacotron2 model on LJSpeech dataset, follow these steps.
|
||||
|
||||
```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json```
|
||||
|
||||
To fine-tune a model, use ```--restore_path```.
|
||||
|
||||
```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar```
|
||||
```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar```
|
||||
|
||||
To continue an old training run, use ```--continue_path```.
|
||||
|
||||
```python TTS/bin/train_tts.py --continue_path /path/to/your/run_folder/```
|
||||
```python TTS/bin/train_tacotron.py --continue_path /path/to/your/run_folder/```
|
||||
|
||||
For multi-GPU training use ```distribute.py```. It enables process based multi-GPU training where each process uses a single GPU.
|
||||
For multi-GPU training, call ```distribute.py```. It runs any provided train script in multi-GPU setting.
|
||||
|
||||
```CUDA_VISIBLE_DEVICES="0,1,4" TTS/bin/distribute.py --script train_tts.py --config_path TTS/tts/configs/config.json```
|
||||
```CUDA_VISIBLE_DEVICES="0,1,4" python TTS/bin/distribute.py --script train_tacotron.py --config_path TTS/tts/configs/config.json```
|
||||
|
||||
Each run creates a new output folder and ```config.json``` is copied under this folder.
|
||||
Each run creates a new output folder accomodating used ```config.json```, model checkpoints and tensorboard logs.
|
||||
|
||||
In case of any error or intercepted execution, if there is no checkpoint yet under the output folder, the whole folder is going to be removed.
|
||||
|
||||
|
|
|
@ -7,27 +7,25 @@ import os
|
|||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from random import randrange
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from random import randrange
|
||||
from torch.utils.data import DataLoader
|
||||
from TTS.tts.datasets.preprocess import load_meta_data
|
||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
||||
from TTS.tts.layers.losses import TacotronLoss
|
||||
from TTS.tts.utils.distribute import (DistributedSampler,
|
||||
apply_gradient_allreduce,
|
||||
init_distributed, reduce_tensor)
|
||||
from TTS.tts.utils.generic_utils import setup_model, check_config_tts
|
||||
from TTS.tts.utils.generic_utils import check_config_tts, setup_model
|
||||
from TTS.tts.utils.io import save_best_model, save_checkpoint
|
||||
from TTS.tts.utils.measures import alignment_diagonal_score
|
||||
from TTS.tts.utils.speakers import parse_speakers, load_speaker_mapping
|
||||
from TTS.tts.utils.speakers import load_speaker_mapping, parse_speakers
|
||||
from TTS.tts.utils.synthesis import synthesis
|
||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.console_logger import ConsoleLogger
|
||||
from TTS.utils.distribute import (DistributedSampler, apply_gradient_allreduce,
|
||||
init_distributed, reduce_tensor)
|
||||
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
|
||||
create_experiment_folder, get_git_branch,
|
||||
remove_experiment_folder, set_init_dict)
|
||||
|
@ -38,7 +36,6 @@ from TTS.utils.training import (NoamLR, adam_weight_decay, check_update,
|
|||
gradual_training_scheduler, set_weight_decay,
|
||||
setup_torch_training_env)
|
||||
|
||||
|
||||
use_cuda, num_gpus = setup_torch_training_env(True, False)
|
||||
|
||||
|
|
@ -132,10 +132,6 @@ def train(model, criterion, optimizer,
|
|||
|
||||
optimizer.zero_grad()
|
||||
|
||||
# schedule update
|
||||
if scheduler is not None:
|
||||
scheduler.step()
|
||||
|
||||
# backward pass with loss scaling
|
||||
if c.mixed_precision:
|
||||
scaler.scale(loss).backward()
|
||||
|
@ -150,7 +146,9 @@ def train(model, criterion, optimizer,
|
|||
c.clip_grad)
|
||||
optimizer.step()
|
||||
|
||||
|
||||
# schedule update
|
||||
if scheduler is not None:
|
||||
scheduler.step()
|
||||
|
||||
# disconnect loss values
|
||||
loss_dict = dict()
|
|
@ -68,11 +68,14 @@
|
|||
"apex_amp_level": null, // level of optimization with NVIDIA's apex feature for automatic mixed FP16/FP32 precision (AMP), NOTE: currently only O1 is supported, and use "O1" to activate.
|
||||
|
||||
// LOSS SETTINGS
|
||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||
"loss_masking": false, // enable / disable loss masking against the sequence padding.
|
||||
"decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled
|
||||
"postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
|
||||
"ga_alpha": 5.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
||||
"diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
||||
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
"postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
"decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled
|
||||
"postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
|
||||
// VALIDATION
|
||||
"run_eval": true,
|
||||
|
|
|
@ -6,9 +6,10 @@ TF_CPP_MIN_LOG_LEVEL=3
|
|||
# runtime tests
|
||||
./tests/test_server_package.sh && \
|
||||
./tests/test_tts_train.sh && \
|
||||
./tests/test_glow-tts_train.sh && \
|
||||
./tests/test_vocoder_gan_train.sh && \
|
||||
./tests/test_vocoder_wavernn_train.sh && \
|
||||
./tests/test_glow-tts_train.sh && \
|
||||
./tests/test_vocoder_wavegrad_train.sh && \
|
||||
|
||||
# linter check
|
||||
cardboardlinter --refspec master
|
|
@ -74,6 +74,16 @@
|
|||
"test_delay_epochs": 0, //Until attention is aligned, testing only wastes computation time.
|
||||
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
|
||||
|
||||
// LOSS SETTINGS
|
||||
"loss_masking": false, // enable / disable loss masking against the sequence padding.
|
||||
"decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled
|
||||
"postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
|
||||
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
||||
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
"postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
"decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled
|
||||
"postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
|
||||
// OPTIMIZER
|
||||
"noam_schedule": false, // use noam warmup and lr schedule.
|
||||
"grad_clip": 1.0, // upper limit for gradients for clipping.
|
||||
|
|
|
@ -5,11 +5,11 @@ echo "$BASEDIR"
|
|||
# create run dir
|
||||
mkdir $BASEDIR/train_outputs
|
||||
# run training
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_gan_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
|
||||
# find the training folder
|
||||
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
|
||||
echo $LATEST_FOLDER
|
||||
# continue the previous training
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_gan_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
||||
# remove all the outputs
|
||||
rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER
|
||||
|
|
|
@ -5,11 +5,11 @@ echo "$BASEDIR"
|
|||
# create run dir
|
||||
mkdir $BASEDIR/train_outputs
|
||||
# run training
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_wavernn_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
|
||||
# find the training folder
|
||||
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
|
||||
echo $LATEST_FOLDER
|
||||
# continue the previous training
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_wavernn_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
||||
# remove all the outputs
|
||||
rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER
|
Loading…
Reference in New Issue