TTS/train.py

209 lines
8.0 KiB
Python
Raw Normal View History

2018-01-22 09:48:59 +00:00
import os
import sys
import time
2018-01-26 10:07:07 +00:00
import datetime
2018-01-22 16:20:20 +00:00
import shutil
2018-01-22 09:48:59 +00:00
import torch
import signal
import argparse
2018-01-22 16:20:20 +00:00
import importlib
import pickle
2018-01-22 09:48:59 +00:00
import numpy as np
import torch.nn as nn
from torch import optim
2018-01-22 14:58:12 +00:00
from torch.autograd import Variable
2018-01-22 09:48:59 +00:00
from torch.utils.data import DataLoader
2018-01-26 10:07:07 +00:00
from torch.optim.lr_scheduler import ReduceLROnPlateau
2018-01-25 15:07:46 +00:00
from tensorboardX import SummaryWriter
2018-01-22 09:48:59 +00:00
from utils.generic_utils import (Progbar, remove_experiment_folder,
2018-01-22 16:20:20 +00:00
create_experiment_folder, save_checkpoint,
load_config)
2018-01-22 09:48:59 +00:00
from utils.model import get_param_size
from datasets.LJSpeech import LJSpeechDataset
2018-01-22 14:58:12 +00:00
from models.tacotron import Tacotron
2018-01-22 09:48:59 +00:00
use_cuda = torch.cuda.is_available()
def main(args):
2018-01-22 16:20:20 +00:00
# setup output paths and read configs
c = load_config(args.config_path)
_ = os.path.dirname(os.path.realpath(__file__))
OUT_PATH = os.path.join(_, c.output_path)
OUT_PATH = create_experiment_folder(OUT_PATH)
CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints')
shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json'))
# save config to tmp place to be loaded by subsequent modules.
file_name = str(os.getpid())
tmp_path = os.path.join("/tmp/", file_name+'_tts')
pickle.dump(c, open(tmp_path, "wb"))
2018-01-25 15:07:46 +00:00
# setup tensorboard
LOG_DIR = OUT_PATH
2018-01-25 15:07:46 +00:00
tb = SummaryWriter(LOG_DIR)
2018-01-22 16:20:20 +00:00
# Ctrl+C handler to remove empty experiment folder
def signal_handler(signal, frame):
print(" !! Pressed Ctrl+C !!")
remove_experiment_folder(OUT_PATH)
2018-01-24 16:17:49 +00:00
sys.exit(1)
2018-01-22 16:20:20 +00:00
signal.signal(signal.SIGINT, signal_handler)
2018-01-22 09:48:59 +00:00
dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata.csv'),
os.path.join(c.data_path, 'wavs'),
2018-01-22 16:29:27 +00:00
c.r,
c.sample_rate,
c.text_cleaner,
c.num_mels,
c.min_level_db,
c.frame_shift_ms,
c.frame_length_ms,
c.preemphasis,
c.ref_level_db,
c.num_freq,
c.power
2018-01-22 09:48:59 +00:00
)
model = Tacotron(c.embedding_size,
c.hidden_size,
c.num_mels,
c.num_freq,
2018-01-22 16:20:20 +00:00
c.r)
2018-01-22 09:48:59 +00:00
if use_cuda:
model = nn.DataParallel(model.cuda())
optimizer = optim.Adam(model.parameters(), lr=c.lr)
try:
checkpoint = torch.load(os.path.join(
2018-01-22 16:20:20 +00:00
CHECKPOINT_PATH, 'checkpoint_%d.pth.tar' % args.restore_step))
2018-01-22 09:48:59 +00:00
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
print("\n > Model restored from step %d\n" % args.restore_step)
except:
2018-01-25 15:07:46 +00:00
print("\n > Starting a new training")
2018-01-22 09:48:59 +00:00
model = model.train()
2018-01-22 16:20:20 +00:00
if not os.path.exists(CHECKPOINT_PATH):
os.mkdir(CHECKPOINT_PATH)
2018-01-22 09:48:59 +00:00
if use_cuda:
criterion = nn.L1Loss().cuda()
else:
criterion = nn.L1Loss()
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
2018-01-26 10:07:07 +00:00
lr_scheduler = ReduceLROnPlateau(optimizer, factor=c.lr_decay,
patience=c.lr_patience, verbose=True)
epoch_time = 0
2018-01-22 09:48:59 +00:00
for epoch in range(c.epochs):
2018-01-22 16:20:20 +00:00
dataloader = DataLoader(dataset, batch_size=c.batch_size,
2018-01-22 09:48:59 +00:00
shuffle=True, collate_fn=dataset.collate_fn,
2018-01-26 10:53:01 +00:00
drop_last=True, num_workers=c.num_loader_workers)
2018-01-26 10:07:07 +00:00
print("\n | > Epoch {}/{}".format(epoch, c.epochs))
2018-01-22 16:20:20 +00:00
progbar = Progbar(len(dataset) / c.batch_size)
2018-01-22 09:48:59 +00:00
for i, data in enumerate(dataloader):
2018-01-22 14:58:12 +00:00
text_input = data[0]
magnitude_input = data[1]
mel_input = data[2]
2018-01-22 09:48:59 +00:00
current_step = i + args.restore_step + epoch * len(dataloader) + 1
optimizer.zero_grad()
try:
mel_input = np.concatenate((np.zeros(
2018-01-22 16:20:20 +00:00
[c.batch_size, 1, c.num_mels], dtype=np.float32),
2018-01-22 14:58:12 +00:00
mel_input[:, 1:, :]), axis=1)
2018-01-22 09:48:59 +00:00
except:
raise TypeError("not same dimension")
if use_cuda:
2018-01-22 14:58:12 +00:00
text_input_var = Variable(torch.from_numpy(text_input).type(
2018-01-22 09:48:59 +00:00
torch.cuda.LongTensor), requires_grad=False).cuda()
2018-01-22 14:58:12 +00:00
mel_input_var = Variable(torch.from_numpy(mel_input).type(
2018-01-22 09:48:59 +00:00
torch.cuda.FloatTensor), requires_grad=False).cuda()
2018-01-22 14:58:12 +00:00
mel_spec_var = Variable(torch.from_numpy(mel_input).type(
2018-01-22 09:48:59 +00:00
torch.cuda.FloatTensor), requires_grad=False).cuda()
2018-01-22 14:58:12 +00:00
linear_spec_var = Variable(torch.from_numpy(magnitude_input)
.type(torch.cuda.FloatTensor), requires_grad=False).cuda()
2018-01-22 09:48:59 +00:00
else:
2018-01-22 14:58:12 +00:00
text_input_var = Variable(torch.from_numpy(text_input).type(
2018-01-22 09:48:59 +00:00
torch.LongTensor), requires_grad=False)
2018-01-22 14:58:12 +00:00
mel_input_var = Variable(torch.from_numpy(mel_input).type(
2018-01-22 09:48:59 +00:00
torch.FloatTensor), requires_grad=False)
2018-01-22 14:58:12 +00:00
mel_spec_var = Variable(torch.from_numpy(
mel_input).type(torch.FloatTensor), requires_grad=False)
linear_spec_var = Variable(torch.from_numpy(
magnitude_input).type(torch.FloatTensor),
requires_grad=False)
2018-01-22 09:48:59 +00:00
2018-01-22 14:58:12 +00:00
mel_output, linear_output, alignments =\
model.forward(text_input_var, mel_input_var)
2018-01-22 09:48:59 +00:00
2018-01-22 14:58:12 +00:00
mel_loss = criterion(mel_output, mel_spec_var)
linear_loss = torch.abs(linear_output - linear_spec_var)
2018-01-22 09:48:59 +00:00
linear_loss = 0.5 * \
torch.mean(linear_loss) + 0.5 * \
torch.mean(linear_loss[:, :n_priority_freq, :])
loss = mel_loss + linear_loss
loss = loss.cuda()
start_time = time.time()
loss.backward()
nn.utils.clip_grad_norm(model.parameters(), 1.)
optimizer.step()
step_time = time.time() - start_time
epoch_time += step_time
2018-01-26 10:07:07 +00:00
progbar.update(i+1, values=[('total_loss', loss.data[0]),
2018-01-22 09:48:59 +00:00
('linear_loss', linear_loss.data[0]),
('mel_loss', mel_loss.data[0])])
2018-01-25 15:07:46 +00:00
tb.add_scalar('Train/TotalLoss', loss.data[0], current_step)
tb.add_scalar('Train/LinearLoss', linear_loss.data[0],
current_step)
2018-01-25 15:07:46 +00:00
tb.add_scalar('Train/MelLoss', mel_loss.data[0], current_step)
tb.add_scalar('LearningRate', optimizer.param_groups[0]['lr'],
current_step)
tb.add_scalar('Time/StepTime', step_time, current_step)
2018-01-25 15:07:46 +00:00
2018-01-22 09:48:59 +00:00
if current_step % c.save_step == 0:
checkpoint_path = 'checkpoint_{}.pth.tar'.format(current_step)
checkpoint_path = os.path.join(OUT_PATH, checkpoint_path)
save_checkpoint({'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'step': current_step,
'total_loss': loss.data[0],
'linear_loss': linear_loss.data[0],
'mel_loss': mel_loss.data[0],
'date': datetime.date.today().strftime("%B %d, %Y")},
checkpoint_path)
2018-01-26 10:07:07 +00:00
print("\n | > Checkpoint is saved : {}".format(checkpoint_path))
lr_scheduler.step(loss.data[0])
tb.add_scalar('Time/EpochTime', epoch_time, epoch)
epoch_time = 0
2018-01-22 09:48:59 +00:00
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--restore_step', type=int,
2018-01-22 14:58:12 +00:00
help='Global step to restore checkpoint', default=128)
2018-01-22 16:20:20 +00:00
parser.add_argument('--config_path', type=str,
2018-01-22 09:48:59 +00:00
help='path to config file for training',)
args = parser.parse_args()
main(args)