mirror of https://github.com/coqui-ai/TTS.git
Update train.py for stop token prediciton
parent
8be07ee3c5
commit
31cc9c5443
53
train.py
53
train.py
|
@ -59,12 +59,12 @@ LOG_DIR = OUT_PATH
|
||||||
tb = SummaryWriter(LOG_DIR)
|
tb = SummaryWriter(LOG_DIR)
|
||||||
|
|
||||||
|
|
||||||
def train(model, criterion, data_loader, optimizer, epoch):
|
def train(model, criterion, criterion_st, data_loader, optimizer, epoch):
|
||||||
model = model.train()
|
model = model.train()
|
||||||
epoch_time = 0
|
epoch_time = 0
|
||||||
avg_linear_loss = 0
|
avg_linear_loss = 0
|
||||||
avg_mel_loss = 0
|
avg_mel_loss = 0
|
||||||
|
avg_stop_loss = 0
|
||||||
print(" | > Epoch {}/{}".format(epoch, c.epochs))
|
print(" | > Epoch {}/{}".format(epoch, c.epochs))
|
||||||
progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
||||||
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
||||||
|
@ -96,16 +96,17 @@ def train(model, criterion, data_loader, optimizer, epoch):
|
||||||
linear_input = linear_input.cuda()
|
linear_input = linear_input.cuda()
|
||||||
|
|
||||||
# forward pass
|
# forward pass
|
||||||
mel_output, linear_output, alignments =\
|
mel_output, linear_output, alignments, stop_tokens =\
|
||||||
model.forward(text_input, mel_input)
|
model.forward(text_input, mel_input)
|
||||||
|
|
||||||
# loss computation
|
# loss computation
|
||||||
|
stop_loss = criterion_st(stop_tokens, stop_target)
|
||||||
mel_loss = criterion(mel_output, mel_input, mel_lengths)
|
mel_loss = criterion(mel_output, mel_input, mel_lengths)
|
||||||
linear_loss = 0.5 * criterion(linear_output, linear_input, mel_lengths) \
|
linear_loss = 0.5 * criterion(linear_output, linear_input, mel_lengths) \
|
||||||
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
||||||
linear_input[:, :, :n_priority_freq],
|
linear_input[:, :, :n_priority_freq],
|
||||||
mel_lengths)
|
mel_lengths)
|
||||||
loss = mel_loss + linear_loss
|
loss = mel_loss + linear_loss + stop_loss
|
||||||
|
|
||||||
# backpass and check the grad norm
|
# backpass and check the grad norm
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
@ -123,9 +124,11 @@ def train(model, criterion, data_loader, optimizer, epoch):
|
||||||
progbar.update(num_iter+1, values=[('total_loss', loss.item()),
|
progbar.update(num_iter+1, values=[('total_loss', loss.item()),
|
||||||
('linear_loss', linear_loss.item()),
|
('linear_loss', linear_loss.item()),
|
||||||
('mel_loss', mel_loss.item()),
|
('mel_loss', mel_loss.item()),
|
||||||
|
('stop_loss', stop_loss.item()),
|
||||||
('grad_norm', grad_norm.item())])
|
('grad_norm', grad_norm.item())])
|
||||||
avg_linear_loss += linear_loss.item()
|
avg_linear_loss += linear_loss.item()
|
||||||
avg_mel_loss += mel_loss.item()
|
avg_mel_loss += mel_loss.item()
|
||||||
|
avg_stop_loss += stop_loss.item()
|
||||||
|
|
||||||
# Plot Training Iter Stats
|
# Plot Training Iter Stats
|
||||||
tb.add_scalar('TrainIterLoss/TotalLoss', loss.item(), current_step)
|
tb.add_scalar('TrainIterLoss/TotalLoss', loss.item(), current_step)
|
||||||
|
@ -172,24 +175,26 @@ def train(model, criterion, data_loader, optimizer, epoch):
|
||||||
|
|
||||||
avg_linear_loss /= (num_iter + 1)
|
avg_linear_loss /= (num_iter + 1)
|
||||||
avg_mel_loss /= (num_iter + 1)
|
avg_mel_loss /= (num_iter + 1)
|
||||||
avg_total_loss = avg_mel_loss + avg_linear_loss
|
avg_stop_loss /= (num_iter + 1)
|
||||||
|
avg_total_loss = avg_mel_loss + avg_linear_loss + avg_stop_loss
|
||||||
|
|
||||||
# Plot Training Epoch Stats
|
# Plot Training Epoch Stats
|
||||||
tb.add_scalar('TrainEpochLoss/TotalLoss', avg_total_loss, current_step)
|
tb.add_scalar('TrainEpochLoss/TotalLoss', avg_total_loss, current_step)
|
||||||
tb.add_scalar('TrainEpochLoss/LinearLoss', avg_linear_loss, current_step)
|
tb.add_scalar('TrainEpochLoss/LinearLoss', avg_linear_loss, current_step)
|
||||||
tb.add_scalar('TrainEpochLoss/MelLoss', avg_mel_loss, current_step)
|
tb.add_scalar('TrainEpochLoss/MelLoss', avg_mel_loss, current_step)
|
||||||
|
tb.add_scalar('TrainEpochLoss/StopLoss', avg_stop_loss, current_step)
|
||||||
tb.add_scalar('Time/EpochTime', epoch_time, epoch)
|
tb.add_scalar('Time/EpochTime', epoch_time, epoch)
|
||||||
epoch_time = 0
|
epoch_time = 0
|
||||||
|
|
||||||
return avg_linear_loss, current_step
|
return avg_linear_loss, current_step
|
||||||
|
|
||||||
|
|
||||||
def evaluate(model, criterion, data_loader, current_step):
|
def evaluate(model, criterion, criterion_st, data_loader, current_step):
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
epoch_time = 0
|
epoch_time = 0
|
||||||
avg_linear_loss = 0
|
avg_linear_loss = 0
|
||||||
avg_mel_loss = 0
|
avg_mel_loss = 0
|
||||||
|
avg_stop_loss = 0
|
||||||
print(" | > Validation")
|
print(" | > Validation")
|
||||||
progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
||||||
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
||||||
|
@ -203,6 +208,11 @@ def evaluate(model, criterion, data_loader, current_step):
|
||||||
linear_input = data[2]
|
linear_input = data[2]
|
||||||
mel_input = data[3]
|
mel_input = data[3]
|
||||||
mel_lengths = data[4]
|
mel_lengths = data[4]
|
||||||
|
stop_target = data[5]
|
||||||
|
|
||||||
|
# set stop targets view, we predict a single stop token per r frames prediction
|
||||||
|
stop_target = stop_target.view(text_input.shape[0], stop_target.size(1) // c.r, -1)
|
||||||
|
stop_target = (stop_target.sum(2) > 0.0).float()
|
||||||
|
|
||||||
# dispatch data to GPU
|
# dispatch data to GPU
|
||||||
if use_cuda:
|
if use_cuda:
|
||||||
|
@ -210,18 +220,20 @@ def evaluate(model, criterion, data_loader, current_step):
|
||||||
mel_input = mel_input.cuda()
|
mel_input = mel_input.cuda()
|
||||||
mel_lengths = mel_lengths.cuda()
|
mel_lengths = mel_lengths.cuda()
|
||||||
linear_input = linear_input.cuda()
|
linear_input = linear_input.cuda()
|
||||||
|
stop_target = stop_target.cuda()
|
||||||
|
|
||||||
# forward pass
|
# forward pass
|
||||||
mel_output, linear_output, alignments =\
|
mel_output, linear_output, alignments, stop_tokens =\
|
||||||
model.forward(text_input, mel_input)
|
model.forward(text_input, mel_spec)
|
||||||
|
|
||||||
# loss computation
|
# loss computation
|
||||||
|
stop_loss = criterion_st(stop_tokens, stop_target)
|
||||||
mel_loss = criterion(mel_output, mel_input, mel_lengths)
|
mel_loss = criterion(mel_output, mel_input, mel_lengths)
|
||||||
linear_loss = 0.5 * criterion(linear_output, linear_input, mel_lengths) \
|
linear_loss = 0.5 * criterion(linear_output, linear_input, mel_lengths) \
|
||||||
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
||||||
linear_input[:, :, :n_priority_freq],
|
linear_input[:, :, :n_priority_freq],
|
||||||
mel_lengths)
|
mel_lengths)
|
||||||
loss = mel_loss + linear_loss
|
loss = mel_loss + linear_loss + stop_loss
|
||||||
|
|
||||||
step_time = time.time() - start_time
|
step_time = time.time() - start_time
|
||||||
epoch_time += step_time
|
epoch_time += step_time
|
||||||
|
@ -229,10 +241,12 @@ def evaluate(model, criterion, data_loader, current_step):
|
||||||
# update
|
# update
|
||||||
progbar.update(num_iter+1, values=[('total_loss', loss.item()),
|
progbar.update(num_iter+1, values=[('total_loss', loss.item()),
|
||||||
('linear_loss', linear_loss.item()),
|
('linear_loss', linear_loss.item()),
|
||||||
('mel_loss', mel_loss.item())])
|
('mel_loss', mel_loss.item()),
|
||||||
|
('stop_loss', stop_loss.item())])
|
||||||
|
|
||||||
avg_linear_loss += linear_loss.item()
|
avg_linear_loss += linear_loss.item()
|
||||||
avg_mel_loss += mel_loss.item()
|
avg_mel_loss += mel_loss.item()
|
||||||
|
avg_stop_loss += stop_loss.item()
|
||||||
|
|
||||||
# Diagnostic visualizations
|
# Diagnostic visualizations
|
||||||
idx = np.random.randint(mel_input.shape[0])
|
idx = np.random.randint(mel_input.shape[0])
|
||||||
|
@ -263,13 +277,15 @@ def evaluate(model, criterion, data_loader, current_step):
|
||||||
|
|
||||||
# compute average losses
|
# compute average losses
|
||||||
avg_linear_loss /= (num_iter + 1)
|
avg_linear_loss /= (num_iter + 1)
|
||||||
avg_mel_loss /= (num_iter + 1)
|
avg_stop_loss /= (num_iter + 1)
|
||||||
avg_total_loss = avg_mel_loss + avg_linear_loss
|
avg_total_loss = avg_mel_loss + avg_linear_loss + stop_loss
|
||||||
|
|
||||||
# Plot Learning Stats
|
# Plot Learning Stats
|
||||||
tb.add_scalar('ValEpochLoss/TotalLoss', avg_total_loss, current_step)
|
tb.add_scalar('ValEpochLoss/TotalLoss', avg_total_loss, current_step)
|
||||||
tb.add_scalar('ValEpochLoss/LinearLoss', avg_linear_loss, current_step)
|
tb.add_scalar('ValEpochLoss/LinearLoss', avg_linear_loss, current_step)
|
||||||
tb.add_scalar('ValEpochLoss/MelLoss', avg_mel_loss, current_step)
|
tb.add_scalar('ValEpochLoss/MelLoss', avg_mel_loss, current_step)
|
||||||
|
tb.add_scalar('ValEpochLoss/Stop_loss', avg_stop_loss, current_step)
|
||||||
|
|
||||||
return avg_linear_loss
|
return avg_linear_loss
|
||||||
|
|
||||||
|
|
||||||
|
@ -324,10 +340,8 @@ def main(args):
|
||||||
|
|
||||||
optimizer = optim.Adam(model.parameters(), lr=c.lr)
|
optimizer = optim.Adam(model.parameters(), lr=c.lr)
|
||||||
|
|
||||||
if use_cuda:
|
|
||||||
criterion = L1LossMasked().cuda()
|
|
||||||
else:
|
|
||||||
criterion = L1LossMasked()
|
criterion = L1LossMasked()
|
||||||
|
criterion_st = nn.BCELoss()
|
||||||
|
|
||||||
if args.restore_path:
|
if args.restore_path:
|
||||||
checkpoint = torch.load(args.restore_path)
|
checkpoint = torch.load(args.restore_path)
|
||||||
|
@ -344,6 +358,8 @@ def main(args):
|
||||||
|
|
||||||
if use_cuda:
|
if use_cuda:
|
||||||
model = nn.DataParallel(model.cuda())
|
model = nn.DataParallel(model.cuda())
|
||||||
|
criterion.cuda()
|
||||||
|
criterion_st.cuda()
|
||||||
|
|
||||||
num_params = count_parameters(model)
|
num_params = count_parameters(model)
|
||||||
print(" | > Model has {} parameters".format(num_params))
|
print(" | > Model has {} parameters".format(num_params))
|
||||||
|
@ -356,15 +372,14 @@ def main(args):
|
||||||
|
|
||||||
for epoch in range(0, c.epochs):
|
for epoch in range(0, c.epochs):
|
||||||
train_loss, current_step = train(
|
train_loss, current_step = train(
|
||||||
model, criterion, train_loader, optimizer, epoch)
|
model, criterion, criterion_st, train_loader, optimizer, epoch)
|
||||||
val_loss = evaluate(model, criterion, val_loader, current_step)
|
val_loss = evaluate(model, criterion, criterion_st, val_loader, current_step)
|
||||||
best_loss = save_best_model(model, optimizer, val_loss,
|
best_loss = save_best_model(model, optimizer, val_loss,
|
||||||
best_loss, OUT_PATH,
|
best_loss, OUT_PATH,
|
||||||
current_step, epoch)
|
current_step, epoch)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
try:
|
try:
|
||||||
main(args)
|
main(args)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
|
Loading…
Reference in New Issue