mirror of https://github.com/coqui-ai/TTS.git
Remove variables
parent
95654de860
commit
7bc27fac82
|
@ -9,7 +9,7 @@ class PrenetTests(unittest.TestCase):
|
|||
|
||||
def test_in_out(self):
|
||||
layer = Prenet(128, out_features=[256, 128])
|
||||
dummy_input = T.autograd.Variable(T.rand(4, 128))
|
||||
dummy_input = T.rand(4, 128)
|
||||
|
||||
print(layer)
|
||||
output = layer(dummy_input)
|
||||
|
@ -21,7 +21,7 @@ class CBHGTests(unittest.TestCase):
|
|||
|
||||
def test_in_out(self):
|
||||
layer = CBHG(128, K=6, projections=[128, 128], num_highways=2)
|
||||
dummy_input = T.autograd.Variable(T.rand(4, 8, 128))
|
||||
dummy_input = T.rand(4, 8, 128)
|
||||
|
||||
print(layer)
|
||||
output = layer(dummy_input)
|
||||
|
@ -34,8 +34,8 @@ class DecoderTests(unittest.TestCase):
|
|||
|
||||
def test_in_out(self):
|
||||
layer = Decoder(in_features=256, memory_dim=80, r=2)
|
||||
dummy_input = T.autograd.Variable(T.rand(4, 8, 256))
|
||||
dummy_memory = T.autograd.Variable(T.rand(4, 2, 80))
|
||||
dummy_input = T.rand(4, 8, 256)
|
||||
dummy_memory = T.rand(4, 2, 80)
|
||||
|
||||
output, alignment = layer(dummy_input, dummy_memory)
|
||||
|
||||
|
@ -48,7 +48,7 @@ class EncoderTests(unittest.TestCase):
|
|||
|
||||
def test_in_out(self):
|
||||
layer = Encoder(128)
|
||||
dummy_input = T.autograd.Variable(T.rand(4, 8, 128))
|
||||
dummy_input = T.rand(4, 8, 128)
|
||||
|
||||
print(layer)
|
||||
output = layer(dummy_input)
|
||||
|
@ -62,24 +62,22 @@ class L1LossMaskedTests(unittest.TestCase):
|
|||
|
||||
def test_in_out(self):
|
||||
layer = L1LossMasked()
|
||||
dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float())
|
||||
dummy_target = T.autograd.Variable(T.ones(4, 8, 128).float())
|
||||
dummy_length = T.autograd.Variable((T.ones(4) * 8).long())
|
||||
dummy_input = T.ones(4, 8, 128).float()
|
||||
dummy_target = T.ones(4, 8, 128).float()
|
||||
dummy_length = (T.ones(4) * 8).long()
|
||||
output = layer(dummy_input, dummy_target, dummy_length)
|
||||
assert output.shape[0] == 0
|
||||
assert len(output.shape) == 1
|
||||
assert output.data[0] == 0.0
|
||||
assert output.item() == 0.0
|
||||
|
||||
dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float())
|
||||
dummy_target = T.autograd.Variable(T.zeros(4, 8, 128).float())
|
||||
dummy_length = T.autograd.Variable((T.ones(4) * 8).long())
|
||||
dummy_input = T.ones(4, 8, 128).float()
|
||||
dummy_target = T.zeros(4, 8, 128).float()
|
||||
dummy_length = (T.ones(4) * 8).long()
|
||||
output = layer(dummy_input, dummy_target, dummy_length)
|
||||
assert output.data[0] == 1.0, "1.0 vs {}".format(output.data[0])
|
||||
assert output.item() == 1.0, "1.0 vs {}".format(output.data[0])
|
||||
|
||||
dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float())
|
||||
dummy_target = T.autograd.Variable(T.zeros(4, 8, 128).float())
|
||||
dummy_length = T.autograd.Variable((T.arange(5, 9)).long())
|
||||
dummy_input = T.ones(4, 8, 128).float()
|
||||
dummy_target = T.zeros(4, 8, 128).float()
|
||||
dummy_length = (T.arange(5, 9)).long()
|
||||
mask = ((_sequence_mask(dummy_length).float() - 1.0)
|
||||
* 100.0).unsqueeze(2)
|
||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||
assert output.data[0] == 1.0, "1.0 vs {}".format(output.data[0])
|
||||
assert output.item() == 1.0, "1.0 vs {}".format(output.data[0])
|
||||
|
|
94
train.py
94
train.py
|
@ -12,7 +12,6 @@ import numpy as np
|
|||
|
||||
import torch.nn as nn
|
||||
from torch import optim
|
||||
from torch.autograd import Variable
|
||||
from torch.utils.data import DataLoader
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
|
@ -94,41 +93,41 @@ def train(model, criterion, data_loader, optimizer, epoch):
|
|||
optimizer.zero_grad()
|
||||
|
||||
# convert inputs to variables
|
||||
text_input_var = Variable(text_input)
|
||||
mel_spec_var = Variable(mel_input)
|
||||
mel_lengths_var = Variable(mel_lengths)
|
||||
linear_spec_var = Variable(linear_input, volatile=True)
|
||||
text_input.requires_grad_()
|
||||
mel_spec.requires_grad_()
|
||||
# mel_lengths.requires_grad_()
|
||||
# linear_spec.requires_grad_()
|
||||
|
||||
# dispatch data to GPU
|
||||
if use_cuda:
|
||||
text_input_var = text_input_var.cuda()
|
||||
mel_spec_var = mel_spec_var.cuda()
|
||||
mel_lengths_var = mel_lengths_var.cuda()
|
||||
linear_spec_var = linear_spec_var.cuda()
|
||||
text_input = text_input.cuda()
|
||||
mel_spec = mel_spec.cuda()
|
||||
mel_lengths = mel_lengths.cuda()
|
||||
linear_spec = linear_spec.cuda()
|
||||
|
||||
# create attention mask
|
||||
if c.mk > 0.0:
|
||||
N = text_input_var.shape[1]
|
||||
T = mel_spec_var.shape[1] // c.r
|
||||
N = text_input.shape[1]
|
||||
T = mel_spec.shape[1] // c.r
|
||||
M = create_attn_mask(N, T, 0.03)
|
||||
mk = mk_decay(c.mk, c.epochs, epoch)
|
||||
|
||||
# forward pass
|
||||
mel_output, linear_output, alignments =\
|
||||
model.forward(text_input_var, mel_spec_var)
|
||||
model.forward(text_input, mel_spec)
|
||||
|
||||
# loss computation
|
||||
mel_loss = criterion(mel_output, mel_spec_var, mel_lengths_var)
|
||||
linear_loss = 0.5 * criterion(linear_output, linear_spec_var, mel_lengths_var) \
|
||||
mel_loss = criterion(mel_output, mel_spec, mel_lengths)
|
||||
linear_loss = 0.5 * criterion(linear_output, linear_spec, mel_lengths) \
|
||||
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
||||
linear_spec_var[:, :, :n_priority_freq],
|
||||
mel_lengths_var)
|
||||
linear_spec[:, :, :n_priority_freq],
|
||||
mel_lengths)
|
||||
loss = mel_loss + linear_loss
|
||||
if c.mk > 0.0:
|
||||
attention_loss = criterion(alignments, M, mel_lengths_var)
|
||||
attention_loss = criterion(alignments, M, mel_lengths)
|
||||
loss += mk * attention_loss
|
||||
avg_attn_loss += attention_loss.data[0]
|
||||
progbar_display['attn_loss'] = attention_loss.data[0]
|
||||
avg_attn_loss += attention_loss.item()
|
||||
progbar_display['attn_loss'] = attention_loss.item()
|
||||
|
||||
# backpass and check the grad norm
|
||||
loss.backward()
|
||||
|
@ -142,21 +141,21 @@ def train(model, criterion, data_loader, optimizer, epoch):
|
|||
step_time = time.time() - start_time
|
||||
epoch_time += step_time
|
||||
|
||||
progbar_display['total_loss'] = loss.data[0]
|
||||
progbar_display['linear_loss'] = linear_loss.data[0]
|
||||
progbar_display['mel_loss'] = mel_loss.data[0]
|
||||
progbar_display['total_loss'] = loss.item()
|
||||
progbar_display['linear_loss'] = linear_loss.item()
|
||||
progbar_display['mel_loss'] = mel_loss.item()
|
||||
progbar_display['grad_norm'] = grad_norm
|
||||
|
||||
# update
|
||||
progbar.update(num_iter+1, values=list(progbar_display.items()))
|
||||
avg_linear_loss += linear_loss.data[0]
|
||||
avg_mel_loss += mel_loss.data[0]
|
||||
avg_linear_loss += linear_loss.item()
|
||||
avg_mel_loss += mel_loss.item()
|
||||
|
||||
# Plot Training Iter Stats
|
||||
tb.add_scalar('TrainIterLoss/TotalLoss', loss.data[0], current_step)
|
||||
tb.add_scalar('TrainIterLoss/LinearLoss', linear_loss.data[0],
|
||||
tb.add_scalar('TrainIterLoss/TotalLoss', loss.item(), current_step)
|
||||
tb.add_scalar('TrainIterLoss/LinearLoss', linear_loss.item(),
|
||||
current_step)
|
||||
tb.add_scalar('TrainIterLoss/MelLoss', mel_loss.data[0], current_step)
|
||||
tb.add_scalar('TrainIterLoss/MelLoss', mel_loss.item(), current_step)
|
||||
tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'],
|
||||
current_step)
|
||||
tb.add_scalar('Params/GradNorm', grad_norm, current_step)
|
||||
|
@ -165,12 +164,12 @@ def train(model, criterion, data_loader, optimizer, epoch):
|
|||
if current_step % c.save_step == 0:
|
||||
if c.checkpoint:
|
||||
# save model
|
||||
save_checkpoint(model, optimizer, linear_loss.data[0],
|
||||
save_checkpoint(model, optimizer, linear_loss.item(),
|
||||
OUT_PATH, current_step, epoch)
|
||||
|
||||
# Diagnostic visualizations
|
||||
const_spec = linear_output[0].data.cpu().numpy()
|
||||
gt_spec = linear_spec_var[0].data.cpu().numpy()
|
||||
gt_spec = linear_spec[0].data.cpu().numpy()
|
||||
|
||||
const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap)
|
||||
gt_spec = plot_spectrogram(gt_spec, data_loader.dataset.ap)
|
||||
|
@ -221,6 +220,7 @@ def evaluate(model, criterion, data_loader, current_step):
|
|||
print(" | > Validation")
|
||||
progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
||||
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
||||
with torch.no_grad():
|
||||
for num_iter, data in enumerate(data_loader):
|
||||
start_time = time.time()
|
||||
|
||||
|
@ -231,47 +231,41 @@ def evaluate(model, criterion, data_loader, current_step):
|
|||
mel_input = data[3]
|
||||
mel_lengths = data[4]
|
||||
|
||||
# convert inputs to variables
|
||||
text_input_var = Variable(text_input)
|
||||
mel_spec_var = Variable(mel_input)
|
||||
mel_lengths_var = Variable(mel_lengths)
|
||||
linear_spec_var = Variable(linear_input, volatile=True)
|
||||
|
||||
# dispatch data to GPU
|
||||
if use_cuda:
|
||||
text_input_var = text_input_var.cuda()
|
||||
mel_spec_var = mel_spec_var.cuda()
|
||||
mel_lengths_var = mel_lengths_var.cuda()
|
||||
linear_spec_var = linear_spec_var.cuda()
|
||||
text_input = text_input.cuda()
|
||||
mel_spec = mel_spec.cuda()
|
||||
mel_lengths = mel_lengths.cuda()
|
||||
linear_spec = linear_spec.cuda()
|
||||
|
||||
# forward pass
|
||||
mel_output, linear_output, alignments =\
|
||||
model.forward(text_input_var, mel_spec_var)
|
||||
model.forward(text_input, mel_spec)
|
||||
|
||||
# loss computation
|
||||
mel_loss = criterion(mel_output, mel_spec_var, mel_lengths_var)
|
||||
linear_loss = 0.5 * criterion(linear_output, linear_spec_var, mel_lengths_var) \
|
||||
mel_loss = criterion(mel_output, mel_spec, mel_lengths)
|
||||
linear_loss = 0.5 * criterion(linear_output, linear_spec, mel_lengths) \
|
||||
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
||||
linear_spec_var[:, :, :n_priority_freq],
|
||||
mel_lengths_var)
|
||||
linear_spec[:, :, :n_priority_freq],
|
||||
mel_lengths)
|
||||
loss = mel_loss + linear_loss
|
||||
|
||||
step_time = time.time() - start_time
|
||||
epoch_time += step_time
|
||||
|
||||
# update
|
||||
progbar.update(num_iter+1, values=[('total_loss', loss.data[0]),
|
||||
progbar.update(num_iter+1, values=[('total_loss', loss.item()),
|
||||
('linear_loss',
|
||||
linear_loss.data[0]),
|
||||
('mel_loss', mel_loss.data[0])])
|
||||
linear_loss.item()),
|
||||
('mel_loss', mel_loss.item())])
|
||||
|
||||
avg_linear_loss += linear_loss.data[0]
|
||||
avg_mel_loss += mel_loss.data[0]
|
||||
avg_linear_loss += linear_loss.item()
|
||||
avg_mel_loss += mel_loss.item()
|
||||
|
||||
# Diagnostic visualizations
|
||||
idx = np.random.randint(mel_input.shape[0])
|
||||
const_spec = linear_output[idx].data.cpu().numpy()
|
||||
gt_spec = linear_spec_var[idx].data.cpu().numpy()
|
||||
gt_spec = linear_spec[idx].data.cpu().numpy()
|
||||
align_img = alignments[idx].data.cpu().numpy()
|
||||
|
||||
const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap)
|
||||
|
|
Loading…
Reference in New Issue