mirror of https://github.com/coqui-ai/TTS.git
Remove variables
parent
95654de860
commit
7bc27fac82
|
@ -9,7 +9,7 @@ class PrenetTests(unittest.TestCase):
|
||||||
|
|
||||||
def test_in_out(self):
|
def test_in_out(self):
|
||||||
layer = Prenet(128, out_features=[256, 128])
|
layer = Prenet(128, out_features=[256, 128])
|
||||||
dummy_input = T.autograd.Variable(T.rand(4, 128))
|
dummy_input = T.rand(4, 128)
|
||||||
|
|
||||||
print(layer)
|
print(layer)
|
||||||
output = layer(dummy_input)
|
output = layer(dummy_input)
|
||||||
|
@ -21,7 +21,7 @@ class CBHGTests(unittest.TestCase):
|
||||||
|
|
||||||
def test_in_out(self):
|
def test_in_out(self):
|
||||||
layer = CBHG(128, K=6, projections=[128, 128], num_highways=2)
|
layer = CBHG(128, K=6, projections=[128, 128], num_highways=2)
|
||||||
dummy_input = T.autograd.Variable(T.rand(4, 8, 128))
|
dummy_input = T.rand(4, 8, 128)
|
||||||
|
|
||||||
print(layer)
|
print(layer)
|
||||||
output = layer(dummy_input)
|
output = layer(dummy_input)
|
||||||
|
@ -34,8 +34,8 @@ class DecoderTests(unittest.TestCase):
|
||||||
|
|
||||||
def test_in_out(self):
|
def test_in_out(self):
|
||||||
layer = Decoder(in_features=256, memory_dim=80, r=2)
|
layer = Decoder(in_features=256, memory_dim=80, r=2)
|
||||||
dummy_input = T.autograd.Variable(T.rand(4, 8, 256))
|
dummy_input = T.rand(4, 8, 256)
|
||||||
dummy_memory = T.autograd.Variable(T.rand(4, 2, 80))
|
dummy_memory = T.rand(4, 2, 80)
|
||||||
|
|
||||||
output, alignment = layer(dummy_input, dummy_memory)
|
output, alignment = layer(dummy_input, dummy_memory)
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ class EncoderTests(unittest.TestCase):
|
||||||
|
|
||||||
def test_in_out(self):
|
def test_in_out(self):
|
||||||
layer = Encoder(128)
|
layer = Encoder(128)
|
||||||
dummy_input = T.autograd.Variable(T.rand(4, 8, 128))
|
dummy_input = T.rand(4, 8, 128)
|
||||||
|
|
||||||
print(layer)
|
print(layer)
|
||||||
output = layer(dummy_input)
|
output = layer(dummy_input)
|
||||||
|
@ -62,24 +62,22 @@ class L1LossMaskedTests(unittest.TestCase):
|
||||||
|
|
||||||
def test_in_out(self):
|
def test_in_out(self):
|
||||||
layer = L1LossMasked()
|
layer = L1LossMasked()
|
||||||
dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float())
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
dummy_target = T.autograd.Variable(T.ones(4, 8, 128).float())
|
dummy_target = T.ones(4, 8, 128).float()
|
||||||
dummy_length = T.autograd.Variable((T.ones(4) * 8).long())
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
assert output.shape[0] == 0
|
assert output.item() == 0.0
|
||||||
assert len(output.shape) == 1
|
|
||||||
assert output.data[0] == 0.0
|
|
||||||
|
|
||||||
dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float())
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
dummy_target = T.autograd.Variable(T.zeros(4, 8, 128).float())
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
dummy_length = T.autograd.Variable((T.ones(4) * 8).long())
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
assert output.data[0] == 1.0, "1.0 vs {}".format(output.data[0])
|
assert output.item() == 1.0, "1.0 vs {}".format(output.data[0])
|
||||||
|
|
||||||
dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float())
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
dummy_target = T.autograd.Variable(T.zeros(4, 8, 128).float())
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
dummy_length = T.autograd.Variable((T.arange(5, 9)).long())
|
dummy_length = (T.arange(5, 9)).long()
|
||||||
mask = ((_sequence_mask(dummy_length).float() - 1.0)
|
mask = ((_sequence_mask(dummy_length).float() - 1.0)
|
||||||
* 100.0).unsqueeze(2)
|
* 100.0).unsqueeze(2)
|
||||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
assert output.data[0] == 1.0, "1.0 vs {}".format(output.data[0])
|
assert output.item() == 1.0, "1.0 vs {}".format(output.data[0])
|
||||||
|
|
132
train.py
132
train.py
|
@ -12,7 +12,6 @@ import numpy as np
|
||||||
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch import optim
|
from torch import optim
|
||||||
from torch.autograd import Variable
|
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tensorboardX import SummaryWriter
|
from tensorboardX import SummaryWriter
|
||||||
|
|
||||||
|
@ -94,41 +93,41 @@ def train(model, criterion, data_loader, optimizer, epoch):
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
# convert inputs to variables
|
# convert inputs to variables
|
||||||
text_input_var = Variable(text_input)
|
text_input.requires_grad_()
|
||||||
mel_spec_var = Variable(mel_input)
|
mel_spec.requires_grad_()
|
||||||
mel_lengths_var = Variable(mel_lengths)
|
# mel_lengths.requires_grad_()
|
||||||
linear_spec_var = Variable(linear_input, volatile=True)
|
# linear_spec.requires_grad_()
|
||||||
|
|
||||||
# dispatch data to GPU
|
# dispatch data to GPU
|
||||||
if use_cuda:
|
if use_cuda:
|
||||||
text_input_var = text_input_var.cuda()
|
text_input = text_input.cuda()
|
||||||
mel_spec_var = mel_spec_var.cuda()
|
mel_spec = mel_spec.cuda()
|
||||||
mel_lengths_var = mel_lengths_var.cuda()
|
mel_lengths = mel_lengths.cuda()
|
||||||
linear_spec_var = linear_spec_var.cuda()
|
linear_spec = linear_spec.cuda()
|
||||||
|
|
||||||
# create attention mask
|
# create attention mask
|
||||||
if c.mk > 0.0:
|
if c.mk > 0.0:
|
||||||
N = text_input_var.shape[1]
|
N = text_input.shape[1]
|
||||||
T = mel_spec_var.shape[1] // c.r
|
T = mel_spec.shape[1] // c.r
|
||||||
M = create_attn_mask(N, T, 0.03)
|
M = create_attn_mask(N, T, 0.03)
|
||||||
mk = mk_decay(c.mk, c.epochs, epoch)
|
mk = mk_decay(c.mk, c.epochs, epoch)
|
||||||
|
|
||||||
# forward pass
|
# forward pass
|
||||||
mel_output, linear_output, alignments =\
|
mel_output, linear_output, alignments =\
|
||||||
model.forward(text_input_var, mel_spec_var)
|
model.forward(text_input, mel_spec)
|
||||||
|
|
||||||
# loss computation
|
# loss computation
|
||||||
mel_loss = criterion(mel_output, mel_spec_var, mel_lengths_var)
|
mel_loss = criterion(mel_output, mel_spec, mel_lengths)
|
||||||
linear_loss = 0.5 * criterion(linear_output, linear_spec_var, mel_lengths_var) \
|
linear_loss = 0.5 * criterion(linear_output, linear_spec, mel_lengths) \
|
||||||
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
||||||
linear_spec_var[:, :, :n_priority_freq],
|
linear_spec[:, :, :n_priority_freq],
|
||||||
mel_lengths_var)
|
mel_lengths)
|
||||||
loss = mel_loss + linear_loss
|
loss = mel_loss + linear_loss
|
||||||
if c.mk > 0.0:
|
if c.mk > 0.0:
|
||||||
attention_loss = criterion(alignments, M, mel_lengths_var)
|
attention_loss = criterion(alignments, M, mel_lengths)
|
||||||
loss += mk * attention_loss
|
loss += mk * attention_loss
|
||||||
avg_attn_loss += attention_loss.data[0]
|
avg_attn_loss += attention_loss.item()
|
||||||
progbar_display['attn_loss'] = attention_loss.data[0]
|
progbar_display['attn_loss'] = attention_loss.item()
|
||||||
|
|
||||||
# backpass and check the grad norm
|
# backpass and check the grad norm
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
@ -142,21 +141,21 @@ def train(model, criterion, data_loader, optimizer, epoch):
|
||||||
step_time = time.time() - start_time
|
step_time = time.time() - start_time
|
||||||
epoch_time += step_time
|
epoch_time += step_time
|
||||||
|
|
||||||
progbar_display['total_loss'] = loss.data[0]
|
progbar_display['total_loss'] = loss.item()
|
||||||
progbar_display['linear_loss'] = linear_loss.data[0]
|
progbar_display['linear_loss'] = linear_loss.item()
|
||||||
progbar_display['mel_loss'] = mel_loss.data[0]
|
progbar_display['mel_loss'] = mel_loss.item()
|
||||||
progbar_display['grad_norm'] = grad_norm
|
progbar_display['grad_norm'] = grad_norm
|
||||||
|
|
||||||
# update
|
# update
|
||||||
progbar.update(num_iter+1, values=list(progbar_display.items()))
|
progbar.update(num_iter+1, values=list(progbar_display.items()))
|
||||||
avg_linear_loss += linear_loss.data[0]
|
avg_linear_loss += linear_loss.item()
|
||||||
avg_mel_loss += mel_loss.data[0]
|
avg_mel_loss += mel_loss.item()
|
||||||
|
|
||||||
# Plot Training Iter Stats
|
# Plot Training Iter Stats
|
||||||
tb.add_scalar('TrainIterLoss/TotalLoss', loss.data[0], current_step)
|
tb.add_scalar('TrainIterLoss/TotalLoss', loss.item(), current_step)
|
||||||
tb.add_scalar('TrainIterLoss/LinearLoss', linear_loss.data[0],
|
tb.add_scalar('TrainIterLoss/LinearLoss', linear_loss.item(),
|
||||||
current_step)
|
current_step)
|
||||||
tb.add_scalar('TrainIterLoss/MelLoss', mel_loss.data[0], current_step)
|
tb.add_scalar('TrainIterLoss/MelLoss', mel_loss.item(), current_step)
|
||||||
tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'],
|
tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'],
|
||||||
current_step)
|
current_step)
|
||||||
tb.add_scalar('Params/GradNorm', grad_norm, current_step)
|
tb.add_scalar('Params/GradNorm', grad_norm, current_step)
|
||||||
|
@ -165,12 +164,12 @@ def train(model, criterion, data_loader, optimizer, epoch):
|
||||||
if current_step % c.save_step == 0:
|
if current_step % c.save_step == 0:
|
||||||
if c.checkpoint:
|
if c.checkpoint:
|
||||||
# save model
|
# save model
|
||||||
save_checkpoint(model, optimizer, linear_loss.data[0],
|
save_checkpoint(model, optimizer, linear_loss.item(),
|
||||||
OUT_PATH, current_step, epoch)
|
OUT_PATH, current_step, epoch)
|
||||||
|
|
||||||
# Diagnostic visualizations
|
# Diagnostic visualizations
|
||||||
const_spec = linear_output[0].data.cpu().numpy()
|
const_spec = linear_output[0].data.cpu().numpy()
|
||||||
gt_spec = linear_spec_var[0].data.cpu().numpy()
|
gt_spec = linear_spec[0].data.cpu().numpy()
|
||||||
|
|
||||||
const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap)
|
const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap)
|
||||||
gt_spec = plot_spectrogram(gt_spec, data_loader.dataset.ap)
|
gt_spec = plot_spectrogram(gt_spec, data_loader.dataset.ap)
|
||||||
|
@ -221,57 +220,52 @@ def evaluate(model, criterion, data_loader, current_step):
|
||||||
print(" | > Validation")
|
print(" | > Validation")
|
||||||
progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
||||||
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
||||||
for num_iter, data in enumerate(data_loader):
|
with torch.no_grad():
|
||||||
start_time = time.time()
|
for num_iter, data in enumerate(data_loader):
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
# setup input data
|
# setup input data
|
||||||
text_input = data[0]
|
text_input = data[0]
|
||||||
text_lengths = data[1]
|
text_lengths = data[1]
|
||||||
linear_input = data[2]
|
linear_input = data[2]
|
||||||
mel_input = data[3]
|
mel_input = data[3]
|
||||||
mel_lengths = data[4]
|
mel_lengths = data[4]
|
||||||
|
|
||||||
# convert inputs to variables
|
# dispatch data to GPU
|
||||||
text_input_var = Variable(text_input)
|
if use_cuda:
|
||||||
mel_spec_var = Variable(mel_input)
|
text_input = text_input.cuda()
|
||||||
mel_lengths_var = Variable(mel_lengths)
|
mel_spec = mel_spec.cuda()
|
||||||
linear_spec_var = Variable(linear_input, volatile=True)
|
mel_lengths = mel_lengths.cuda()
|
||||||
|
linear_spec = linear_spec.cuda()
|
||||||
|
|
||||||
# dispatch data to GPU
|
# forward pass
|
||||||
if use_cuda:
|
mel_output, linear_output, alignments =\
|
||||||
text_input_var = text_input_var.cuda()
|
model.forward(text_input, mel_spec)
|
||||||
mel_spec_var = mel_spec_var.cuda()
|
|
||||||
mel_lengths_var = mel_lengths_var.cuda()
|
|
||||||
linear_spec_var = linear_spec_var.cuda()
|
|
||||||
|
|
||||||
# forward pass
|
# loss computation
|
||||||
mel_output, linear_output, alignments =\
|
mel_loss = criterion(mel_output, mel_spec, mel_lengths)
|
||||||
model.forward(text_input_var, mel_spec_var)
|
linear_loss = 0.5 * criterion(linear_output, linear_spec, mel_lengths) \
|
||||||
|
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
||||||
|
linear_spec[:, :, :n_priority_freq],
|
||||||
|
mel_lengths)
|
||||||
|
loss = mel_loss + linear_loss
|
||||||
|
|
||||||
# loss computation
|
step_time = time.time() - start_time
|
||||||
mel_loss = criterion(mel_output, mel_spec_var, mel_lengths_var)
|
epoch_time += step_time
|
||||||
linear_loss = 0.5 * criterion(linear_output, linear_spec_var, mel_lengths_var) \
|
|
||||||
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
|
||||||
linear_spec_var[:, :, :n_priority_freq],
|
|
||||||
mel_lengths_var)
|
|
||||||
loss = mel_loss + linear_loss
|
|
||||||
|
|
||||||
step_time = time.time() - start_time
|
# update
|
||||||
epoch_time += step_time
|
progbar.update(num_iter+1, values=[('total_loss', loss.item()),
|
||||||
|
('linear_loss',
|
||||||
|
linear_loss.item()),
|
||||||
|
('mel_loss', mel_loss.item())])
|
||||||
|
|
||||||
# update
|
avg_linear_loss += linear_loss.item()
|
||||||
progbar.update(num_iter+1, values=[('total_loss', loss.data[0]),
|
avg_mel_loss += mel_loss.item()
|
||||||
('linear_loss',
|
|
||||||
linear_loss.data[0]),
|
|
||||||
('mel_loss', mel_loss.data[0])])
|
|
||||||
|
|
||||||
avg_linear_loss += linear_loss.data[0]
|
|
||||||
avg_mel_loss += mel_loss.data[0]
|
|
||||||
|
|
||||||
# Diagnostic visualizations
|
# Diagnostic visualizations
|
||||||
idx = np.random.randint(mel_input.shape[0])
|
idx = np.random.randint(mel_input.shape[0])
|
||||||
const_spec = linear_output[idx].data.cpu().numpy()
|
const_spec = linear_output[idx].data.cpu().numpy()
|
||||||
gt_spec = linear_spec_var[idx].data.cpu().numpy()
|
gt_spec = linear_spec[idx].data.cpu().numpy()
|
||||||
align_img = alignments[idx].data.cpu().numpy()
|
align_img = alignments[idx].data.cpu().numpy()
|
||||||
|
|
||||||
const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap)
|
const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap)
|
||||||
|
|
Loading…
Reference in New Issue