Remove variables

pull/10/head
Eren Golge 2018-04-25 08:02:56 -07:00
parent 95654de860
commit 7bc27fac82
2 changed files with 80 additions and 88 deletions

View File

@ -9,7 +9,7 @@ class PrenetTests(unittest.TestCase):
def test_in_out(self): def test_in_out(self):
layer = Prenet(128, out_features=[256, 128]) layer = Prenet(128, out_features=[256, 128])
dummy_input = T.autograd.Variable(T.rand(4, 128)) dummy_input = T.rand(4, 128)
print(layer) print(layer)
output = layer(dummy_input) output = layer(dummy_input)
@ -21,7 +21,7 @@ class CBHGTests(unittest.TestCase):
def test_in_out(self): def test_in_out(self):
layer = CBHG(128, K=6, projections=[128, 128], num_highways=2) layer = CBHG(128, K=6, projections=[128, 128], num_highways=2)
dummy_input = T.autograd.Variable(T.rand(4, 8, 128)) dummy_input = T.rand(4, 8, 128)
print(layer) print(layer)
output = layer(dummy_input) output = layer(dummy_input)
@ -34,8 +34,8 @@ class DecoderTests(unittest.TestCase):
def test_in_out(self): def test_in_out(self):
layer = Decoder(in_features=256, memory_dim=80, r=2) layer = Decoder(in_features=256, memory_dim=80, r=2)
dummy_input = T.autograd.Variable(T.rand(4, 8, 256)) dummy_input = T.rand(4, 8, 256)
dummy_memory = T.autograd.Variable(T.rand(4, 2, 80)) dummy_memory = T.rand(4, 2, 80)
output, alignment = layer(dummy_input, dummy_memory) output, alignment = layer(dummy_input, dummy_memory)
@ -48,7 +48,7 @@ class EncoderTests(unittest.TestCase):
def test_in_out(self): def test_in_out(self):
layer = Encoder(128) layer = Encoder(128)
dummy_input = T.autograd.Variable(T.rand(4, 8, 128)) dummy_input = T.rand(4, 8, 128)
print(layer) print(layer)
output = layer(dummy_input) output = layer(dummy_input)
@ -62,24 +62,22 @@ class L1LossMaskedTests(unittest.TestCase):
def test_in_out(self): def test_in_out(self):
layer = L1LossMasked() layer = L1LossMasked()
dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float()) dummy_input = T.ones(4, 8, 128).float()
dummy_target = T.autograd.Variable(T.ones(4, 8, 128).float()) dummy_target = T.ones(4, 8, 128).float()
dummy_length = T.autograd.Variable((T.ones(4) * 8).long()) dummy_length = (T.ones(4) * 8).long()
output = layer(dummy_input, dummy_target, dummy_length) output = layer(dummy_input, dummy_target, dummy_length)
assert output.shape[0] == 0 assert output.item() == 0.0
assert len(output.shape) == 1
assert output.data[0] == 0.0
dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float()) dummy_input = T.ones(4, 8, 128).float()
dummy_target = T.autograd.Variable(T.zeros(4, 8, 128).float()) dummy_target = T.zeros(4, 8, 128).float()
dummy_length = T.autograd.Variable((T.ones(4) * 8).long()) dummy_length = (T.ones(4) * 8).long()
output = layer(dummy_input, dummy_target, dummy_length) output = layer(dummy_input, dummy_target, dummy_length)
assert output.data[0] == 1.0, "1.0 vs {}".format(output.data[0]) assert output.item() == 1.0, "1.0 vs {}".format(output.data[0])
dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float()) dummy_input = T.ones(4, 8, 128).float()
dummy_target = T.autograd.Variable(T.zeros(4, 8, 128).float()) dummy_target = T.zeros(4, 8, 128).float()
dummy_length = T.autograd.Variable((T.arange(5, 9)).long()) dummy_length = (T.arange(5, 9)).long()
mask = ((_sequence_mask(dummy_length).float() - 1.0) mask = ((_sequence_mask(dummy_length).float() - 1.0)
* 100.0).unsqueeze(2) * 100.0).unsqueeze(2)
output = layer(dummy_input + mask, dummy_target, dummy_length) output = layer(dummy_input + mask, dummy_target, dummy_length)
assert output.data[0] == 1.0, "1.0 vs {}".format(output.data[0]) assert output.item() == 1.0, "1.0 vs {}".format(output.data[0])

132
train.py
View File

@ -12,7 +12,6 @@ import numpy as np
import torch.nn as nn import torch.nn as nn
from torch import optim from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
@ -94,41 +93,41 @@ def train(model, criterion, data_loader, optimizer, epoch):
optimizer.zero_grad() optimizer.zero_grad()
# convert inputs to variables # convert inputs to variables
text_input_var = Variable(text_input) text_input.requires_grad_()
mel_spec_var = Variable(mel_input) mel_spec.requires_grad_()
mel_lengths_var = Variable(mel_lengths) # mel_lengths.requires_grad_()
linear_spec_var = Variable(linear_input, volatile=True) # linear_spec.requires_grad_()
# dispatch data to GPU # dispatch data to GPU
if use_cuda: if use_cuda:
text_input_var = text_input_var.cuda() text_input = text_input.cuda()
mel_spec_var = mel_spec_var.cuda() mel_spec = mel_spec.cuda()
mel_lengths_var = mel_lengths_var.cuda() mel_lengths = mel_lengths.cuda()
linear_spec_var = linear_spec_var.cuda() linear_spec = linear_spec.cuda()
# create attention mask # create attention mask
if c.mk > 0.0: if c.mk > 0.0:
N = text_input_var.shape[1] N = text_input.shape[1]
T = mel_spec_var.shape[1] // c.r T = mel_spec.shape[1] // c.r
M = create_attn_mask(N, T, 0.03) M = create_attn_mask(N, T, 0.03)
mk = mk_decay(c.mk, c.epochs, epoch) mk = mk_decay(c.mk, c.epochs, epoch)
# forward pass # forward pass
mel_output, linear_output, alignments =\ mel_output, linear_output, alignments =\
model.forward(text_input_var, mel_spec_var) model.forward(text_input, mel_spec)
# loss computation # loss computation
mel_loss = criterion(mel_output, mel_spec_var, mel_lengths_var) mel_loss = criterion(mel_output, mel_spec, mel_lengths)
linear_loss = 0.5 * criterion(linear_output, linear_spec_var, mel_lengths_var) \ linear_loss = 0.5 * criterion(linear_output, linear_spec, mel_lengths) \
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq], + 0.5 * criterion(linear_output[:, :, :n_priority_freq],
linear_spec_var[:, :, :n_priority_freq], linear_spec[:, :, :n_priority_freq],
mel_lengths_var) mel_lengths)
loss = mel_loss + linear_loss loss = mel_loss + linear_loss
if c.mk > 0.0: if c.mk > 0.0:
attention_loss = criterion(alignments, M, mel_lengths_var) attention_loss = criterion(alignments, M, mel_lengths)
loss += mk * attention_loss loss += mk * attention_loss
avg_attn_loss += attention_loss.data[0] avg_attn_loss += attention_loss.item()
progbar_display['attn_loss'] = attention_loss.data[0] progbar_display['attn_loss'] = attention_loss.item()
# backpass and check the grad norm # backpass and check the grad norm
loss.backward() loss.backward()
@ -142,21 +141,21 @@ def train(model, criterion, data_loader, optimizer, epoch):
step_time = time.time() - start_time step_time = time.time() - start_time
epoch_time += step_time epoch_time += step_time
progbar_display['total_loss'] = loss.data[0] progbar_display['total_loss'] = loss.item()
progbar_display['linear_loss'] = linear_loss.data[0] progbar_display['linear_loss'] = linear_loss.item()
progbar_display['mel_loss'] = mel_loss.data[0] progbar_display['mel_loss'] = mel_loss.item()
progbar_display['grad_norm'] = grad_norm progbar_display['grad_norm'] = grad_norm
# update # update
progbar.update(num_iter+1, values=list(progbar_display.items())) progbar.update(num_iter+1, values=list(progbar_display.items()))
avg_linear_loss += linear_loss.data[0] avg_linear_loss += linear_loss.item()
avg_mel_loss += mel_loss.data[0] avg_mel_loss += mel_loss.item()
# Plot Training Iter Stats # Plot Training Iter Stats
tb.add_scalar('TrainIterLoss/TotalLoss', loss.data[0], current_step) tb.add_scalar('TrainIterLoss/TotalLoss', loss.item(), current_step)
tb.add_scalar('TrainIterLoss/LinearLoss', linear_loss.data[0], tb.add_scalar('TrainIterLoss/LinearLoss', linear_loss.item(),
current_step) current_step)
tb.add_scalar('TrainIterLoss/MelLoss', mel_loss.data[0], current_step) tb.add_scalar('TrainIterLoss/MelLoss', mel_loss.item(), current_step)
tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'], tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'],
current_step) current_step)
tb.add_scalar('Params/GradNorm', grad_norm, current_step) tb.add_scalar('Params/GradNorm', grad_norm, current_step)
@ -165,12 +164,12 @@ def train(model, criterion, data_loader, optimizer, epoch):
if current_step % c.save_step == 0: if current_step % c.save_step == 0:
if c.checkpoint: if c.checkpoint:
# save model # save model
save_checkpoint(model, optimizer, linear_loss.data[0], save_checkpoint(model, optimizer, linear_loss.item(),
OUT_PATH, current_step, epoch) OUT_PATH, current_step, epoch)
# Diagnostic visualizations # Diagnostic visualizations
const_spec = linear_output[0].data.cpu().numpy() const_spec = linear_output[0].data.cpu().numpy()
gt_spec = linear_spec_var[0].data.cpu().numpy() gt_spec = linear_spec[0].data.cpu().numpy()
const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap) const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap)
gt_spec = plot_spectrogram(gt_spec, data_loader.dataset.ap) gt_spec = plot_spectrogram(gt_spec, data_loader.dataset.ap)
@ -221,57 +220,52 @@ def evaluate(model, criterion, data_loader, current_step):
print(" | > Validation") print(" | > Validation")
progbar = Progbar(len(data_loader.dataset) / c.batch_size) progbar = Progbar(len(data_loader.dataset) / c.batch_size)
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq) n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
for num_iter, data in enumerate(data_loader): with torch.no_grad():
start_time = time.time() for num_iter, data in enumerate(data_loader):
start_time = time.time()
# setup input data # setup input data
text_input = data[0] text_input = data[0]
text_lengths = data[1] text_lengths = data[1]
linear_input = data[2] linear_input = data[2]
mel_input = data[3] mel_input = data[3]
mel_lengths = data[4] mel_lengths = data[4]
# convert inputs to variables # dispatch data to GPU
text_input_var = Variable(text_input) if use_cuda:
mel_spec_var = Variable(mel_input) text_input = text_input.cuda()
mel_lengths_var = Variable(mel_lengths) mel_spec = mel_spec.cuda()
linear_spec_var = Variable(linear_input, volatile=True) mel_lengths = mel_lengths.cuda()
linear_spec = linear_spec.cuda()
# dispatch data to GPU # forward pass
if use_cuda: mel_output, linear_output, alignments =\
text_input_var = text_input_var.cuda() model.forward(text_input, mel_spec)
mel_spec_var = mel_spec_var.cuda()
mel_lengths_var = mel_lengths_var.cuda()
linear_spec_var = linear_spec_var.cuda()
# forward pass # loss computation
mel_output, linear_output, alignments =\ mel_loss = criterion(mel_output, mel_spec, mel_lengths)
model.forward(text_input_var, mel_spec_var) linear_loss = 0.5 * criterion(linear_output, linear_spec, mel_lengths) \
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
linear_spec[:, :, :n_priority_freq],
mel_lengths)
loss = mel_loss + linear_loss
# loss computation step_time = time.time() - start_time
mel_loss = criterion(mel_output, mel_spec_var, mel_lengths_var) epoch_time += step_time
linear_loss = 0.5 * criterion(linear_output, linear_spec_var, mel_lengths_var) \
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
linear_spec_var[:, :, :n_priority_freq],
mel_lengths_var)
loss = mel_loss + linear_loss
step_time = time.time() - start_time # update
epoch_time += step_time progbar.update(num_iter+1, values=[('total_loss', loss.item()),
('linear_loss',
linear_loss.item()),
('mel_loss', mel_loss.item())])
# update avg_linear_loss += linear_loss.item()
progbar.update(num_iter+1, values=[('total_loss', loss.data[0]), avg_mel_loss += mel_loss.item()
('linear_loss',
linear_loss.data[0]),
('mel_loss', mel_loss.data[0])])
avg_linear_loss += linear_loss.data[0]
avg_mel_loss += mel_loss.data[0]
# Diagnostic visualizations # Diagnostic visualizations
idx = np.random.randint(mel_input.shape[0]) idx = np.random.randint(mel_input.shape[0])
const_spec = linear_output[idx].data.cpu().numpy() const_spec = linear_output[idx].data.cpu().numpy()
gt_spec = linear_spec_var[idx].data.cpu().numpy() gt_spec = linear_spec[idx].data.cpu().numpy()
align_img = alignments[idx].data.cpu().numpy() align_img = alignments[idx].data.cpu().numpy()
const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap) const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap)