TTS/tests/test_tacotron_model.py

import os
import copy
import torch
import unittest

from torch import optim
from torch import nn
from TTS.utils.io import load_config
from TTS.layers.losses import L1LossMasked
from TTS.models.tacotron import Tacotron

#pylint: disable=unused-variable

torch.manual_seed(1)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

file_path = os.path.dirname(os.path.realpath(__file__))
c = load_config(os.path.join(file_path, 'test_config.json'))


def count_parameters(model):
    r"""Count number of trainable parameters in a network"""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


class TacotronTrainTest(unittest.TestCase):
    @staticmethod
    def test_train_step():
        input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
        input_lengths[-1] = 128
        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
        linear_spec = torch.rand(8, 30, c.audio['num_freq']).to(device)
        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
        stop_targets = torch.zeros(8, 30, 1).float().to(device)
        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)

        for idx in mel_lengths:
            stop_targets[:, int(idx.item()):, 0] = 1.0

        stop_targets = stop_targets.view(input_dummy.shape[0],
                                         stop_targets.size(1) // c.r, -1)
        stop_targets = (stop_targets.sum(2) >
                        0.0).unsqueeze(2).float().squeeze()

        criterion = L1LossMasked(seq_len_norm=False).to(device)
        criterion_st = nn.BCEWithLogitsLoss().to(device)
        model = Tacotron(
            num_chars=32,
            num_speakers=5,
            postnet_output_dim=c.audio['num_freq'],
            decoder_output_dim=c.audio['num_mels'],
            r=c.r,
            memory_size=c.memory_size
        ).to(device)  #FIXME: missing num_speakers parameter to Tacotron ctor
        model.train()
        print(" > Num parameters for Tacotron model:%s" %
              (count_parameters(model)))
        model_ref = copy.deepcopy(model)
        count = 0
        for param, param_ref in zip(model.parameters(),
                                    model_ref.parameters()):
            assert (param - param_ref).sum() == 0, param
            count += 1
        optimizer = optim.Adam(model.parameters(), lr=c.lr)
        for _ in range(5):
            mel_out, linear_out, align, stop_tokens = model.forward(
                input_dummy, input_lengths, mel_spec, speaker_ids)
            optimizer.zero_grad()
            loss = criterion(mel_out, mel_spec, mel_lengths)
            stop_loss = criterion_st(stop_tokens, stop_targets)
            loss = loss + criterion(linear_out, linear_spec,
                                    mel_lengths) + stop_loss
            loss.backward()
            optimizer.step()
        # check parameter changes
        count = 0
        for param, param_ref in zip(model.parameters(),
                                    model_ref.parameters()):
            # ignore pre-higway layer since it works conditional
            # if count not in [145, 59]:
            assert (param != param_ref).any(
            ), "param {} with shape {} not updated!! \n{}\n{}".format(
                count, param.shape, param, param_ref)
            count += 1


class TacotronGSTTrainTest(unittest.TestCase):
    @staticmethod
    def test_train_step():
        input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
        input_lengths[-1] = 128
        mel_spec = torch.rand(8, 120, c.audio['num_mels']).to(device)
        linear_spec = torch.rand(8, 120, c.audio['num_freq']).to(device)
        mel_lengths = torch.randint(20, 120, (8, )).long().to(device)
        stop_targets = torch.zeros(8, 120, 1).float().to(device)
        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)

        for idx in mel_lengths:
            stop_targets[:, int(idx.item()):, 0] = 1.0

        stop_targets = stop_targets.view(input_dummy.shape[0],
                                         stop_targets.size(1) // c.r, -1)
        stop_targets = (stop_targets.sum(2) >
                        0.0).unsqueeze(2).float().squeeze()

        criterion = L1LossMasked(seq_len_norm=False).to(device)
        criterion_st = nn.BCEWithLogitsLoss().to(device)
        model = Tacotron(
            num_chars=32,
            num_speakers=5,
            gst=True,
            postnet_output_dim=c.audio['num_freq'],
            decoder_output_dim=c.audio['num_mels'],
            r=c.r,
            memory_size=c.memory_size
        ).to(device)  #FIXME: missing num_speakers parameter to Tacotron ctor
        model.train()
        print(model)
        print(" > Num parameters for Tacotron GST model:%s" %
              (count_parameters(model)))
        model_ref = copy.deepcopy(model)
        count = 0
        for param, param_ref in zip(model.parameters(),
                                    model_ref.parameters()):
            assert (param - param_ref).sum() == 0, param
            count += 1
        optimizer = optim.Adam(model.parameters(), lr=c.lr)
        for _ in range(10):
            mel_out, linear_out, align, stop_tokens = model.forward(
                input_dummy, input_lengths, mel_spec, speaker_ids)
            optimizer.zero_grad()
            loss = criterion(mel_out, mel_spec, mel_lengths)
            stop_loss = criterion_st(stop_tokens, stop_targets)
            loss = loss + criterion(linear_out, linear_spec,
                                    mel_lengths) + stop_loss
            loss.backward()
            optimizer.step()
        # check parameter changes
        count = 0
        for param, param_ref in zip(model.parameters(),
                                    model_ref.parameters()):
            # ignore pre-higway layer since it works conditional
            assert (param != param_ref).any(
            ), "param {} with shape {} not updated!! \n{}\n{}".format(
                count, param.shape, param, param_ref)
            count += 1
Loss bug fix - target_flat vs target 2018-05-10 22:59:05 +00:00			`import os`
			`import copy`
			`import torch`
			`import unittest`

			`from torch import optim`
			`from torch import nn`
add tf tacotron2 test and edit test utils imports after utils refactoring 2020-05-18 09:34:13 +00:00			`from TTS.utils.io import load_config`
Fix installation by using an explicit symlink 2019-08-29 09:49:53 +00:00			`from TTS.layers.losses import L1LossMasked`
			`from TTS.models.tacotron import Tacotron`
Loss bug fix - target_flat vs target 2018-05-10 22:59:05 +00:00
Address even more lint problems 2019-07-19 09:48:12 +00:00			`#pylint: disable=unused-variable`

Loss bug fix - target_flat vs target 2018-05-10 22:59:05 +00:00			`torch.manual_seed(1)`
			`use_cuda = torch.cuda.is_available()`
			`device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")`

			`file_path = os.path.dirname(os.path.realpath(__file__))`
			`c = load_config(os.path.join(file_path, 'test_config.json'))`


Set tacotron model parameters to adap to common_layers.py - Prenet and Attention 2019-05-27 12:40:28 +00:00			`def count_parameters(model):`
			`r"""Count number of trainable parameters in a network"""`
			`return sum(p.numel() for p in model.parameters() if p.requires_grad)`


formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`class TacotronTrainTest(unittest.TestCase):`
			`@staticmethod`
			`def test_train_step():`
			`input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)`
			`input_lengths = torch.randint(100, 129, (8, )).long().to(device)`
			`input_lengths[-1] = 128`
			`mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)`
			`linear_spec = torch.rand(8, 30, c.audio['num_freq']).to(device)`
			`mel_lengths = torch.randint(20, 30, (8, )).long().to(device)`
			`stop_targets = torch.zeros(8, 30, 1).float().to(device)`
			`speaker_ids = torch.randint(0, 5, (8, )).long().to(device)`

			`for idx in mel_lengths:`
			`stop_targets[:, int(idx.item()):, 0] = 1.0`

			`stop_targets = stop_targets.view(input_dummy.shape[0],`
			`stop_targets.size(1) // c.r, -1)`
			`stop_targets = (stop_targets.sum(2) >`
			`0.0).unsqueeze(2).float().squeeze()`

test updates 2020-01-15 22:10:11 +00:00			`criterion = L1LossMasked(seq_len_norm=False).to(device)`
formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`criterion_st = nn.BCEWithLogitsLoss().to(device)`
			`model = Tacotron(`
			`num_chars=32,`
			`num_speakers=5,`
bug fixes, linter update and test updates 2019-10-29 13:28:49 +00:00			`postnet_output_dim=c.audio['num_freq'],`
			`decoder_output_dim=c.audio['num_mels'],`
formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`r=c.r,`
			`memory_size=c.memory_size`
			`).to(device) #FIXME: missing num_speakers parameter to Tacotron ctor`
			`model.train()`
			`print(" > Num parameters for Tacotron model:%s" %`
			`(count_parameters(model)))`
			`model_ref = copy.deepcopy(model)`
			`count = 0`
			`for param, param_ref in zip(model.parameters(),`
			`model_ref.parameters()):`
			`assert (param - param_ref).sum() == 0, param`
			`count += 1`
			`optimizer = optim.Adam(model.parameters(), lr=c.lr)`
			`for _ in range(5):`
			`mel_out, linear_out, align, stop_tokens = model.forward(`
			`input_dummy, input_lengths, mel_spec, speaker_ids)`
			`optimizer.zero_grad()`
			`loss = criterion(mel_out, mel_spec, mel_lengths)`
			`stop_loss = criterion_st(stop_tokens, stop_targets)`
			`loss = loss + criterion(linear_out, linear_spec,`
			`mel_lengths) + stop_loss`
			`loss.backward()`
			`optimizer.step()`
			`# check parameter changes`
			`count = 0`
			`for param, param_ref in zip(model.parameters(),`
			`model_ref.parameters()):`
			`# ignore pre-higway layer since it works conditional`
			`# if count not in [145, 59]:`
			`assert (param != param_ref).any(`
			`), "param {} with shape {} not updated!! \n{}\n{}".format(`
			`count, param.shape, param, param_ref)`
			`count += 1`
update TacotronGST and its test. Inherit it from Tacotron class 2019-09-12 21:06:59 +00:00

			`class TacotronGSTTrainTest(unittest.TestCase):`
formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`@staticmethod`
			`def test_train_step():`
			`input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)`
bug fix for tacotron and tests update 2019-03-06 12:43:29 +00:00			`input_lengths = torch.randint(100, 129, (8, )).long().to(device)`
test bug fix 2019-03-12 08:52:01 +00:00			`input_lengths[-1] = 128`
update TacotronGST and its test. Inherit it from Tacotron class 2019-09-12 21:06:59 +00:00			`mel_spec = torch.rand(8, 120, c.audio['num_mels']).to(device)`
			`linear_spec = torch.rand(8, 120, c.audio['num_freq']).to(device)`
			`mel_lengths = torch.randint(20, 120, (8, )).long().to(device)`
			`stop_targets = torch.zeros(8, 120, 1).float().to(device)`
fix unittests for the latest updates 2019-07-19 09:12:48 +00:00			`speaker_ids = torch.randint(0, 5, (8, )).long().to(device)`
pep8 format all 2018-08-02 14:34:17 +00:00
Loss bug fix - target_flat vs target 2018-05-10 22:59:05 +00:00			`for idx in mel_lengths:`
			`stop_targets[:, int(idx.item()):, 0] = 1.0`
pep8 format all 2018-08-02 14:34:17 +00:00
formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`stop_targets = stop_targets.view(input_dummy.shape[0],`
pep8 format all 2018-08-02 14:34:17 +00:00			`stop_targets.size(1) // c.r, -1)`
Set tacotron model parameters to adap to common_layers.py - Prenet and Attention 2019-05-27 12:40:28 +00:00			`stop_targets = (stop_targets.sum(2) >`
			`0.0).unsqueeze(2).float().squeeze()`
pep8 format all 2018-08-02 14:34:17 +00:00
test updates 2020-01-15 22:10:11 +00:00			`criterion = L1LossMasked(seq_len_norm=False).to(device)`
Update tests 2019-03-25 23:48:35 +00:00			`criterion_st = nn.BCEWithLogitsLoss().to(device)`
formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`model = Tacotron(`
fix argument order for testing models 2019-07-19 09:49:03 +00:00			`num_chars=32,`
formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`num_speakers=5,`
			`gst=True,`
bug fixes, linter update and test updates 2019-10-29 13:28:49 +00:00			`postnet_output_dim=c.audio['num_freq'],`
			`decoder_output_dim=c.audio['num_mels'],`
Set tacotron model parameters to adap to common_layers.py - Prenet and Attention 2019-05-27 12:40:28 +00:00			`r=c.r,`
formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`memory_size=c.memory_size`
			`).to(device) #FIXME: missing num_speakers parameter to Tacotron ctor`
Loss bug fix - target_flat vs target 2018-05-10 22:59:05 +00:00			`model.train()`
update TacotronGST and its test. Inherit it from Tacotron class 2019-09-12 21:06:59 +00:00			`print(model)`
formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`print(" > Num parameters for Tacotron GST model:%s" %`
			`(count_parameters(model)))`
Loss bug fix - target_flat vs target 2018-05-10 22:59:05 +00:00			`model_ref = copy.deepcopy(model)`
			`count = 0`
pep8 format all 2018-08-02 14:34:17 +00:00			`for param, param_ref in zip(model.parameters(),`
			`model_ref.parameters()):`
Loss bug fix - target_flat vs target 2018-05-10 22:59:05 +00:00			`assert (param - param_ref).sum() == 0, param`
			`count += 1`
			`optimizer = optim.Adam(model.parameters(), lr=c.lr)`
update TacotronGST and its test. Inherit it from Tacotron class 2019-09-12 21:06:59 +00:00			`for _ in range(10):`
pep8 format all 2018-08-02 14:34:17 +00:00			`mel_out, linear_out, align, stop_tokens = model.forward(`
formatting, merge GST model with Tacotron 2019-09-24 14:18:48 +00:00			`input_dummy, input_lengths, mel_spec, speaker_ids)`
Loss bug fix - target_flat vs target 2018-05-10 22:59:05 +00:00			`optimizer.zero_grad()`
pep8 format all 2018-08-02 14:34:17 +00:00			`loss = criterion(mel_out, mel_spec, mel_lengths)`
add stop token to tacotron testing 2018-04-30 13:01:02 +00:00			`stop_loss = criterion_st(stop_tokens, stop_targets)`
pep8 format all 2018-08-02 14:34:17 +00:00			`loss = loss + criterion(linear_out, linear_spec,`
			`mel_lengths) + stop_loss`
Loss bug fix - target_flat vs target 2018-05-10 22:59:05 +00:00			`loss.backward()`
			`optimizer.step()`
			`# check parameter changes`
			`count = 0`
pep8 format all 2018-08-02 14:34:17 +00:00			`for param, param_ref in zip(model.parameters(),`
			`model_ref.parameters()):`
			`# ignore pre-higway layer since it works conditional`
Formating and printing more about the model 2018-08-08 16:45:02 +00:00			`assert (param != param_ref).any(`
			`), "param {} with shape {} not updated!! \n{}\n{}".format(`
			`count, param.shape, param, param_ref)`
Fix Pylint issues 2019-07-19 06:46:23 +00:00			`count += 1`