TTS/utils/generic_utils.py

import os
import sys
import glob
import time
import shutil
import datetime
import json
import torch
import subprocess
import numpy as np
from collections import OrderedDict
from torch.autograd import Variable
from utils.text import text_to_sequence


class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self


def load_config(config_path):
    config = AttrDict()
    config.update(json.load(open(config_path, "r")))
    return config


def get_commit_hash():
    """https://stackoverflow.com/questions/14989858/get-the-current-git-hash-in-a-python-script"""
    try:
        subprocess.check_output(['git', 'diff-index', '--quiet',
                                 'HEAD'])  # Verify client is clean
    except:
        raise RuntimeError(
            " !! Commit before training to get the commit hash.")
    commit = subprocess.check_output(['git', 'rev-parse', '--short',
                                      'HEAD']).decode().strip()
    print(' > Git Hash: {}'.format(commit))
    return commit


def create_experiment_folder(root_path, model_name, debug):
    """ Create a folder with the current date and time """
    date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I+%M%p")
    if debug:
        commit_hash = 'debug'
    else:
        commit_hash = get_commit_hash()
    output_folder = os.path.join(
        root_path, date_str + '-' + model_name + '-' + commit_hash)
    os.makedirs(output_folder, exist_ok=True)
    print(" > Experiment folder: {}".format(output_folder))
    return output_folder


def remove_experiment_folder(experiment_path):
    """Check folder if there is a checkpoint, otherwise remove the folder"""

    checkpoint_files = glob.glob(experiment_path + "/*.pth.tar")
    if len(checkpoint_files) < 1:
        if os.path.exists(experiment_path):
            shutil.rmtree(experiment_path)
            print(" ! Run is removed from {}".format(experiment_path))
    else:
        print(" ! Run is kept in {}".format(experiment_path))


def copy_config_file(config_file, path):
    config_name = os.path.basename(config_file)
    out_path = os.path.join(path, config_name)
    shutil.copyfile(config_file, out_path)


def _trim_model_state_dict(state_dict):
    r"""Remove 'module.' prefix from state dictionary. It is necessary as it
    is loded for the next time by model.load_state(). Otherwise, it complains
    about the torch.DataParallel()"""

    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]  # remove `module.`
        new_state_dict[name] = v
    return new_state_dict


def save_checkpoint(model, optimizer, optimizer_st, model_loss, out_path,
                    current_step, epoch):
    checkpoint_path = 'checkpoint_{}.pth.tar'.format(current_step)
    checkpoint_path = os.path.join(out_path, checkpoint_path)
    print(" | | > Checkpoint saving : {}".format(checkpoint_path))

    new_state_dict = model.state_dict()
    state = {
        'model': new_state_dict,
        'optimizer': optimizer.state_dict(),
        'optimizer_st': optimizer_st.state_dict(),
        'step': current_step,
        'epoch': epoch,
        'linear_loss': model_loss,
        'date': datetime.date.today().strftime("%B %d, %Y")
    }
    torch.save(state, checkpoint_path)


def save_best_model(model, optimizer, model_loss, best_loss, out_path,
                    current_step, epoch):
    if model_loss < best_loss:
        new_state_dict = model.state_dict()
        state = {
            'model': new_state_dict,
            'optimizer': optimizer.state_dict(),
            'step': current_step,
            'epoch': epoch,
            'linear_loss': model_loss,
            'date': datetime.date.today().strftime("%B %d, %Y")
        }
        best_loss = model_loss
        bestmodel_path = 'best_model.pth.tar'
        bestmodel_path = os.path.join(out_path, bestmodel_path)
        print(" | > Best model saving with loss {0:.2f} : {1:}".format(
            model_loss, bestmodel_path))
        torch.save(state, bestmodel_path)
    return best_loss


def check_update(model, grad_clip, grad_top):
    r'''Check model gradient against unexpected jumps and failures'''
    skip_flag = False
    grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
    if np.isinf(grad_norm):
        print(" | > Gradient is INF !!")
        skip_flag = True
    elif grad_norm > grad_top:
        print(" | > Gradient is above the top limit !!")
        skip_flag = True
    return grad_norm, skip_flag


def lr_decay(init_lr, global_step, warmup_steps):
    r'''from https://github.com/r9y9/tacotron_pytorch/blob/master/train.py'''
    warmup_steps = float(warmup_steps)
    step = global_step + 1.
    lr = init_lr * warmup_steps**0.5 * np.minimum(step * warmup_steps**-1.5,
                                                  step**-0.5)
    return lr


def mk_decay(init_mk, max_epoch, n_epoch):
    return init_mk * ((max_epoch - n_epoch) / max_epoch)


def count_parameters(model):
    r"""Count number of trainable parameters in a network"""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


# from https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1
def sequence_mask(sequence_length, max_len=None):
    if max_len is None:
        max_len = sequence_length.data.max()
    batch_size = sequence_length.size(0)
    seq_range = torch.arange(0, max_len).long()
    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
    if sequence_length.is_cuda:
        seq_range_expand = seq_range_expand.cuda()
    seq_length_expand = (sequence_length.unsqueeze(1)
                         .expand_as(seq_range_expand))
    return seq_range_expand < seq_length_expand


def synthesis(model, ap, text, use_cuda, text_cleaner):
    text_cleaner = [text_cleaner]
    seq = np.array(text_to_sequence(text, text_cleaner))
    chars_var = torch.from_numpy(seq).unsqueeze(0)
    if use_cuda:
        chars_var = chars_var.cuda().long()
    _, linear_out, alignments, _ = model.forward(chars_var)
    linear_out = linear_out[0].data.cpu().numpy()
    wav = ap.inv_spectrogram(linear_out.T)
    return wav, linear_out, alignments
Beginning 2018-01-22 09:48:59 +00:00			`import os`
			`import sys`
			`import glob`
			`import time`
			`import shutil`
			`import datetime`
Change config to json 2018-01-22 16:20:20 +00:00			`import json`
Checkpoint fix 2018-01-26 10:07:07 +00:00			`import torch`
get git hash 2018-04-06 10:53:49 +00:00			`import subprocess`
Beginning 2018-01-22 09:48:59 +00:00			`import numpy as np`
Harmonized teacher-forcing 2018-02-26 13:33:54 +00:00			`from collections import OrderedDict`
bug fix 2018-04-25 12:38:23 +00:00			`from torch.autograd import Variable`
Perform testing on example sentences 2018-07-20 11:10:25 +00:00			`from utils.text import text_to_sequence`
Beginning 2018-01-22 09:48:59 +00:00

Change config to json 2018-01-22 16:20:20 +00:00			`class AttrDict(dict):`
			`def __init__(self, args, *kwargs):`
			`super(AttrDict, self).__init__(args, *kwargs)`
			`self.__dict__ = self`


			`def load_config(config_path):`
			`config = AttrDict()`
			`config.update(json.load(open(config_path, "r")))`
pep8 check 2018-04-03 10:24:57 +00:00			`return config`
Change config to json 2018-01-22 16:20:20 +00:00

get git hash 2018-04-06 10:53:49 +00:00			`def get_commit_hash():`
			`"""https://stackoverflow.com/questions/14989858/get-the-current-git-hash-in-a-python-script"""`
get git hash 2 2018-04-06 10:55:44 +00:00			`try:`
pep8 format all 2018-08-02 14:34:17 +00:00			`subprocess.check_output(['git', 'diff-index', '--quiet',`
			`'HEAD']) # Verify client is clean`
get git hash 2 2018-04-06 10:55:44 +00:00			`except:`
pep8 format all 2018-08-02 14:34:17 +00:00			`raise RuntimeError(`
			`" !! Commit before training to get the commit hash.")`
			`commit = subprocess.check_output(['git', 'rev-parse', '--short',`
			`'HEAD']).decode().strip()`
get git hash 2018-04-06 10:53:49 +00:00			`print(' > Git Hash: {}'.format(commit))`
			`return commit`


Allow debug runand don force git hash 2018-05-11 10:49:55 +00:00			`def create_experiment_folder(root_path, model_name, debug):`
Beginning 2018-01-22 09:48:59 +00:00			`""" Create a folder with the current date and time """`
Bug fixes 2018-07-13 13:24:50 +00:00			`date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I+%M%p")`
Allow debug runand don force git hash 2018-05-11 10:49:55 +00:00			`if debug:`
			`commit_hash = 'debug'`
Remove preemphasis from audio processing 2018-07-13 12:56:05 +00:00			`else:`
Allow debug runand don force git hash 2018-05-11 10:49:55 +00:00			`commit_hash = get_commit_hash()`
pep8 format all 2018-08-02 14:34:17 +00:00			`output_folder = os.path.join(`
			`root_path, date_str + '-' + model_name + '-' + commit_hash)`
Beginning 2018-01-22 09:48:59 +00:00			`os.makedirs(output_folder, exist_ok=True)`
			`print(" > Experiment folder: {}".format(output_folder))`
			`return output_folder`


			`def remove_experiment_folder(experiment_path):`
			`"""Check folder if there is a checkpoint, otherwise remove the folder"""`

pep8 format all 2018-08-02 14:34:17 +00:00			`checkpoint_files = glob.glob(experiment_path + "/*.pth.tar")`
Change config to json 3 2018-01-22 16:29:27 +00:00			`if len(checkpoint_files) < 1:`
fix handling CTRL C 2018-01-24 16:17:49 +00:00			`if os.path.exists(experiment_path):`
			`shutil.rmtree(experiment_path)`
			`print(" ! Run is removed from {}".format(experiment_path))`
Beginning 2018-01-22 09:48:59 +00:00			`else:`
			`print(" ! Run is kept in {}".format(experiment_path))`


			`def copy_config_file(config_file, path):`
			`config_name = os.path.basename(config_file)`
			`out_path = os.path.join(path, config_name)`
			`shutil.copyfile(config_file, out_path)`


Remove DataParallel from the model state before saving 2018-02-21 15:03:53 +00:00			`def _trim_model_state_dict(state_dict):`
			`r"""Remove 'module.' prefix from state dictionary. It is necessary as it`
			`is loded for the next time by model.load_state(). Otherwise, it complains`
			`about the torch.DataParallel()"""`

			`new_state_dict = OrderedDict()`
			`for k, v in state_dict.items():`
pep8 check 2018-04-03 10:24:57 +00:00			name = k[7:] # remove `module.`
Remove DataParallel from the model state before saving 2018-02-21 15:03:53 +00:00			`new_state_dict[name] = v`
			`return new_state_dict`


Checkpoint stop token optimizer 2018-07-20 10:23:44 +00:00			`def save_checkpoint(model, optimizer, optimizer_st, model_loss, out_path,`
updates and debugs 2018-02-13 09:45:52 +00:00			`current_step, epoch):`
small bug fixes 2018-02-09 13:39:58 +00:00			`checkpoint_path = 'checkpoint_{}.pth.tar'.format(current_step)`
			`checkpoint_path = os.path.join(out_path, checkpoint_path)`
logging change 2018-07-18 12:31:09 +00:00			`print(" \| \| > Checkpoint saving : {}".format(checkpoint_path))`
Remove DataParallel from the model state before saving 2018-02-21 15:03:53 +00:00
Dont tim model param names since we don't use DataPatallel initilization 2018-08-10 15:49:21 +00:00			`new_state_dict = model.state_dict()`
pep8 format all 2018-08-02 14:34:17 +00:00			`state = {`
			`'model': new_state_dict,`
			`'optimizer': optimizer.state_dict(),`
			`'optimizer_st': optimizer_st.state_dict(),`
			`'step': current_step,`
			`'epoch': epoch,`
			`'linear_loss': model_loss,`
			`'date': datetime.date.today().strftime("%B %d, %Y")`
			`}`
small bug fixes 2018-02-09 13:39:58 +00:00			`torch.save(state, checkpoint_path)`
updates and debugs 2018-02-13 09:45:52 +00:00

			`def save_best_model(model, optimizer, model_loss, best_loss, out_path,`
			`current_step, epoch):`
small bug fixes 2018-02-09 13:39:58 +00:00			`if model_loss < best_loss:`
Dont tim model param names since we don't use DataPatallel initilization 2018-08-10 15:49:21 +00:00			`new_state_dict = model.state_dict()`
pep8 format all 2018-08-02 14:34:17 +00:00			`state = {`
			`'model': new_state_dict,`
			`'optimizer': optimizer.state_dict(),`
			`'step': current_step,`
			`'epoch': epoch,`
			`'linear_loss': model_loss,`
			`'date': datetime.date.today().strftime("%B %d, %Y")`
			`}`
small bug fixes 2018-02-09 13:39:58 +00:00			`best_loss = model_loss`
updates and debugs 2018-02-13 09:45:52 +00:00			`bestmodel_path = 'best_model.pth.tar'`
small bug fixes 2018-02-09 13:39:58 +00:00			`bestmodel_path = os.path.join(out_path, bestmodel_path)`
bug fix on training avg loss printing and computing 2018-04-12 12:57:52 +00:00			`print(" \| > Best model saving with loss {0:.2f} : {1:}".format(`
pep8 check 2018-04-03 10:24:57 +00:00			`model_loss, bestmodel_path))`
small bug fixes 2018-02-09 13:39:58 +00:00			`torch.save(state, bestmodel_path)`
			`return best_loss`
Beginning 2018-01-22 09:48:59 +00:00

check gradients for big errorenous changes 2018-02-27 15:31:07 +00:00			`def check_update(model, grad_clip, grad_top):`
			`r'''Check model gradient against unexpected jumps and failures'''`
			`skip_flag = False`
grad norm with new pytorch function 2018-05-10 23:25:48 +00:00			`grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)`
check gradients for big errorenous changes 2018-02-27 15:31:07 +00:00			`if np.isinf(grad_norm):`
			`print(" \| > Gradient is INF !!")`
			`skip_flag = True`
			`elif grad_norm > grad_top:`
			`print(" \| > Gradient is above the top limit !!")`
			`skip_flag = True`
			`return grad_norm, skip_flag`


			`def lr_decay(init_lr, global_step, warmup_steps):`
			`r'''from https://github.com/r9y9/tacotron_pytorch/blob/master/train.py'''`
get git hash 2018-04-06 10:53:49 +00:00			`warmup_steps = float(warmup_steps)`
new lr schedule 2018-02-01 16:26:40 +00:00			`step = global_step + 1.`
			`lr = init_lr * warmup_steps*0.5 np.minimum(step * warmup_steps**-1.5,`
			`step**-0.5)`
			`return lr`

Count total number of model parameters 2018-02-23 14:20:22 +00:00
add mk annealing (mk attn loss contribution) 2018-04-25 12:36:00 +00:00			`def mk_decay(init_mk, max_epoch, n_epoch):`
			`return init_mk * ((max_epoch - n_epoch) / max_epoch)`


Count total number of model parameters 2018-02-23 14:20:22 +00:00			`def count_parameters(model):`
			`r"""Count number of trainable parameters in a network"""`
			`return sum(p.numel() for p in model.parameters() if p.requires_grad)`


Remove preemphasis from audio processing 2018-07-13 12:56:05 +00:00			`# from https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1`
			`def sequence_mask(sequence_length, max_len=None):`
			`if max_len is None:`
			`max_len = sequence_length.data.max()`
			`batch_size = sequence_length.size(0)`
			`seq_range = torch.arange(0, max_len).long()`
			`seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)`
			`if sequence_length.is_cuda:`
			`seq_range_expand = seq_range_expand.cuda()`
			`seq_length_expand = (sequence_length.unsqueeze(1)`
			`.expand_as(seq_range_expand))`
			`return seq_range_expand < seq_length_expand`


Perform testing on example sentences 2018-07-20 11:10:25 +00:00			`def synthesis(model, ap, text, use_cuda, text_cleaner):`
pep8 format all 2018-08-02 14:34:17 +00:00			`text_cleaner = [text_cleaner]`
			`seq = np.array(text_to_sequence(text, text_cleaner))`
			`chars_var = torch.from_numpy(seq).unsqueeze(0)`
			`if use_cuda:`
			`chars_var = chars_var.cuda().long()`
			`_, linear_out, alignments, _ = model.forward(chars_var)`
			`linear_out = linear_out[0].data.cpu().numpy()`
			`wav = ap.inv_spectrogram(linear_out.T)`
			`return wav, linear_out, alignments`