a ton of linter updates

pull/367/head
Eren Gölge 2021-03-08 05:06:54 +01:00
parent 4422642ec0
commit 9a48ba3821
45 changed files with 244 additions and 241 deletions

View File

@ -170,7 +170,7 @@ def main():
args.vocoder_name = model_item['default_vocoder'] if args.vocoder_name is None else args.vocoder_name
if args.vocoder_name is not None:
vocoder_path, vocoder_config_path, vocoder_item = manager.download_model(args.vocoder_name)
vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
# CASE3: load custome models
if args.model_path is not None:

View File

@ -573,7 +573,7 @@ def main(args): # pylint: disable=redefined-outer-name
if c.run_eval:
target_loss = eval_avg_loss_dict['avg_loss']
best_loss = save_best_model(target_loss, best_loss, model, optimizer,
global_step, epoch, c.r, OUT_PATH,
global_step, epoch, c.r, OUT_PATH, model_characters,
keep_all_best=keep_all_best, keep_after=keep_after)

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import glob
import os
import sys
import time
@ -535,7 +533,7 @@ def main(args): # pylint: disable=redefined-outer-name
if c.run_eval:
target_loss = eval_avg_loss_dict['avg_loss']
best_loss = save_best_model(target_loss, best_loss, model, optimizer,
global_step, epoch, c.r, OUT_PATH,
global_step, epoch, c.r, OUT_PATH, model_characters,
keep_all_best=keep_all_best, keep_after=keep_after)

View File

@ -648,12 +648,14 @@ def main(args): # pylint: disable=redefined-outer-name
epoch,
c.r,
OUT_PATH,
model_characters,
keep_all_best=keep_all_best,
keep_after=keep_after,
scaler=scaler.state_dict() if c.mixed_precision else None
)
if __name__ == '__main__':
args = parse_arguments(sys.argv)
c, OUT_PATH, AUDIO_PATH, c_logger, tb_logger = process_args(

View File

@ -50,7 +50,7 @@ def setup_loader(ap, is_val=False, verbose=False):
sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None
loader = DataLoader(dataset,
batch_size=1 if is_val else c.batch_size,
shuffle=False if num_gpus > 1 else True,
shuffle=num_gpus == 0,
drop_last=False,
sampler=sampler,
num_workers=c.num_val_loader_workers

View File

@ -1,7 +1,7 @@
import collections
import os
import random
from multiprocessing import Manager, Pool
from multiprocessing import Pool
import numpy as np
import torch

View File

@ -3,7 +3,7 @@ from glob import glob
import re
import sys
from pathlib import Path
from typing import List, Tuple
from typing import List
from tqdm import tqdm

View File

@ -366,8 +366,10 @@ class RelativePositionTransformer(nn.Module):
self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
self.ffn_layers.append(
FeedForwardNetwork(hidden_channels,
hidden_channels if (idx + 1) != self.num_layers else out_channels,
FeedForwardNetwork(
hidden_channels,
hidden_channels if
(idx + 1) != self.num_layers else out_channels,
hidden_channels_ffn,
kernel_size,
dropout_p=dropout_p))

View File

@ -75,7 +75,7 @@ class ReferenceEncoder(nn.Module):
# x: 3D tensor [batch_size, post_conv_width,
# num_channels*post_conv_height]
self.recurrence.flatten_parameters()
memory, out = self.recurrence(x)
_, out = self.recurrence(x)
# out: 3D tensor [seq_len==1, batch_size, encoding_size=128]
return out.squeeze(0)

View File

@ -2,13 +2,12 @@ import math
import numpy as np
import torch
from torch import nn
from inspect import signature
from torch.nn import functional
from TTS.tts.utils.generic_utils import sequence_mask
from TTS.tts.utils.ssim import ssim
# pylint: disable=abstract-method Method
# pylint: disable=abstract-method
# relates https://github.com/pytorch/pytorch/issues/42305
class L1LossMasked(nn.Module):
def __init__(self, seq_len_norm):

View File

@ -78,8 +78,7 @@ class RelativePositionTransformerEncoder(nn.Module):
kernel_size=5,
num_res_blocks=3,
num_conv_blocks=1,
dilations=[1, 1, 1]
)
dilations=[1, 1, 1])
self.rel_pos_transformer = RelativePositionTransformer(
hidden_channels, out_channels, hidden_channels, **params)
@ -104,8 +103,7 @@ class ResidualConv1dBNEncoder(nn.Module):
"""
def __init__(self, in_channels, out_channels, hidden_channels, params):
super().__init__()
self.prenet = nn.Sequential(
nn.Conv1d(in_channels, hidden_channels, 1),
self.prenet = nn.Sequential(nn.Conv1d(in_channels, hidden_channels, 1),
nn.ReLU())
self.res_conv_block = ResidualConv1dBNBlock(hidden_channels,
hidden_channels,
@ -183,9 +181,8 @@ class Encoder(nn.Module):
# init encoder
if encoder_type.lower() == "transformer":
# text encoder
self.encoder = RelativePositionTransformerEncoder(in_hidden_channels,
out_channels,
in_hidden_channels,
self.encoder = RelativePositionTransformerEncoder(
in_hidden_channels, out_channels, in_hidden_channels,
encoder_params) # pylint: disable=unexpected-keyword-arg
elif encoder_type.lower() == 'residual_conv_bn':
self.encoder = ResidualConv1dBNEncoder(in_hidden_channels,

View File

@ -32,7 +32,7 @@ def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False) -> str:
nd = str(num)
if abs(float(nd)) >= 1e48:
raise ValueError('number out of range')
elif 'e' in nd:
if 'e' in nd:
raise ValueError('scientific notation is not supported')
c_symbol = '正负点' if simp else '正負點'
if o: # formal
@ -69,7 +69,7 @@ def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False) -> str:
if int(unit) == 0: # 0000
intresult.append(c_basic[0])
continue
elif nu > 0 and int(unit) == 2: # 0002
if nu > 0 and int(unit) == 2: # 0002
intresult.append(c_twoalt + c_unit2[nu - 1])
continue
ulist = []

View File

@ -135,7 +135,7 @@ def setup_model(num_chars, num_speakers, c, speaker_embedding_dim=None):
return model
def is_tacotron(c):
return False if c['model'] in ['speedy_speech', 'glow_tts'] else True
return not c['model'] in ['speedy_speech', 'glow_tts']
def check_config_tts(c):
check_argument('model', c, enum_list=['tacotron', 'tacotron2', 'glow_tts', 'speedy_speech'], restricted=True, val_type=str)

View File

@ -7,7 +7,7 @@ from TTS.utils.io import RenamingUnpickler
def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False, eval=False):
def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False, eval=False): # pylint: disable=redefined-builtin
"""Load ```TTS.tts.models``` checkpoints.
Args:

View File

@ -63,8 +63,8 @@ def parse_speakers(c, args, meta_data_train, OUT_PATH):
speaker_embedding_dim = None
save_speaker_mapping(OUT_PATH, speaker_mapping)
num_speakers = len(speaker_mapping)
print(" > Training with {} speakers: {}".format(len(speakers),
", ".join(speakers)))
print(" > Training with {} speakers: {}".format(
len(speakers), ", ".join(speakers)))
else:
num_speakers = 0
speaker_embedding_dim = None

View File

@ -17,6 +17,7 @@ def create_window(window_size, channel):
window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
return window
def _ssim(img1, img2, window, window_size, channel, size_average=True):
mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
@ -25,9 +26,13 @@ def _ssim(img1, img2, window, window_size, channel, size_average = True):
mu2_sq = mu2.pow(2)
mu1_mu2 = mu1*mu2
sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq
sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq
sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2
sigma1_sq = F.conv2d(
img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
sigma2_sq = F.conv2d(
img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
sigma12 = F.conv2d(
img1 * img2, window, padding=window_size // 2,
groups=channel) - mu1_mu2
C1 = 0.01**2
C2 = 0.03**2
@ -64,6 +69,7 @@ class SSIM(torch.nn.Module):
return _ssim(img1, img2, window, self.window_size, channel, self.size_average)
def ssim(img1, img2, window_size=11, size_average=True):
(_, channel, _, _) = img1.size()
window = create_window(window_size, channel)

View File

@ -20,9 +20,13 @@ def text_to_seqvec(text, CONFIG):
add_blank=CONFIG['add_blank'] if 'add_blank' in CONFIG.keys() else False),
dtype=np.int32)
else:
seq = np.asarray(
text_to_sequence(text, text_cleaner, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None,
add_blank=CONFIG['add_blank'] if 'add_blank' in CONFIG.keys() else False), dtype=np.int32)
seq = np.asarray(text_to_sequence(
text,
text_cleaner,
tp=CONFIG.characters if 'characters' in CONFIG.keys() else None,
add_blank=CONFIG['add_blank']
if 'add_blank' in CONFIG.keys() else False),
dtype=np.int32)
return seq

View File

@ -144,8 +144,3 @@ class ModelManager(object):
if isinstance(key, str) and len(my_dict[key]) > 0:
return True
return False

View File

@ -4,7 +4,7 @@ from torch import nn
from torch.nn import functional as F
class TorchSTFT(nn.Module):
class TorchSTFT(nn.Module): # pylint: disable=abstract-method
def __init__(self, n_fft, hop_length, win_length, window='hann_window'):
""" Torch based STFT operation """
super(TorchSTFT, self).__init__()

View File

@ -23,7 +23,9 @@ class PositionalEncoding(nn.Module):
def forward(self, x, noise_level):
if x.shape[2] > self.pe.shape[1]:
self.init_pe_matrix(x.shape[1], x.shape[2], x)
return x + noise_level[..., None, None] + self.pe[:, :x.size(2)].repeat(x.shape[0], 1, 1) / self.C
return x + noise_level[..., None,
None] + self.pe[:, :x.size(2)].repeat(
x.shape[0], 1, 1) / self.C
def init_pe_matrix(self, n_channels, max_len, x):
pe = torch.zeros(max_len, n_channels)
@ -172,4 +174,3 @@ class DBlock(nn.Module):
for idx, layer in enumerate(self.main_block):
if len(layer.state_dict()) != 0:
self.main_block[idx] = weight_norm(layer)

View File

@ -79,7 +79,7 @@ class Wavegrad(nn.Module):
return x
def load_noise_schedule(self, path):
beta = np.load(path, allow_pickle=True).item()['beta']
beta = np.load(path, allow_pickle=True).item()['beta'] # pylint: disable=unexpected-keyword-arg
self.compute_noise_level(beta)
@torch.no_grad()
@ -91,8 +91,8 @@ class Wavegrad(nn.Module):
y_n = torch.FloatTensor(y_n).unsqueeze(0).unsqueeze(0).to(x)
sqrt_alpha_hat = self.noise_level.to(x)
for n in range(len(self.alpha) - 1, -1, -1):
y_n = self.c1[n] * (y_n -
self.c2[n] * self.forward(y_n, x, sqrt_alpha_hat[n].repeat(x.shape[0])))
y_n = self.c1[n] * (y_n - self.c2[n] * self.forward(
y_n, x, sqrt_alpha_hat[n].repeat(x.shape[0])))
if n > 0:
z = torch.randn_like(y_n)
y_n += self.sigma[n - 1] * z

View File

@ -118,9 +118,8 @@ class UpsampleNetwork(nn.Module):
class Upsample(nn.Module):
def __init__(
self, scale, pad, num_res_blocks, feat_dims, compute_dims, res_out_dims, use_aux_net
):
def __init__(self, scale, pad, num_res_blocks, feat_dims, compute_dims,
res_out_dims, use_aux_net):
super().__init__()
self.scale = scale
self.pad = pad

View File

@ -44,9 +44,11 @@ def log_sum_exp(x):
# It is adapted from https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/mixture.py
def discretized_mix_logistic_loss(
y_hat, y, num_classes=65536, log_scale_min=None, reduce=True
):
def discretized_mix_logistic_loss(y_hat,
y,
num_classes=65536,
log_scale_min=None,
reduce=True):
if log_scale_min is None:
log_scale_min = float(np.log(1e-14))
y_hat = y_hat.permute(0, 2, 1)

View File

@ -7,7 +7,7 @@ import pickle as pickle_tts
from TTS.utils.io import RenamingUnpickler
def load_checkpoint(model, checkpoint_path, use_cuda=False, eval=False):
def load_checkpoint(model, checkpoint_path, use_cuda=False, eval=False): # pylint: disable=redefined-builtin
try:
state = torch.load(checkpoint_path, map_location=torch.device('cpu'))
except ModuleNotFoundError:

View File

@ -217,4 +217,3 @@ class SSIMLossTests(unittest.TestCase):
(sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
output = layer(dummy_input + mask, dummy_target, dummy_length)
assert output.item() == 0, "0 vs {}".format(output.item())

View File

@ -356,4 +356,3 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
), "param {} with shape {} not updated!! \n{}\n{}".format(
count, param.shape, param, param_ref)
count += 1

View File

@ -17,5 +17,5 @@ def test_currency() -> None:
def test_expand_numbers() -> None:
assert "minus one" == phoneme_cleaners("-1")
assert "one" == phoneme_cleaners("1")
assert phoneme_cleaners("-1") == 'minus one'
assert phoneme_cleaners("1") == 'one'

View File

@ -17,7 +17,7 @@ def test_phoneme_to_sequence():
lang = "en-us"
sequence = phoneme_to_sequence(text, text_cleaner, lang)
text_hat = sequence_to_phoneme(sequence)
sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters)
gt = 'ɹiːsənt ɹᵻsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪŋkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹᵻspɑːnsᵻbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjʊleɪʃən ænd lɜːnɪŋ!'
assert text_hat == text_hat_with_params == gt