Remove SpeedySpeech from .models.json

pull/800/head
Eren Gölge 2021-09-10 17:47:27 +00:00
parent d97952611d
commit 26f76fce22
6 changed files with 17 additions and 43 deletions

View File

@ -47,15 +47,6 @@
"license": "MPL",
"contact": "egolge@coqui.com"
},
"speedy-speech-wn": {
"description": "Speedy Speech model with wavenet decoder.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.1.0/tts_models--en--ljspeech--speedy-speech-wn.zip",
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
"commit": "77b6145",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact": "egolge@coqui.com"
},
"vits": {
"description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/tts_models--en--ljspeech--vits.zip",

View File

@ -1,15 +1 @@
from TTS.tts.layers.losses import *
def setup_loss(config):
if config.model.lower() in ["tacotron", "tacotron2"]:
model = TacotronLoss(config)
elif config.model.lower() == "glow_tts":
model = GlowTTSLoss()
elif config.model.lower() == "speedy_speech":
model = SpeedySpeechLoss(config)
elif config.model.lower() == "align_tts":
model = AlignTTSLoss(config)
else:
raise ValueError(f" [!] loss for model {config.model.lower()} cannot be found.")
return model

View File

@ -1,5 +1,4 @@
import numpy as np
import torch
def _pad_data(x, length):

View File

@ -11,11 +11,11 @@ except ModuleNotFoundError:
class StandardScaler:
"""StandardScaler for mean-std normalization with the given mean and std values."""
"""StandardScaler for mean-scale normalization with the given mean and scale values."""
def __init__(self, mean: np.ndarray = None, std: np.ndarray = None) -> None:
def __init__(self, mean: np.ndarray = None, scale: np.ndarray = None) -> None:
self.mean_ = mean
self.std_ = std
self.scale_ = scale
def set_stats(self, mean, scale):
self.mean_ = mean

View File

@ -1,5 +1,3 @@
import unittest
import torch as T
from TTS.tts.models.forward_tts import ForwardTTS, ForwardTTSArgs
@ -54,12 +52,12 @@ def model_input_output_test():
assert (outputs["x_mask"] - x_mask).sum() == 0.0
assert (outputs["y_mask"] - y_mask).sum() == 0.0
assert outputs["alignment_soft"] == None
assert outputs["alignment_mas"] == None
assert outputs["alignment_logprob"] == None
assert outputs["o_alignment_dur"] == None
assert outputs["pitch_avg"] == None
assert outputs["pitch_avg_gt"] == None
assert outputs["alignment_soft"] is None
assert outputs["alignment_mas"] is None
assert outputs["alignment_logprob"] is None
assert outputs["o_alignment_dur"] is None
assert outputs["pitch_avg"] is None
assert outputs["pitch_avg_gt"] is None
# USE PITCH
model = ForwardTTS(ForwardTTSArgs(num_chars=10, use_pitch=True, use_aligner=False))
@ -85,10 +83,10 @@ def model_input_output_test():
assert outputs["pitch_avg"].shape == (2, 1, 21)
assert outputs["pitch_avg_gt"].shape == (2, 1, 21)
assert outputs["alignment_soft"] == None
assert outputs["alignment_mas"] == None
assert outputs["alignment_logprob"] == None
assert outputs["o_alignment_dur"] == None
assert outputs["alignment_soft"] is None
assert outputs["alignment_mas"] is None
assert outputs["alignment_logprob"] is None
assert outputs["o_alignment_dur"] is None
# USE ALIGNER NETWORK
model = ForwardTTS(ForwardTTSArgs(num_chars=10, use_pitch=False, use_aligner=True))
@ -116,8 +114,8 @@ def model_input_output_test():
assert outputs["alignment_logprob"].shape == (2, 1, durations.sum(1).max(), 21)
assert outputs["o_alignment_dur"].shape == (2, 21)
assert outputs["pitch_avg"] == None
assert outputs["pitch_avg_gt"] == None
assert outputs["pitch_avg"] is None
assert outputs["pitch_avg_gt"] is None
# USE ALIGNER NETWORK AND PITCH
model = ForwardTTS(ForwardTTSArgs(num_chars=10, use_pitch=True, use_aligner=True))

View File

@ -1,6 +1,6 @@
import torch as T
from TTS.tts.utils.helpers import *
from TTS.tts.utils.helpers import average_over_durations, generate_path, segment, sequence_mask
def average_over_durations_test(): # pylint: disable=no-self-use
@ -47,7 +47,7 @@ def generate_path_test():
durations = durations * x_mask.squeeze(1)
y_length = durations.sum(1)
y_mask = sequence_mask(y_length).unsqueeze(1).long()
attn_mask = (torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)).squeeze(1).long()
attn_mask = (T.unsqueeze(x_mask, -1) * T.unsqueeze(y_mask, 2)).squeeze(1).long()
print(attn_mask.shape)
path = generate_path(durations, attn_mask)
assert path.shape == (10, 21, durations.sum(1).max().item())