mirror of https://github.com/coqui-ai/TTS.git
style update
parent
5aee30443f
commit
19fb1d743d
TTS
config
speaker_encoder
tts
configs
utils
tests
|
@ -16,16 +16,11 @@ from TTS.speaker_encoder.model import SpeakerEncoder
|
|||
from TTS.speaker_encoder.utils.io import save_best_model, save_checkpoint
|
||||
from TTS.speaker_encoder.utils.visual import plot_embeddings
|
||||
from TTS.tts.datasets.preprocess import load_meta_data
|
||||
from TTS.utils.arguments import init_training
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.generic_utils import (
|
||||
count_parameters,
|
||||
remove_experiment_folder,
|
||||
set_init_dict,
|
||||
)
|
||||
from TTS.utils.generic_utils import count_parameters, remove_experiment_folder, set_init_dict
|
||||
from TTS.utils.radam import RAdam
|
||||
from TTS.utils.training import NoamLR, check_update
|
||||
from TTS.utils.arguments import init_training
|
||||
|
||||
|
||||
torch.backends.cudnn.enabled = True
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
import yaml
|
||||
|
||||
from TTS.config.shared_configs import *
|
||||
|
@ -34,7 +35,7 @@ def _search_configs(model_name):
|
|||
|
||||
def _process_model_name(config_dict):
|
||||
model_name = config_dict["model"] if "model" in config_dict else config_dict["generator_model"]
|
||||
model_name = model_name.replace('_generator', '').replace('_discriminator', '')
|
||||
model_name = model_name.replace("_generator", "").replace("_discriminator", "")
|
||||
return model_name
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
from coqpit import MISSING
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import List
|
||||
from TTS.config.shared_configs import BaseTrainingConfig, BaseAudioConfig, BaseDatasetConfig
|
||||
|
||||
from coqpit import MISSING
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig, BaseTrainingConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -13,23 +15,27 @@ class SpeakerEncoderConfig(BaseTrainingConfig):
|
|||
datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()])
|
||||
|
||||
# model params
|
||||
model_params: dict = field(default_factory=lambda: {
|
||||
"input_dim": 40,
|
||||
"proj_dim": 256,
|
||||
"lstm_dim": 768,
|
||||
"num_lstm_layers": 3,
|
||||
"use_lstm_with_projection": True
|
||||
})
|
||||
model_params: dict = field(
|
||||
default_factory=lambda: {
|
||||
"input_dim": 40,
|
||||
"proj_dim": 256,
|
||||
"lstm_dim": 768,
|
||||
"num_lstm_layers": 3,
|
||||
"use_lstm_with_projection": True,
|
||||
}
|
||||
)
|
||||
|
||||
storage: dict = field(default_factory=lambda:{
|
||||
"sample_from_storage_p": 0.66, # the probability with which we'll sample from the DataSet in-memory storage
|
||||
"storage_size": 15, # the size of the in-memory storage with respect to a single batch
|
||||
"additive_noise": 1e-5 # add very small gaussian noise to the data in order to increase robustness
|
||||
})
|
||||
storage: dict = field(
|
||||
default_factory=lambda: {
|
||||
"sample_from_storage_p": 0.66, # the probability with which we'll sample from the DataSet in-memory storage
|
||||
"storage_size": 15, # the size of the in-memory storage with respect to a single batch
|
||||
"additive_noise": 1e-5, # add very small gaussian noise to the data in order to increase robustness
|
||||
}
|
||||
)
|
||||
|
||||
# training params
|
||||
max_train_step: int = 1000 # end training when number of training steps reaches this value.
|
||||
loss: str = 'angleproto'
|
||||
loss: str = "angleproto"
|
||||
grad_clip: float = 3.0
|
||||
lr: float = 0.0001
|
||||
lr_decay: bool = False
|
||||
|
@ -51,4 +57,6 @@ class SpeakerEncoderConfig(BaseTrainingConfig):
|
|||
def check_values(self):
|
||||
super().check_values()
|
||||
c = asdict(self)
|
||||
assert c['model_params']['input_dim'] == self.audio.num_mels, " [!] model input dimendion must be equal to melspectrogram dimension."
|
||||
assert (
|
||||
c["model_params"]["input_dim"] == self.audio.num_mels
|
||||
), " [!] model input dimendion must be equal to melspectrogram dimension."
|
||||
|
|
|
@ -9,6 +9,10 @@ def to_camel(text):
|
|||
|
||||
|
||||
def setup_model(c):
|
||||
model = SpeakerEncoder(c.model_params["input_dim"], c.model_params["proj_dim"],
|
||||
c.model_params["lstm_dim"], c.model_params["num_lstm_layers"])
|
||||
return model
|
||||
model = SpeakerEncoder(
|
||||
c.model_params["input_dim"],
|
||||
c.model_params["proj_dim"],
|
||||
c.model_params["lstm_dim"],
|
||||
c.model_params["num_lstm_layers"],
|
||||
)
|
||||
return model
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import os
|
||||
import datetime
|
||||
import os
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
|
@ -19,8 +20,7 @@ def save_checkpoint(model, optimizer, model_loss, out_path, current_step):
|
|||
torch.save(state, checkpoint_path)
|
||||
|
||||
|
||||
def save_best_model(model, optimizer, model_loss, best_loss, out_path,
|
||||
current_step):
|
||||
def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_step):
|
||||
if model_loss < best_loss:
|
||||
new_state_dict = model.state_dict()
|
||||
state = {
|
||||
|
@ -33,7 +33,6 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path,
|
|||
best_loss = model_loss
|
||||
bestmodel_path = "best_model.pth.tar"
|
||||
bestmodel_path = os.path.join(out_path, bestmodel_path)
|
||||
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(
|
||||
model_loss, bestmodel_path))
|
||||
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
|
||||
torch.save(state, bestmodel_path)
|
||||
return best_loss
|
||||
|
|
|
@ -41,7 +41,7 @@ class CharactersConfig(Coqpit):
|
|||
characters: str = None
|
||||
punctuations: str = None
|
||||
phonemes: str = None
|
||||
unique: bool = True # for backwards compatibility of models trained with char sets with duplicates
|
||||
unique: bool = True # for backwards compatibility of models trained with char sets with duplicates
|
||||
|
||||
def check_values(
|
||||
self,
|
||||
|
|
|
@ -6,9 +6,9 @@ from typing import Union
|
|||
import numpy as np
|
||||
import torch
|
||||
|
||||
from TTS.config import load_config
|
||||
from TTS.speaker_encoder.utils.generic_utils import setup_model
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.config import load_config
|
||||
|
||||
|
||||
def make_speakers_json_path(out_path):
|
||||
|
|
|
@ -7,10 +7,11 @@ import re
|
|||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import torch
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
def get_cuda():
|
||||
use_cuda = torch.cuda.is_available()
|
||||
|
|
|
@ -8,8 +8,8 @@ from shutil import copyfile
|
|||
import gdown
|
||||
import requests
|
||||
|
||||
from TTS.utils.generic_utils import get_user_data_dir
|
||||
from TTS.config import load_config
|
||||
from TTS.utils.generic_utils import get_user_data_dir
|
||||
|
||||
|
||||
class ModelManager(object):
|
||||
|
|
|
@ -5,6 +5,7 @@ import numpy as np
|
|||
import pysbd
|
||||
import torch
|
||||
|
||||
from TTS.config import load_config
|
||||
from TTS.tts.utils.generic_utils import setup_model
|
||||
from TTS.tts.utils.speakers import SpeakerManager
|
||||
|
||||
|
@ -13,7 +14,6 @@ from TTS.tts.utils.speakers import SpeakerManager
|
|||
from TTS.tts.utils.synthesis import synthesis, trim_silence
|
||||
from TTS.tts.utils.text import make_symbols, phonemes, symbols
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.config import load_config
|
||||
from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input, setup_generator
|
||||
|
||||
|
||||
|
@ -117,7 +117,7 @@ class Synthesizer(object):
|
|||
self.use_phonemes = self.tts_config.use_phonemes
|
||||
self.ap = AudioProcessor(verbose=False, **self.tts_config.audio)
|
||||
|
||||
if self.tts_config.has('characters') and self.tts_config.characters:
|
||||
if self.tts_config.has("characters") and self.tts_config.characters:
|
||||
symbols, phonemes = make_symbols(**self.tts_config.characters)
|
||||
|
||||
if self.use_phonemes:
|
||||
|
|
|
@ -2,8 +2,8 @@ import os
|
|||
import unittest
|
||||
|
||||
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.config import BaseAudioConfig
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
|
||||
TESTS_PATH = get_tests_path()
|
||||
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
|
||||
|
|
|
@ -6,10 +6,10 @@ import torch
|
|||
from torch import optim
|
||||
|
||||
from tests import get_tests_input_path
|
||||
from TTS.tts.configs import GlowTTSConfig
|
||||
from TTS.tts.layers.losses import GlowTTSLoss
|
||||
from TTS.tts.models.glow_tts import GlowTTS
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.tts.configs import GlowTTSConfig
|
||||
|
||||
# pylint: disable=unused-variable
|
||||
|
||||
|
|
|
@ -7,10 +7,10 @@ import torch
|
|||
from torch.utils.data import DataLoader
|
||||
|
||||
from tests import get_tests_input_path, get_tests_output_path
|
||||
from TTS.tts.configs import BaseTTSConfig
|
||||
from TTS.tts.datasets import TTSDataset
|
||||
from TTS.tts.datasets.preprocess import ljspeech
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.tts.configs import BaseTTSConfig
|
||||
|
||||
# pylint: disable=unused-variable
|
||||
|
||||
|
@ -18,7 +18,7 @@ OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
|
|||
os.makedirs(OUTPATH, exist_ok=True)
|
||||
|
||||
# create a dummy config for testing data loaders.
|
||||
c = BaseTTSConfig(text_cleaner='english_cleaners', num_loader_workers=0, batch_size=2)
|
||||
c = BaseTTSConfig(text_cleaner="english_cleaners", num_loader_workers=0, batch_size=2)
|
||||
c.r = 5
|
||||
c.data_path = "tests/data/ljspeech/"
|
||||
ok_ljspeech = os.path.exists(c.data_path)
|
||||
|
|
|
@ -2,10 +2,9 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||
output_path = os.path.join(get_tests_output_path(), "train_outputs")
|
||||
|
@ -20,7 +19,7 @@ config = SpeakerEncoderConfig(
|
|||
print_step=1,
|
||||
save_step=1,
|
||||
print_eval=True,
|
||||
audio=BaseAudioConfig(num_mels=40)
|
||||
audio=BaseAudioConfig(num_mels=40),
|
||||
)
|
||||
config.audio.do_trim_silence = True
|
||||
config.audio.trim_db = 60
|
||||
|
@ -42,6 +41,8 @@ run_cli(command_train)
|
|||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -5,10 +5,10 @@ import numpy as np
|
|||
import torch
|
||||
|
||||
from tests import get_tests_input_path
|
||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
||||
from TTS.speaker_encoder.utils.io import save_checkpoint
|
||||
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
|
||||
from TTS.config import load_config
|
||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
||||
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
|
||||
from TTS.speaker_encoder.utils.io import save_checkpoint
|
||||
from TTS.tts.utils.speakers import SpeakerManager
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import os
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
|
||||
|
||||
def test_synthesize():
|
||||
|
|
|
@ -2,10 +2,10 @@ import os
|
|||
import unittest
|
||||
|
||||
from tests import get_tests_input_path, get_tests_output_path
|
||||
from TTS.config import load_config
|
||||
from TTS.tts.utils.generic_utils import setup_model
|
||||
from TTS.tts.utils.io import save_checkpoint
|
||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||
from TTS.config import load_config
|
||||
from TTS.utils.synthesizer import Synthesizer
|
||||
|
||||
|
||||
|
@ -15,7 +15,7 @@ class SynthesizerTest(unittest.TestCase):
|
|||
# pylint: disable=global-statement
|
||||
global symbols, phonemes
|
||||
config = load_config(os.path.join(get_tests_output_path(), "dummy_model_config.json"))
|
||||
if config.has('characters') and config.characters:
|
||||
if config.has("characters") and config.characters:
|
||||
symbols, phonemes = make_symbols(**config.characters.to_dict())
|
||||
|
||||
num_chars = len(phonemes) if config.use_phonemes else len(symbols)
|
||||
|
@ -26,8 +26,8 @@ class SynthesizerTest(unittest.TestCase):
|
|||
def test_in_out(self):
|
||||
self._create_random_model()
|
||||
tts_root_path = get_tests_output_path()
|
||||
tts_checkpoint = os.path.join(tts_root_path, 'checkpoint_10.pth.tar')
|
||||
tts_config = os.path.join(tts_root_path, 'dummy_model_config.json')
|
||||
tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth.tar")
|
||||
tts_config = os.path.join(tts_root_path, "dummy_model_config.json")
|
||||
synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None)
|
||||
synthesizer.tts("Better this test works!!")
|
||||
|
||||
|
|
|
@ -6,10 +6,10 @@ import torch
|
|||
from torch import nn, optim
|
||||
|
||||
from tests import get_tests_input_path
|
||||
from TTS.tts.configs import Tacotron2Config
|
||||
from TTS.tts.layers.losses import MSELossMasked
|
||||
from TTS.tts.models.tacotron2 import Tacotron2
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.tts.configs import Tacotron2Config
|
||||
|
||||
# pylint: disable=unused-variable
|
||||
|
||||
|
@ -148,13 +148,7 @@ class TacotronGSTTrainTest(unittest.TestCase):
|
|||
|
||||
criterion = MSELossMasked(seq_len_norm=False).to(device)
|
||||
criterion_st = nn.BCEWithLogitsLoss().to(device)
|
||||
model = Tacotron2(
|
||||
num_chars=24,
|
||||
r=c.r,
|
||||
num_speakers=5,
|
||||
use_gst=True,
|
||||
gst=c.gst
|
||||
).to(device)
|
||||
model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, use_gst=True, gst=c.gst).to(device)
|
||||
model.train()
|
||||
model_ref = copy.deepcopy(model)
|
||||
count = 0
|
||||
|
@ -210,13 +204,7 @@ class TacotronGSTTrainTest(unittest.TestCase):
|
|||
|
||||
criterion = MSELossMasked(seq_len_norm=False).to(device)
|
||||
criterion_st = nn.BCEWithLogitsLoss().to(device)
|
||||
model = Tacotron2(
|
||||
num_chars=24,
|
||||
r=c.r,
|
||||
num_speakers=5,
|
||||
use_gst=True,
|
||||
gst =c.gst
|
||||
).to(device)
|
||||
model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, use_gst=True, gst=c.gst).to(device)
|
||||
model.train()
|
||||
model_ref = copy.deepcopy(model)
|
||||
count = 0
|
||||
|
@ -271,14 +259,9 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
|
|||
stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
|
||||
criterion = MSELossMasked(seq_len_norm=False).to(device)
|
||||
criterion_st = nn.BCEWithLogitsLoss().to(device)
|
||||
model = Tacotron2(
|
||||
num_chars=24,
|
||||
r=c.r,
|
||||
num_speakers=5,
|
||||
speaker_embedding_dim=55,
|
||||
use_gst=True,
|
||||
gst=c.gst
|
||||
).to(device)
|
||||
model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, speaker_embedding_dim=55, use_gst=True, gst=c.gst).to(
|
||||
device
|
||||
)
|
||||
model.train()
|
||||
model_ref = copy.deepcopy(model)
|
||||
count = 0
|
||||
|
|
|
@ -5,10 +5,9 @@ import numpy as np
|
|||
import tensorflow as tf
|
||||
import torch
|
||||
|
||||
from TTS.tts.configs import Tacotron2Config
|
||||
from TTS.tts.tf.models.tacotron2 import Tacotron2
|
||||
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
|
||||
from TTS.tts.configs import Tacotron2Config
|
||||
|
||||
|
||||
tf.get_logger().setLevel("INFO")
|
||||
|
||||
|
|
|
@ -6,11 +6,10 @@ import torch
|
|||
from torch import nn, optim
|
||||
|
||||
from tests import get_tests_input_path
|
||||
from TTS.tts.configs import TacotronConfig
|
||||
from TTS.tts.layers.losses import L1LossMasked
|
||||
from TTS.tts.models.tacotron import Tacotron
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.tts.configs import TacotronConfig
|
||||
|
||||
|
||||
# pylint: disable=unused-variable
|
||||
|
||||
|
|
|
@ -4,9 +4,8 @@ import os
|
|||
# pylint: disable=wildcard-import
|
||||
# pylint: disable=unused-import
|
||||
from tests import get_tests_input_path, get_tests_path
|
||||
from TTS.tts.utils.text import *
|
||||
from TTS.tts.configs import TacotronConfig
|
||||
|
||||
from TTS.tts.utils.text import *
|
||||
|
||||
conf = TacotronConfig()
|
||||
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.tts.configs import AlignTTSConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||
|
@ -44,6 +43,8 @@ run_cli(command_train)
|
|||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_align_tts.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_align_tts.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.tts.configs import GlowTTSConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||
|
@ -45,6 +44,8 @@ run_cli(command_train)
|
|||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_glow_tts.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_glow_tts.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.tts.configs import SpeedySpeechConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_speedy_speech_config.json")
|
||||
|
@ -45,6 +44,8 @@ run_cli(command_train)
|
|||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_speedy_speech.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_speedy_speech.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.tts.configs import Tacotron2Config
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||
|
@ -45,6 +44,8 @@ run_cli(command_train)
|
|||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.tts.configs import TacotronConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||
|
@ -44,6 +43,8 @@ run_cli(command_train)
|
|||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.vocoder.configs import FullbandMelganConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||
|
@ -29,13 +28,17 @@ config.audio.trim_db = 60
|
|||
config.save_json(config_path)
|
||||
|
||||
# train the model for one epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
|
||||
# Find latest folder
|
||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.vocoder.configs import HifiganConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||
|
@ -30,13 +29,17 @@ config.audio.trim_db = 60
|
|||
config.save_json(config_path)
|
||||
|
||||
# train the model for one epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
|
||||
# Find latest folder
|
||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -2,10 +2,9 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.vocoder.configs import MelganConfig
|
||||
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||
output_path = os.path.join(get_tests_output_path(), "train_outputs")
|
||||
|
||||
|
@ -29,13 +28,17 @@ config.audio.trim_db = 60
|
|||
config.save_json(config_path)
|
||||
|
||||
# train the model for one epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
|
||||
# Find latest folder
|
||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.vocoder.configs import MultibandMelganConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||
|
@ -29,13 +28,17 @@ config.audio.trim_db = 60
|
|||
config.save_json(config_path)
|
||||
|
||||
# train the model for one epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
|
||||
# Find latest folder
|
||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.vocoder.configs import ParallelWaveganConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||
|
@ -29,13 +28,17 @@ config.audio.trim_db = 60
|
|||
config.save_json(config_path)
|
||||
|
||||
# train the model for one epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
|
||||
# Find latest folder
|
||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
|
@ -5,9 +5,9 @@ from torch.utils.data import DataLoader
|
|||
|
||||
from tests import get_tests_output_path, get_tests_path
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.vocoder.configs import BaseGANVocoderConfig
|
||||
from TTS.vocoder.datasets.gan_dataset import GANDataset
|
||||
from TTS.vocoder.datasets.preprocess import load_wav_data
|
||||
from TTS.vocoder.configs import BaseGANVocoderConfig
|
||||
|
||||
file_path = os.path.dirname(os.path.realpath(__file__))
|
||||
OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
|
||||
|
|
|
@ -3,8 +3,8 @@ import os
|
|||
import torch
|
||||
|
||||
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.config import BaseAudioConfig
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.vocoder.layers.losses import MelganFeatureLoss, MultiScaleSTFTLoss, STFTLoss, TorchSTFT
|
||||
|
||||
TESTS_PATH = get_tests_path()
|
||||
|
|
|
@ -2,42 +2,40 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.vocoder.configs import WavegradConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||
output_path = os.path.join(get_tests_output_path(), "train_outputs")
|
||||
|
||||
config = WavegradConfig(batch_size=8,
|
||||
eval_batch_size=8,
|
||||
num_loader_workers=0,
|
||||
num_val_loader_workers=0,
|
||||
run_eval=True,
|
||||
test_delay_epochs=-1,
|
||||
epochs=1,
|
||||
seq_len=8192,
|
||||
eval_split_size=1,
|
||||
print_step=1,
|
||||
print_eval=True,
|
||||
data_path="tests/data/ljspeech",
|
||||
output_path=output_path,
|
||||
test_noise_schedule={
|
||||
"min_val": 1e-6,
|
||||
"max_val": 1e-2,
|
||||
"num_steps": 2
|
||||
})
|
||||
config = WavegradConfig(
|
||||
batch_size=8,
|
||||
eval_batch_size=8,
|
||||
num_loader_workers=0,
|
||||
num_val_loader_workers=0,
|
||||
run_eval=True,
|
||||
test_delay_epochs=-1,
|
||||
epochs=1,
|
||||
seq_len=8192,
|
||||
eval_split_size=1,
|
||||
print_step=1,
|
||||
print_eval=True,
|
||||
data_path="tests/data/ljspeech",
|
||||
output_path=output_path,
|
||||
test_noise_schedule={"min_val": 1e-6, "max_val": 1e-2, "num_steps": 2},
|
||||
)
|
||||
config.audio.do_trim_silence = True
|
||||
config.audio.trim_db = 60
|
||||
config.save_json(config_path)
|
||||
|
||||
# train the model for one epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --config_path {config_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --config_path {config_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
|
||||
# Find latest folder
|
||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")),
|
||||
key=os.path.getmtime)
|
||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --continue_path {continue_path} "
|
||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from tests import get_tests_output_path, run_cli, get_device_id
|
||||
|
||||
from tests import get_device_id, get_tests_output_path, run_cli
|
||||
from TTS.vocoder.configs import WavernnConfig
|
||||
|
||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||
|
@ -29,14 +28,17 @@ config.audio.trim_db = 60
|
|||
config.save_json(config_path)
|
||||
|
||||
# train the model for one epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --config_path {config_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --config_path {config_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
|
||||
# Find latest folder
|
||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")),
|
||||
key=os.path.getmtime)
|
||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||
|
||||
# restore the model and continue training for one more epoch
|
||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --continue_path {continue_path} "
|
||||
command_train = (
|
||||
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --continue_path {continue_path} "
|
||||
)
|
||||
run_cli(command_train)
|
||||
shutil.rmtree(continue_path)
|
||||
|
|
Loading…
Reference in New Issue