style update

pull/476/head
Eren Gölge 2021-05-10 23:03:21 +02:00
parent 5aee30443f
commit 19fb1d743d
35 changed files with 160 additions and 152 deletions

View File

@ -16,16 +16,11 @@ from TTS.speaker_encoder.model import SpeakerEncoder
from TTS.speaker_encoder.utils.io import save_best_model, save_checkpoint
from TTS.speaker_encoder.utils.visual import plot_embeddings
from TTS.tts.datasets.preprocess import load_meta_data
from TTS.utils.arguments import init_training
from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import (
count_parameters,
remove_experiment_folder,
set_init_dict,
)
from TTS.utils.generic_utils import count_parameters, remove_experiment_folder, set_init_dict
from TTS.utils.radam import RAdam
from TTS.utils.training import NoamLR, check_update
from TTS.utils.arguments import init_training
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True

View File

@ -1,6 +1,7 @@
import json
import os
import re
import yaml
from TTS.config.shared_configs import *
@ -34,7 +35,7 @@ def _search_configs(model_name):
def _process_model_name(config_dict):
model_name = config_dict["model"] if "model" in config_dict else config_dict["generator_model"]
model_name = model_name.replace('_generator', '').replace('_discriminator', '')
model_name = model_name.replace("_generator", "").replace("_discriminator", "")
return model_name

View File

@ -1,7 +1,9 @@
from coqpit import MISSING
from dataclasses import dataclass, field, asdict
from dataclasses import asdict, dataclass, field
from typing import List
from TTS.config.shared_configs import BaseTrainingConfig, BaseAudioConfig, BaseDatasetConfig
from coqpit import MISSING
from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig, BaseTrainingConfig
@dataclass
@ -13,23 +15,27 @@ class SpeakerEncoderConfig(BaseTrainingConfig):
datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()])
# model params
model_params: dict = field(default_factory=lambda: {
"input_dim": 40,
"proj_dim": 256,
"lstm_dim": 768,
"num_lstm_layers": 3,
"use_lstm_with_projection": True
})
model_params: dict = field(
default_factory=lambda: {
"input_dim": 40,
"proj_dim": 256,
"lstm_dim": 768,
"num_lstm_layers": 3,
"use_lstm_with_projection": True,
}
)
storage: dict = field(default_factory=lambda:{
"sample_from_storage_p": 0.66, # the probability with which we'll sample from the DataSet in-memory storage
"storage_size": 15, # the size of the in-memory storage with respect to a single batch
"additive_noise": 1e-5 # add very small gaussian noise to the data in order to increase robustness
})
storage: dict = field(
default_factory=lambda: {
"sample_from_storage_p": 0.66, # the probability with which we'll sample from the DataSet in-memory storage
"storage_size": 15, # the size of the in-memory storage with respect to a single batch
"additive_noise": 1e-5, # add very small gaussian noise to the data in order to increase robustness
}
)
# training params
max_train_step: int = 1000 # end training when number of training steps reaches this value.
loss: str = 'angleproto'
loss: str = "angleproto"
grad_clip: float = 3.0
lr: float = 0.0001
lr_decay: bool = False
@ -51,4 +57,6 @@ class SpeakerEncoderConfig(BaseTrainingConfig):
def check_values(self):
super().check_values()
c = asdict(self)
assert c['model_params']['input_dim'] == self.audio.num_mels, " [!] model input dimendion must be equal to melspectrogram dimension."
assert (
c["model_params"]["input_dim"] == self.audio.num_mels
), " [!] model input dimendion must be equal to melspectrogram dimension."

View File

@ -9,6 +9,10 @@ def to_camel(text):
def setup_model(c):
model = SpeakerEncoder(c.model_params["input_dim"], c.model_params["proj_dim"],
c.model_params["lstm_dim"], c.model_params["num_lstm_layers"])
return model
model = SpeakerEncoder(
c.model_params["input_dim"],
c.model_params["proj_dim"],
c.model_params["lstm_dim"],
c.model_params["num_lstm_layers"],
)
return model

View File

@ -1,5 +1,6 @@
import os
import datetime
import os
import torch
@ -19,8 +20,7 @@ def save_checkpoint(model, optimizer, model_loss, out_path, current_step):
torch.save(state, checkpoint_path)
def save_best_model(model, optimizer, model_loss, best_loss, out_path,
current_step):
def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_step):
if model_loss < best_loss:
new_state_dict = model.state_dict()
state = {
@ -33,7 +33,6 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path,
best_loss = model_loss
bestmodel_path = "best_model.pth.tar"
bestmodel_path = os.path.join(out_path, bestmodel_path)
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(
model_loss, bestmodel_path))
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
torch.save(state, bestmodel_path)
return best_loss

View File

@ -41,7 +41,7 @@ class CharactersConfig(Coqpit):
characters: str = None
punctuations: str = None
phonemes: str = None
unique: bool = True # for backwards compatibility of models trained with char sets with duplicates
unique: bool = True # for backwards compatibility of models trained with char sets with duplicates
def check_values(
self,

View File

@ -6,9 +6,9 @@ from typing import Union
import numpy as np
import torch
from TTS.config import load_config
from TTS.speaker_encoder.utils.generic_utils import setup_model
from TTS.utils.audio import AudioProcessor
from TTS.config import load_config
def make_speakers_json_path(out_path):

View File

@ -7,10 +7,11 @@ import re
import shutil
import subprocess
import sys
import torch
from pathlib import Path
from typing import List
import torch
def get_cuda():
use_cuda = torch.cuda.is_available()

View File

@ -8,8 +8,8 @@ from shutil import copyfile
import gdown
import requests
from TTS.utils.generic_utils import get_user_data_dir
from TTS.config import load_config
from TTS.utils.generic_utils import get_user_data_dir
class ModelManager(object):

View File

@ -5,6 +5,7 @@ import numpy as np
import pysbd
import torch
from TTS.config import load_config
from TTS.tts.utils.generic_utils import setup_model
from TTS.tts.utils.speakers import SpeakerManager
@ -13,7 +14,6 @@ from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.synthesis import synthesis, trim_silence
from TTS.tts.utils.text import make_symbols, phonemes, symbols
from TTS.utils.audio import AudioProcessor
from TTS.config import load_config
from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input, setup_generator
@ -117,7 +117,7 @@ class Synthesizer(object):
self.use_phonemes = self.tts_config.use_phonemes
self.ap = AudioProcessor(verbose=False, **self.tts_config.audio)
if self.tts_config.has('characters') and self.tts_config.characters:
if self.tts_config.has("characters") and self.tts_config.characters:
symbols, phonemes = make_symbols(**self.tts_config.characters)
if self.use_phonemes:

View File

@ -2,8 +2,8 @@ import os
import unittest
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
from TTS.utils.audio import AudioProcessor
from TTS.config import BaseAudioConfig
from TTS.utils.audio import AudioProcessor
TESTS_PATH = get_tests_path()
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")

View File

@ -6,10 +6,10 @@ import torch
from torch import optim
from tests import get_tests_input_path
from TTS.tts.configs import GlowTTSConfig
from TTS.tts.layers.losses import GlowTTSLoss
from TTS.tts.models.glow_tts import GlowTTS
from TTS.utils.audio import AudioProcessor
from TTS.tts.configs import GlowTTSConfig
# pylint: disable=unused-variable

View File

@ -7,10 +7,10 @@ import torch
from torch.utils.data import DataLoader
from tests import get_tests_input_path, get_tests_output_path
from TTS.tts.configs import BaseTTSConfig
from TTS.tts.datasets import TTSDataset
from TTS.tts.datasets.preprocess import ljspeech
from TTS.utils.audio import AudioProcessor
from TTS.tts.configs import BaseTTSConfig
# pylint: disable=unused-variable
@ -18,7 +18,7 @@ OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
os.makedirs(OUTPATH, exist_ok=True)
# create a dummy config for testing data loaders.
c = BaseTTSConfig(text_cleaner='english_cleaners', num_loader_workers=0, batch_size=2)
c = BaseTTSConfig(text_cleaner="english_cleaners", num_loader_workers=0, batch_size=2)
c.r = 5
c.data_path = "tests/data/ljspeech/"
ok_ljspeech = os.path.exists(c.data_path)

View File

@ -2,10 +2,9 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.config.shared_configs import BaseAudioConfig
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")
@ -20,7 +19,7 @@ config = SpeakerEncoderConfig(
print_step=1,
save_step=1,
print_eval=True,
audio=BaseAudioConfig(num_mels=40)
audio=BaseAudioConfig(num_mels=40),
)
config.audio.do_trim_silence = True
config.audio.trim_db = 60
@ -42,6 +41,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -5,10 +5,10 @@ import numpy as np
import torch
from tests import get_tests_input_path
from TTS.speaker_encoder.model import SpeakerEncoder
from TTS.speaker_encoder.utils.io import save_checkpoint
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
from TTS.config import load_config
from TTS.speaker_encoder.model import SpeakerEncoder
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
from TTS.speaker_encoder.utils.io import save_checkpoint
from TTS.tts.utils.speakers import SpeakerManager
from TTS.utils.audio import AudioProcessor

View File

@ -1,7 +1,6 @@
import os
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
def test_synthesize():

View File

@ -2,10 +2,10 @@ import os
import unittest
from tests import get_tests_input_path, get_tests_output_path
from TTS.config import load_config
from TTS.tts.utils.generic_utils import setup_model
from TTS.tts.utils.io import save_checkpoint
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
from TTS.config import load_config
from TTS.utils.synthesizer import Synthesizer
@ -15,7 +15,7 @@ class SynthesizerTest(unittest.TestCase):
# pylint: disable=global-statement
global symbols, phonemes
config = load_config(os.path.join(get_tests_output_path(), "dummy_model_config.json"))
if config.has('characters') and config.characters:
if config.has("characters") and config.characters:
symbols, phonemes = make_symbols(**config.characters.to_dict())
num_chars = len(phonemes) if config.use_phonemes else len(symbols)
@ -26,8 +26,8 @@ class SynthesizerTest(unittest.TestCase):
def test_in_out(self):
self._create_random_model()
tts_root_path = get_tests_output_path()
tts_checkpoint = os.path.join(tts_root_path, 'checkpoint_10.pth.tar')
tts_config = os.path.join(tts_root_path, 'dummy_model_config.json')
tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth.tar")
tts_config = os.path.join(tts_root_path, "dummy_model_config.json")
synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None)
synthesizer.tts("Better this test works!!")

View File

@ -6,10 +6,10 @@ import torch
from torch import nn, optim
from tests import get_tests_input_path
from TTS.tts.configs import Tacotron2Config
from TTS.tts.layers.losses import MSELossMasked
from TTS.tts.models.tacotron2 import Tacotron2
from TTS.utils.audio import AudioProcessor
from TTS.tts.configs import Tacotron2Config
# pylint: disable=unused-variable
@ -148,13 +148,7 @@ class TacotronGSTTrainTest(unittest.TestCase):
criterion = MSELossMasked(seq_len_norm=False).to(device)
criterion_st = nn.BCEWithLogitsLoss().to(device)
model = Tacotron2(
num_chars=24,
r=c.r,
num_speakers=5,
use_gst=True,
gst=c.gst
).to(device)
model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, use_gst=True, gst=c.gst).to(device)
model.train()
model_ref = copy.deepcopy(model)
count = 0
@ -210,13 +204,7 @@ class TacotronGSTTrainTest(unittest.TestCase):
criterion = MSELossMasked(seq_len_norm=False).to(device)
criterion_st = nn.BCEWithLogitsLoss().to(device)
model = Tacotron2(
num_chars=24,
r=c.r,
num_speakers=5,
use_gst=True,
gst =c.gst
).to(device)
model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, use_gst=True, gst=c.gst).to(device)
model.train()
model_ref = copy.deepcopy(model)
count = 0
@ -271,14 +259,9 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
criterion = MSELossMasked(seq_len_norm=False).to(device)
criterion_st = nn.BCEWithLogitsLoss().to(device)
model = Tacotron2(
num_chars=24,
r=c.r,
num_speakers=5,
speaker_embedding_dim=55,
use_gst=True,
gst=c.gst
).to(device)
model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, speaker_embedding_dim=55, use_gst=True, gst=c.gst).to(
device
)
model.train()
model_ref = copy.deepcopy(model)
count = 0

View File

@ -5,10 +5,9 @@ import numpy as np
import tensorflow as tf
import torch
from TTS.tts.configs import Tacotron2Config
from TTS.tts.tf.models.tacotron2 import Tacotron2
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
from TTS.tts.configs import Tacotron2Config
tf.get_logger().setLevel("INFO")

View File

@ -6,11 +6,10 @@ import torch
from torch import nn, optim
from tests import get_tests_input_path
from TTS.tts.configs import TacotronConfig
from TTS.tts.layers.losses import L1LossMasked
from TTS.tts.models.tacotron import Tacotron
from TTS.utils.audio import AudioProcessor
from TTS.tts.configs import TacotronConfig
# pylint: disable=unused-variable

View File

@ -4,9 +4,8 @@ import os
# pylint: disable=wildcard-import
# pylint: disable=unused-import
from tests import get_tests_input_path, get_tests_path
from TTS.tts.utils.text import *
from TTS.tts.configs import TacotronConfig
from TTS.tts.utils.text import *
conf = TacotronConfig()

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import AlignTTSConfig
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
@ -44,6 +43,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_align_tts.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_align_tts.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import GlowTTSConfig
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
@ -45,6 +44,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_glow_tts.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_glow_tts.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import SpeedySpeechConfig
config_path = os.path.join(get_tests_output_path(), "test_speedy_speech_config.json")
@ -45,6 +44,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_speedy_speech.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_speedy_speech.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import Tacotron2Config
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
@ -45,6 +44,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import TacotronConfig
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
@ -44,6 +43,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import FullbandMelganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -29,13 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path)
# train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train)
# Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import HifiganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -30,13 +29,17 @@ config.audio.trim_db = 60
config.save_json(config_path)
# train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train)
# Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -2,10 +2,9 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import MelganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")
@ -29,13 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path)
# train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train)
# Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import MultibandMelganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -29,13 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path)
# train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train)
# Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import ParallelWaveganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -29,13 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path)
# train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train)
# Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)

View File

@ -5,9 +5,9 @@ from torch.utils.data import DataLoader
from tests import get_tests_output_path, get_tests_path
from TTS.utils.audio import AudioProcessor
from TTS.vocoder.configs import BaseGANVocoderConfig
from TTS.vocoder.datasets.gan_dataset import GANDataset
from TTS.vocoder.datasets.preprocess import load_wav_data
from TTS.vocoder.configs import BaseGANVocoderConfig
file_path = os.path.dirname(os.path.realpath(__file__))
OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")

View File

@ -3,8 +3,8 @@ import os
import torch
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
from TTS.utils.audio import AudioProcessor
from TTS.config import BaseAudioConfig
from TTS.utils.audio import AudioProcessor
from TTS.vocoder.layers.losses import MelganFeatureLoss, MultiScaleSTFTLoss, STFTLoss, TorchSTFT
TESTS_PATH = get_tests_path()

View File

@ -2,42 +2,40 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import WavegradConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")
config = WavegradConfig(batch_size=8,
eval_batch_size=8,
num_loader_workers=0,
num_val_loader_workers=0,
run_eval=True,
test_delay_epochs=-1,
epochs=1,
seq_len=8192,
eval_split_size=1,
print_step=1,
print_eval=True,
data_path="tests/data/ljspeech",
output_path=output_path,
test_noise_schedule={
"min_val": 1e-6,
"max_val": 1e-2,
"num_steps": 2
})
config = WavegradConfig(
batch_size=8,
eval_batch_size=8,
num_loader_workers=0,
num_val_loader_workers=0,
run_eval=True,
test_delay_epochs=-1,
epochs=1,
seq_len=8192,
eval_split_size=1,
print_step=1,
print_eval=True,
data_path="tests/data/ljspeech",
output_path=output_path,
test_noise_schedule={"min_val": 1e-6, "max_val": 1e-2, "num_steps": 2},
)
config.audio.do_trim_silence = True
config.audio.trim_db = 60
config.save_json(config_path)
# train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --config_path {config_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --config_path {config_path} "
)
run_cli(command_train)
# Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")),
key=os.path.getmtime)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --continue_path {continue_path} "

View File

@ -2,8 +2,7 @@ import glob
import os
import shutil
from tests import get_tests_output_path, run_cli, get_device_id
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import WavernnConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -29,14 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path)
# train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --config_path {config_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --config_path {config_path} "
)
run_cli(command_train)
# Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")),
key=os.path.getmtime)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --continue_path {continue_path} "
command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --continue_path {continue_path} "
)
run_cli(command_train)
shutil.rmtree(continue_path)