mirror of https://github.com/coqui-ai/TTS.git
Make style
parent
33b98e6cc3
commit
8622226f3f
|
@ -164,4 +164,5 @@ internal/*
|
|||
*_pitch.npy
|
||||
*_phoneme.npy
|
||||
wandb
|
||||
depot/*
|
||||
depot/*
|
||||
coqui_recipes/*
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config import load_config, register_config
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config import load_config, register_config
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
from TTS.tts.models import setup_model
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config import load_config, register_config
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config import load_config, register_config
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
|
||||
from TTS.vocoder.models import setup_model
|
||||
|
|
|
@ -324,9 +324,9 @@ class BaseTTS(BaseModel):
|
|||
loader = DataLoader(
|
||||
dataset,
|
||||
batch_size=config.eval_batch_size if is_eval else config.batch_size,
|
||||
shuffle=False, # shuffle is done in the dataset.
|
||||
shuffle=False, # shuffle is done in the dataset.
|
||||
collate_fn=dataset.collate_fn,
|
||||
drop_last=True, # setting this False might cause issues in AMP training.
|
||||
drop_last=True, # setting this False might cause issues in AMP training.
|
||||
sampler=sampler,
|
||||
num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
|
||||
pin_memory=False,
|
||||
|
|
|
@ -994,9 +994,9 @@ class Vits(BaseTTS):
|
|||
print(" !! Error creating Test Sentence -", idx)
|
||||
return {"figures": test_figures, "audios": test_audios}
|
||||
|
||||
def test_log(self, outputs: dict, logger: "Logger", assets: dict, steps:int) -> None:
|
||||
logger.test_audios(steps, outputs['audios'], self.ap.sample_rate)
|
||||
logger.test_figures(steps, outputs['figures'])
|
||||
def test_log(self, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
|
||||
logger.test_audios(steps, outputs["audios"], self.ap.sample_rate)
|
||||
logger.test_figures(steps, outputs["figures"])
|
||||
|
||||
def get_optimizer(self) -> List:
|
||||
"""Initiate and return the GAN optimizers based on the config parameters.
|
||||
|
|
|
@ -11,6 +11,7 @@ def is_tool(name):
|
|||
|
||||
return which(name) is not None
|
||||
|
||||
|
||||
# priority: espeakng > espeak
|
||||
if is_tool("espeak-ng"):
|
||||
_DEF_ESPEAK_LIB = "espeak-ng"
|
||||
|
@ -116,7 +117,6 @@ class ESpeak(BasePhonemizer):
|
|||
# ^
|
||||
self.num_skip_chars = 1
|
||||
|
||||
|
||||
def auto_set_espeak_lib(self) -> None:
|
||||
if is_tool("espeak-ng"):
|
||||
self._ESPEAK_LIB = "espeak-ng"
|
||||
|
@ -163,7 +163,7 @@ class ESpeak(BasePhonemizer):
|
|||
phonemes = ""
|
||||
for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True):
|
||||
logging.debug("line: %s", repr(line))
|
||||
phonemes += line.decode("utf8").strip()[self.num_skip_chars:] # skip initial redundant characters
|
||||
phonemes += line.decode("utf8").strip()[self.num_skip_chars :] # skip initial redundant characters
|
||||
return phonemes.replace("_", separator)
|
||||
|
||||
def _phonemize(self, text, separator=None):
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.tts.configs.align_tts_config import AlignTTSConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
|
||||
from TTS.tts.configs.fast_pitch_config import FastPitchConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
from TTS.tts.models.forward_tts import ForwardTTS
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from TTS.tts.configs.fast_speech_config import FastSpeechConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
from TTS.tts.models.forward_tts import ForwardTTS
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.vocoder.configs import HifiganConfig
|
||||
from TTS.vocoder.datasets.preprocess import load_wav_data
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.vocoder.configs import MultibandMelganConfig
|
||||
from TTS.vocoder.datasets.preprocess import load_wav_data
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from TTS.tts.configs.speedy_speech_config import SpeedySpeechConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
from TTS.tts.models.forward_tts import ForwardTTS
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.configs.tacotron2_config import Tacotron2Config
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.configs.tacotron2_config import Tacotron2Config
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.vocoder.configs import UnivnetConfig
|
||||
from TTS.vocoder.datasets.preprocess import load_wav_data
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.configs.vits_config import VitsConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.vocoder.configs import WavegradConfig
|
||||
from TTS.vocoder.datasets.preprocess import load_wav_data
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.vocoder.configs import WavernnConfig
|
||||
from TTS.vocoder.datasets.preprocess import load_wav_data
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
import os
|
||||
from glob import glob
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.configs.vits_config import VitsConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from TTS.tts.configs.fast_pitch_config import FastPitchConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
from TTS.tts.models.forward_tts import ForwardTTS
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from TTS.tts.configs.fast_speech_config import FastSpeechConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
from TTS.tts.models.forward_tts import ForwardTTS
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.tts.configs.glow_tts_config import GlowTTSConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config import BaseAudioConfig, BaseDatasetConfig
|
||||
from TTS.tts.configs.speedy_speech_config import SpeedySpeechConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
from TTS.tts.models.forward_tts import ForwardTTS
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.configs.tacotron_config import TacotronConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.configs.tacotron2_config import Tacotron2Config
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.configs.tacotron2_config import Tacotron2Config
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
||||
from TTS.config.shared_configs import BaseAudioConfig
|
||||
from TTS.tts.configs.shared_configs import BaseDatasetConfig
|
||||
from TTS.tts.configs.vits_config import VitsConfig
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
|
@ -57,7 +58,7 @@ config = VitsConfig(
|
|||
print_step=25,
|
||||
print_eval=False,
|
||||
mixed_precision=True,
|
||||
max_text_len= 325, # change this if you have a larger VRAM than 16GB
|
||||
max_text_len=325, # change this if you have a larger VRAM than 16GB
|
||||
output_path=output_path,
|
||||
datasets=[dataset_config],
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue