From 56480360cf0aa0527e2010f7cb087998cf664cc5 Mon Sep 17 00:00:00 2001 From: Edresson Date: Sun, 19 Sep 2021 13:29:09 -0300 Subject: [PATCH] Update the VITS model docs --- TTS/tts/models/vits.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index a9078b26..334e4526 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -159,16 +159,18 @@ class VitsArgs(Coqpit): num_languages (int): Number of languages for the language embedding layer. Defaults to 0. - use_speaker_encoder_as_loss (bool): - + use_speaker_encoder_as_loss (bool): + Enable/Disable Speaker Consistency Loss (SCL). Defaults to False. - use_speaker_encoder_as_loss: bool = False - speaker_encoder_config_path: str = "" - speaker_encoder_model_path: str = "" + speaker_encoder_config_path (str): + Path to the file speaker encoder config file, to use for SCL. Defaults to "". + + speaker_encoder_model_path (str): + Path to the file speaker encoder checkpoint file, to use for SCL. Defaults to "". fine_tuning_mode (int): Fine tuning only the vocoder part of the model, while the rest will be frozen. Defaults to 0. - Mode 0: disabled; + Mode 0: Disabled; Mode 1: uses the distribution predicted by the encoder and It's recommended for TTS; Mode 2: uses the distribution predicted by the encoder and It's recommended for voice conversion. """