mirror of https://github.com/coqui-ai/TTS.git
Fix model tests (#2943)
parent
af62613c86
commit
623ea41634
|
@ -11,8 +11,9 @@
|
|||
],
|
||||
"default_vocoder": null,
|
||||
"commit": "e9a1953e",
|
||||
"license": "Coqui Community Model License",
|
||||
"contact": "info@coqui.ai"
|
||||
"license": "CPML",
|
||||
"contact": "info@coqui.ai",
|
||||
"tos_required": true
|
||||
},
|
||||
"your_tts": {
|
||||
"description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418",
|
||||
|
|
|
@ -41,6 +41,7 @@ def register_config(model_name: str) -> Coqpit:
|
|||
# TODO: fix this
|
||||
if model_name == "xtts":
|
||||
from TTS.tts.configs.xtts_config import XttsConfig
|
||||
|
||||
config_class = XttsConfig
|
||||
paths = ["TTS.tts.configs", "TTS.vocoder.configs", "TTS.encoder.configs", "TTS.vc.configs"]
|
||||
for path in paths:
|
||||
|
@ -96,7 +97,6 @@ def load_config(config_path: str) -> Coqpit:
|
|||
raise TypeError(f" [!] Unknown config file type {ext}")
|
||||
config_dict.update(data)
|
||||
model_name = _process_model_name(config_dict)
|
||||
breakpoint
|
||||
config_class = register_config(model_name.lower())
|
||||
config = config_class()
|
||||
config.from_dict(config_dict)
|
||||
|
|
|
@ -21,6 +21,7 @@ LICENSE_URLS = {
|
|||
"apache 2.0": "https://choosealicense.com/licenses/apache-2.0/",
|
||||
"apache2": "https://choosealicense.com/licenses/apache-2.0/",
|
||||
"cc-by-sa 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
|
||||
"cpml": "https://coqui.ai/cpml.txt"
|
||||
}
|
||||
|
||||
|
||||
|
@ -295,6 +296,29 @@ class ModelManager(object):
|
|||
model_item = self.set_model_url(model_item)
|
||||
return model_item, model_full_name, model
|
||||
|
||||
def ask_tos(self, model_full_path):
|
||||
"""Ask the user to agree to the terms of service"""
|
||||
tos_path = os.path.join(model_full_path, "tos_agreed.txt")
|
||||
if not os.path.exists(tos_path):
|
||||
print(" > You must agree to the terms of service to use this model.")
|
||||
print(" | > Please see the terms of service at https://coqui.ai/cpml.txt")
|
||||
print(' | > "I have read, understood and agreed the Terms and Conditions." - [y/n]')
|
||||
answer = input(" | | > ")
|
||||
if answer.lower() == "y":
|
||||
with open(tos_path, "w") as f:
|
||||
f.write("I have read, understood ad agree the Terms and Conditions.")
|
||||
else:
|
||||
raise Exception("You must agree to the terms of service to use this model.")
|
||||
|
||||
def tos_agreed(self, model_item, model_full_path):
|
||||
"""Check if the user has agreed to the terms of service"""
|
||||
if "tos_required" in model_item and model_item["tos_required"]:
|
||||
tos_path = os.path.join(model_full_path, "tos_agreed.txt")
|
||||
if os.path.exists(tos_path):
|
||||
return True
|
||||
return False
|
||||
return True
|
||||
|
||||
def download_model(self, model_name):
|
||||
"""Download model files given the full model name.
|
||||
Model name is in the format
|
||||
|
@ -316,6 +340,9 @@ class ModelManager(object):
|
|||
print(f" > {model_name} is already downloaded.")
|
||||
else:
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
# handle TOS
|
||||
if not self.tos_agreed(model_item, output_path):
|
||||
self.ask_tos(output_path)
|
||||
print(f" > Downloading model to {output_path}")
|
||||
try:
|
||||
if "fairseq" in model_name:
|
||||
|
|
|
@ -338,7 +338,7 @@ class Synthesizer(nn.Module):
|
|||
|
||||
elif language_name and isinstance(language_name, str):
|
||||
try:
|
||||
language_id = self.tts_model.language_manager.name_to_id[language_id]
|
||||
language_id = self.tts_model.language_manager.name_to_id[language_name]
|
||||
except KeyError as e:
|
||||
raise ValueError(
|
||||
f" [!] Looks like you use a multi-lingual model. "
|
||||
|
|
|
@ -27,7 +27,7 @@ RUN_NAME = "YourTTS-CML-TTS"
|
|||
OUT_PATH = os.path.dirname(os.path.abspath(__file__)) # "/raid/coqui/Checkpoints/original-YourTTS/"
|
||||
|
||||
# If you want to do transfer learning and speedup your training you can set here the path to the CML-TTS available checkpoint that cam be downloaded here: https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
|
||||
RESTORE_PATH = "/raid/edresson/CML_YourTTS/checkpoints_yourtts_cml_tts_dataset/best_model.pth" # Download the checkpoint here: https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
|
||||
RESTORE_PATH = "/raid/edresson/CML_YourTTS/checkpoints_yourtts_cml_tts_dataset/best_model.pth" # Download the checkpoint here: https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
|
||||
|
||||
# This paramter is useful to debug, it skips the training epochs and just do the evaluation and produce the test sentences
|
||||
SKIP_TRAIN_EPOCH = False
|
||||
|
@ -47,7 +47,7 @@ MAX_AUDIO_LEN_IN_SECONDS = float("inf")
|
|||
CML_DATASET_PATH = "./datasets/CML-TTS-Dataset/"
|
||||
|
||||
|
||||
### Download LibriTTS dataset
|
||||
### Download LibriTTS dataset
|
||||
# it will automatic download the dataset, if you have problems you can comment it and manually donwload and extract it ! Download link: https://www.openslr.org/resources/60/train-clean-360.tar.gz
|
||||
LIBRITTS_DOWNLOAD_PATH = "./datasets/LibriTTS/"
|
||||
# Check if LibriTTS dataset is not already downloaded, if not download it
|
||||
|
@ -62,7 +62,7 @@ libritts_config = BaseDatasetConfig(
|
|||
meta_file_train="",
|
||||
meta_file_val="",
|
||||
path=os.path.join(LIBRITTS_DOWNLOAD_PATH, "train-clean-360/"),
|
||||
language="en"
|
||||
language="en",
|
||||
)
|
||||
|
||||
# init CML-TTS configs
|
||||
|
@ -71,8 +71,8 @@ pt_config = BaseDatasetConfig(
|
|||
dataset_name="cml_tts",
|
||||
meta_file_train="train.csv",
|
||||
meta_file_val="",
|
||||
path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_portuguese_v0.1/"),
|
||||
language="pt-br"
|
||||
path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_portuguese_v0.1/"),
|
||||
language="pt-br",
|
||||
)
|
||||
|
||||
pl_config = BaseDatasetConfig(
|
||||
|
@ -80,8 +80,8 @@ pl_config = BaseDatasetConfig(
|
|||
dataset_name="cml_tts",
|
||||
meta_file_train="train.csv",
|
||||
meta_file_val="",
|
||||
path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_polish_v0.1/"),
|
||||
language="pl"
|
||||
path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_polish_v0.1/"),
|
||||
language="pl",
|
||||
)
|
||||
|
||||
it_config = BaseDatasetConfig(
|
||||
|
@ -89,8 +89,8 @@ it_config = BaseDatasetConfig(
|
|||
dataset_name="cml_tts",
|
||||
meta_file_train="train.csv",
|
||||
meta_file_val="",
|
||||
path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_italian_v0.1/"),
|
||||
language="it"
|
||||
path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_italian_v0.1/"),
|
||||
language="it",
|
||||
)
|
||||
|
||||
fr_config = BaseDatasetConfig(
|
||||
|
@ -98,8 +98,8 @@ fr_config = BaseDatasetConfig(
|
|||
dataset_name="cml_tts",
|
||||
meta_file_train="train.csv",
|
||||
meta_file_val="",
|
||||
path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_french_v0.1/"),
|
||||
language="fr"
|
||||
path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_french_v0.1/"),
|
||||
language="fr",
|
||||
)
|
||||
|
||||
du_config = BaseDatasetConfig(
|
||||
|
@ -107,8 +107,8 @@ du_config = BaseDatasetConfig(
|
|||
dataset_name="cml_tts",
|
||||
meta_file_train="train.csv",
|
||||
meta_file_val="",
|
||||
path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_dutch_v0.1/"),
|
||||
language="du"
|
||||
path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_dutch_v0.1/"),
|
||||
language="du",
|
||||
)
|
||||
|
||||
ge_config = BaseDatasetConfig(
|
||||
|
@ -116,8 +116,8 @@ ge_config = BaseDatasetConfig(
|
|||
dataset_name="cml_tts",
|
||||
meta_file_train="train.csv",
|
||||
meta_file_val="",
|
||||
path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_german_v0.1/"),
|
||||
language="ge"
|
||||
path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_german_v0.1/"),
|
||||
language="ge",
|
||||
)
|
||||
|
||||
sp_config = BaseDatasetConfig(
|
||||
|
@ -125,8 +125,8 @@ sp_config = BaseDatasetConfig(
|
|||
dataset_name="cml_tts",
|
||||
meta_file_train="train.csv",
|
||||
meta_file_val="",
|
||||
path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_spanish_v0.1/"),
|
||||
language="sp"
|
||||
path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_spanish_v0.1/"),
|
||||
language="sp",
|
||||
)
|
||||
|
||||
# Add here all datasets configs Note: If you want to add new datasets, just add them here and it will automatically compute the speaker embeddings (d-vectors) for this new dataset :)
|
||||
|
@ -247,150 +247,55 @@ config = VitsConfig(
|
|||
max_audio_len=SAMPLE_RATE * MAX_AUDIO_LEN_IN_SECONDS,
|
||||
mixed_precision=False,
|
||||
test_sentences=[
|
||||
[
|
||||
"Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.",
|
||||
"9351",
|
||||
None,
|
||||
"pt-br"
|
||||
],
|
||||
[
|
||||
"Quando voc\u00ea n\u00e3o corre nenhum risco, voc\u00ea arrisca tudo.",
|
||||
"12249",
|
||||
None,
|
||||
"pt-br"
|
||||
],
|
||||
["Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.", "9351", None, "pt-br"],
|
||||
["Quando voc\u00ea n\u00e3o corre nenhum risco, voc\u00ea arrisca tudo.", "12249", None, "pt-br"],
|
||||
[
|
||||
"S\u00e3o necess\u00e1rios muitos anos de trabalho para ter sucesso da noite para o dia.",
|
||||
"2961",
|
||||
None,
|
||||
"pt-br"
|
||||
],
|
||||
[
|
||||
"You'll have the view of the top of the mountain that you climb.",
|
||||
"LTTS_6574",
|
||||
None,
|
||||
"en"
|
||||
],
|
||||
[
|
||||
"When you don\u2019t take any risks, you risk everything.",
|
||||
"LTTS_6206",
|
||||
None,
|
||||
"en"
|
||||
],
|
||||
[
|
||||
"Are necessary too many years of work to succeed overnight.",
|
||||
"LTTS_5717",
|
||||
None,
|
||||
"en"
|
||||
],
|
||||
[
|
||||
"Je hebt uitzicht op de top van de berg die je beklimt.",
|
||||
"960",
|
||||
None,
|
||||
"du"
|
||||
],
|
||||
[
|
||||
"Als je geen risico neemt, riskeer je alles.",
|
||||
"2450",
|
||||
None,
|
||||
"du"
|
||||
],
|
||||
[
|
||||
"Zijn te veel jaren werk nodig om van de ene op de andere dag te slagen.",
|
||||
"10984",
|
||||
None,
|
||||
"du"
|
||||
],
|
||||
[
|
||||
"Vous aurez la vue sur le sommet de la montagne que vous gravirez.",
|
||||
"6381",
|
||||
None,
|
||||
"fr"
|
||||
],
|
||||
[
|
||||
"Quand tu ne prends aucun risque, tu risques tout.",
|
||||
"2825",
|
||||
None,
|
||||
"fr"
|
||||
"pt-br",
|
||||
],
|
||||
["You'll have the view of the top of the mountain that you climb.", "LTTS_6574", None, "en"],
|
||||
["When you don\u2019t take any risks, you risk everything.", "LTTS_6206", None, "en"],
|
||||
["Are necessary too many years of work to succeed overnight.", "LTTS_5717", None, "en"],
|
||||
["Je hebt uitzicht op de top van de berg die je beklimt.", "960", None, "du"],
|
||||
["Als je geen risico neemt, riskeer je alles.", "2450", None, "du"],
|
||||
["Zijn te veel jaren werk nodig om van de ene op de andere dag te slagen.", "10984", None, "du"],
|
||||
["Vous aurez la vue sur le sommet de la montagne que vous gravirez.", "6381", None, "fr"],
|
||||
["Quand tu ne prends aucun risque, tu risques tout.", "2825", None, "fr"],
|
||||
[
|
||||
"Sont n\u00e9cessaires trop d'ann\u00e9es de travail pour r\u00e9ussir du jour au lendemain.",
|
||||
"1844",
|
||||
None,
|
||||
"fr"
|
||||
],
|
||||
[
|
||||
"Sie haben die Aussicht auf die Spitze des Berges, den Sie erklimmen.",
|
||||
"2314",
|
||||
None,
|
||||
"ge"
|
||||
],
|
||||
[
|
||||
"Wer nichts riskiert, riskiert alles.",
|
||||
"7483",
|
||||
None,
|
||||
"ge"
|
||||
],
|
||||
[
|
||||
"Es sind zu viele Jahre Arbeit notwendig, um \u00fcber Nacht erfolgreich zu sein.",
|
||||
"12461",
|
||||
None,
|
||||
"ge"
|
||||
],
|
||||
[
|
||||
"Avrai la vista della cima della montagna che sali.",
|
||||
"4998",
|
||||
None,
|
||||
"it"
|
||||
],
|
||||
[
|
||||
"Quando non corri alcun rischio, rischi tutto.",
|
||||
"6744",
|
||||
None,
|
||||
"it"
|
||||
],
|
||||
[
|
||||
"Are necessary too many years of work to succeed overnight.",
|
||||
"1157",
|
||||
None,
|
||||
"it"
|
||||
"fr",
|
||||
],
|
||||
["Sie haben die Aussicht auf die Spitze des Berges, den Sie erklimmen.", "2314", None, "ge"],
|
||||
["Wer nichts riskiert, riskiert alles.", "7483", None, "ge"],
|
||||
["Es sind zu viele Jahre Arbeit notwendig, um \u00fcber Nacht erfolgreich zu sein.", "12461", None, "ge"],
|
||||
["Avrai la vista della cima della montagna che sali.", "4998", None, "it"],
|
||||
["Quando non corri alcun rischio, rischi tutto.", "6744", None, "it"],
|
||||
["Are necessary too many years of work to succeed overnight.", "1157", None, "it"],
|
||||
[
|
||||
"B\u0119dziesz mie\u0107 widok na szczyt g\u00f3ry, na kt\u00f3r\u0105 si\u0119 wspinasz.",
|
||||
"7014",
|
||||
None,
|
||||
"pl"
|
||||
],
|
||||
[
|
||||
"Kiedy nie podejmujesz \u017cadnego ryzyka, ryzykujesz wszystko.",
|
||||
"3492",
|
||||
None,
|
||||
"pl"
|
||||
"pl",
|
||||
],
|
||||
["Kiedy nie podejmujesz \u017cadnego ryzyka, ryzykujesz wszystko.", "3492", None, "pl"],
|
||||
[
|
||||
"Potrzebne s\u0105 zbyt wiele lat pracy, aby odnie\u015b\u0107 sukces z dnia na dzie\u0144.",
|
||||
"1890",
|
||||
None,
|
||||
"pl"
|
||||
],
|
||||
[
|
||||
"Tendr\u00e1s la vista de la cima de la monta\u00f1a que subes",
|
||||
"101",
|
||||
None,
|
||||
"sp"
|
||||
],
|
||||
[
|
||||
"Cuando no te arriesgas, lo arriesgas todo.",
|
||||
"5922",
|
||||
None,
|
||||
"sp"
|
||||
"pl",
|
||||
],
|
||||
["Tendr\u00e1s la vista de la cima de la monta\u00f1a que subes", "101", None, "sp"],
|
||||
["Cuando no te arriesgas, lo arriesgas todo.", "5922", None, "sp"],
|
||||
[
|
||||
"Son necesarios demasiados a\u00f1os de trabajo para triunfar de la noche a la ma\u00f1ana.",
|
||||
"10246",
|
||||
None,
|
||||
"sp"
|
||||
]
|
||||
"sp",
|
||||
],
|
||||
],
|
||||
# Enable the weighted sampler
|
||||
use_weighted_sampler=True,
|
||||
|
@ -399,10 +304,10 @@ config = VitsConfig(
|
|||
weighted_sampler_attrs={"language": 1.0},
|
||||
weighted_sampler_multipliers={
|
||||
# "speaker_name": {
|
||||
# you can force the batching scheme to give a higher weight to a certain speaker and then this speaker will appears more frequently on the batch.
|
||||
# It will speedup the speaker adaptation process. Considering the CML train dataset and "new_speaker" as the speaker name of the speaker that you want to adapt.
|
||||
# The line above will make the balancer consider the "new_speaker" as 106 speakers so 1/4 of the number of speakers present on CML dataset.
|
||||
# 'new_speaker': 106, # (CML tot. train speaker)/4 = (424/4) = 106
|
||||
# you can force the batching scheme to give a higher weight to a certain speaker and then this speaker will appears more frequently on the batch.
|
||||
# It will speedup the speaker adaptation process. Considering the CML train dataset and "new_speaker" as the speaker name of the speaker that you want to adapt.
|
||||
# The line above will make the balancer consider the "new_speaker" as 106 speakers so 1/4 of the number of speakers present on CML dataset.
|
||||
# 'new_speaker': 106, # (CML tot. train speaker)/4 = (424/4) = 106
|
||||
# }
|
||||
},
|
||||
# It defines the Speaker Consistency Loss (SCL) α to 9 like the YourTTS paper
|
||||
|
@ -414,7 +319,7 @@ train_samples, eval_samples = load_tts_samples(
|
|||
config.datasets,
|
||||
eval_split=True,
|
||||
eval_split_max_size=config.eval_split_max_size,
|
||||
eval_split_size=config.eval_split_size
|
||||
eval_split_size=config.eval_split_size,
|
||||
)
|
||||
|
||||
# Init the model
|
||||
|
|
|
@ -10,12 +10,15 @@ from TTS.utils.generic_utils import get_user_data_dir
|
|||
from TTS.utils.manage import ModelManager
|
||||
|
||||
|
||||
MODELS_WITH_SEP_TESTS = ["bark", "xtts"]
|
||||
|
||||
|
||||
def run_models(offset=0, step=1):
|
||||
"""Check if all the models are downloadable and tts models run correctly."""
|
||||
print(" > Run synthesizer with all the models.")
|
||||
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
||||
manager = ModelManager(output_prefix=get_tests_output_path(), progress_bar=False)
|
||||
model_names = [name for name in manager.list_models() if "bark" not in name]
|
||||
model_names = [name for name in manager.list_models() if name in MODELS_WITH_SEP_TESTS]
|
||||
for model_name in model_names[offset::step]:
|
||||
print(f"\n > Run - {model_name}")
|
||||
model_path, _, _ = manager.download_model(model_name)
|
||||
|
@ -63,20 +66,15 @@ def run_models(offset=0, step=1):
|
|||
manager.download_model(model_name)
|
||||
print(f" | > OK: {model_name}")
|
||||
|
||||
# folders = glob.glob(os.path.join(manager.output_prefix, "*"))
|
||||
# assert len(folders) == len(model_names) // step
|
||||
|
||||
|
||||
def test_models_offset_0_step_3():
|
||||
run_models(offset=0, step=3)
|
||||
|
||||
|
||||
def test_models_offset_1_step_3():
|
||||
run_models(offset=1, step=3)
|
||||
|
||||
|
||||
def test_models_offset_2_step_3():
|
||||
run_models(offset=2, step=3)
|
||||
def test_xtts():
|
||||
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
||||
speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")
|
||||
run_cli("yes | "
|
||||
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v1 "
|
||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True '
|
||||
f'--speaker_wav "{speaker_wav}" --language_idx "en"'
|
||||
)
|
||||
|
||||
|
||||
def test_bark():
|
||||
|
@ -84,7 +82,7 @@ def test_bark():
|
|||
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
||||
run_cli(
|
||||
f" tts --model_name tts_models/multilingual/multi-dataset/bark "
|
||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False'
|
||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True'
|
||||
)
|
||||
|
||||
|
||||
|
@ -99,3 +97,17 @@ def test_voice_conversion():
|
|||
f"tts --model_name {model_name}"
|
||||
f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} --progress_bar False"
|
||||
)
|
||||
|
||||
"""
|
||||
These are used to split tests into different actions on Github.
|
||||
"""
|
||||
def test_models_offset_0_step_3():
|
||||
run_models(offset=0, step=3)
|
||||
|
||||
|
||||
def test_models_offset_1_step_3():
|
||||
run_models(offset=1, step=3)
|
||||
|
||||
|
||||
def test_models_offset_2_step_3():
|
||||
run_models(offset=2, step=3)
|
Loading…
Reference in New Issue