Fix model tests (#2943)

2023-09-14 15:21:48 +02:00 · 2023-09-14 15:21:48 +02:00 · 623ea41634
parent af62613c86
commit 623ea41634
6 changed files with 106 additions and 161 deletions
--- a/TTS/.models.json
+++ b/TTS/.models.json
@ -11,8 +11,9 @@
                    ],
                    "default_vocoder": null,
                    "commit": "e9a1953e",
-                    "license": "Coqui Community Model License",
-                    "contact": "info@coqui.ai"
+                    "license": "CPML",
+                    "contact": "info@coqui.ai",
+                    "tos_required": true
                },
                "your_tts": {
                    "description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418",
--- a/TTS/config/init.py
+++ b/TTS/config/init.py
@ -41,6 +41,7 @@ def register_config(model_name: str) -> Coqpit:
    # TODO: fix this
    if model_name == "xtts":
        from TTS.tts.configs.xtts_config import XttsConfig
+
        config_class = XttsConfig
    paths = ["TTS.tts.configs", "TTS.vocoder.configs", "TTS.encoder.configs", "TTS.vc.configs"]
    for path in paths:
@ -96,7 +97,6 @@ def load_config(config_path: str) -> Coqpit:
        raise TypeError(f" [!] Unknown config file type {ext}")
    config_dict.update(data)
    model_name = _process_model_name(config_dict)
-    breakpoint
    config_class = register_config(model_name.lower())
    config = config_class()
    config.from_dict(config_dict)
--- a/TTS/utils/manage.py
+++ b/TTS/utils/manage.py
@ -21,6 +21,7 @@ LICENSE_URLS = {
    "apache 2.0": "https://choosealicense.com/licenses/apache-2.0/",
    "apache2": "https://choosealicense.com/licenses/apache-2.0/",
    "cc-by-sa 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
+    "cpml": "https://coqui.ai/cpml.txt"
 }


@ -295,6 +296,29 @@ class ModelManager(object):
        model_item = self.set_model_url(model_item)
        return model_item, model_full_name, model

+    def ask_tos(self, model_full_path):
+        """Ask the user to agree to the terms of service"""
+        tos_path = os.path.join(model_full_path, "tos_agreed.txt")
+        if not os.path.exists(tos_path):
+            print(" > You must agree to the terms of service to use this model.")
+            print(" | > Please see the terms of service at https://coqui.ai/cpml.txt")
+            print(' | > "I have read, understood and agreed the Terms and Conditions." - [y/n]')
+            answer = input(" | | > ")
+            if answer.lower() == "y":
+                with open(tos_path, "w") as f:
+                    f.write("I have read, understood ad agree the Terms and Conditions.")
+            else:
+                raise Exception("You must agree to the terms of service to use this model.")
+
+    def tos_agreed(self, model_item, model_full_path):
+        """Check if the user has agreed to the terms of service"""
+        if "tos_required" in model_item and model_item["tos_required"]:
+            tos_path = os.path.join(model_full_path, "tos_agreed.txt")
+            if os.path.exists(tos_path):
+                return True
+            return False
+        return True
+
    def download_model(self, model_name):
        """Download model files given the full model name.
        Model name is in the format
@ -316,6 +340,9 @@ class ModelManager(object):
            print(f" > {model_name} is already downloaded.")
        else:
            os.makedirs(output_path, exist_ok=True)
+             # handle TOS
+            if not self.tos_agreed(model_item, output_path):
+                self.ask_tos(output_path)
            print(f" > Downloading model to {output_path}")
            try:
                if "fairseq" in model_name:
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@ -338,7 +338,7 @@ class Synthesizer(nn.Module):

            elif language_name and isinstance(language_name, str):
                try:
-                    language_id = self.tts_model.language_manager.name_to_id[language_id]
+                    language_id = self.tts_model.language_manager.name_to_id[language_name]
                except KeyError as e:
                    raise ValueError(
                        f" [!] Looks like you use a multi-lingual model. "
--- a/recipes/multilingual/cml_yourtts/train_yourtts.py
+++ b/recipes/multilingual/cml_yourtts/train_yourtts.py
@ -27,7 +27,7 @@ RUN_NAME = "YourTTS-CML-TTS"
 OUT_PATH = os.path.dirname(os.path.abspath(__file__))  # "/raid/coqui/Checkpoints/original-YourTTS/"

 # If you want to do transfer learning and speedup your training you can set here the path to the CML-TTS available checkpoint that cam be downloaded here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
-RESTORE_PATH = "/raid/edresson/CML_YourTTS/checkpoints_yourtts_cml_tts_dataset/best_model.pth" # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
+RESTORE_PATH = "/raid/edresson/CML_YourTTS/checkpoints_yourtts_cml_tts_dataset/best_model.pth"  # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p

 # This paramter is useful to debug, it skips the training epochs and just do the evaluation  and produce the test sentences
 SKIP_TRAIN_EPOCH = False
@ -47,7 +47,7 @@ MAX_AUDIO_LEN_IN_SECONDS = float("inf")
 CML_DATASET_PATH = "./datasets/CML-TTS-Dataset/"


-### Download LibriTTS dataset 
+### Download LibriTTS dataset
 # it will automatic download the dataset, if you have problems you can comment it and manually donwload and extract it ! Download link: https://www.openslr.org/resources/60/train-clean-360.tar.gz
 LIBRITTS_DOWNLOAD_PATH = "./datasets/LibriTTS/"
 # Check if LibriTTS dataset is not already downloaded, if not download it
@ -62,7 +62,7 @@ libritts_config = BaseDatasetConfig(
    meta_file_train="",
    meta_file_val="",
    path=os.path.join(LIBRITTS_DOWNLOAD_PATH, "train-clean-360/"),
-    language="en"
+    language="en",
 )

 # init CML-TTS configs
@ -71,8 +71,8 @@ pt_config = BaseDatasetConfig(
    dataset_name="cml_tts",
    meta_file_train="train.csv",
    meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_portuguese_v0.1/"),
-    language="pt-br"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_portuguese_v0.1/"),
+    language="pt-br",
 )

 pl_config = BaseDatasetConfig(
@ -80,8 +80,8 @@ pl_config = BaseDatasetConfig(
    dataset_name="cml_tts",
    meta_file_train="train.csv",
    meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_polish_v0.1/"),
-    language="pl"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_polish_v0.1/"),
+    language="pl",
 )

 it_config = BaseDatasetConfig(
@ -89,8 +89,8 @@ it_config = BaseDatasetConfig(
    dataset_name="cml_tts",
    meta_file_train="train.csv",
    meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_italian_v0.1/"),
-    language="it"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_italian_v0.1/"),
+    language="it",
 )

 fr_config = BaseDatasetConfig(
@ -98,8 +98,8 @@ fr_config = BaseDatasetConfig(
    dataset_name="cml_tts",
    meta_file_train="train.csv",
    meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_french_v0.1/"),
-    language="fr"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_french_v0.1/"),
+    language="fr",
 )

 du_config = BaseDatasetConfig(
@ -107,8 +107,8 @@ du_config = BaseDatasetConfig(
    dataset_name="cml_tts",
    meta_file_train="train.csv",
    meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_dutch_v0.1/"),
-    language="du"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_dutch_v0.1/"),
+    language="du",
 )

 ge_config = BaseDatasetConfig(
@ -116,8 +116,8 @@ ge_config = BaseDatasetConfig(
    dataset_name="cml_tts",
    meta_file_train="train.csv",
    meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_german_v0.1/"),
-    language="ge"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_german_v0.1/"),
+    language="ge",
 )

 sp_config = BaseDatasetConfig(
@ -125,8 +125,8 @@ sp_config = BaseDatasetConfig(
    dataset_name="cml_tts",
    meta_file_train="train.csv",
    meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_spanish_v0.1/"),
-    language="sp"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_spanish_v0.1/"),
+    language="sp",
 )

 # Add here all datasets configs Note: If you want to add new datasets, just add them here and it will automatically compute the speaker embeddings (d-vectors) for this new dataset :)
@ -247,150 +247,55 @@ config = VitsConfig(
    max_audio_len=SAMPLE_RATE * MAX_AUDIO_LEN_IN_SECONDS,
    mixed_precision=False,
    test_sentences=[
-        [
-            "Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.",
-            "9351",
-            None,
-            "pt-br"
-        ],
-        [
-            "Quando voc\u00ea n\u00e3o corre nenhum risco, voc\u00ea arrisca tudo.",
-            "12249",
-            None,
-            "pt-br"
-        ],
+        ["Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.", "9351", None, "pt-br"],
+        ["Quando voc\u00ea n\u00e3o corre nenhum risco, voc\u00ea arrisca tudo.", "12249", None, "pt-br"],
        [
            "S\u00e3o necess\u00e1rios muitos anos de trabalho para ter sucesso da noite para o dia.",
            "2961",
            None,
-            "pt-br"
-        ],
-        [
-            "You'll have the view of the top of the mountain that you climb.",
-            "LTTS_6574",
-            None,
-            "en"
-        ],
-        [
-            "When you don\u2019t take any risks, you risk everything.",
-            "LTTS_6206",
-            None,
-            "en"
-        ],
-        [
-            "Are necessary too many years of work to succeed overnight.",
-            "LTTS_5717",
-            None,
-            "en"
-        ],
-        [
-            "Je hebt uitzicht op de top van de berg die je beklimt.",
-            "960",
-            None,
-            "du"
-        ],
-        [
-            "Als je geen risico neemt, riskeer je alles.",
-            "2450",
-            None,
-            "du"
-        ],
-        [
-            "Zijn te veel jaren werk nodig om van de ene op de andere dag te slagen.",
-            "10984",
-            None,
-            "du"
-        ],
-        [
-            "Vous aurez la vue sur le sommet de la montagne que vous gravirez.",
-            "6381",
-            None,
-            "fr"
-        ],
-        [
-            "Quand tu ne prends aucun risque, tu risques tout.",
-            "2825",
-            None,
-            "fr"
+            "pt-br",
        ],
+        ["You'll have the view of the top of the mountain that you climb.", "LTTS_6574", None, "en"],
+        ["When you don\u2019t take any risks, you risk everything.", "LTTS_6206", None, "en"],
+        ["Are necessary too many years of work to succeed overnight.", "LTTS_5717", None, "en"],
+        ["Je hebt uitzicht op de top van de berg die je beklimt.", "960", None, "du"],
+        ["Als je geen risico neemt, riskeer je alles.", "2450", None, "du"],
+        ["Zijn te veel jaren werk nodig om van de ene op de andere dag te slagen.", "10984", None, "du"],
+        ["Vous aurez la vue sur le sommet de la montagne que vous gravirez.", "6381", None, "fr"],
+        ["Quand tu ne prends aucun risque, tu risques tout.", "2825", None, "fr"],
        [
            "Sont n\u00e9cessaires trop d'ann\u00e9es de travail pour r\u00e9ussir du jour au lendemain.",
            "1844",
            None,
-            "fr"
-        ],
-        [
-            "Sie haben die Aussicht auf die Spitze des Berges, den Sie erklimmen.",
-            "2314",
-            None,
-            "ge"
-        ],
-        [
-            "Wer nichts riskiert, riskiert alles.",
-            "7483",
-            None,
-            "ge"
-        ],
-        [
-            "Es sind zu viele Jahre Arbeit notwendig, um \u00fcber Nacht erfolgreich zu sein.",
-            "12461",
-            None,
-            "ge"
-        ],
-        [
-            "Avrai la vista della cima della montagna che sali.",
-            "4998",
-            None,
-            "it"
-        ],
-        [
-            "Quando non corri alcun rischio, rischi tutto.",
-            "6744",
-            None,
-            "it"
-        ],
-        [
-            "Are necessary too many years of work to succeed overnight.",
-            "1157",
-            None,
-            "it"
+            "fr",
        ],
+        ["Sie haben die Aussicht auf die Spitze des Berges, den Sie erklimmen.", "2314", None, "ge"],
+        ["Wer nichts riskiert, riskiert alles.", "7483", None, "ge"],
+        ["Es sind zu viele Jahre Arbeit notwendig, um \u00fcber Nacht erfolgreich zu sein.", "12461", None, "ge"],
+        ["Avrai la vista della cima della montagna che sali.", "4998", None, "it"],
+        ["Quando non corri alcun rischio, rischi tutto.", "6744", None, "it"],
+        ["Are necessary too many years of work to succeed overnight.", "1157", None, "it"],
        [
            "B\u0119dziesz mie\u0107 widok na szczyt g\u00f3ry, na kt\u00f3r\u0105 si\u0119 wspinasz.",
            "7014",
            None,
-            "pl"
-        ],
-        [
-            "Kiedy nie podejmujesz \u017cadnego ryzyka, ryzykujesz wszystko.",
-            "3492",
-            None,
-            "pl"
+            "pl",
        ],
+        ["Kiedy nie podejmujesz \u017cadnego ryzyka, ryzykujesz wszystko.", "3492", None, "pl"],
        [
            "Potrzebne s\u0105 zbyt wiele lat pracy, aby odnie\u015b\u0107 sukces z dnia na dzie\u0144.",
            "1890",
            None,
-            "pl"
-        ],
-        [
-            "Tendr\u00e1s la vista de la cima de la monta\u00f1a que subes",
-            "101",
-            None,
-            "sp"
-        ],
-        [
-            "Cuando no te arriesgas, lo arriesgas todo.",
-            "5922",
-            None,
-            "sp"
+            "pl",
        ],
+        ["Tendr\u00e1s la vista de la cima de la monta\u00f1a que subes", "101", None, "sp"],
+        ["Cuando no te arriesgas, lo arriesgas todo.", "5922", None, "sp"],
        [
            "Son necesarios demasiados a\u00f1os de trabajo para triunfar de la noche a la ma\u00f1ana.",
            "10246",
            None,
-            "sp"
-        ]
+            "sp",
+        ],
    ],
    # Enable the weighted sampler
    use_weighted_sampler=True,
@ -399,10 +304,10 @@ config = VitsConfig(
    weighted_sampler_attrs={"language": 1.0},
    weighted_sampler_multipliers={
        # "speaker_name": {
-            # you can force the batching scheme to give a higher weight to a certain speaker and then this speaker will appears more frequently on the batch.
-            # It will speedup the speaker adaptation process. Considering the CML train dataset and "new_speaker" as the speaker name of the speaker that you want to adapt.
-            # The line above will make the balancer consider the "new_speaker" as 106 speakers so 1/4 of the number of speakers present on CML dataset.
-            # 'new_speaker': 106, # (CML tot. train speaker)/4 = (424/4) = 106
+        # you can force the batching scheme to give a higher weight to a certain speaker and then this speaker will appears more frequently on the batch.
+        # It will speedup the speaker adaptation process. Considering the CML train dataset and "new_speaker" as the speaker name of the speaker that you want to adapt.
+        # The line above will make the balancer consider the "new_speaker" as 106 speakers so 1/4 of the number of speakers present on CML dataset.
+        # 'new_speaker': 106, # (CML tot. train speaker)/4 = (424/4) = 106
        # }
    },
    # It defines the Speaker Consistency Loss (SCL) α to 9 like the YourTTS paper
@ -414,7 +319,7 @@ train_samples, eval_samples = load_tts_samples(
    config.datasets,
    eval_split=True,
    eval_split_max_size=config.eval_split_max_size,
-    eval_split_size=config.eval_split_size
+    eval_split_size=config.eval_split_size,
 )

 # Init the model
--- a/tests/zoo_tests/test_models.py
+++ b/tests/zoo_tests/test_models.py
@ -10,12 +10,15 @@ from TTS.utils.generic_utils import get_user_data_dir
 from TTS.utils.manage import ModelManager


+MODELS_WITH_SEP_TESTS = ["bark", "xtts"]
+
+
 def run_models(offset=0, step=1):
    """Check if all the models are downloadable and tts models run correctly."""
    print(" > Run synthesizer with all the models.")
    output_path = os.path.join(get_tests_output_path(), "output.wav")
    manager = ModelManager(output_prefix=get_tests_output_path(), progress_bar=False)
-    model_names = [name for name in manager.list_models() if "bark" not in name]
+    model_names = [name for name in manager.list_models() if name in MODELS_WITH_SEP_TESTS]
    for model_name in model_names[offset::step]:
        print(f"\n > Run - {model_name}")
        model_path, _, _ = manager.download_model(model_name)
@ -63,20 +66,15 @@ def run_models(offset=0, step=1):
            manager.download_model(model_name)
        print(f" | > OK: {model_name}")

-    # folders = glob.glob(os.path.join(manager.output_prefix, "*"))
-    # assert len(folders) == len(model_names) // step

-
-def test_models_offset_0_step_3():
-    run_models(offset=0, step=3)
-
-
-def test_models_offset_1_step_3():
-    run_models(offset=1, step=3)
-
-
-def test_models_offset_2_step_3():
-    run_models(offset=2, step=3)
+def test_xtts():
+    output_path = os.path.join(get_tests_output_path(), "output.wav")
+    speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")
+    run_cli("yes | "
+        f"tts --model_name  tts_models/multilingual/multi-dataset/xtts_v1 "
+        f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True '
+        f'--speaker_wav "{speaker_wav}" --language_idx "en"'
+    )


 def test_bark():
@ -84,7 +82,7 @@ def test_bark():
    output_path = os.path.join(get_tests_output_path(), "output.wav")
    run_cli(
        f" tts --model_name  tts_models/multilingual/multi-dataset/bark "
-        f'--text "This is an example." --out_path "{output_path}" --progress_bar False'
+        f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True'
    )


@ -99,3 +97,17 @@ def test_voice_conversion():
        f"tts --model_name  {model_name}"
        f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} --progress_bar False"
    )
+
+"""
+These are used to split tests into different actions on Github.
+"""
+def test_models_offset_0_step_3():
+    run_models(offset=0, step=3)
+
+
+def test_models_offset_1_step_3():
+    run_models(offset=1, step=3)
+
+
+def test_models_offset_2_step_3():
+    run_models(offset=2, step=3)