Fix XTTS v2.0 training recipe (#3154)

* Fix XTTS v2.0 training recipe * Update XTTS v2 model hash
2023-11-07 10:16:44 -03:00 · 2023-11-07 10:16:44 -03:00 · cbdbc44e0f
parent 5e992d8704
commit cbdbc44e0f
2 changed files with 8 additions and 9 deletions
--- a/TTS/.models.json
+++ b/TTS/.models.json
@ -10,7 +10,7 @@
                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json",
                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/hash.md5"
                    ],
-                    "model_hash": "ae9e4b39e095fd5728fe7f7931eccoqui",
+                    "model_hash": "6a09d1ad43896f06041ed8195956c9698f13b6189dc80f1c74bdc2b8e8d15324",
                    "default_vocoder": null,
                    "commit": "480a6cdf7",
                    "license": "CPML",
--- a/recipes/ljspeech/xtts_v2/train_gpt_xtts.py
+++ b/recipes/ljspeech/xtts_v2/train_gpt_xtts.py
@ -40,14 +40,13 @@ CHECKPOINTS_OUT_PATH = os.path.join(OUT_PATH, "XTTS_v2.0_original_model_files/")
 os.makedirs(CHECKPOINTS_OUT_PATH, exist_ok=True)


-# ToDo: update DVAE checkpoint
 # DVAE files
-DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/dvae.pth"
-MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/mel_stats.pth"
+DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/dvae.pth"
+MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/mel_stats.pth"

 # Set the path to the downloaded files
-DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, DVAE_CHECKPOINT_LINK.split("/")[-1])
-MEL_NORM_FILE = os.path.join(CHECKPOINTS_OUT_PATH, MEL_NORM_LINK.split("/")[-1])
+DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(DVAE_CHECKPOINT_LINK))
+MEL_NORM_FILE = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(MEL_NORM_LINK))

 # download DVAE files if needed
 if not os.path.isfile(DVAE_CHECKPOINT) or not os.path.isfile(MEL_NORM_FILE):
@ -90,9 +89,9 @@ def main():
        dvae_checkpoint=DVAE_CHECKPOINT,
        xtts_checkpoint=XTTS_CHECKPOINT,  # checkpoint path of the model that you want to fine-tune
        tokenizer_file=TOKENIZER_FILE,
-        gpt_num_audio_tokens=1024,
-        gpt_start_audio_token=1025,
-        gpt_stop_audio_token=1026,
+        gpt_num_audio_tokens=1026,
+        gpt_start_audio_token=1024,
+        gpt_stop_audio_token=1025,
        gpt_use_masking_gt_prompt_approach=True,
        gpt_use_perceiver_resampler=True,
    )