diff --git a/.gitignore b/.gitignore index 1829dd93..1d3ab8c2 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,3 @@ notebooks/data/* TTS/tts/layers/glow_tts/monotonic_align/core.c .vscode-upload.json temp_build/* -recipes/* diff --git a/TTS/tts/configs/tacotron_config.py b/TTS/tts/configs/tacotron_config.py index 91213012..d3a54269 100644 --- a/TTS/tts/configs/tacotron_config.py +++ b/TTS/tts/configs/tacotron_config.py @@ -169,7 +169,8 @@ class TacotronConfig(BaseTTSConfig): postnet_ssim_alpha: float = 0.25 ga_alpha: float = 5.0 - def check_values(self): if self.gradual_training: - assert self.gradual_training[0][1] == self.r, f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}" \ No newline at end of file + assert ( + self.gradual_training[0][1] == self.r + ), f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}" diff --git a/recipes/README.md b/recipes/README.md new file mode 100644 index 00000000..041693a2 --- /dev/null +++ b/recipes/README.md @@ -0,0 +1,13 @@ +# πŸΈπŸ’¬ TTS Training Recipes + +TTS recipes intended to host bash scripts running all the necessary steps to train a TTS model with a particular dataset. + +Run each script from the root TTS folder as follows + +```console +$ bash ./recipes///run.sh +``` + +All the outputs are held under the recipe directory unless you change the paths in the bash script. + +If you train a new model using TTS, feel free to share your training to expand the list of recipes. \ No newline at end of file diff --git a/recipes/ljspeech/tacotron/run.sh b/recipes/ljspeech/tacotron2-DDC/run.sh similarity index 78% rename from recipes/ljspeech/tacotron/run.sh rename to recipes/ljspeech/tacotron2-DDC/run.sh index 9f5435db..eaa05b60 100644 --- a/recipes/ljspeech/tacotron/run.sh +++ b/recipes/ljspeech/tacotron2-DDC/run.sh @@ -1,4 +1,5 @@ #!/bin/bash +# take the scripts's parent's directory to prefix all the output paths. RUN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" echo $RUN_DIR # download LJSpeech dataset @@ -12,10 +13,10 @@ tail -n 1100 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv mv LJSpeech-1.1 $RUN_DIR/ rm LJSpeech-1.1.tar.bz2 # compute dataset mean and variance for normalization -python TTS/bin/compute_statistics.py $RUN_DIR/tacotron2-DCA.json $RUN_DIR/scale_stats.npy --data_path $RUN_DIR/LJSpeech-1.1/wavs/ +python TTS/bin/compute_statistics.py $RUN_DIR/tacotron2-DDC.json $RUN_DIR/scale_stats.npy --data_path $RUN_DIR/LJSpeech-1.1/wavs/ # training .... # change the GPU id if needed CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tacotron.py --config_path $RUN_DIR/tacotron2-DDC.json \ - --output_path $RUN_DIR \ + --coqpit.output_path $RUN_DIR \ --coqpit.datasets.0.path $RUN_DIR/LJSpeech-1.1/ \ --coqpit.audio.stats_path $RUN_DIR/scale_stats.npy \ \ No newline at end of file diff --git a/recipes/ljspeech/tacotron/tacotron2-DDC.json b/recipes/ljspeech/tacotron2-DDC/tacotron2-DDC.json similarity index 95% rename from recipes/ljspeech/tacotron/tacotron2-DDC.json rename to recipes/ljspeech/tacotron2-DDC/tacotron2-DDC.json index 0e290405..9cdbbd3b 100644 --- a/recipes/ljspeech/tacotron/tacotron2-DDC.json +++ b/recipes/ljspeech/tacotron2-DDC/tacotron2-DDC.json @@ -37,11 +37,10 @@ "gst_num_style_tokens": 10 }, "model": "Tacotron2", - "run_name": "ljspeech-dcattn", - "run_description": "tacotron2 with dynamic convolution attention.", + "run_name": "ljspeech-ddc", + "run_description": "tacotron2 with double decoder consistency.", "batch_size": 64, "eval_batch_size": 16, - "r": 2, "mixed_precision": true, "loss_masking": true, "decoder_loss_alpha": 0.25, @@ -69,6 +68,7 @@ "double_decoder_consistency": true, "ddc_r": 6, "attention_norm": "sigmoid", + "r": 6, "gradual_training": [[0, 6, 64], [10000, 4, 32], [50000, 3, 32], [100000, 2, 32]], "stopnet": true, "separate_stopnet": true,