Update model test configs

pull/506/head
Eren Gölge 2021-06-18 13:24:48 +02:00
parent 98298ee671
commit fcfd95669a
9 changed files with 23 additions and 23 deletions

View File

@ -123,7 +123,7 @@
"text_cleaner": "english_cleaners",
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 0, // number of evaluation data loader processes.
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
"min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training
"max_seq_len": 300, // DATASET-RELATED: maximum text length
@ -140,8 +140,8 @@
// MULTI-SPEAKER and GST
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
"use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
"external_speaker_embedding_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
"use_d_vector_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
"d_vector_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_d_vector_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
// DATASETS

View File

@ -115,7 +115,7 @@
"text_cleaner": "phoneme_cleaners",
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 0, // number of evaluation data loader processes.
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
"min_seq_len": 3, // DATASET-RELATED: minimum text length to use in training
"max_seq_len": 500, // DATASET-RELATED: maximum text length
@ -132,8 +132,8 @@
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
// MULTI-SPEAKER and GST
"use_external_speaker_embedding_file": false,
"external_speaker_embedding_file": null,
"use_d_vector_file": false,
"d_vector_file": null,
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
// DATASETS

View File

@ -120,7 +120,7 @@
"text_cleaner": "english_cleaners",
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 0, // number of evaluation data loader processes.
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
"min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training
"max_seq_len": 300, // DATASET-RELATED: maximum text length
@ -137,8 +137,8 @@
// MULTI-SPEAKER and GST
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
"use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
"external_speaker_embedding_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
"use_d_vector_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
"d_vector_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_d_vector_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
// DATASETS

View File

@ -130,7 +130,7 @@
"text_cleaner": "phoneme_cleaners",
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 0, // number of evaluation data loader processes.
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
"max_seq_len": 153, // DATASET-RELATED: maximum text length
@ -145,8 +145,8 @@
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
// MULTI-SPEAKER and GST
"use_external_speaker_embedding_file": false,
"external_speaker_embedding_file": null,
"use_d_vector_file": false,
"d_vector_file": null,
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
"use_gst": true, // use global style tokens
"gst": { // gst parameter if gst is enabled

View File

@ -130,7 +130,7 @@
"text_cleaner": "phoneme_cleaners",
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 0, // number of evaluation data loader processes.
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
"max_seq_len": 153, // DATASET-RELATED: maximum text length
@ -145,8 +145,8 @@
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
// MULTI-SPEAKER and GST
"use_external_speaker_embedding_file": false,
"external_speaker_embedding_file": null,
"use_d_vector_file": false,
"d_vector_file": null,
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
"use_gst": true, // use global style tokens
"gst": { // gst parameter if gst is enabled

View File

@ -130,7 +130,7 @@
"text_cleaner": "phoneme_cleaners",
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 0, // number of evaluation data loader processes.
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
"max_seq_len": 153, // DATASET-RELATED: maximum text length
@ -145,8 +145,8 @@
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
// MULTI-SPEAKER and GST
"use_external_speaker_embedding_file": false,
"external_speaker_embedding_file": null,
"use_d_vector_file": false,
"d_vector_file": null,
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
"use_gst": true, // use global style tokens
"gst": { // gst parameter if gst is enabled

View File

@ -157,7 +157,7 @@
// DATA LOADING
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 0, // number of evaluation data loader processes.
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
"eval_split_size": 10,
// PATHS

View File

@ -88,7 +88,7 @@
// OPTIMIZER
"epochs": 1, // total number of epochs to train.
"clip_grad": 1.0, // Generator gradient clipping threshold. Apply gradient clipping if > 0
"grad_clip": 1.0, // Generator gradient clipping threshold. Apply gradient clipping if > 0
"lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
"lr_scheduler_params": {
"gamma": 0.5,
@ -107,7 +107,7 @@
// DATA LOADING
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 0, // number of evaluation data loader processes.
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
"eval_split_size": 4,
// PATHS

View File

@ -55,7 +55,7 @@
"padding": 2, // pad the input for resnet to see wider input length
// GENERATOR - for backward compatibility
"generator_model": "WaveRNN",
"generator_model": "Wavernn",
// DATASET
//"use_gta": true, // use computed gta features from the tts model
@ -103,7 +103,7 @@
// DATA LOADING
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 0, // number of evaluation data loader processes.
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
"eval_split_size": 10, // number of samples for testing
// PATHS