mirror of https://github.com/coqui-ai/TTS.git
load_meta_data changes
parent
1fad04e317
commit
64a01f584b
17
config.json
17
config.json
|
@ -65,10 +65,6 @@
|
|||
"run_eval": true,
|
||||
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
|
||||
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
|
||||
"data_path": "/home/erogol/Data/LJSpeech-1.1/", // DATASET-RELATED: can overwritten from command argument
|
||||
"meta_file_train": "metadata_train.csv", // DATASET-RELATED: metafile for training dataloader.
|
||||
"meta_file_val": "metadata_val.csv", // DATASET-RELATED: metafile for evaluation dataloader.
|
||||
"dataset": "ljspeech", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
|
||||
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
|
||||
"max_seq_len": 150, // DATASET-RELATED: maximum text length
|
||||
"output_path": "../keep/", // DATASET-RELATED: output path for all training outputs.
|
||||
|
@ -80,6 +76,17 @@
|
|||
"text_cleaner": "phoneme_cleaners",
|
||||
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
||||
"style_wav_for_test": null, // path to style wav file to be used in TacotronGST inference.
|
||||
"use_gst": false // TACOTRON ONLY: use global style tokens
|
||||
"use_gst": false, // TACOTRON ONLY: use global style tokens
|
||||
|
||||
"datasets": // List of datasets. They all merged and they get different speaker_ids.
|
||||
[
|
||||
{
|
||||
"name": "ljspeech",
|
||||
"path": "/home/erogol/Data/LJSpeech-1.1/",
|
||||
"meta_file_train": "metadata_train.csv",
|
||||
"meta_file_val": "metadata_val.csv"
|
||||
}
|
||||
]
|
||||
|
||||
}
|
||||
|
||||
|
|
14
train.py
14
train.py
|
@ -28,7 +28,7 @@ from TTS.utils.speakers import load_speaker_mapping, save_speaker_mapping, \
|
|||
from TTS.utils.synthesis import synthesis
|
||||
from TTS.utils.text.symbols import phonemes, symbols
|
||||
from TTS.utils.visual import plot_alignment, plot_spectrogram
|
||||
from TTS.datasets.preprocess import get_preprocessor_by_name
|
||||
from TTS.datasets.preprocess import load_meta_data
|
||||
from TTS.utils.radam import RAdam
|
||||
from TTS.utils.measures import alignment_diagonal_score
|
||||
|
||||
|
@ -46,17 +46,7 @@ def setup_loader(ap, is_val=False, verbose=False):
|
|||
global meta_data_train
|
||||
global meta_data_eval
|
||||
if "meta_data_train" not in globals():
|
||||
if c.meta_file_train is not None:
|
||||
meta_data_train = get_preprocessor_by_name(
|
||||
c.dataset)(c.data_path, c.meta_file_train)
|
||||
else:
|
||||
meta_data_train = get_preprocessor_by_name(c.dataset)(c.data_path)
|
||||
if "meta_data_eval" not in globals() and c.run_eval:
|
||||
if c.meta_file_val is not None:
|
||||
meta_data_eval = get_preprocessor_by_name(
|
||||
c.dataset)(c.data_path, c.meta_file_val)
|
||||
else:
|
||||
meta_data_eval, meta_data_train = split_dataset(meta_data_train)
|
||||
meta_data_train, meta_data_eval = load_meta_data(c.datasets)
|
||||
if is_val and not c.run_eval:
|
||||
loader = None
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue