load_meta_data changes

pull/10/head
Eren Golge 2019-09-30 15:03:18 +02:00
parent 1fad04e317
commit 64a01f584b
2 changed files with 14 additions and 17 deletions

View File

@ -65,10 +65,6 @@
"run_eval": true,
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
"data_path": "/home/erogol/Data/LJSpeech-1.1/", // DATASET-RELATED: can overwritten from command argument
"meta_file_train": "metadata_train.csv", // DATASET-RELATED: metafile for training dataloader.
"meta_file_val": "metadata_val.csv", // DATASET-RELATED: metafile for evaluation dataloader.
"dataset": "ljspeech", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
"max_seq_len": 150, // DATASET-RELATED: maximum text length
"output_path": "../keep/", // DATASET-RELATED: output path for all training outputs.
@ -80,6 +76,17 @@
"text_cleaner": "phoneme_cleaners",
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
"style_wav_for_test": null, // path to style wav file to be used in TacotronGST inference.
"use_gst": false // TACOTRON ONLY: use global style tokens
"use_gst": false, // TACOTRON ONLY: use global style tokens
"datasets": // List of datasets. They all merged and they get different speaker_ids.
[
{
"name": "ljspeech",
"path": "/home/erogol/Data/LJSpeech-1.1/",
"meta_file_train": "metadata_train.csv",
"meta_file_val": "metadata_val.csv"
}
]
}

View File

@ -28,7 +28,7 @@ from TTS.utils.speakers import load_speaker_mapping, save_speaker_mapping, \
from TTS.utils.synthesis import synthesis
from TTS.utils.text.symbols import phonemes, symbols
from TTS.utils.visual import plot_alignment, plot_spectrogram
from TTS.datasets.preprocess import get_preprocessor_by_name
from TTS.datasets.preprocess import load_meta_data
from TTS.utils.radam import RAdam
from TTS.utils.measures import alignment_diagonal_score
@ -46,17 +46,7 @@ def setup_loader(ap, is_val=False, verbose=False):
global meta_data_train
global meta_data_eval
if "meta_data_train" not in globals():
if c.meta_file_train is not None:
meta_data_train = get_preprocessor_by_name(
c.dataset)(c.data_path, c.meta_file_train)
else:
meta_data_train = get_preprocessor_by_name(c.dataset)(c.data_path)
if "meta_data_eval" not in globals() and c.run_eval:
if c.meta_file_val is not None:
meta_data_eval = get_preprocessor_by_name(
c.dataset)(c.data_path, c.meta_file_val)
else:
meta_data_eval, meta_data_train = split_dataset(meta_data_train)
meta_data_train, meta_data_eval = load_meta_data(c.datasets)
if is_val and not c.run_eval:
loader = None
else: