From 1496f271dc23daaa6be9a4bf100f9e52d9bd7921 Mon Sep 17 00:00:00 2001 From: Edresson Date: Thu, 27 May 2021 00:45:18 -0300 Subject: [PATCH] update Compute embeddings script --- TTS/bin/compute_embeddings.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py index 410086de..dce9ea83 100644 --- a/TTS/bin/compute_embeddings.py +++ b/TTS/bin/compute_embeddings.py @@ -10,7 +10,7 @@ from TTS.speaker_encoder.utils.generic_utils import setup_model from TTS.tts.datasets.preprocess import load_meta_data from TTS.tts.utils.speakers import save_speaker_mapping from TTS.utils.audio import AudioProcessor -from TTS.utils.io import load_config +from TTS.config import load_config, BaseDatasetConfig parser = argparse.ArgumentParser( description='Compute embedding vectors for each wav file in a dataset. If "target_dataset" is defined, it generates "speakers.json" necessary for training a multi-speaker model.' @@ -44,7 +44,7 @@ sep = args.separator if args.target_dataset != "": # if target dataset is defined dataset_config = [ - {"name": args.target_dataset, "path": args.data_path, "meta_file_train": None, "meta_file_val": None}, + BaseDatasetConfig(name=args.target_dataset, path=args.data_path, meta_file_train=None, meta_file_val=None), ] wav_files, _ = load_meta_data(dataset_config, eval_split=False) output_files = [wav_file[1].replace(data_path, args.output_path).replace(".wav", ".npy") for wav_file in wav_files] @@ -106,6 +106,7 @@ for idx, wav_file in enumerate(tqdm(wav_files)): speaker_mapping[wav_file_name]["embedding"] = embedd.flatten().tolist() if args.target_dataset != "": - # save speaker_mapping if target dataset is defined - mapping_file_path = os.path.join(args.output_path, "speakers.json") - save_speaker_mapping(args.output_path, speaker_mapping) + if speaker_mapping: + # save speaker_mapping if target dataset is defined + mapping_file_path = os.path.join(args.output_path, "speakers.json") + save_speaker_mapping(args.output_path, speaker_mapping)