diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py index e46e4a00..014ba4e8 100755 --- a/TTS/bin/extract_tts_spectrograms.py +++ b/TTS/bin/extract_tts_spectrograms.py @@ -37,8 +37,8 @@ def setup_loader(ap, r, verbose=False): enable_eos_bos=c.enable_eos_bos_chars, use_noise_augment=False, verbose=verbose, - speaker_id_mapping=speaker_manager.speaker_ids, - d_vector_mapping=speaker_manager.d_vectors if c.use_speaker_embedding and c.use_d_vector_file else None, + speaker_id_mapping=speaker_manager.speaker_ids if c.use_speaker_embedding else None, + d_vector_mapping=speaker_manager.d_vectors if c.use_d_vector_file else None, ) if c.use_phonemes and c.compute_input_seq_cache: @@ -235,13 +235,14 @@ def main(args): # pylint: disable=redefined-outer-name meta_data = meta_data_train + meta_data_eval # init speaker manager - if config.use_speaker_embedding: + if c.use_speaker_embedding: speaker_manager = SpeakerManager(data_items=meta_data) - elif config.use_d_vector_file: + elif c.use_d_vector_file: speaker_manager = SpeakerManager(d_vectors_file_path=c.d_vector_file) else: speaker_manager = None + # setup model model = setup_model(c) diff --git a/TTS/bin/train_tts.py b/TTS/bin/train_tts.py index 1a9faf02..e28e9dec 100644 --- a/TTS/bin/train_tts.py +++ b/TTS/bin/train_tts.py @@ -64,7 +64,7 @@ def main(): train_samples=train_samples, eval_samples=eval_samples, training_assets={"audio_processor": ap}, - parse_command_line_args=True, + parse_command_line_args=False, ) trainer.fit()