stylewav for testing inference

pull/10/head
Thomas Werkmeister 2019-07-24 12:17:08 +02:00
parent b1657d70b1
commit 4a23354d3c
2 changed files with 5 additions and 2 deletions

View File

@ -77,6 +77,7 @@
"use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronounciation.
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
"text_cleaner": "phoneme_cleaners",
"use_speaker_embedding": false // whether to use additional embeddings for separate speakers
"use_speaker_embedding": false, // whether to use additional embeddings for separate speakers
"style_wav_for_test": null // path to wav for styling the inference tests when using GST
}

View File

@ -409,11 +409,13 @@ def evaluate(model, criterion, criterion_st, ap, current_step, epoch):
test_figures = {}
print(" | > Synthesizing test sentences")
speaker_id = 0 if c.use_speaker_embedding else None
style_wav = c.get("style_wav_for_test")
for idx, test_sentence in enumerate(test_sentences):
try:
wav, alignment, decoder_output, postnet_output, stop_tokens = synthesis(
model, test_sentence, c, use_cuda, ap,
speaker_id=speaker_id)
speaker_id=speaker_id,
style_wav=style_wav)
file_path = os.path.join(AUDIO_PATH, str(current_step))
os.makedirs(file_path, exist_ok=True)
file_path = os.path.join(file_path,