clearners update for special chars and conifg update

pull/10/head
erogol 2020-02-18 12:26:52 +01:00
parent 79a3684370
commit f707460886
2 changed files with 8 additions and 5 deletions

View File

@ -39,7 +39,7 @@
"batch_size": 2, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
"eval_batch_size":16,
"r": 7, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled.
"gradual_training": [[0, 7, 64], [1, 5, 64], [50000, 3, 32], [130000, 2, 32], [290000, 1, 32]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
"gradual_training": [[0, 7, 64], [2000, 5, 64], [35000, 3, 32], [70000, 2, 32], [140000, 1, 32]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
"loss_masking": true, // enable / disable loss masking against the sequence padding.
"grad_accum": 2, // if N > 1, enable gradient accumulation for N iterations. It is useful for low memory GPUs.
@ -93,10 +93,10 @@
"max_seq_len": 150, // DATASET-RELATED: maximum text length
// PATHS
"output_path": "/data4/rw/home/Trainings/",
"output_path": "/home/erogol/Models/",
// PHONEMES
"phoneme_cache_path": "mozilla_us_phonemes", // phoneme computation is slow, therefore, it caches results in the given folder.
"phoneme_cache_path": "mozilla_us_phonemes_2_1", // phoneme computation is slow, therefore, it caches results in the given folder.
"use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronounciation.
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
@ -110,7 +110,7 @@
[
{
"name": "ljspeech",
"path": "/root/LJSpeech-1.1/",
"path": "/home/erogol/Data/LJSpeech-1.1/",
"meta_file_train": "metadata.csv",
"meta_file_val": null
}

View File

@ -85,7 +85,10 @@
" if use_cuda:\n",
" waveform = waveform.cpu()\n",
" waveform = waveform.numpy()\n",
" print(\" > Run-time: {}\".format(time.time() - t_1))\n",
" rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n",
" print(waveform.shape)\n",
" print(\" > Run-time: {}\".format(time.time() - t_1))\n",
" print(\" > Real-time factor: {}\".format(rtf))\n",
" if figures: \n",
" visualize(alignment, mel_postnet_spec, stop_tokens, text, ap.hop_length, CONFIG, ap._denormalize(mel_spec)) \n",
" IPython.display.display(Audio(waveform, rate=CONFIG.audio['sample_rate'], normalize=False)) \n",