mirror of https://github.com/coqui-ai/TTS.git
Plot specs and alignments for test sentences
parent
766e88700d
commit
c72f309068
5
.compute
5
.compute
|
@ -1,3 +1,6 @@
|
|||
#!/bin/bash
|
||||
source ../tmp/venv/bin/activate
|
||||
# source ../tmp/venv/bin/activate
|
||||
# ls /snakepit/jobs/650/keep/
|
||||
source /snakepit/jobs/650/keep/venv/bin/activate
|
||||
# source /snakepit/jobs/560/tmp/venv/bin/activate
|
||||
python train.py --config_path config.json --debug true
|
||||
|
|
6
.install
6
.install
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash
|
||||
virtualenv -p python3 ../tmp/venv
|
||||
source ../tmp/venv/bin/activate
|
||||
python setup.py develop
|
||||
# virtualenv -p python3 ../tmp/venv
|
||||
# source ../tmp/venv/bin/activate
|
||||
# python setup.py develop
|
||||
|
|
|
@ -23,10 +23,10 @@
|
|||
"griffin_lim_iters": 60,
|
||||
"power": 1.5,
|
||||
|
||||
"num_loader_workers": 4,
|
||||
"num_loader_workers": 14,
|
||||
|
||||
"checkpoint": true,
|
||||
"save_step": 376,
|
||||
"save_step": 750,
|
||||
"print_step": 10,
|
||||
"run_eval": false,
|
||||
"data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/",
|
||||
|
|
10
train.py
10
train.py
|
@ -302,14 +302,20 @@ def evaluate(model, criterion, criterion_st, data_loader, ap, current_step):
|
|||
# test sentences
|
||||
ap.griffin_lim_iters = 60
|
||||
for idx, test_sentence in enumerate(test_sentences):
|
||||
wav = synthesis(model, ap, test_sentence, use_cuda,
|
||||
c.text_cleaner)
|
||||
wav, linear_out, alignments = synthesis(model, ap, test_sentence, use_cuda,
|
||||
c.text_cleaner)
|
||||
try:
|
||||
wav_name = 'TestSentences/{}'.format(idx)
|
||||
tb.add_audio(wav_name, wav, current_step,
|
||||
sample_rate=c.sample_rate)
|
||||
except:
|
||||
pass
|
||||
align_img = alignments[0].data.cpu().numpy()
|
||||
linear_spec = linear_output[0].data.cpu().numpy()
|
||||
linear_spec = plot_spectrogram(linear_spec, ap)
|
||||
align_img = plot_alignment(align_img)
|
||||
tb.add_image('TestSentences/{}_GroundTruth'.format(idx), gt_spec, current_step)
|
||||
tb.add_image('TestSentences/{}_Alignment'.format(idx), align_img, current_step)
|
||||
return avg_linear_loss
|
||||
|
||||
|
||||
|
|
|
@ -166,7 +166,7 @@ def synthesis(model, ap, text, use_cuda, text_cleaner):
|
|||
chars_var = torch.from_numpy(seq).unsqueeze(0)
|
||||
if use_cuda:
|
||||
chars_var = chars_var.cuda().long()
|
||||
_, linear_out, _, _ = model.forward(chars_var)
|
||||
_, linear_out, alignments, _ = model.forward(chars_var)
|
||||
linear_out = linear_out[0].data.cpu().numpy()
|
||||
wav = ap.inv_spectrogram(linear_out.T)
|
||||
return wav
|
||||
return wav, linear_out, alignments
|
Loading…
Reference in New Issue