visual updates for phoenemes

pull/10/head
Eren Golge 2019-02-25 17:20:05 +01:00
parent a60c9ee47d
commit caae1af4f6
4 changed files with 17 additions and 13 deletions

View File

@ -182,4 +182,5 @@ def sequence_mask(sequence_length, max_len=None):
seq_range_expand = seq_range_expand.cuda()
seq_length_expand = (sequence_length.unsqueeze(1)
.expand_as(seq_range_expand))
# B x T_max
return seq_range_expand < seq_length_expand

View File

@ -3,7 +3,7 @@ import time
import librosa
import torch
import numpy as np
from .text import text_to_sequence, phoneme_to_sequence
from .text import text_to_sequence, phoneme_to_sequence, sequence_to_phoneme
from .visual import visualize
from matplotlib import pylab as plt
@ -11,8 +11,6 @@ from matplotlib import pylab as plt
def synthesis(m, s, CONFIG, use_cuda, ap):
""" Given the text, synthesising the audio """
text_cleaner = [CONFIG.text_cleaner]
# print(phoneme_to_sequence(s, text_cleaner))s
# print(sequence_to_phoneme(phoneme_to_sequence(s, text_cleaner)))
if CONFIG.use_phonemes:
seq = np.asarray(
phoneme_to_sequence(s, text_cleaner, CONFIG.phoneme_language),

View File

@ -52,7 +52,7 @@ def phoneme_to_sequence(text, cleaner_names, language):
for phoneme in phonemes.split('|'):
# print(word, ' -- ', phonemes_text)
sequence += _phoneme_to_sequence(phoneme)
# Aeepnd EOS char
# Append EOS char
sequence.append(_phonemes_to_id['~'])
return sequence

View File

@ -3,6 +3,7 @@ import librosa
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from utils.text import phoneme_to_sequence, sequence_to_phoneme
def plot_alignment(alignment, info=None):
@ -29,19 +30,22 @@ def plot_spectrogram(linear_output, audio):
return fig
def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG, spectrogram2=None):
if spectrogram2 is not None:
def visualize(alignment, spectrogram_postnet, stop_tokens, text, hop_length, CONFIG, spectrogram=None):
if spectrogram is not None:
num_plot = 4
else:
num_plot = 3
label_fontsize = 16
plt.figure(figsize=(16, 32))
plt.figure(figsize=(16, 48))
plt.subplot(num_plot, 1, 1)
plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None)
plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
if CONFIG.use_phonemes:
seq = phoneme_to_sequence(text, [CONFIG.text_cleaner], CONFIG.phoneme_language)
text = sequence_to_phoneme(seq)
plt.yticks(range(len(text)), list(text))
plt.colorbar()
@ -50,17 +54,18 @@ def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG, spe
plt.plot(range(len(stop_tokens)), list(stop_tokens))
plt.subplot(num_plot, 1, 3)
librosa.display.specshow(spectrogram_postnet.T, sr=CONFIG.audio['sample_rate'],
hop_length=hop_length, x_axis="time", y_axis="linear")
plt.xlabel("Time", fontsize=label_fontsize)
plt.ylabel("Hz", fontsize=label_fontsize)
plt.tight_layout()
plt.colorbar()
if spectrogram is not None:
plt.subplot(num_plot, 1, 4)
librosa.display.specshow(spectrogram.T, sr=CONFIG.audio['sample_rate'],
hop_length=hop_length, x_axis="time", y_axis="linear")
plt.xlabel("Time", fontsize=label_fontsize)
plt.ylabel("Hz", fontsize=label_fontsize)
if spectrogram2 is not None:
plt.subplot(num_plot, 1, 4)
librosa.display.specshow(spectrogram2.T, sr=CONFIG.audio['sample_rate'],
hop_length=hop_length, x_axis="time", y_axis="linear")
plt.xlabel("Time", fontsize=label_fontsize)
plt.ylabel("Hz", fontsize=label_fontsize)
plt.tight_layout()
plt.colorbar()