TTS/utils/visual.py

67 lines
2.1 KiB
Python
Raw Normal View History

import numpy as np
2018-11-02 15:13:51 +00:00
import librosa
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
def plot_alignment(alignment, info=None):
2018-04-03 10:24:57 +00:00
fig, ax = plt.subplots(figsize=(16, 10))
2018-08-02 14:34:17 +00:00
im = ax.imshow(
alignment.T, aspect='auto', origin='lower', interpolation='none')
fig.colorbar(im, ax=ax)
xlabel = 'Decoder timestep'
if info is not None:
xlabel += '\n\n' + info
plt.xlabel(xlabel)
plt.ylabel('Encoder timestep')
2018-11-02 15:13:51 +00:00
# plt.yticks(range(len(text)), list(text))
plt.tight_layout()
2018-08-11 14:53:09 +00:00
return fig
def plot_spectrogram(linear_output, audio):
spectrogram = audio._denormalize(linear_output)
fig = plt.figure(figsize=(16, 10))
plt.imshow(spectrogram.T, aspect="auto", origin="lower")
plt.colorbar()
plt.tight_layout()
2018-08-11 14:53:09 +00:00
return fig
2018-11-02 15:13:51 +00:00
2018-11-13 11:10:40 +00:00
def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG, spectrogram2=None):
if spectrogram2 is not None:
num_plot = 4
else:
num_plot = 3
2018-11-02 15:13:51 +00:00
label_fontsize = 16
plt.figure(figsize=(16, 32))
2018-11-13 11:10:40 +00:00
plt.subplot(num_plot, 1, 1)
2018-11-02 15:13:51 +00:00
plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None)
plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
plt.yticks(range(len(text)), list(text))
plt.colorbar()
stop_tokens = stop_tokens.squeeze().detach().to('cpu').numpy()
2018-11-13 11:10:40 +00:00
plt.subplot(num_plot, 1, 2)
2018-11-02 15:13:51 +00:00
plt.plot(range(len(stop_tokens)), list(stop_tokens))
2018-11-13 11:10:40 +00:00
plt.subplot(num_plot, 1, 3)
2018-11-02 15:13:51 +00:00
librosa.display.specshow(spectrogram.T, sr=CONFIG.audio['sample_rate'],
hop_length=hop_length, x_axis="time", y_axis="linear")
plt.xlabel("Time", fontsize=label_fontsize)
plt.ylabel("Hz", fontsize=label_fontsize)
2018-11-13 11:10:40 +00:00
if spectrogram2 is not None:
plt.subplot(num_plot, 1, 4)
librosa.display.specshow(spectrogram2.T, sr=CONFIG.audio['sample_rate'],
hop_length=hop_length, x_axis="time", y_axis="linear")
plt.xlabel("Time", fontsize=label_fontsize)
plt.ylabel("Hz", fontsize=label_fontsize)
2018-11-02 15:13:51 +00:00
plt.tight_layout()
plt.colorbar()