visual updates for phoenemes

2019-02-25 17:20:05 +01:00 · 2019-02-25 17:20:05 +01:00 · caae1af4f6
parent a60c9ee47d
commit caae1af4f6
4 changed files with 17 additions and 13 deletions
--- a/utils/generic_utils.py
+++ b/utils/generic_utils.py
@ -182,4 +182,5 @@ def sequence_mask(sequence_length, max_len=None):
        seq_range_expand = seq_range_expand.cuda()
    seq_length_expand = (sequence_length.unsqueeze(1)
                         .expand_as(seq_range_expand))
+    # B x T_max
    return seq_range_expand < seq_length_expand
--- a/utils/synthesis.py
+++ b/utils/synthesis.py
@ -3,7 +3,7 @@ import time
 import librosa
 import torch
 import numpy as np
-from .text import text_to_sequence, phoneme_to_sequence
+from .text import text_to_sequence, phoneme_to_sequence, sequence_to_phoneme
 from .visual import visualize
 from matplotlib import pylab as plt

@ -11,8 +11,6 @@ from matplotlib import pylab as plt
 def synthesis(m, s, CONFIG, use_cuda, ap):
    """ Given the text, synthesising the audio """
    text_cleaner = [CONFIG.text_cleaner]
-    # print(phoneme_to_sequence(s, text_cleaner))s
-    # print(sequence_to_phoneme(phoneme_to_sequence(s, text_cleaner)))
    if CONFIG.use_phonemes:
        seq = np.asarray(
            phoneme_to_sequence(s, text_cleaner, CONFIG.phoneme_language),
--- a/utils/text/init.py
+++ b/utils/text/init.py
@ -52,7 +52,7 @@ def phoneme_to_sequence(text, cleaner_names, language):
    for phoneme in phonemes.split('|'):
        # print(word, ' -- ', phonemes_text)
        sequence += _phoneme_to_sequence(phoneme)
-    # Aeepnd EOS char
+    # Append EOS char
    sequence.append(_phonemes_to_id['~'])
    return sequence

--- a/utils/visual.py
+++ b/utils/visual.py
@ -3,6 +3,7 @@ import librosa
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
+from utils.text import phoneme_to_sequence, sequence_to_phoneme


 def plot_alignment(alignment, info=None):
@ -29,19 +30,22 @@ def plot_spectrogram(linear_output, audio):
    return fig


-def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG, spectrogram2=None):
-    if spectrogram2 is not None:
+def visualize(alignment, spectrogram_postnet, stop_tokens, text, hop_length, CONFIG, spectrogram=None):
+    if spectrogram is not None:
        num_plot = 4
    else:
        num_plot = 3

    label_fontsize = 16
-    plt.figure(figsize=(16, 32))
+    plt.figure(figsize=(16, 48))

    plt.subplot(num_plot, 1, 1)
    plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None)
    plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
    plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
+    if CONFIG.use_phonemes:
+        seq = phoneme_to_sequence(text, [CONFIG.text_cleaner], CONFIG.phoneme_language)
+        text = sequence_to_phoneme(seq)
    plt.yticks(range(len(text)), list(text))
    plt.colorbar()
    
@ -50,17 +54,18 @@ def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG, spe
    plt.plot(range(len(stop_tokens)), list(stop_tokens))

    plt.subplot(num_plot, 1, 3)
+    librosa.display.specshow(spectrogram_postnet.T, sr=CONFIG.audio['sample_rate'],
+                             hop_length=hop_length, x_axis="time", y_axis="linear")
+    plt.xlabel("Time", fontsize=label_fontsize)
+    plt.ylabel("Hz", fontsize=label_fontsize)
+    plt.tight_layout()
+    plt.colorbar()
+
+    if spectrogram is not None:
+        plt.subplot(num_plot, 1, 4)
        librosa.display.specshow(spectrogram.T, sr=CONFIG.audio['sample_rate'],
                                hop_length=hop_length, x_axis="time", y_axis="linear")
        plt.xlabel("Time", fontsize=label_fontsize)
        plt.ylabel("Hz", fontsize=label_fontsize)
-
-    if spectrogram2 is not None:
-        plt.subplot(num_plot, 1, 4)
-        librosa.display.specshow(spectrogram2.T, sr=CONFIG.audio['sample_rate'],
-                                hop_length=hop_length, x_axis="time", y_axis="linear")
-        plt.xlabel("Time", fontsize=label_fontsize)
-        plt.ylabel("Hz", fontsize=label_fontsize)
-
        plt.tight_layout()
        plt.colorbar()