Merge branch 'tacotron-gst' of github.com:mozilla/TTS into multispeaker

2019-07-01 14:00:22 +02:00 · 2019-07-01 14:00:22 +02:00 · 04e452d8cb
parent 765597e983 464cc29756
commit 04e452d8cb
2 changed files with 6 additions and 5 deletions
--- a/layers/common_layers.py
+++ b/layers/common_layers.py
@ -208,7 +208,7 @@ class Attention(nn.Module):
            _, n = prev_alpha.max(1)
            val, n2 = alpha.max(1)
            for b in range(alignment.shape[0]):
-                alpha[b, n[b] + 2:] = 0
+                alpha[b, n[b] + 3:] = 0
                alpha[b, :(n[b] - 1)] = 0  # ignore all previous states to prevent repetition.
                alpha[b, (n[b] - 2)] = 0.01 * val[b]  # smoothing factor for the prev step
        # compute attention weights
--- a/utils/audio.py
+++ b/utils/audio.py
@ -230,12 +230,13 @@ class AudioProcessor(object):
        x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1)
        return x

-    def load_wav(self, filename, encode=False):
-        x, sr = sf.read(filename)
-        # x, sr = librosa.load(filename, sr=self.sample_rate)
+    def load_wav(self, filename, sr=None):
+        if sr is None:
+            x, sr = sf.read(filename)
+        else:
+            x, sr = librosa.load(filename, sr=sr)
        if self.do_trim_silence:
            x = self.trim_silence(x)
-        # sr, x = io.wavfile.read(filename)
        assert self.sample_rate == sr, "Expected sampling rate {} but file " \
                                       "{} has {}.".format(self.sample_rate,
                                                           filename,