diff --git a/layers/common_layers.py b/layers/common_layers.py index b6f72bc1..f7b8e7ed 100644 --- a/layers/common_layers.py +++ b/layers/common_layers.py @@ -208,7 +208,7 @@ class Attention(nn.Module): _, n = prev_alpha.max(1) val, n2 = alpha.max(1) for b in range(alignment.shape[0]): - alpha[b, n[b] + 2:] = 0 + alpha[b, n[b] + 3:] = 0 alpha[b, :(n[b] - 1)] = 0 # ignore all previous states to prevent repetition. alpha[b, (n[b] - 2)] = 0.01 * val[b] # smoothing factor for the prev step # compute attention weights diff --git a/utils/audio.py b/utils/audio.py index fb3edad3..e985dbf2 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -230,12 +230,13 @@ class AudioProcessor(object): x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1) return x - def load_wav(self, filename, encode=False): - x, sr = sf.read(filename) - # x, sr = librosa.load(filename, sr=self.sample_rate) + def load_wav(self, filename, sr=None): + if sr is None: + x, sr = sf.read(filename) + else: + x, sr = librosa.load(filename, sr=sr) if self.do_trim_silence: x = self.trim_silence(x) - # sr, x = io.wavfile.read(filename) assert self.sample_rate == sr, "Expected sampling rate {} but file " \ "{} has {}.".format(self.sample_rate, filename,