mirror of https://github.com/coqui-ai/TTS.git
Merge branch 'tacotron-gst' of github.com:mozilla/TTS into multispeaker
commit
04e452d8cb
|
@ -208,7 +208,7 @@ class Attention(nn.Module):
|
||||||
_, n = prev_alpha.max(1)
|
_, n = prev_alpha.max(1)
|
||||||
val, n2 = alpha.max(1)
|
val, n2 = alpha.max(1)
|
||||||
for b in range(alignment.shape[0]):
|
for b in range(alignment.shape[0]):
|
||||||
alpha[b, n[b] + 2:] = 0
|
alpha[b, n[b] + 3:] = 0
|
||||||
alpha[b, :(n[b] - 1)] = 0 # ignore all previous states to prevent repetition.
|
alpha[b, :(n[b] - 1)] = 0 # ignore all previous states to prevent repetition.
|
||||||
alpha[b, (n[b] - 2)] = 0.01 * val[b] # smoothing factor for the prev step
|
alpha[b, (n[b] - 2)] = 0.01 * val[b] # smoothing factor for the prev step
|
||||||
# compute attention weights
|
# compute attention weights
|
||||||
|
|
|
@ -230,12 +230,13 @@ class AudioProcessor(object):
|
||||||
x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1)
|
x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def load_wav(self, filename, encode=False):
|
def load_wav(self, filename, sr=None):
|
||||||
x, sr = sf.read(filename)
|
if sr is None:
|
||||||
# x, sr = librosa.load(filename, sr=self.sample_rate)
|
x, sr = sf.read(filename)
|
||||||
|
else:
|
||||||
|
x, sr = librosa.load(filename, sr=sr)
|
||||||
if self.do_trim_silence:
|
if self.do_trim_silence:
|
||||||
x = self.trim_silence(x)
|
x = self.trim_silence(x)
|
||||||
# sr, x = io.wavfile.read(filename)
|
|
||||||
assert self.sample_rate == sr, "Expected sampling rate {} but file " \
|
assert self.sample_rate == sr, "Expected sampling rate {} but file " \
|
||||||
"{} has {}.".format(self.sample_rate,
|
"{} has {}.".format(self.sample_rate,
|
||||||
filename,
|
filename,
|
||||||
|
|
Loading…
Reference in New Issue