diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index f89d1ee5..618bc338 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -387,10 +387,9 @@ class AudioProcessor(object): x = self.sound_norm(x) return x - def save_wav(self, wav, path, sample_rate=None): - sample_rate = self.sample_rate if sample_rate is None else sample_rate + def save_wav(self, wav, path, sr=None): wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) - scipy.io.wavfile.write(path, sample_rate, wav_norm.astype(np.int16)) + scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm.astype(np.int16)) @staticmethod def mulaw_encode(wav, qc): diff --git a/TTS/vocoder/models/hifigan_generator.py b/TTS/vocoder/models/hifigan_generator.py index 2b1f43f7..44b0edfa 100644 --- a/TTS/vocoder/models/hifigan_generator.py +++ b/TTS/vocoder/models/hifigan_generator.py @@ -1,8 +1,8 @@ import torch import torch.nn.functional as F import torch.nn as nn -from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d -from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm +from torch.nn import Conv1d, ConvTranspose1d +from torch.nn.utils import weight_norm, remove_weight_norm LRELU_SLOPE = 0.1 @@ -115,6 +115,7 @@ class HifiganGenerator(torch.nn.Module): resblock_kernel_sizes, upsample_kernel_sizes, upsample_initial_channel, upsample_factors): super().__init__() + self.inference_padding = 5 self.num_kernels = len(resblock_kernel_sizes) self.num_upsamples = len(upsample_factors) self.conv_pre = weight_norm(