trim silence if enabled

pull/10/head
Eren Golge 2018-11-23 16:58:26 +01:00
parent 22dcc4f7d0
commit 0f0bde935c
2 changed files with 13 additions and 1 deletions

View File

@ -21,7 +21,8 @@
"max_norm": 1, // scale normalization to range [-max_norm, max_norm] or [0, max_norm]
"clip_norm": true, // clip normalized values into the range.
"mel_fmin": null, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!!
"mel_fmax": null // maximum freq level for mel-spec. Tune for dataset!!
"mel_fmax": null, // maximum freq level for mel-spec. Tune for dataset!!
"do_trim_silence": true // enable trimming of slience of audio as you load it.
},
"embedding_size": 256,

View File

@ -26,6 +26,7 @@ class AudioProcessor(object):
mel_fmax=None,
clip_norm=True,
griffin_lim_iters=None,
do_trim_silence=False
**kwargs):
print(" > Setting up Audio Processor...")
@ -47,6 +48,7 @@ class AudioProcessor(object):
self.mel_fmax = mel_fmax
self.max_norm = 1.0 if max_norm is None else float(max_norm)
self.clip_norm = clip_norm
self.do_trim_silence = do_trim_silence
self.n_fft, self.hop_length, self.win_length = self._stft_parameters()
print(" | > Audio Processor attributes.")
members = vars(self)
@ -203,6 +205,13 @@ class AudioProcessor(object):
return x + hop_length
return len(wav)
def trim_silence(self, wav):
""" Trim silent parts with a threshold and 0.1 sec margin """
margin = int(self.sample_rate * 0.1)
wav = wav[margin:-margin]
return librosa.effects.trim(
wav, top_db=40, frame_length=1024, hop_length=256)[0]
# WaveRNN repo specific functions
# def mulaw_encode(self, wav, qc):
# mu = qc - 1
@ -225,6 +234,8 @@ class AudioProcessor(object):
def load_wav(self, filename, encode=False):
x, sr = librosa.load(filename, sr=self.sample_rate)
if self.do_trim_silence:
x = self.ap.trim_silence(x)
# sr, x = io.wavfile.read(filename)
assert self.sample_rate == sr
return x