set silence trimming threshold in config

pull/10/head
root 2020-02-03 14:16:40 +01:00 committed by erogol
parent ca33336ae0
commit ffe9a32813
2 changed files with 4 additions and 1 deletions

View File

@ -24,6 +24,7 @@
"mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!!
"mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!!
"do_trim_silence": true // enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true)
"trim_db": 60, // threshold for timming silence. Set this according to your dataset.
},
// DISTRIBUTED TRAINING

View File

@ -24,6 +24,7 @@ class AudioProcessor(object):
clip_norm=True,
griffin_lim_iters=None,
do_trim_silence=False,
trim_db=60,
sound_norm=False,
**_):
@ -46,6 +47,7 @@ class AudioProcessor(object):
self.max_norm = 1.0 if max_norm is None else float(max_norm)
self.clip_norm = clip_norm
self.do_trim_silence = do_trim_silence
self.trim_db = trim_db
self.sound_norm = sound_norm
self.n_fft, self.hop_length, self.win_length = self._stft_parameters()
assert min_level_db != 0.0, " [!] min_level_db is 0"
@ -217,7 +219,7 @@ class AudioProcessor(object):
margin = int(self.sample_rate * 0.01)
wav = wav[margin:-margin]
return librosa.effects.trim(
wav, top_db=40, frame_length=self.win_length, hop_length=self.hop_length)[0]
wav, top_db=self.trim_db, frame_length=self.win_length, hop_length=self.hop_length)[0]
@staticmethod
def mulaw_encode(wav, qc):