mirror of https://github.com/coqui-ai/TTS.git
set silence trimming threshold in config
parent
ca33336ae0
commit
ffe9a32813
|
@ -24,6 +24,7 @@
|
|||
"mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!!
|
||||
"mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!!
|
||||
"do_trim_silence": true // enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true)
|
||||
"trim_db": 60, // threshold for timming silence. Set this according to your dataset.
|
||||
},
|
||||
|
||||
// DISTRIBUTED TRAINING
|
||||
|
|
|
@ -24,6 +24,7 @@ class AudioProcessor(object):
|
|||
clip_norm=True,
|
||||
griffin_lim_iters=None,
|
||||
do_trim_silence=False,
|
||||
trim_db=60,
|
||||
sound_norm=False,
|
||||
**_):
|
||||
|
||||
|
@ -46,6 +47,7 @@ class AudioProcessor(object):
|
|||
self.max_norm = 1.0 if max_norm is None else float(max_norm)
|
||||
self.clip_norm = clip_norm
|
||||
self.do_trim_silence = do_trim_silence
|
||||
self.trim_db = trim_db
|
||||
self.sound_norm = sound_norm
|
||||
self.n_fft, self.hop_length, self.win_length = self._stft_parameters()
|
||||
assert min_level_db != 0.0, " [!] min_level_db is 0"
|
||||
|
@ -217,7 +219,7 @@ class AudioProcessor(object):
|
|||
margin = int(self.sample_rate * 0.01)
|
||||
wav = wav[margin:-margin]
|
||||
return librosa.effects.trim(
|
||||
wav, top_db=40, frame_length=self.win_length, hop_length=self.hop_length)[0]
|
||||
wav, top_db=self.trim_db, frame_length=self.win_length, hop_length=self.hop_length)[0]
|
||||
|
||||
@staticmethod
|
||||
def mulaw_encode(wav, qc):
|
||||
|
|
Loading…
Reference in New Issue