mirror of https://github.com/coqui-ai/TTS.git
set silence trimming threshold in config
parent
ca33336ae0
commit
ffe9a32813
|
@ -24,6 +24,7 @@
|
||||||
"mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!!
|
"mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!!
|
||||||
"mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!!
|
"mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!!
|
||||||
"do_trim_silence": true // enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true)
|
"do_trim_silence": true // enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true)
|
||||||
|
"trim_db": 60, // threshold for timming silence. Set this according to your dataset.
|
||||||
},
|
},
|
||||||
|
|
||||||
// DISTRIBUTED TRAINING
|
// DISTRIBUTED TRAINING
|
||||||
|
|
|
@ -24,6 +24,7 @@ class AudioProcessor(object):
|
||||||
clip_norm=True,
|
clip_norm=True,
|
||||||
griffin_lim_iters=None,
|
griffin_lim_iters=None,
|
||||||
do_trim_silence=False,
|
do_trim_silence=False,
|
||||||
|
trim_db=60,
|
||||||
sound_norm=False,
|
sound_norm=False,
|
||||||
**_):
|
**_):
|
||||||
|
|
||||||
|
@ -46,6 +47,7 @@ class AudioProcessor(object):
|
||||||
self.max_norm = 1.0 if max_norm is None else float(max_norm)
|
self.max_norm = 1.0 if max_norm is None else float(max_norm)
|
||||||
self.clip_norm = clip_norm
|
self.clip_norm = clip_norm
|
||||||
self.do_trim_silence = do_trim_silence
|
self.do_trim_silence = do_trim_silence
|
||||||
|
self.trim_db = trim_db
|
||||||
self.sound_norm = sound_norm
|
self.sound_norm = sound_norm
|
||||||
self.n_fft, self.hop_length, self.win_length = self._stft_parameters()
|
self.n_fft, self.hop_length, self.win_length = self._stft_parameters()
|
||||||
assert min_level_db != 0.0, " [!] min_level_db is 0"
|
assert min_level_db != 0.0, " [!] min_level_db is 0"
|
||||||
|
@ -217,7 +219,7 @@ class AudioProcessor(object):
|
||||||
margin = int(self.sample_rate * 0.01)
|
margin = int(self.sample_rate * 0.01)
|
||||||
wav = wav[margin:-margin]
|
wav = wav[margin:-margin]
|
||||||
return librosa.effects.trim(
|
return librosa.effects.trim(
|
||||||
wav, top_db=40, frame_length=self.win_length, hop_length=self.hop_length)[0]
|
wav, top_db=self.trim_db, frame_length=self.win_length, hop_length=self.hop_length)[0]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def mulaw_encode(wav, qc):
|
def mulaw_encode(wav, qc):
|
||||||
|
|
Loading…
Reference in New Issue