Detect endpoint at first long silence

pull/2/head
Keith Ito 2017-09-26 09:40:07 -07:00
parent e61fa839f9
commit d88640b7c0
2 changed files with 11 additions and 1 deletions

View File

@ -34,7 +34,7 @@ class Synthesizer:
}
wav = self.session.run(self.wav_output, feed_dict=feed_dict)
wav = audio.inv_preemphasis(wav)
wav, _ = effects.trim(wav)
wav = wav[:audio.find_endpoint(wav)]
out = io.BytesIO()
audio.save_wav(wav, out)
return out.getvalue()

View File

@ -52,6 +52,16 @@ def melspectrogram(y):
return _normalize(S)
def find_endpoint(wav, threshold_db=-40, min_silence_sec=0.8):
window_length = int(hparams.sample_rate * min_silence_sec)
hop_length = int(window_length / 4)
threshold = _db_to_amp(threshold_db)
for x in range(hop_length, len(wav) - window_length, hop_length):
if np.max(wav[x:x+window_length]) < threshold:
return x + hop_length
return len(wav)
def _griffin_lim(S):
'''librosa implementation of Griffin-Lim
Based on https://github.com/librosa/librosa/issues/434