Detect endpoint at first long silence

2017-09-26 09:40:07 -07:00 · 2017-09-26 09:40:07 -07:00 · d88640b7c0
parent e61fa839f9
commit d88640b7c0
2 changed files with 11 additions and 1 deletions
--- a/synthesizer.py
+++ b/synthesizer.py
@ -34,7 +34,7 @@ class Synthesizer:
    }
    wav = self.session.run(self.wav_output, feed_dict=feed_dict)
    wav = audio.inv_preemphasis(wav)
-    wav, _ = effects.trim(wav)
+    wav = wav[:audio.find_endpoint(wav)]
    out = io.BytesIO()
    audio.save_wav(wav, out)
    return out.getvalue()
--- a/util/audio.py
+++ b/util/audio.py
@ -52,6 +52,16 @@ def melspectrogram(y):
  return _normalize(S)


+def find_endpoint(wav, threshold_db=-40, min_silence_sec=0.8):
+  window_length = int(hparams.sample_rate * min_silence_sec)
+  hop_length = int(window_length / 4)
+  threshold = _db_to_amp(threshold_db)
+  for x in range(hop_length, len(wav) - window_length, hop_length):
+    if np.max(wav[x:x+window_length]) < threshold:
+      return x + hop_length
+  return len(wav)
+
+
 def _griffin_lim(S):
  '''librosa implementation of Griffin-Lim
  Based on https://github.com/librosa/librosa/issues/434