Trim leading and trailing silence

2018-04-02 13:02:25 -07:00 · 2018-04-02 13:02:25 -07:00 · 567bf6c62e
parent 1579386764
commit 567bf6c62e
1 changed files with 8 additions and 0 deletions
--- a/datasets/amy.py
+++ b/datasets/amy.py
@ -1,8 +1,11 @@
 from concurrent.futures import ProcessPoolExecutor
 from functools import partial
 import glob
+import librosa
 import numpy as np
 import os
+
+from hparams import hparams
 from util import audio


@ -32,6 +35,11 @@ def _process_utterance(out_dir, prompt_id, wav_path, text):
  # Load the audio to a numpy array:
  wav = audio.load_wav(wav_path)

+  # Trim leading and trailing silence:
+  margin = int(hparams.sample_rate * 0.1)
+  wav = wav[margin:-margin]
+  wav, _ = librosa.effects.trim(wav, top_db=40, frame_length=1024, hop_length=256)
+
  # Compute the linear-scale spectrogram from the wav:
  spectrogram = audio.spectrogram(wav).astype(np.float32)
  n_frames = spectrogram.shape[1]