Merge pull request #5 from MycroftAI/amy-data

Trim leading and trailing silence
2018-04-02 13:04:32 -07:00 · 2018-04-02 13:04:32 -07:00 · c7efb3c208
parent 1579386764 567bf6c62e
commit c7efb3c208
1 changed files with 8 additions and 0 deletions
--- a/datasets/amy.py
+++ b/datasets/amy.py
@ -1,8 +1,11 @@
 from concurrent.futures import ProcessPoolExecutor
 from functools import partial
 import glob
+import librosa
 import numpy as np
 import os
+
+from hparams import hparams
 from util import audio


@ -32,6 +35,11 @@ def _process_utterance(out_dir, prompt_id, wav_path, text):
  # Load the audio to a numpy array:
  wav = audio.load_wav(wav_path)

+  # Trim leading and trailing silence:
+  margin = int(hparams.sample_rate * 0.1)
+  wav = wav[margin:-margin]
+  wav, _ = librosa.effects.trim(wav, top_db=40, frame_length=1024, hop_length=256)
+
  # Compute the linear-scale spectrogram from the wav:
  spectrogram = audio.spectrogram(wav).astype(np.float32)
  n_frames = spectrogram.shape[1]