Trim leading and trailing silence

pull/5/head
Keith Ito 2018-04-02 13:02:25 -07:00
parent 1579386764
commit 567bf6c62e
1 changed files with 8 additions and 0 deletions

View File

@ -1,8 +1,11 @@
from concurrent.futures import ProcessPoolExecutor
from functools import partial
import glob
import librosa
import numpy as np
import os
from hparams import hparams
from util import audio
@ -32,6 +35,11 @@ def _process_utterance(out_dir, prompt_id, wav_path, text):
# Load the audio to a numpy array:
wav = audio.load_wav(wav_path)
# Trim leading and trailing silence:
margin = int(hparams.sample_rate * 0.1)
wav = wav[margin:-margin]
wav, _ = librosa.effects.trim(wav, top_db=40, frame_length=1024, hop_length=256)
# Compute the linear-scale spectrogram from the wav:
spectrogram = audio.spectrogram(wav).astype(np.float32)
n_frames = spectrogram.shape[1]