Merge pull request #5 from MycroftAI/amy-data

Trim leading and trailing silence
danny
Keith Ito 2018-04-02 13:04:32 -07:00 committed by GitHub
commit c7efb3c208
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 8 additions and 0 deletions

View File

@ -1,8 +1,11 @@
from concurrent.futures import ProcessPoolExecutor
from functools import partial
import glob
import librosa
import numpy as np
import os
from hparams import hparams
from util import audio
@ -32,6 +35,11 @@ def _process_utterance(out_dir, prompt_id, wav_path, text):
# Load the audio to a numpy array:
wav = audio.load_wav(wav_path)
# Trim leading and trailing silence:
margin = int(hparams.sample_rate * 0.1)
wav = wav[margin:-margin]
wav, _ = librosa.effects.trim(wav, top_db=40, frame_length=1024, hop_length=256)
# Compute the linear-scale spectrogram from the wav:
spectrogram = audio.spectrogram(wav).astype(np.float32)
n_frames = spectrogram.shape[1]