Merge pull request #5 from MycroftAI/amy-data

Trim leading and trailing silence
danny
Keith Ito 2018-04-02 13:04:32 -07:00 committed by GitHub
commit c7efb3c208
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 8 additions and 0 deletions

View File

@ -1,8 +1,11 @@
from concurrent.futures import ProcessPoolExecutor from concurrent.futures import ProcessPoolExecutor
from functools import partial from functools import partial
import glob import glob
import librosa
import numpy as np import numpy as np
import os import os
from hparams import hparams
from util import audio from util import audio
@ -32,6 +35,11 @@ def _process_utterance(out_dir, prompt_id, wav_path, text):
# Load the audio to a numpy array: # Load the audio to a numpy array:
wav = audio.load_wav(wav_path) wav = audio.load_wav(wav_path)
# Trim leading and trailing silence:
margin = int(hparams.sample_rate * 0.1)
wav = wav[margin:-margin]
wav, _ = librosa.effects.trim(wav, top_db=40, frame_length=1024, hop_length=256)
# Compute the linear-scale spectrogram from the wav: # Compute the linear-scale spectrogram from the wav:
spectrogram = audio.spectrogram(wav).astype(np.float32) spectrogram = audio.spectrogram(wav).astype(np.float32)
n_frames = spectrogram.shape[1] n_frames = spectrogram.shape[1]