mirror of https://github.com/MycroftAI/mimic2.git
commit
c7efb3c208
|
@ -1,8 +1,11 @@
|
||||||
from concurrent.futures import ProcessPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import glob
|
import glob
|
||||||
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from hparams import hparams
|
||||||
from util import audio
|
from util import audio
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,6 +35,11 @@ def _process_utterance(out_dir, prompt_id, wav_path, text):
|
||||||
# Load the audio to a numpy array:
|
# Load the audio to a numpy array:
|
||||||
wav = audio.load_wav(wav_path)
|
wav = audio.load_wav(wav_path)
|
||||||
|
|
||||||
|
# Trim leading and trailing silence:
|
||||||
|
margin = int(hparams.sample_rate * 0.1)
|
||||||
|
wav = wav[margin:-margin]
|
||||||
|
wav, _ = librosa.effects.trim(wav, top_db=40, frame_length=1024, hop_length=256)
|
||||||
|
|
||||||
# Compute the linear-scale spectrogram from the wav:
|
# Compute the linear-scale spectrogram from the wav:
|
||||||
spectrogram = audio.spectrogram(wav).astype(np.float32)
|
spectrogram = audio.spectrogram(wav).astype(np.float32)
|
||||||
n_frames = spectrogram.shape[1]
|
n_frames = spectrogram.shape[1]
|
||||||
|
|
Loading…
Reference in New Issue