FIx vectorizing invalid files

pull/10/head
Matthew D. Scholefield 2018-03-21 15:20:37 -05:00
parent 39721720fe
commit befd296128
2 changed files with 9 additions and 2 deletions

View File

@ -13,6 +13,7 @@
# limitations under the License.
import json
from argparse import ArgumentParser
from contextlib import suppress
from hashlib import md5
from os.path import join, isfile, dirname
from typing import *
@ -178,8 +179,12 @@ class TrainData:
outputs = []
def add(filenames, output):
inputs.extend(load_vector(f, vectorizer) for f in filenames)
outputs.extend(np.array([output]) for _ in filenames)
for f in filenames:
try:
inputs.append(load_vector(f, vectorizer))
outputs.append(np.array([output]))
except ValueError:
print('Skipping invalid file:', f)
print('Loading wake-word...')
add(kw_files, 1.0)

View File

@ -26,6 +26,8 @@ inhibit_hop_t = 0.1
def vectorize_raw(audio: np.ndarray) -> np.ndarray:
"""Turns audio into feature vectors, without clipping for length"""
from speechpy.feature import mfcc
if len(audio) == 0:
raise ValueError('Cannot vectorize empty audio!')
return mfcc(audio, pr.sample_rate, pr.window_t, pr.hop_t, pr.n_mfcc, pr.n_filt, pr.n_fft)