Refacetor and source code standardization
parent
0883ffdeb7
commit
484a90f9bc
|
|
@ -1,6 +1,6 @@
|
|||
dist/
|
||||
build/
|
||||
cache/
|
||||
.cache/
|
||||
.idea/
|
||||
__pycache__/
|
||||
*.egg-info/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
if ! [[ "$1" =~ .*\.net$ ]]; then
|
||||
echo "Usage: $0 <model>.net"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
[ -d .cache/precise-data ] || git clone https://github.com/mycroftai/precise-data .cache/precise-data
|
||||
model_name=$(date +"${1%%net}%y-%m-%d")
|
||||
precise/scripts/convert.py $1 -o ".cache/precise-data/$model_name.pb"
|
||||
cp "$1" ".cache/precise-data/$model_name.net"
|
||||
cp "$1.params" ".cache/precise-data/$model_name.net.params"
|
||||
mv "$model_name.pb" "$model_name.pb.params" ".cache/precise-data/"
|
||||
echo "Converted to .cache/precise-data/$model_name.*"
|
||||
|
|
@ -1,269 +0,0 @@
|
|||
# Python 3
|
||||
# Copyright (c) 2017 Mycroft AI Inc.
|
||||
|
||||
import json
|
||||
from argparse import ArgumentParser
|
||||
from os.path import isfile
|
||||
from typing import *
|
||||
|
||||
import numpy as np
|
||||
|
||||
from precise.params import ListenerParams
|
||||
|
||||
pr = ListenerParams(window_t=0.1, hop_t=0.05, buffer_t=1.5,
|
||||
sample_rate=16000, sample_depth=2,
|
||||
n_mfcc=13, n_filt=20, n_fft=512)
|
||||
|
||||
lstm_units = 20
|
||||
inhibit_t = 0.4
|
||||
inhibit_dist_t = 1.0
|
||||
inhibit_hop_t = 0.1
|
||||
|
||||
|
||||
def create_parser(usage: str) -> ArgumentParser:
|
||||
"""
|
||||
Creates an argument parser from a condensed usage string in the format of:
|
||||
:pos_arg_name int
|
||||
This is the help message
|
||||
which can span multiple lines
|
||||
:-o --optional_arg str default_value
|
||||
The type can be any valid python type
|
||||
:-eo --extra-option
|
||||
This adds args.extra_option as a bool
|
||||
which is False by default
|
||||
"""
|
||||
first_line = [i for i in usage.split('\n') if i][0]
|
||||
indent = ' ' * (len(first_line) - len(first_line.lstrip(' ')))
|
||||
usage = usage.replace('\n' + indent, '\n')
|
||||
|
||||
defaults = {}
|
||||
description, *descriptors = usage.split('\n:')
|
||||
parser = ArgumentParser(description=description.strip())
|
||||
for descriptor in descriptors:
|
||||
try:
|
||||
options, *help = descriptor.split('\n')
|
||||
help = ' '.join(help).replace(' ', '')
|
||||
if options.count(' ') == 1:
|
||||
if options[0] == '-':
|
||||
short, long = options.split(' ')
|
||||
var_name = long.strip('-').replace('-', '_')
|
||||
parser.add_argument(short, long, dest=var_name, action='store_true', help=help)
|
||||
defaults[var_name] = False
|
||||
else:
|
||||
short, typ = options.split(' ')
|
||||
parser.add_argument(short, type=eval(typ), help=help)
|
||||
else:
|
||||
short, long, typ, default = options.split(' ')
|
||||
help += '. Default: ' + default
|
||||
default = '' if default == '-' else default
|
||||
parser.add_argument(short, long, type=eval(typ), default=default, help=help)
|
||||
except Exception as e:
|
||||
print(e.__class__.__name__ + ': ' + str(e))
|
||||
print('While parsing:')
|
||||
print(descriptor)
|
||||
exit(1)
|
||||
return parser
|
||||
|
||||
|
||||
def buffer_to_audio(buffer: bytes) -> np.ndarray:
|
||||
"""Convert a raw mono audio byte string to numpy array of floats"""
|
||||
return np.fromstring(buffer, dtype='<i2').astype(np.float32, order='C') / 32768.0
|
||||
|
||||
|
||||
def inject_params(model_name: str) -> ListenerParams:
|
||||
params_file = model_name + '.params'
|
||||
try:
|
||||
global pr
|
||||
with open(params_file) as f:
|
||||
pr = ListenerParams(**json.load(f))
|
||||
except (OSError, ValueError, TypeError):
|
||||
print('Warning: Failed to load parameters from ' + params_file)
|
||||
return pr
|
||||
|
||||
|
||||
def save_params(model_name: str):
|
||||
with open(model_name + '.params', 'w') as f:
|
||||
json.dump(pr._asdict(), f)
|
||||
|
||||
|
||||
def vectorize_raw(audio: np.ndarray) -> np.ndarray:
|
||||
"""Turns audio into feature vectors, without clipping for length"""
|
||||
from speechpy.feature import mfcc
|
||||
return mfcc(audio, pr.sample_rate, pr.window_t, pr.hop_t, pr.n_mfcc, pr.n_filt, pr.n_fft)
|
||||
|
||||
|
||||
def vectorize(audio: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Args:
|
||||
audio: Audio verified to be of `sample_rate`
|
||||
|
||||
Returns:
|
||||
array<float>: Vector representation of audio
|
||||
"""
|
||||
if len(audio) > pr.max_samples:
|
||||
audio = audio[-pr.max_samples:]
|
||||
features = vectorize_raw(audio)
|
||||
if len(features) < pr.n_features:
|
||||
features = np.concatenate(
|
||||
[np.zeros((pr.n_features - len(features), len(features[0]))), features])
|
||||
if len(features) > pr.n_features:
|
||||
features = features[-pr.n_features:]
|
||||
|
||||
return features
|
||||
|
||||
|
||||
def vectorize_inhibit(audio: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Returns an array of inputs generated from the
|
||||
keyword audio that shouldn't cause an activation
|
||||
"""
|
||||
|
||||
def samp(x):
|
||||
return int(pr.sample_rate * x)
|
||||
|
||||
inputs = []
|
||||
for offset in range(samp(inhibit_t), samp(inhibit_dist_t), samp(inhibit_hop_t)):
|
||||
if len(audio) - offset < samp(pr.buffer_t / 2.):
|
||||
break
|
||||
inputs.append(vectorize(audio[:-offset]))
|
||||
return np.array(inputs) if inputs else np.empty((0, pr.n_features, pr.feature_size))
|
||||
|
||||
|
||||
def load_vector(name: str, vectorizer: Callable = vectorize) -> np.ndarray:
|
||||
"""Loads and caches a vector input from a wav or npy file"""
|
||||
import os
|
||||
|
||||
save_name = name if name.endswith('.npy') else os.path.join('cache', str(abs(hash(pr))),
|
||||
vectorizer.__name__ + '.' + name + '.npy')
|
||||
|
||||
if os.path.isfile(save_name):
|
||||
return np.load(save_name)
|
||||
|
||||
print('Loading ' + name + '...')
|
||||
os.makedirs(os.path.dirname(save_name), exist_ok=True)
|
||||
|
||||
vec = vectorizer(load_audio(name))
|
||||
np.save(save_name, vec)
|
||||
return vec
|
||||
|
||||
|
||||
def load_audio(file: Any) -> np.ndarray:
|
||||
"""
|
||||
Args:
|
||||
file: Audio filename or file object
|
||||
Returns:
|
||||
samples: Sample rate and audio samples from 0..1
|
||||
"""
|
||||
import wavio
|
||||
wav = wavio.read(file)
|
||||
if wav.data.dtype != np.int16:
|
||||
raise ValueError('Unsupported data type: ' + str(wav.data.dtype))
|
||||
if wav.rate != pr.sample_rate:
|
||||
raise ValueError('Unsupported sample rate: ' + str(wav.rate))
|
||||
|
||||
data = np.squeeze(wav.data)
|
||||
return data.astype(np.float32) / float(np.iinfo(data.dtype).max)
|
||||
|
||||
|
||||
def save_audio(filename: str, audio: np.ndarray):
|
||||
import wavio
|
||||
save_audio = (audio * np.iinfo(np.int16).max).astype(np.int16)
|
||||
wavio.write(filename, save_audio, pr.sample_rate, sampwidth=pr.sample_depth, scale='none')
|
||||
|
||||
|
||||
def glob_all(folder: str, filt: str) -> List[str]:
|
||||
"""Recursive glob"""
|
||||
import os
|
||||
import fnmatch
|
||||
matches = []
|
||||
for root, dirnames, filenames in os.walk(folder):
|
||||
for filename in fnmatch.filter(filenames, filt):
|
||||
matches.append(os.path.join(root, filename))
|
||||
return matches
|
||||
|
||||
|
||||
def find_wavs(folder: str) -> Tuple[List[str], List[str]]:
|
||||
"""Finds keyword and not-keyword wavs in folder"""
|
||||
return glob_all(folder + '/keyword', '*.wav'), glob_all(folder + '/not-keyword', '*.wav')
|
||||
|
||||
|
||||
def weighted_log_loss(yt, yp) -> Any:
|
||||
"""
|
||||
Binary crossentropy with a bias towards false negatives
|
||||
yt: Target
|
||||
yp: Prediction
|
||||
"""
|
||||
from keras import backend as K
|
||||
weight = 0.9 # [0..1] where 1 is inf bias
|
||||
|
||||
pos_loss = -(0 + yt) * K.log(0 + yp + K.epsilon())
|
||||
neg_loss = -(1 - yt) * K.log(1 - yp + K.epsilon())
|
||||
return weight * K.sum(neg_loss) + (1. - weight) * K.sum(pos_loss)
|
||||
|
||||
|
||||
def weighted_mse_loss(yt, yp) -> Any:
|
||||
from keras import backend as K
|
||||
weight = 0.9 # [0..1] where 1 is inf bias
|
||||
|
||||
total = K.sum(K.ones_like(yt))
|
||||
neg_loss = total * K.sum(K.square(yp * (1 - yt))) / K.sum(1 - yt)
|
||||
pos_loss = total * K.sum(K.square(1. - (yp * yt))) / K.sum(yt)
|
||||
|
||||
return weight * neg_loss + (1. - weight) * pos_loss
|
||||
|
||||
|
||||
def false_pos(yt, yp) -> Any:
|
||||
from keras import backend as K
|
||||
return K.sum(K.cast(yp * (1 - yt) > 0.5, 'float')) / K.sum(1 - yt)
|
||||
|
||||
|
||||
def false_neg(yt, yp) -> Any:
|
||||
from keras import backend as K
|
||||
return K.sum(K.cast((1 - yp) * (0 + yt) > 0.5, 'float')) / K.sum(0 + yt)
|
||||
|
||||
|
||||
def load_keras() -> Any:
|
||||
import keras
|
||||
keras.losses.weighted_log_loss = weighted_log_loss
|
||||
keras.metrics.false_pos = false_pos
|
||||
keras.metrics.false_neg = false_neg
|
||||
return keras
|
||||
|
||||
|
||||
def load_precise_model(model_name: str) -> Any:
|
||||
"""Loads a Keras model from file, handling custom loss function"""
|
||||
if not model_name.endswith('.net'):
|
||||
print('Warning: Unknown model type, ', model_name)
|
||||
|
||||
inject_params(model_name)
|
||||
return load_keras().models.load_model(model_name)
|
||||
|
||||
|
||||
def create_model(model_name: str, skip_acc: bool = False) -> Any:
|
||||
"""
|
||||
Load or create a precise model
|
||||
|
||||
Args:
|
||||
model_name: Name of model
|
||||
skip_acc: Whether to skip accuracy calculation while training
|
||||
|
||||
Returns:
|
||||
model: Loaded Keras model
|
||||
"""
|
||||
if isfile(model_name):
|
||||
print('Loading from ' + model_name + '...')
|
||||
model = load_precise_model(model_name)
|
||||
else:
|
||||
from keras.layers.core import Dense
|
||||
from keras.layers.recurrent import GRU
|
||||
from keras.models import Sequential
|
||||
|
||||
model = Sequential()
|
||||
model.add(GRU(lstm_units, activation='linear', input_shape=(pr.n_features, pr.feature_size),
|
||||
dropout=0.3, name='net'))
|
||||
model.add(Dense(1, activation='sigmoid'))
|
||||
|
||||
load_keras()
|
||||
metrics = ['accuracy', false_pos, false_neg]
|
||||
model.compile('rmsprop', weighted_log_loss, metrics=(not skip_acc) * metrics)
|
||||
return model
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
from typing import *
|
||||
|
||||
|
||||
def weighted_log_loss(yt, yp) -> Any:
|
||||
"""
|
||||
Binary crossentropy with a bias towards false negatives
|
||||
yt: Target
|
||||
yp: Prediction
|
||||
"""
|
||||
from keras import backend as K
|
||||
weight = 0.99 # [0..1] where 1 is inf bias
|
||||
|
||||
pos_loss = -(0 + yt) * K.log(0 + yp + K.epsilon())
|
||||
neg_loss = -(1 - yt) * K.log(1 - yp + K.epsilon())
|
||||
return weight * K.sum(neg_loss) + (1. - weight) * K.sum(pos_loss)
|
||||
|
||||
|
||||
def weighted_mse_loss(yt, yp) -> Any:
|
||||
from keras import backend as K
|
||||
weight = 0.9 # [0..1] where 1 is inf bias
|
||||
|
||||
total = K.sum(K.ones_like(yt))
|
||||
neg_loss = total * K.sum(K.square(yp * (1 - yt))) / K.sum(1 - yt)
|
||||
pos_loss = total * K.sum(K.square(1. - (yp * yt))) / K.sum(yt)
|
||||
|
||||
return weight * neg_loss + (1. - weight) * pos_loss
|
||||
|
||||
|
||||
def false_pos(yt, yp) -> Any:
|
||||
from keras import backend as K
|
||||
return K.sum(K.cast(yp * (1 - yt) > 0.5, 'float')) / K.sum(1 - yt)
|
||||
|
||||
|
||||
def false_neg(yt, yp) -> Any:
|
||||
from keras import backend as K
|
||||
return K.sum(K.cast((1 - yp) * (0 + yt) > 0.5, 'float')) / K.sum(0 + yt)
|
||||
|
||||
|
||||
def load_keras() -> Any:
|
||||
import keras
|
||||
keras.losses.weighted_log_loss = weighted_log_loss
|
||||
keras.metrics.false_pos = false_pos
|
||||
keras.metrics.false_positives = false_pos
|
||||
keras.metrics.false_neg = false_neg
|
||||
return keras
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
from os.path import isfile
|
||||
from typing import *
|
||||
|
||||
from precise.functions import load_keras, false_pos, false_neg, weighted_log_loss
|
||||
from precise.params import inject_params
|
||||
|
||||
lstm_units = 20
|
||||
|
||||
|
||||
def load_precise_model(model_name: str) -> Any:
|
||||
"""Loads a Keras model from file, handling custom loss function"""
|
||||
if not model_name.endswith('.net'):
|
||||
print('Warning: Unknown model type, ', model_name)
|
||||
|
||||
inject_params(model_name)
|
||||
return load_keras().models.load_model(model_name)
|
||||
|
||||
|
||||
def create_model(model_name: str, skip_acc: bool = False) -> Any:
|
||||
"""
|
||||
Load or create a precise model
|
||||
|
||||
Args:
|
||||
model_name: Name of model
|
||||
skip_acc: Whether to skip accuracy calculation while training
|
||||
|
||||
Returns:
|
||||
model: Loaded Keras model
|
||||
"""
|
||||
if isfile(model_name):
|
||||
print('Loading from ' + model_name + '...')
|
||||
model = load_precise_model(model_name)
|
||||
else:
|
||||
from keras.layers.core import Dense
|
||||
from keras.layers.recurrent import GRU
|
||||
from keras.models import Sequential
|
||||
|
||||
model = Sequential()
|
||||
model.add(GRU(lstm_units, activation='linear', input_shape=(pr.n_features, pr.feature_size),
|
||||
dropout=0.3, name='net'))
|
||||
model.add(Dense(1, activation='sigmoid'))
|
||||
|
||||
load_keras()
|
||||
metrics = ['accuracy', false_pos, false_neg]
|
||||
model.compile('rmsprop', weighted_log_loss, metrics=(not skip_acc) * metrics)
|
||||
return model
|
||||
|
|
@ -4,10 +4,13 @@ from abc import abstractmethod, ABCMeta
|
|||
from importlib import import_module
|
||||
from os.path import splitext
|
||||
from typing import *
|
||||
from typing import BinaryIO
|
||||
|
||||
import numpy as np
|
||||
|
||||
from precise.common import buffer_to_audio, load_precise_model, inject_params
|
||||
from precise.util import buffer_to_audio
|
||||
from precise.model import load_precise_model
|
||||
from precise.params import inject_params
|
||||
|
||||
|
||||
class Runner(metaclass=ABCMeta):
|
||||
|
|
@ -15,7 +18,7 @@ class Runner(metaclass=ABCMeta):
|
|||
def run(self, inp: np.ndarray) -> float:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
class TensorflowRunner(Runner):
|
||||
def __init__(self, model_name: str):
|
||||
if model_name.endswith('.net'):
|
||||
|
|
|
|||
|
|
@ -1,30 +1,66 @@
|
|||
# Python 3
|
||||
# Copyright (c) 2017 Mycroft AI Inc.
|
||||
|
||||
import json
|
||||
from collections import namedtuple
|
||||
from math import floor
|
||||
from typing import *
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _make_cls() -> type:
|
||||
def _create_listener_params():
|
||||
cls = namedtuple('ListenerParams',
|
||||
'window_t hop_t buffer_t sample_rate sample_depth n_mfcc n_filt n_fft')
|
||||
cls.buffer_samples = property(
|
||||
lambda s: s.hop_samples * (int(np.round(s.sample_rate * s.buffer_t)) // s.hop_samples)
|
||||
)
|
||||
cls.n_features = property(
|
||||
lambda s: 1 + int(floor((s.buffer_samples - s.window_samples) / s.hop_samples))
|
||||
)
|
||||
cls.window_samples = property(lambda s: int(s.sample_rate * s.window_t + 0.5))
|
||||
cls.hop_samples = property(lambda s: int(s.sample_rate * s.hop_t + 0.5))
|
||||
cls.max_samples = property(lambda s: int(s.buffer_t * s.sample_rate))
|
||||
cls.feature_size = property(lambda s: s.n_mfcc)
|
||||
|
||||
def add_prop(name: str, fn: Callable):
|
||||
setattr(cls, name, property(fn))
|
||||
|
||||
import numpy as np
|
||||
|
||||
add_prop('buffer_samples',
|
||||
lambda s: s.hop_samples * (int(np.round(s.sample_rate * s.buffer_t)) // s.hop_samples))
|
||||
add_prop('window_samples', lambda s: int(s.sample_rate * s.window_t + 0.5))
|
||||
add_prop('hop_samples', lambda s: int(s.sample_rate * s.hop_t + 0.5))
|
||||
|
||||
add_prop('n_features',
|
||||
lambda s: 1 + int(floor((s.buffer_samples - s.window_samples) / s.hop_samples)))
|
||||
add_prop('feature_size', lambda s: s.n_mfcc)
|
||||
add_prop('max_samples', lambda s: int(s.buffer_t * s.sample_rate))
|
||||
return cls
|
||||
|
||||
|
||||
ListenerParams = _make_cls()
|
||||
class Proxy:
|
||||
def __init__(self, obj):
|
||||
self.obj = obj
|
||||
|
||||
def __getattr__(self, item):
|
||||
return getattr(self.obj, item)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
if key == 'obj':
|
||||
object.__setattr__(self, key, value)
|
||||
else:
|
||||
raise AttributeError('Cannot set attributes to proxy')
|
||||
|
||||
def __hash__(self):
|
||||
return self.obj.__hash__()
|
||||
|
||||
|
||||
ListenerParams = _create_listener_params()
|
||||
|
||||
# Reference to global listener parameters
|
||||
pr = Proxy(ListenerParams(
|
||||
window_t=0.1, hop_t=0.05, buffer_t=1.5, sample_rate=16000,
|
||||
sample_depth=2, n_mfcc=13, n_filt=20, n_fft=512
|
||||
))
|
||||
|
||||
|
||||
def inject_params(model_name: str) -> ListenerParams:
|
||||
"""Set the global listener params to a saved model"""
|
||||
params_file = model_name + '.params'
|
||||
try:
|
||||
with open(params_file) as f:
|
||||
pr.obj = ListenerParams(**json.load(f))
|
||||
except (OSError, ValueError, TypeError):
|
||||
print('Warning: Failed to load parameters from ' + params_file)
|
||||
return pr
|
||||
|
||||
|
||||
def save_params(model_name: str):
|
||||
"""Save current global listener params to a file"""
|
||||
with open(model_name + '.params', 'w') as f:
|
||||
json.dump(pr._asdict(), f)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
|
||||
class PocketsphinxListener:
|
||||
def __init__(self, key_phrase, dict_file, hmm_folder, threshold=1e-90):
|
||||
from pocketsphinx import Decoder
|
||||
config = Decoder.default_config()
|
||||
config.set_string('-hmm', hmm_folder)
|
||||
config.set_string('-dict', dict_file)
|
||||
config.set_string('-keyphrase', key_phrase)
|
||||
config.set_float('-kws_threshold', float(threshold))
|
||||
config.set_float('-samprate', 16000)
|
||||
config.set_int('-nfft', 2048)
|
||||
config.set_string('-logfn', '/dev/null')
|
||||
self.key_phrase = key_phrase
|
||||
self.decoder = Decoder(config)
|
||||
|
||||
def transcribe(self, byte_data):
|
||||
self.decoder.start_utt()
|
||||
self.decoder.process_raw(byte_data, False, False)
|
||||
self.decoder.end_utt()
|
||||
return self.decoder.hyp()
|
||||
|
||||
def found_wake_word(self, frame_data):
|
||||
hyp = self.transcribe(frame_data + b'\0' * int(2 * 16000 * 0.01))
|
||||
return bool(hyp and self.key_phrase in hyp.hypstr.lower())
|
||||
|
|
@ -9,17 +9,17 @@ sys.path += ['.'] # noqa
|
|||
import os
|
||||
from os.path import split, isfile
|
||||
from shutil import copyfile
|
||||
from precise.common import create_parser
|
||||
from prettyparse import create_parser
|
||||
|
||||
usage = """
|
||||
Convert keyword model from Keras to TensorFlow
|
||||
|
||||
:model str
|
||||
Input Keras model (.net)
|
||||
|
||||
:-o --out str {model}.pb
|
||||
Custom output TensorFlow protobuf filename
|
||||
"""
|
||||
usage = '''
|
||||
Convert keyword model from Keras to TensorFlow
|
||||
|
||||
:model str
|
||||
Input Keras model (.net)
|
||||
|
||||
:-o --out str {model}.pb
|
||||
Custom output TensorFlow protobuf filename
|
||||
'''
|
||||
|
||||
|
||||
def convert(model_path: str, out_file: str):
|
||||
|
|
@ -33,7 +33,7 @@ def convert(model_path: str, out_file: str):
|
|||
print('Converting', model_path, 'to', out_file, '...')
|
||||
|
||||
import tensorflow as tf
|
||||
from precise.common import load_precise_model
|
||||
from precise.model import load_precise_model
|
||||
from keras import backend as K
|
||||
|
||||
out_dir, filename = split(out_file)
|
||||
|
|
@ -8,28 +8,29 @@ sys.path += ['.', 'runner'] # noqa
|
|||
from threading import Event
|
||||
from random import randint
|
||||
from os.path import join
|
||||
from subprocess import call
|
||||
from subprocess import Popen
|
||||
from prettyparse import create_parser
|
||||
import numpy as np
|
||||
|
||||
from precise.common import buffer_to_audio, save_audio, create_parser
|
||||
from precise.util import save_audio, buffer_to_audio
|
||||
from precise.network_runner import Listener
|
||||
from precise_runner import PreciseRunner
|
||||
from precise_runner.runner import ListenerEngine
|
||||
|
||||
usage = '''
|
||||
Run a model on microphone audio input
|
||||
|
||||
:model str
|
||||
Either Keras (.net) or Tensorflow (.pb) model to run
|
||||
|
||||
:-c --chunk-size int 2048
|
||||
Samples between inferences
|
||||
|
||||
:-s --save-dir str -
|
||||
Folder to save false positives
|
||||
|
||||
:-p --save-prefix str -
|
||||
Prefix for saved filenames
|
||||
Run a model on microphone audio input
|
||||
|
||||
:model str
|
||||
Either Keras (.net) or Tensorflow (.pb) model to run
|
||||
|
||||
:-c --chunk-size int 2048
|
||||
Samples between inferences
|
||||
|
||||
:-s --save-dir str -
|
||||
Folder to save false positives
|
||||
|
||||
:-p --save-prefix str -
|
||||
Prefix for saved filenames
|
||||
'''
|
||||
|
||||
session_id, chunk_num = '%03d' % randint(0, 999), 0
|
||||
|
|
@ -39,7 +40,7 @@ def main():
|
|||
args = create_parser(usage).parse_args()
|
||||
|
||||
def on_activation():
|
||||
call(['aplay', '-q', 'data/activate.wav'])
|
||||
Popen(['aplay', '-q', 'data/activate.wav'])
|
||||
if args.save_dir:
|
||||
global chunk_num
|
||||
nm = join(args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav')
|
||||
|
|
@ -62,7 +63,7 @@ def main():
|
|||
|
||||
engine = ListenerEngine(listener)
|
||||
engine.get_prediction = get_prediction
|
||||
runner = PreciseRunner(engine, 1024, on_activation=on_activation, on_prediction=on_prediction)
|
||||
runner = PreciseRunner(engine, 3, on_activation=on_activation, on_prediction=on_prediction)
|
||||
runner.start()
|
||||
Event().wait() # Wait forever
|
||||
|
||||
|
|
@ -6,15 +6,21 @@ import sys
|
|||
sys.path += ['.'] # noqa
|
||||
|
||||
import os
|
||||
from precise.common import create_parser
|
||||
from prettyparse import create_parser
|
||||
|
||||
from precise.network_runner import Listener
|
||||
from precise import __version__
|
||||
|
||||
usage = '''
|
||||
stdin should be a stream of raw int16 audio, written in
|
||||
groups of CHUNK_SIZE samples. If no CHUNK_SIZE is given
|
||||
it will read until EOF. For every chunk, an inference
|
||||
will be given via stdout as a float string, one per line
|
||||
stdin should be a stream of raw int16 audio, written in
|
||||
groups of CHUNK_SIZE samples. If no CHUNK_SIZE is given
|
||||
it will read until EOF. For every chunk, an inference
|
||||
will be given via stdout as a float string, one per line
|
||||
|
||||
:model_name str
|
||||
Keras or Tensorflow model to read from
|
||||
|
||||
...
|
||||
'''
|
||||
|
||||
|
||||
|
|
@ -25,7 +31,6 @@ def main():
|
|||
|
||||
parser = create_parser(usage)
|
||||
parser.add_argument('-v', '--version', action='version', version=__version__)
|
||||
parser.add_argument('model_name')
|
||||
parser.add_argument('chunk_size', type=int, nargs='?', default=-1,
|
||||
help='Number of samples to read before making a prediction.'
|
||||
'Higher values are less computationally expensive')
|
||||
|
|
@ -5,20 +5,57 @@ import sys
|
|||
|
||||
sys.path += ['.'] # noqa
|
||||
|
||||
from precise.common import load_precise_model, inject_params, create_parser
|
||||
from prettyparse import create_parser
|
||||
from precise.params import inject_params
|
||||
from precise.model import load_precise_model
|
||||
from precise.train_data import TrainData
|
||||
|
||||
usage = '''
|
||||
Test a model against a dataset
|
||||
|
||||
:model str
|
||||
Keras model file (.net) to test
|
||||
|
||||
:-t --use-train
|
||||
Evaluate training data instead of test data
|
||||
Test a model against a dataset
|
||||
|
||||
:model str
|
||||
Keras model file (.net) to test
|
||||
|
||||
:-t --use-train
|
||||
Evaluate training data instead of test data
|
||||
|
||||
:-nf --no-filenames
|
||||
Don't print out the names of files that failed
|
||||
|
||||
...
|
||||
'''
|
||||
|
||||
|
||||
def show_stats(false_pos, false_neg, true_pos, true_neg, show_filenames):
|
||||
num_correct = len(true_pos) + len(true_neg)
|
||||
total = num_correct + len(false_pos) + len(false_neg)
|
||||
|
||||
def prc(a: int, b: int): # Rounded percent
|
||||
return round(100.0 * (b and a / b), 2)
|
||||
|
||||
if show_filenames:
|
||||
print('=== False Positives ===')
|
||||
for i in false_pos:
|
||||
print(i)
|
||||
print()
|
||||
print('=== False Negatives ===')
|
||||
for i in false_neg:
|
||||
print(i)
|
||||
print()
|
||||
print('=== Counts ===')
|
||||
print('False Positives:', len(false_pos))
|
||||
print('True Negatives:', len(true_neg))
|
||||
print('False Negatives:', len(false_neg))
|
||||
print('True Positives:', len(true_pos))
|
||||
print()
|
||||
print('=== Summary ===')
|
||||
print(num_correct, "out of", total)
|
||||
print(prc(num_correct, total), "%")
|
||||
print()
|
||||
print(prc(len(false_pos), len(false_pos) + len(true_neg)), "% false positives")
|
||||
print(prc(len(false_neg), len(false_neg) + len(true_pos)), "% false negatives")
|
||||
|
||||
|
||||
def main():
|
||||
args = TrainData.parse_args(create_parser(usage))
|
||||
|
||||
|
|
@ -42,27 +79,8 @@ def main():
|
|||
(False, False): true_neg
|
||||
}[prediction[0] > 0.5, target[0] > 0.5].append(name)
|
||||
|
||||
num_correct = len(true_pos) + len(true_neg)
|
||||
total = num_correct + len(false_pos) + len(false_neg)
|
||||
|
||||
def prc(a: int, b: int): # Rounded percent
|
||||
return round(100.0 * (b and a / b), 2)
|
||||
|
||||
print('Data:', data)
|
||||
print('=== False Positives ===')
|
||||
for i in false_pos:
|
||||
print(i)
|
||||
print()
|
||||
print('=== False Negatives ===')
|
||||
for i in false_neg:
|
||||
print(i)
|
||||
print()
|
||||
print('=== Summary ===')
|
||||
print(num_correct, "out of", total)
|
||||
print(prc(num_correct, total), "%")
|
||||
print()
|
||||
print(prc(len(false_pos), len(false_pos) + len(true_neg)), "% false positives")
|
||||
print(prc(len(false_neg), len(false_neg) + len(true_pos)), "% false negatives")
|
||||
show_stats(false_pos, false_neg, true_pos, true_neg, not args.no_filenames)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2017 Mycroft AI Inc.
|
||||
import wave
|
||||
from subprocess import check_output, PIPE
|
||||
from prettyparse import create_parser
|
||||
|
||||
from precise.pocketsphinx_listener import PocketsphinxListener
|
||||
from precise.scripts.test import show_stats
|
||||
from precise.train_data import TrainData
|
||||
|
||||
|
||||
usage = '''
|
||||
Test a dataset using Pocketsphinx
|
||||
|
||||
:key_phrase str
|
||||
Key phrase composed of words from dictionary
|
||||
|
||||
:dict_file str
|
||||
Filename of dictionary with word pronunciations
|
||||
|
||||
:hmm_folder str
|
||||
Folder containing hidden markov model
|
||||
|
||||
:-th --threshold str 1e-90
|
||||
Threshold for activations
|
||||
|
||||
:-t --use-train
|
||||
Evaluate training data instead of test data
|
||||
|
||||
:-nf --no-filenames
|
||||
Don't show the names of files that failed
|
||||
|
||||
...
|
||||
'''
|
||||
|
||||
|
||||
def eval_file(filename) -> float:
|
||||
transcription = check_output(['pocketsphinx_continuous', '-kws_threshold', '1e-20', '-keyphrase', 'hey my craft', '-infile', filename], stderr=PIPE)
|
||||
return float(bool(transcription) and not transcription.isspace())
|
||||
|
||||
|
||||
def main():
|
||||
args = TrainData.parse_args(create_parser(usage))
|
||||
data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir)
|
||||
print('Data:', data)
|
||||
|
||||
listener = PocketsphinxListener(args.key_phrase, args.dict_file, args.hmm_folder, args.threshold)
|
||||
|
||||
def run_test(filenames, name):
|
||||
print()
|
||||
print('===', name, '===')
|
||||
negatives, positives = [], []
|
||||
for filename in filenames:
|
||||
with wave.open(filename) as wf:
|
||||
frames = wf.readframes(wf.getnframes())
|
||||
out = listener.found_wake_word(frames)
|
||||
{False: negatives, True: positives}[out].append(filename)
|
||||
print('!' if out else '.', end='', flush=True)
|
||||
print()
|
||||
return negatives, positives
|
||||
|
||||
data_files = data.train_files if args.use_train else data.test_files
|
||||
false_neg, true_pos = run_test(data_files[0], 'Keyword')
|
||||
true_neg, false_pos = run_test(data_files[1], 'Not Keyword')
|
||||
|
||||
show_stats(false_pos, false_neg, true_pos, true_neg, not args.no_filenames)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -5,27 +5,32 @@ import sys
|
|||
|
||||
sys.path += ['.'] # noqa
|
||||
|
||||
from prettyparse import create_parser
|
||||
|
||||
from precise.train_data import TrainData
|
||||
from precise.common import inject_params, create_model, save_params, create_parser
|
||||
from precise.model import create_model
|
||||
from precise.params import inject_params, save_params
|
||||
|
||||
usage = '''
|
||||
Train a new model on a dataset
|
||||
|
||||
:model str
|
||||
Keras model file (.net) to load from and save to
|
||||
|
||||
:-e --epochs int 10
|
||||
Number of epochs to train model for
|
||||
|
||||
:-sb --save-best
|
||||
Only save the model each epoch if its stats improve
|
||||
|
||||
:-nv --no-validation
|
||||
Disable accuracy and validation calculation
|
||||
to improve speed during training
|
||||
|
||||
:-mm --metric-monitor str loss
|
||||
Metric used to determine when to save
|
||||
Train a new model on a dataset
|
||||
|
||||
:model str
|
||||
Keras model file (.net) to load from and save to
|
||||
|
||||
:-e --epochs int 10
|
||||
Number of epochs to train model for
|
||||
|
||||
:-sb --save-best
|
||||
Only save the model each epoch if its stats improve
|
||||
|
||||
:-nv --no-validation
|
||||
Disable accuracy and validation calculation
|
||||
to improve speed during training
|
||||
|
||||
:-mm --metric-monitor str loss
|
||||
Metric used to determine when to save
|
||||
|
||||
...
|
||||
'''
|
||||
|
||||
|
||||
|
|
@ -11,47 +11,52 @@ from random import random
|
|||
from glob import glob
|
||||
from os.path import basename, splitext, isfile, join
|
||||
from typing import *
|
||||
from prettyparse import create_parser
|
||||
|
||||
from precise.train_data import TrainData
|
||||
from precise.network_runner import Listener, KerasRunner
|
||||
from precise.common import create_model, load_audio, save_audio, inject_params, create_parser
|
||||
from precise.model import create_model
|
||||
from precise.params import inject_params
|
||||
from precise.util import load_audio, save_audio
|
||||
|
||||
usage = """
|
||||
Train a model to inhibit activation by
|
||||
marking false activations and retraining
|
||||
|
||||
:model str
|
||||
Keras <NAME>.net file to train
|
||||
|
||||
:-e --epochs int 1
|
||||
Number of epochs to train before continuing evaluation
|
||||
|
||||
:-ds --delay-samples int 10
|
||||
Number of timesteps of false activations to save before re-training
|
||||
|
||||
:-c --chunk-size int 2048
|
||||
Number of samples between testing the neural network
|
||||
|
||||
:-b --batch-size int 128
|
||||
Batch size used for training
|
||||
|
||||
:-sb --save-best
|
||||
Only save the model each epoch if its stats improve
|
||||
|
||||
:-mm --metric-monitor str loss
|
||||
Metric used to determine when to save
|
||||
|
||||
:-nv --no-validation
|
||||
Disable accuracy and validation calculation
|
||||
to improve speed during training
|
||||
|
||||
:-r --random-data-dir str data/random
|
||||
Directories with properly encoded wav files of
|
||||
random audio that should not cause an activation
|
||||
"""
|
||||
usage = '''
|
||||
Train a model to inhibit activation by
|
||||
marking false activations and retraining
|
||||
|
||||
:model str
|
||||
Keras <NAME>.net file to train
|
||||
|
||||
:-e --epochs int 1
|
||||
Number of epochs to train before continuing evaluation
|
||||
|
||||
:-ds --delay-samples int 10
|
||||
Number of timesteps of false activations to save before re-training
|
||||
|
||||
:-c --chunk-size int 2048
|
||||
Number of samples between testing the neural network
|
||||
|
||||
:-b --batch-size int 128
|
||||
Batch size used for training
|
||||
|
||||
:-sb --save-best
|
||||
Only save the model each epoch if its stats improve
|
||||
|
||||
:-mm --metric-monitor str loss
|
||||
Metric used to determine when to save
|
||||
|
||||
:-nv --no-validation
|
||||
Disable accuracy and validation calculation
|
||||
to improve speed during training
|
||||
|
||||
:-r --random-data-dir str data/random
|
||||
Directories with properly encoded wav files of
|
||||
random audio that should not cause an activation
|
||||
|
||||
...
|
||||
'''
|
||||
|
||||
|
||||
def chunk_audio(audio: np.ndarray, chunk_size: int) -> Generator[np.ndarray]:
|
||||
def chunk_audio(audio: np.ndarray, chunk_size: int) -> Generator[np.ndarray, None, None]:
|
||||
for i in range(chunk_size, len(audio), chunk_size):
|
||||
yield audio[i - chunk_size:i]
|
||||
|
||||
|
|
@ -102,7 +107,7 @@ class IncrementalTrainer:
|
|||
|
||||
def train_on_audio(self, fn: str):
|
||||
"""Run through a single audio file"""
|
||||
save_test = False
|
||||
save_test = random() > 0.8
|
||||
samples_since_train = 0
|
||||
audio = load_audio(fn)
|
||||
num_chunks = len(audio) // self.args.chunk_size
|
||||
|
|
@ -123,10 +128,8 @@ class IncrementalTrainer:
|
|||
print('Saved to:', name)
|
||||
elif samples_since_train > 0:
|
||||
samples_since_train = self.args.delay_samples
|
||||
else:
|
||||
save_test = random() > 0.8
|
||||
|
||||
if samples_since_train >= self.args.delay_samples and self.args.epochs > 0:
|
||||
if not save_test and samples_since_train >= self.args.delay_samples and self.args.epochs > 0:
|
||||
samples_since_train = 0
|
||||
self.retrain()
|
||||
|
||||
|
|
@ -9,7 +9,8 @@ from typing import *
|
|||
|
||||
import numpy as np
|
||||
|
||||
from precise.common import find_wavs, load_vector, vectorize_inhibit, vectorize
|
||||
from precise.util import find_wavs
|
||||
from precise.vectorization import load_vector, vectorize_inhibit, vectorize
|
||||
|
||||
|
||||
class TrainData:
|
||||
|
|
@ -72,7 +73,7 @@ class TrainData:
|
|||
"""Generate data with inhibitory inputs created from keyword samples"""
|
||||
|
||||
def loader(kws: list, nkws: list):
|
||||
from precise.common import pr
|
||||
from precise.params import pr
|
||||
inputs = np.empty((0, pr.n_features, pr.feature_size))
|
||||
outputs = np.zeros((len(kws), 1))
|
||||
for f in kws:
|
||||
|
|
@ -137,7 +138,7 @@ class TrainData:
|
|||
print('Loading not-keyword...')
|
||||
add(nkw_files, 0.0)
|
||||
|
||||
from precise.common import pr
|
||||
from precise.params import pr
|
||||
return (
|
||||
np.array(inputs) if inputs else np.empty((0, pr.n_features, pr.feature_size)),
|
||||
np.array(outputs) if outputs else np.empty((0, 1))
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
from typing import *
|
||||
|
||||
import numpy as np
|
||||
|
||||
from precise.params import pr
|
||||
|
||||
|
||||
def buffer_to_audio(buffer: bytes) -> np.ndarray:
|
||||
"""Convert a raw mono audio byte string to numpy array of floats"""
|
||||
return np.fromstring(buffer, dtype='<i2').astype(np.float32, order='C') / 32768.0
|
||||
|
||||
|
||||
def load_audio(file: Any) -> np.ndarray:
|
||||
"""
|
||||
Args:
|
||||
file: Audio filename or file object
|
||||
Returns:
|
||||
samples: Sample rate and audio samples from 0..1
|
||||
"""
|
||||
import wavio
|
||||
wav = wavio.read(file)
|
||||
if wav.data.dtype != np.int16:
|
||||
raise ValueError('Unsupported data type: ' + str(wav.data.dtype))
|
||||
if wav.rate != pr.sample_rate:
|
||||
raise ValueError('Unsupported sample rate: ' + str(wav.rate))
|
||||
|
||||
data = np.squeeze(wav.data)
|
||||
return data.astype(np.float32) / float(np.iinfo(data.dtype).max)
|
||||
|
||||
|
||||
def save_audio(filename: str, audio: np.ndarray):
|
||||
import wavio
|
||||
save_audio = (audio * np.iinfo(np.int16).max).astype(np.int16)
|
||||
wavio.write(filename, save_audio, pr.sample_rate, sampwidth=pr.sample_depth, scale='none')
|
||||
|
||||
|
||||
def glob_all(folder: str, filt: str) -> List[str]:
|
||||
"""Recursive glob"""
|
||||
import os
|
||||
import fnmatch
|
||||
matches = []
|
||||
for root, dirnames, filenames in os.walk(folder):
|
||||
for filename in fnmatch.filter(filenames, filt):
|
||||
matches.append(os.path.join(root, filename))
|
||||
return matches
|
||||
|
||||
|
||||
def find_wavs(folder: str) -> Tuple[List[str], List[str]]:
|
||||
"""Finds keyword and not-keyword wavs in folder"""
|
||||
return glob_all(folder + '/keyword', '*.wav'), glob_all(folder + '/not-keyword', '*.wav')
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
from typing import *
|
||||
|
||||
import numpy as np
|
||||
|
||||
from precise.util import load_audio
|
||||
from precise.params import pr
|
||||
|
||||
|
||||
inhibit_t = 0.4
|
||||
inhibit_dist_t = 1.0
|
||||
inhibit_hop_t = 0.1
|
||||
|
||||
|
||||
def vectorize_raw(audio: np.ndarray) -> np.ndarray:
|
||||
"""Turns audio into feature vectors, without clipping for length"""
|
||||
from speechpy.feature import mfcc
|
||||
return mfcc(audio, pr.sample_rate, pr.window_t, pr.hop_t, pr.n_mfcc, pr.n_filt, pr.n_fft)
|
||||
|
||||
|
||||
def vectorize(audio: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Args:
|
||||
audio: Audio verified to be of `sample_rate`
|
||||
|
||||
Returns:
|
||||
array<float>: Vector representation of audio
|
||||
"""
|
||||
if len(audio) > pr.max_samples:
|
||||
audio = audio[-pr.max_samples:]
|
||||
features = vectorize_raw(audio)
|
||||
if len(features) < pr.n_features:
|
||||
features = np.concatenate(
|
||||
[np.zeros((pr.n_features - len(features), len(features[0]))), features])
|
||||
if len(features) > pr.n_features:
|
||||
features = features[-pr.n_features:]
|
||||
|
||||
return features
|
||||
|
||||
|
||||
def vectorize_inhibit(audio: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Returns an array of inputs generated from the
|
||||
keyword audio that shouldn't cause an activation
|
||||
"""
|
||||
|
||||
def samp(x):
|
||||
return int(pr.sample_rate * x)
|
||||
|
||||
inputs = []
|
||||
for offset in range(samp(inhibit_t), samp(inhibit_dist_t), samp(inhibit_hop_t)):
|
||||
if len(audio) - offset < samp(pr.buffer_t / 2.):
|
||||
break
|
||||
inputs.append(vectorize(audio[:-offset]))
|
||||
return np.array(inputs) if inputs else np.empty((0, pr.n_features, pr.feature_size))
|
||||
|
||||
|
||||
def load_vector(name: str, vectorizer: Callable = vectorize) -> np.ndarray:
|
||||
"""Loads and caches a vector input from a wav or npy file"""
|
||||
import os
|
||||
|
||||
save_name = name if name.endswith('.npy') else os.path.join('.cache', str(abs(hash(pr))),
|
||||
vectorizer.__name__ + '.' + name + '.npy')
|
||||
|
||||
if os.path.isfile(save_name):
|
||||
return np.load(save_name)
|
||||
|
||||
print('Loading ' + name + '...')
|
||||
os.makedirs(os.path.dirname(save_name), exist_ok=True)
|
||||
|
||||
vec = vectorizer(load_audio(name))
|
||||
np.save(save_name, vec)
|
||||
return vec
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2017 Mycroft AI Inc.
|
||||
|
||||
import sys
|
||||
|
||||
sys.path += ['.', 'runner'] # noqa
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from subprocess import Popen
|
||||
from precise_runner import PreciseRunner, PreciseEngine
|
||||
from threading import Event
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser('Implementation demo of precise-stream')
|
||||
parser.add_argument('model')
|
||||
args = parser.parse_args()
|
||||
|
||||
def on_prediction(prob):
|
||||
print('!' if prob > 0.5 else '.', end='', flush=True)
|
||||
|
||||
def on_activation():
|
||||
Popen(['aplay', '-q', 'data/activate.wav'])
|
||||
|
||||
engine = PreciseEngine('./precise/stream.py', args.model)
|
||||
PreciseRunner(engine, on_prediction=on_prediction, on_activation=on_activation, trigger_level=0).start()
|
||||
Event().wait() # Wait forever
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
|
@ -6,15 +6,40 @@ from subprocess import PIPE, Popen
|
|||
from threading import Thread
|
||||
|
||||
|
||||
class PreciseEngine:
|
||||
def __init__(self, exe_file, model_file, chunk_size=2048):
|
||||
self.exe_file = exe_file
|
||||
self.model_file = model_file
|
||||
class Engine:
|
||||
def __init__(self, chunk_size=1024):
|
||||
self.chunk_size = chunk_size
|
||||
|
||||
def start(self):
|
||||
pass
|
||||
|
||||
def stop(self):
|
||||
pass
|
||||
|
||||
def get_prediction(self, chunk):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class PreciseEngine(Engine):
|
||||
"""
|
||||
Wraps a binary precise executable
|
||||
|
||||
Args:
|
||||
exe_file (Union[str, list]): Either filename or list of arguments
|
||||
(ie. ['python', 'precise_stream.py'])
|
||||
model_file (str): Location to .pb model file to use (with .pb.params)
|
||||
chunk_size (int): Number of samples per prediction. Higher numbers
|
||||
decrease CPU usage but increase latency
|
||||
"""
|
||||
|
||||
def __init__(self, exe_file, model_file, chunk_size=1024):
|
||||
Engine.__init__(self, chunk_size)
|
||||
self.exe_args = exe_file if isinstance(exe_file, list) else [exe_file]
|
||||
self.model_file = model_file
|
||||
self.proc = None
|
||||
|
||||
def start(self):
|
||||
self.proc = Popen([self.exe_file, self.model_file, str(self.chunk_size)], stdin=PIPE,
|
||||
self.proc = Popen([*self.exe_args, self.model_file, str(self.chunk_size)], stdin=PIPE,
|
||||
stdout=PIPE)
|
||||
|
||||
def stop(self):
|
||||
|
|
@ -28,39 +53,48 @@ class PreciseEngine:
|
|||
return float(self.proc.stdout.readline())
|
||||
|
||||
|
||||
class ListenerEngine:
|
||||
def __init__(self, listener):
|
||||
self.start = lambda: None
|
||||
self.stop = lambda: None
|
||||
class ListenerEngine(Engine):
|
||||
def __init__(self, listener, chunk_size=1024):
|
||||
Engine.__init__(self, chunk_size)
|
||||
self.get_prediction = listener.update
|
||||
|
||||
|
||||
class PreciseRunner:
|
||||
"""
|
||||
Wrapper to use Precise
|
||||
Wrapper to use Precise. Example:
|
||||
>>> def on_act():
|
||||
... print('Activation!')
|
||||
...
|
||||
>>> p = PreciseRunner(PreciseEngine('./precise-stream'), on_activation=on_act)
|
||||
>>> p.start()
|
||||
>>> from time import sleep; sleep(10)
|
||||
>>> p.stop()
|
||||
|
||||
Args:
|
||||
exe_file (str): Location to precise-stream executable
|
||||
model (str): Location to .pb model file to use (with .pb.params)
|
||||
chunk_size (int): Number of samples per prediction. Higher numbers
|
||||
decrease CPU usage but increase latency
|
||||
engine (Engine): Object containing info on the binary engine
|
||||
trigger_level (int): Number of chunk activations needed to trigger on_activation
|
||||
Higher values add latency but reduce false positives
|
||||
sensitivity (float): From 0.0 to 1.0, relates to the network output level required
|
||||
to consider a chunk "active"
|
||||
stream (BinaryIO): Binary audio stream to read 16000 Hz 1 channel int16
|
||||
audio from. If not given, the microphone is used
|
||||
on_prediction: callback for every new prediction
|
||||
on_activation: callback for when the wake word is heard
|
||||
on_prediction (Callable): callback for every new prediction
|
||||
on_activation (Callable): callback for when the wake word is heard
|
||||
"""
|
||||
def __init__(self, engine, chunk_size=1024, stream=None,
|
||||
on_prediction=lambda x: None, on_activation=lambda: None, trigger_level=3):
|
||||
self.engine = engine
|
||||
self.pa = None
|
||||
self.chunk_size = chunk_size
|
||||
self.thread = None
|
||||
self.stream = stream
|
||||
|
||||
def __init__(self, engine, trigger_level=3, sensitivity=0.5, stream=None,
|
||||
on_prediction=lambda x: None, on_activation=lambda: None):
|
||||
self.engine = engine
|
||||
self.trigger_level = trigger_level
|
||||
self.sensitivity = sensitivity
|
||||
self.stream = stream
|
||||
self.on_prediction = on_prediction
|
||||
self.on_activation = on_activation
|
||||
self.chunk_size = engine.chunk_size
|
||||
|
||||
self.pa = None
|
||||
self.thread = None
|
||||
self.running = False
|
||||
self.trigger_level = trigger_level
|
||||
atexit.register(self.stop)
|
||||
|
||||
def start(self):
|
||||
|
|
@ -68,7 +102,9 @@ class PreciseRunner:
|
|||
if self.stream is None:
|
||||
from pyaudio import PyAudio, paInt16
|
||||
self.pa = PyAudio()
|
||||
self.stream = self.pa.open(16000, 1, paInt16, True, frames_per_buffer=self.chunk_size)
|
||||
self.stream = self.pa.open(
|
||||
16000, 1, paInt16, True, frames_per_buffer=self.chunk_size // 2
|
||||
)
|
||||
|
||||
self.engine.start()
|
||||
self.running = True
|
||||
|
|
@ -98,7 +134,7 @@ class PreciseRunner:
|
|||
prob = self.engine.get_prediction(chunk)
|
||||
self.on_prediction(prob)
|
||||
|
||||
if prob > 0.5 or activation < 0:
|
||||
if prob > 1 - self.sensitivity or activation < 0:
|
||||
activation += 1
|
||||
if activation > self.trigger_level:
|
||||
activation = -self.chunk_size // 50
|
||||
|
|
|
|||
13
setup.py
13
setup.py
|
|
@ -9,11 +9,11 @@ setup(
|
|||
packages=find_packages(),
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'precise-train=precise.train:main',
|
||||
'precise-train-feedback=precise.train_feedback:main',
|
||||
'precise-stream=precise.stream:main',
|
||||
'precise-test=precise.test:main',
|
||||
'precise-convert=precise.convert:main'
|
||||
'precise-train=precise.scripts.train:main',
|
||||
'precise-train-feedback=precise.scripts.train_feedback:main',
|
||||
'precise-stream=precise.scripts.stream:main',
|
||||
'precise-test=precise.scripts.test:main',
|
||||
'precise-convert=precise.scripts.convert:main'
|
||||
]
|
||||
},
|
||||
install_requires=[
|
||||
|
|
@ -26,7 +26,8 @@ setup(
|
|||
'wavio',
|
||||
'typing',
|
||||
'dataset',
|
||||
# 'precise-runner' # Needs to get uploaded to PyPi first
|
||||
'prettyparse',
|
||||
'precise-runner'
|
||||
],
|
||||
|
||||
author='Matthew Scholefield',
|
||||
|
|
|
|||
5
setup.sh
5
setup.sh
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
found_exe() {
|
||||
is_command() {
|
||||
hash "$1" 2>/dev/null
|
||||
}
|
||||
|
||||
|
|
@ -17,7 +17,7 @@ wait_for_apt() {
|
|||
|
||||
set -e
|
||||
|
||||
if found_exe apt-get; then
|
||||
if is_command apt-get; then
|
||||
wait_for_apt
|
||||
sudo apt-get install -y python3-pip libopenblas-dev python3-scipy cython libhdf5-dev python3-h5py portaudio19-dev
|
||||
fi
|
||||
|
|
@ -40,5 +40,6 @@ if ! $python -c 'import tensorflow' 2>/dev/null && [ "$arch" = "armv7l" ]; then
|
|||
rm tensorflow-1.1.0-cp34-cp34m-linux_armv7l.whl
|
||||
fi
|
||||
|
||||
$pip install -e runner/
|
||||
$pip install -e .
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue