Refacetor and source code standardization

pull/1/head
Matthew D. Scholefield 2018-02-20 23:42:04 -06:00
parent 0883ffdeb7
commit 484a90f9bc
22 changed files with 641 additions and 446 deletions

2
.gitignore vendored
View File

@ -1,6 +1,6 @@
dist/
build/
cache/
.cache/
.idea/
__pycache__/
*.egg-info/

14
export.sh Executable file
View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
if ! [[ "$1" =~ .*\.net$ ]]; then
echo "Usage: $0 <model>.net"
exit 1
fi
[ -d .cache/precise-data ] || git clone https://github.com/mycroftai/precise-data .cache/precise-data
model_name=$(date +"${1%%net}%y-%m-%d")
precise/scripts/convert.py $1 -o ".cache/precise-data/$model_name.pb"
cp "$1" ".cache/precise-data/$model_name.net"
cp "$1.params" ".cache/precise-data/$model_name.net.params"
mv "$model_name.pb" "$model_name.pb.params" ".cache/precise-data/"
echo "Converted to .cache/precise-data/$model_name.*"

View File

@ -1,269 +0,0 @@
# Python 3
# Copyright (c) 2017 Mycroft AI Inc.
import json
from argparse import ArgumentParser
from os.path import isfile
from typing import *
import numpy as np
from precise.params import ListenerParams
pr = ListenerParams(window_t=0.1, hop_t=0.05, buffer_t=1.5,
sample_rate=16000, sample_depth=2,
n_mfcc=13, n_filt=20, n_fft=512)
lstm_units = 20
inhibit_t = 0.4
inhibit_dist_t = 1.0
inhibit_hop_t = 0.1
def create_parser(usage: str) -> ArgumentParser:
"""
Creates an argument parser from a condensed usage string in the format of:
:pos_arg_name int
This is the help message
which can span multiple lines
:-o --optional_arg str default_value
The type can be any valid python type
:-eo --extra-option
This adds args.extra_option as a bool
which is False by default
"""
first_line = [i for i in usage.split('\n') if i][0]
indent = ' ' * (len(first_line) - len(first_line.lstrip(' ')))
usage = usage.replace('\n' + indent, '\n')
defaults = {}
description, *descriptors = usage.split('\n:')
parser = ArgumentParser(description=description.strip())
for descriptor in descriptors:
try:
options, *help = descriptor.split('\n')
help = ' '.join(help).replace(' ', '')
if options.count(' ') == 1:
if options[0] == '-':
short, long = options.split(' ')
var_name = long.strip('-').replace('-', '_')
parser.add_argument(short, long, dest=var_name, action='store_true', help=help)
defaults[var_name] = False
else:
short, typ = options.split(' ')
parser.add_argument(short, type=eval(typ), help=help)
else:
short, long, typ, default = options.split(' ')
help += '. Default: ' + default
default = '' if default == '-' else default
parser.add_argument(short, long, type=eval(typ), default=default, help=help)
except Exception as e:
print(e.__class__.__name__ + ': ' + str(e))
print('While parsing:')
print(descriptor)
exit(1)
return parser
def buffer_to_audio(buffer: bytes) -> np.ndarray:
"""Convert a raw mono audio byte string to numpy array of floats"""
return np.fromstring(buffer, dtype='<i2').astype(np.float32, order='C') / 32768.0
def inject_params(model_name: str) -> ListenerParams:
params_file = model_name + '.params'
try:
global pr
with open(params_file) as f:
pr = ListenerParams(**json.load(f))
except (OSError, ValueError, TypeError):
print('Warning: Failed to load parameters from ' + params_file)
return pr
def save_params(model_name: str):
with open(model_name + '.params', 'w') as f:
json.dump(pr._asdict(), f)
def vectorize_raw(audio: np.ndarray) -> np.ndarray:
"""Turns audio into feature vectors, without clipping for length"""
from speechpy.feature import mfcc
return mfcc(audio, pr.sample_rate, pr.window_t, pr.hop_t, pr.n_mfcc, pr.n_filt, pr.n_fft)
def vectorize(audio: np.ndarray) -> np.ndarray:
"""
Args:
audio: Audio verified to be of `sample_rate`
Returns:
array<float>: Vector representation of audio
"""
if len(audio) > pr.max_samples:
audio = audio[-pr.max_samples:]
features = vectorize_raw(audio)
if len(features) < pr.n_features:
features = np.concatenate(
[np.zeros((pr.n_features - len(features), len(features[0]))), features])
if len(features) > pr.n_features:
features = features[-pr.n_features:]
return features
def vectorize_inhibit(audio: np.ndarray) -> np.ndarray:
"""
Returns an array of inputs generated from the
keyword audio that shouldn't cause an activation
"""
def samp(x):
return int(pr.sample_rate * x)
inputs = []
for offset in range(samp(inhibit_t), samp(inhibit_dist_t), samp(inhibit_hop_t)):
if len(audio) - offset < samp(pr.buffer_t / 2.):
break
inputs.append(vectorize(audio[:-offset]))
return np.array(inputs) if inputs else np.empty((0, pr.n_features, pr.feature_size))
def load_vector(name: str, vectorizer: Callable = vectorize) -> np.ndarray:
"""Loads and caches a vector input from a wav or npy file"""
import os
save_name = name if name.endswith('.npy') else os.path.join('cache', str(abs(hash(pr))),
vectorizer.__name__ + '.' + name + '.npy')
if os.path.isfile(save_name):
return np.load(save_name)
print('Loading ' + name + '...')
os.makedirs(os.path.dirname(save_name), exist_ok=True)
vec = vectorizer(load_audio(name))
np.save(save_name, vec)
return vec
def load_audio(file: Any) -> np.ndarray:
"""
Args:
file: Audio filename or file object
Returns:
samples: Sample rate and audio samples from 0..1
"""
import wavio
wav = wavio.read(file)
if wav.data.dtype != np.int16:
raise ValueError('Unsupported data type: ' + str(wav.data.dtype))
if wav.rate != pr.sample_rate:
raise ValueError('Unsupported sample rate: ' + str(wav.rate))
data = np.squeeze(wav.data)
return data.astype(np.float32) / float(np.iinfo(data.dtype).max)
def save_audio(filename: str, audio: np.ndarray):
import wavio
save_audio = (audio * np.iinfo(np.int16).max).astype(np.int16)
wavio.write(filename, save_audio, pr.sample_rate, sampwidth=pr.sample_depth, scale='none')
def glob_all(folder: str, filt: str) -> List[str]:
"""Recursive glob"""
import os
import fnmatch
matches = []
for root, dirnames, filenames in os.walk(folder):
for filename in fnmatch.filter(filenames, filt):
matches.append(os.path.join(root, filename))
return matches
def find_wavs(folder: str) -> Tuple[List[str], List[str]]:
"""Finds keyword and not-keyword wavs in folder"""
return glob_all(folder + '/keyword', '*.wav'), glob_all(folder + '/not-keyword', '*.wav')
def weighted_log_loss(yt, yp) -> Any:
"""
Binary crossentropy with a bias towards false negatives
yt: Target
yp: Prediction
"""
from keras import backend as K
weight = 0.9 # [0..1] where 1 is inf bias
pos_loss = -(0 + yt) * K.log(0 + yp + K.epsilon())
neg_loss = -(1 - yt) * K.log(1 - yp + K.epsilon())
return weight * K.sum(neg_loss) + (1. - weight) * K.sum(pos_loss)
def weighted_mse_loss(yt, yp) -> Any:
from keras import backend as K
weight = 0.9 # [0..1] where 1 is inf bias
total = K.sum(K.ones_like(yt))
neg_loss = total * K.sum(K.square(yp * (1 - yt))) / K.sum(1 - yt)
pos_loss = total * K.sum(K.square(1. - (yp * yt))) / K.sum(yt)
return weight * neg_loss + (1. - weight) * pos_loss
def false_pos(yt, yp) -> Any:
from keras import backend as K
return K.sum(K.cast(yp * (1 - yt) > 0.5, 'float')) / K.sum(1 - yt)
def false_neg(yt, yp) -> Any:
from keras import backend as K
return K.sum(K.cast((1 - yp) * (0 + yt) > 0.5, 'float')) / K.sum(0 + yt)
def load_keras() -> Any:
import keras
keras.losses.weighted_log_loss = weighted_log_loss
keras.metrics.false_pos = false_pos
keras.metrics.false_neg = false_neg
return keras
def load_precise_model(model_name: str) -> Any:
"""Loads a Keras model from file, handling custom loss function"""
if not model_name.endswith('.net'):
print('Warning: Unknown model type, ', model_name)
inject_params(model_name)
return load_keras().models.load_model(model_name)
def create_model(model_name: str, skip_acc: bool = False) -> Any:
"""
Load or create a precise model
Args:
model_name: Name of model
skip_acc: Whether to skip accuracy calculation while training
Returns:
model: Loaded Keras model
"""
if isfile(model_name):
print('Loading from ' + model_name + '...')
model = load_precise_model(model_name)
else:
from keras.layers.core import Dense
from keras.layers.recurrent import GRU
from keras.models import Sequential
model = Sequential()
model.add(GRU(lstm_units, activation='linear', input_shape=(pr.n_features, pr.feature_size),
dropout=0.3, name='net'))
model.add(Dense(1, activation='sigmoid'))
load_keras()
metrics = ['accuracy', false_pos, false_neg]
model.compile('rmsprop', weighted_log_loss, metrics=(not skip_acc) * metrics)
return model

45
precise/functions.py Normal file
View File

@ -0,0 +1,45 @@
from typing import *
def weighted_log_loss(yt, yp) -> Any:
"""
Binary crossentropy with a bias towards false negatives
yt: Target
yp: Prediction
"""
from keras import backend as K
weight = 0.99 # [0..1] where 1 is inf bias
pos_loss = -(0 + yt) * K.log(0 + yp + K.epsilon())
neg_loss = -(1 - yt) * K.log(1 - yp + K.epsilon())
return weight * K.sum(neg_loss) + (1. - weight) * K.sum(pos_loss)
def weighted_mse_loss(yt, yp) -> Any:
from keras import backend as K
weight = 0.9 # [0..1] where 1 is inf bias
total = K.sum(K.ones_like(yt))
neg_loss = total * K.sum(K.square(yp * (1 - yt))) / K.sum(1 - yt)
pos_loss = total * K.sum(K.square(1. - (yp * yt))) / K.sum(yt)
return weight * neg_loss + (1. - weight) * pos_loss
def false_pos(yt, yp) -> Any:
from keras import backend as K
return K.sum(K.cast(yp * (1 - yt) > 0.5, 'float')) / K.sum(1 - yt)
def false_neg(yt, yp) -> Any:
from keras import backend as K
return K.sum(K.cast((1 - yp) * (0 + yt) > 0.5, 'float')) / K.sum(0 + yt)
def load_keras() -> Any:
import keras
keras.losses.weighted_log_loss = weighted_log_loss
keras.metrics.false_pos = false_pos
keras.metrics.false_positives = false_pos
keras.metrics.false_neg = false_neg
return keras

46
precise/model.py Normal file
View File

@ -0,0 +1,46 @@
from os.path import isfile
from typing import *
from precise.functions import load_keras, false_pos, false_neg, weighted_log_loss
from precise.params import inject_params
lstm_units = 20
def load_precise_model(model_name: str) -> Any:
"""Loads a Keras model from file, handling custom loss function"""
if not model_name.endswith('.net'):
print('Warning: Unknown model type, ', model_name)
inject_params(model_name)
return load_keras().models.load_model(model_name)
def create_model(model_name: str, skip_acc: bool = False) -> Any:
"""
Load or create a precise model
Args:
model_name: Name of model
skip_acc: Whether to skip accuracy calculation while training
Returns:
model: Loaded Keras model
"""
if isfile(model_name):
print('Loading from ' + model_name + '...')
model = load_precise_model(model_name)
else:
from keras.layers.core import Dense
from keras.layers.recurrent import GRU
from keras.models import Sequential
model = Sequential()
model.add(GRU(lstm_units, activation='linear', input_shape=(pr.n_features, pr.feature_size),
dropout=0.3, name='net'))
model.add(Dense(1, activation='sigmoid'))
load_keras()
metrics = ['accuracy', false_pos, false_neg]
model.compile('rmsprop', weighted_log_loss, metrics=(not skip_acc) * metrics)
return model

View File

@ -4,10 +4,13 @@ from abc import abstractmethod, ABCMeta
from importlib import import_module
from os.path import splitext
from typing import *
from typing import BinaryIO
import numpy as np
from precise.common import buffer_to_audio, load_precise_model, inject_params
from precise.util import buffer_to_audio
from precise.model import load_precise_model
from precise.params import inject_params
class Runner(metaclass=ABCMeta):
@ -15,7 +18,7 @@ class Runner(metaclass=ABCMeta):
def run(self, inp: np.ndarray) -> float:
pass
class TensorflowRunner(Runner):
def __init__(self, model_name: str):
if model_name.endswith('.net'):

View File

@ -1,30 +1,66 @@
# Python 3
# Copyright (c) 2017 Mycroft AI Inc.
import json
from collections import namedtuple
from math import floor
from typing import *
import numpy as np
def _make_cls() -> type:
def _create_listener_params():
cls = namedtuple('ListenerParams',
'window_t hop_t buffer_t sample_rate sample_depth n_mfcc n_filt n_fft')
cls.buffer_samples = property(
lambda s: s.hop_samples * (int(np.round(s.sample_rate * s.buffer_t)) // s.hop_samples)
)
cls.n_features = property(
lambda s: 1 + int(floor((s.buffer_samples - s.window_samples) / s.hop_samples))
)
cls.window_samples = property(lambda s: int(s.sample_rate * s.window_t + 0.5))
cls.hop_samples = property(lambda s: int(s.sample_rate * s.hop_t + 0.5))
cls.max_samples = property(lambda s: int(s.buffer_t * s.sample_rate))
cls.feature_size = property(lambda s: s.n_mfcc)
def add_prop(name: str, fn: Callable):
setattr(cls, name, property(fn))
import numpy as np
add_prop('buffer_samples',
lambda s: s.hop_samples * (int(np.round(s.sample_rate * s.buffer_t)) // s.hop_samples))
add_prop('window_samples', lambda s: int(s.sample_rate * s.window_t + 0.5))
add_prop('hop_samples', lambda s: int(s.sample_rate * s.hop_t + 0.5))
add_prop('n_features',
lambda s: 1 + int(floor((s.buffer_samples - s.window_samples) / s.hop_samples)))
add_prop('feature_size', lambda s: s.n_mfcc)
add_prop('max_samples', lambda s: int(s.buffer_t * s.sample_rate))
return cls
ListenerParams = _make_cls()
class Proxy:
def __init__(self, obj):
self.obj = obj
def __getattr__(self, item):
return getattr(self.obj, item)
def __setattr__(self, key, value):
if key == 'obj':
object.__setattr__(self, key, value)
else:
raise AttributeError('Cannot set attributes to proxy')
def __hash__(self):
return self.obj.__hash__()
ListenerParams = _create_listener_params()
# Reference to global listener parameters
pr = Proxy(ListenerParams(
window_t=0.1, hop_t=0.05, buffer_t=1.5, sample_rate=16000,
sample_depth=2, n_mfcc=13, n_filt=20, n_fft=512
))
def inject_params(model_name: str) -> ListenerParams:
"""Set the global listener params to a saved model"""
params_file = model_name + '.params'
try:
with open(params_file) as f:
pr.obj = ListenerParams(**json.load(f))
except (OSError, ValueError, TypeError):
print('Warning: Failed to load parameters from ' + params_file)
return pr
def save_params(model_name: str):
"""Save current global listener params to a file"""
with open(model_name + '.params', 'w') as f:
json.dump(pr._asdict(), f)

View File

@ -0,0 +1,25 @@
class PocketsphinxListener:
def __init__(self, key_phrase, dict_file, hmm_folder, threshold=1e-90):
from pocketsphinx import Decoder
config = Decoder.default_config()
config.set_string('-hmm', hmm_folder)
config.set_string('-dict', dict_file)
config.set_string('-keyphrase', key_phrase)
config.set_float('-kws_threshold', float(threshold))
config.set_float('-samprate', 16000)
config.set_int('-nfft', 2048)
config.set_string('-logfn', '/dev/null')
self.key_phrase = key_phrase
self.decoder = Decoder(config)
def transcribe(self, byte_data):
self.decoder.start_utt()
self.decoder.process_raw(byte_data, False, False)
self.decoder.end_utt()
return self.decoder.hyp()
def found_wake_word(self, frame_data):
hyp = self.transcribe(frame_data + b'\0' * int(2 * 16000 * 0.01))
return bool(hyp and self.key_phrase in hyp.hypstr.lower())

View File

@ -9,17 +9,17 @@ sys.path += ['.'] # noqa
import os
from os.path import split, isfile
from shutil import copyfile
from precise.common import create_parser
from prettyparse import create_parser
usage = """
Convert keyword model from Keras to TensorFlow
:model str
Input Keras model (.net)
:-o --out str {model}.pb
Custom output TensorFlow protobuf filename
"""
usage = '''
Convert keyword model from Keras to TensorFlow
:model str
Input Keras model (.net)
:-o --out str {model}.pb
Custom output TensorFlow protobuf filename
'''
def convert(model_path: str, out_file: str):
@ -33,7 +33,7 @@ def convert(model_path: str, out_file: str):
print('Converting', model_path, 'to', out_file, '...')
import tensorflow as tf
from precise.common import load_precise_model
from precise.model import load_precise_model
from keras import backend as K
out_dir, filename = split(out_file)

View File

@ -8,28 +8,29 @@ sys.path += ['.', 'runner'] # noqa
from threading import Event
from random import randint
from os.path import join
from subprocess import call
from subprocess import Popen
from prettyparse import create_parser
import numpy as np
from precise.common import buffer_to_audio, save_audio, create_parser
from precise.util import save_audio, buffer_to_audio
from precise.network_runner import Listener
from precise_runner import PreciseRunner
from precise_runner.runner import ListenerEngine
usage = '''
Run a model on microphone audio input
:model str
Either Keras (.net) or Tensorflow (.pb) model to run
:-c --chunk-size int 2048
Samples between inferences
:-s --save-dir str -
Folder to save false positives
:-p --save-prefix str -
Prefix for saved filenames
Run a model on microphone audio input
:model str
Either Keras (.net) or Tensorflow (.pb) model to run
:-c --chunk-size int 2048
Samples between inferences
:-s --save-dir str -
Folder to save false positives
:-p --save-prefix str -
Prefix for saved filenames
'''
session_id, chunk_num = '%03d' % randint(0, 999), 0
@ -39,7 +40,7 @@ def main():
args = create_parser(usage).parse_args()
def on_activation():
call(['aplay', '-q', 'data/activate.wav'])
Popen(['aplay', '-q', 'data/activate.wav'])
if args.save_dir:
global chunk_num
nm = join(args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav')
@ -62,7 +63,7 @@ def main():
engine = ListenerEngine(listener)
engine.get_prediction = get_prediction
runner = PreciseRunner(engine, 1024, on_activation=on_activation, on_prediction=on_prediction)
runner = PreciseRunner(engine, 3, on_activation=on_activation, on_prediction=on_prediction)
runner.start()
Event().wait() # Wait forever

View File

@ -6,15 +6,21 @@ import sys
sys.path += ['.'] # noqa
import os
from precise.common import create_parser
from prettyparse import create_parser
from precise.network_runner import Listener
from precise import __version__
usage = '''
stdin should be a stream of raw int16 audio, written in
groups of CHUNK_SIZE samples. If no CHUNK_SIZE is given
it will read until EOF. For every chunk, an inference
will be given via stdout as a float string, one per line
stdin should be a stream of raw int16 audio, written in
groups of CHUNK_SIZE samples. If no CHUNK_SIZE is given
it will read until EOF. For every chunk, an inference
will be given via stdout as a float string, one per line
:model_name str
Keras or Tensorflow model to read from
...
'''
@ -25,7 +31,6 @@ def main():
parser = create_parser(usage)
parser.add_argument('-v', '--version', action='version', version=__version__)
parser.add_argument('model_name')
parser.add_argument('chunk_size', type=int, nargs='?', default=-1,
help='Number of samples to read before making a prediction.'
'Higher values are less computationally expensive')

View File

@ -5,20 +5,57 @@ import sys
sys.path += ['.'] # noqa
from precise.common import load_precise_model, inject_params, create_parser
from prettyparse import create_parser
from precise.params import inject_params
from precise.model import load_precise_model
from precise.train_data import TrainData
usage = '''
Test a model against a dataset
:model str
Keras model file (.net) to test
:-t --use-train
Evaluate training data instead of test data
Test a model against a dataset
:model str
Keras model file (.net) to test
:-t --use-train
Evaluate training data instead of test data
:-nf --no-filenames
Don't print out the names of files that failed
...
'''
def show_stats(false_pos, false_neg, true_pos, true_neg, show_filenames):
num_correct = len(true_pos) + len(true_neg)
total = num_correct + len(false_pos) + len(false_neg)
def prc(a: int, b: int): # Rounded percent
return round(100.0 * (b and a / b), 2)
if show_filenames:
print('=== False Positives ===')
for i in false_pos:
print(i)
print()
print('=== False Negatives ===')
for i in false_neg:
print(i)
print()
print('=== Counts ===')
print('False Positives:', len(false_pos))
print('True Negatives:', len(true_neg))
print('False Negatives:', len(false_neg))
print('True Positives:', len(true_pos))
print()
print('=== Summary ===')
print(num_correct, "out of", total)
print(prc(num_correct, total), "%")
print()
print(prc(len(false_pos), len(false_pos) + len(true_neg)), "% false positives")
print(prc(len(false_neg), len(false_neg) + len(true_pos)), "% false negatives")
def main():
args = TrainData.parse_args(create_parser(usage))
@ -42,27 +79,8 @@ def main():
(False, False): true_neg
}[prediction[0] > 0.5, target[0] > 0.5].append(name)
num_correct = len(true_pos) + len(true_neg)
total = num_correct + len(false_pos) + len(false_neg)
def prc(a: int, b: int): # Rounded percent
return round(100.0 * (b and a / b), 2)
print('Data:', data)
print('=== False Positives ===')
for i in false_pos:
print(i)
print()
print('=== False Negatives ===')
for i in false_neg:
print(i)
print()
print('=== Summary ===')
print(num_correct, "out of", total)
print(prc(num_correct, total), "%")
print()
print(prc(len(false_pos), len(false_pos) + len(true_neg)), "% false positives")
print(prc(len(false_neg), len(false_neg) + len(true_pos)), "% false negatives")
show_stats(false_pos, false_neg, true_pos, true_neg, not args.no_filenames)
if __name__ == '__main__':

View File

@ -0,0 +1,70 @@
#!/usr/bin/env python3
# Copyright (c) 2017 Mycroft AI Inc.
import wave
from subprocess import check_output, PIPE
from prettyparse import create_parser
from precise.pocketsphinx_listener import PocketsphinxListener
from precise.scripts.test import show_stats
from precise.train_data import TrainData
usage = '''
Test a dataset using Pocketsphinx
:key_phrase str
Key phrase composed of words from dictionary
:dict_file str
Filename of dictionary with word pronunciations
:hmm_folder str
Folder containing hidden markov model
:-th --threshold str 1e-90
Threshold for activations
:-t --use-train
Evaluate training data instead of test data
:-nf --no-filenames
Don't show the names of files that failed
...
'''
def eval_file(filename) -> float:
transcription = check_output(['pocketsphinx_continuous', '-kws_threshold', '1e-20', '-keyphrase', 'hey my craft', '-infile', filename], stderr=PIPE)
return float(bool(transcription) and not transcription.isspace())
def main():
args = TrainData.parse_args(create_parser(usage))
data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir)
print('Data:', data)
listener = PocketsphinxListener(args.key_phrase, args.dict_file, args.hmm_folder, args.threshold)
def run_test(filenames, name):
print()
print('===', name, '===')
negatives, positives = [], []
for filename in filenames:
with wave.open(filename) as wf:
frames = wf.readframes(wf.getnframes())
out = listener.found_wake_word(frames)
{False: negatives, True: positives}[out].append(filename)
print('!' if out else '.', end='', flush=True)
print()
return negatives, positives
data_files = data.train_files if args.use_train else data.test_files
false_neg, true_pos = run_test(data_files[0], 'Keyword')
true_neg, false_pos = run_test(data_files[1], 'Not Keyword')
show_stats(false_pos, false_neg, true_pos, true_neg, not args.no_filenames)
if __name__ == '__main__':
main()

View File

@ -5,27 +5,32 @@ import sys
sys.path += ['.'] # noqa
from prettyparse import create_parser
from precise.train_data import TrainData
from precise.common import inject_params, create_model, save_params, create_parser
from precise.model import create_model
from precise.params import inject_params, save_params
usage = '''
Train a new model on a dataset
:model str
Keras model file (.net) to load from and save to
:-e --epochs int 10
Number of epochs to train model for
:-sb --save-best
Only save the model each epoch if its stats improve
:-nv --no-validation
Disable accuracy and validation calculation
to improve speed during training
:-mm --metric-monitor str loss
Metric used to determine when to save
Train a new model on a dataset
:model str
Keras model file (.net) to load from and save to
:-e --epochs int 10
Number of epochs to train model for
:-sb --save-best
Only save the model each epoch if its stats improve
:-nv --no-validation
Disable accuracy and validation calculation
to improve speed during training
:-mm --metric-monitor str loss
Metric used to determine when to save
...
'''

View File

@ -11,47 +11,52 @@ from random import random
from glob import glob
from os.path import basename, splitext, isfile, join
from typing import *
from prettyparse import create_parser
from precise.train_data import TrainData
from precise.network_runner import Listener, KerasRunner
from precise.common import create_model, load_audio, save_audio, inject_params, create_parser
from precise.model import create_model
from precise.params import inject_params
from precise.util import load_audio, save_audio
usage = """
Train a model to inhibit activation by
marking false activations and retraining
:model str
Keras <NAME>.net file to train
:-e --epochs int 1
Number of epochs to train before continuing evaluation
:-ds --delay-samples int 10
Number of timesteps of false activations to save before re-training
:-c --chunk-size int 2048
Number of samples between testing the neural network
:-b --batch-size int 128
Batch size used for training
:-sb --save-best
Only save the model each epoch if its stats improve
:-mm --metric-monitor str loss
Metric used to determine when to save
:-nv --no-validation
Disable accuracy and validation calculation
to improve speed during training
:-r --random-data-dir str data/random
Directories with properly encoded wav files of
random audio that should not cause an activation
"""
usage = '''
Train a model to inhibit activation by
marking false activations and retraining
:model str
Keras <NAME>.net file to train
:-e --epochs int 1
Number of epochs to train before continuing evaluation
:-ds --delay-samples int 10
Number of timesteps of false activations to save before re-training
:-c --chunk-size int 2048
Number of samples between testing the neural network
:-b --batch-size int 128
Batch size used for training
:-sb --save-best
Only save the model each epoch if its stats improve
:-mm --metric-monitor str loss
Metric used to determine when to save
:-nv --no-validation
Disable accuracy and validation calculation
to improve speed during training
:-r --random-data-dir str data/random
Directories with properly encoded wav files of
random audio that should not cause an activation
...
'''
def chunk_audio(audio: np.ndarray, chunk_size: int) -> Generator[np.ndarray]:
def chunk_audio(audio: np.ndarray, chunk_size: int) -> Generator[np.ndarray, None, None]:
for i in range(chunk_size, len(audio), chunk_size):
yield audio[i - chunk_size:i]
@ -102,7 +107,7 @@ class IncrementalTrainer:
def train_on_audio(self, fn: str):
"""Run through a single audio file"""
save_test = False
save_test = random() > 0.8
samples_since_train = 0
audio = load_audio(fn)
num_chunks = len(audio) // self.args.chunk_size
@ -123,10 +128,8 @@ class IncrementalTrainer:
print('Saved to:', name)
elif samples_since_train > 0:
samples_since_train = self.args.delay_samples
else:
save_test = random() > 0.8
if samples_since_train >= self.args.delay_samples and self.args.epochs > 0:
if not save_test and samples_since_train >= self.args.delay_samples and self.args.epochs > 0:
samples_since_train = 0
self.retrain()

View File

@ -9,7 +9,8 @@ from typing import *
import numpy as np
from precise.common import find_wavs, load_vector, vectorize_inhibit, vectorize
from precise.util import find_wavs
from precise.vectorization import load_vector, vectorize_inhibit, vectorize
class TrainData:
@ -72,7 +73,7 @@ class TrainData:
"""Generate data with inhibitory inputs created from keyword samples"""
def loader(kws: list, nkws: list):
from precise.common import pr
from precise.params import pr
inputs = np.empty((0, pr.n_features, pr.feature_size))
outputs = np.zeros((len(kws), 1))
for f in kws:
@ -137,7 +138,7 @@ class TrainData:
print('Loading not-keyword...')
add(nkw_files, 0.0)
from precise.common import pr
from precise.params import pr
return (
np.array(inputs) if inputs else np.empty((0, pr.n_features, pr.feature_size)),
np.array(outputs) if outputs else np.empty((0, 1))

50
precise/util.py Normal file
View File

@ -0,0 +1,50 @@
from typing import *
import numpy as np
from precise.params import pr
def buffer_to_audio(buffer: bytes) -> np.ndarray:
"""Convert a raw mono audio byte string to numpy array of floats"""
return np.fromstring(buffer, dtype='<i2').astype(np.float32, order='C') / 32768.0
def load_audio(file: Any) -> np.ndarray:
"""
Args:
file: Audio filename or file object
Returns:
samples: Sample rate and audio samples from 0..1
"""
import wavio
wav = wavio.read(file)
if wav.data.dtype != np.int16:
raise ValueError('Unsupported data type: ' + str(wav.data.dtype))
if wav.rate != pr.sample_rate:
raise ValueError('Unsupported sample rate: ' + str(wav.rate))
data = np.squeeze(wav.data)
return data.astype(np.float32) / float(np.iinfo(data.dtype).max)
def save_audio(filename: str, audio: np.ndarray):
import wavio
save_audio = (audio * np.iinfo(np.int16).max).astype(np.int16)
wavio.write(filename, save_audio, pr.sample_rate, sampwidth=pr.sample_depth, scale='none')
def glob_all(folder: str, filt: str) -> List[str]:
"""Recursive glob"""
import os
import fnmatch
matches = []
for root, dirnames, filenames in os.walk(folder):
for filename in fnmatch.filter(filenames, filt):
matches.append(os.path.join(root, filename))
return matches
def find_wavs(folder: str) -> Tuple[List[str], List[str]]:
"""Finds keyword and not-keyword wavs in folder"""
return glob_all(folder + '/keyword', '*.wav'), glob_all(folder + '/not-keyword', '*.wav')

72
precise/vectorization.py Normal file
View File

@ -0,0 +1,72 @@
from typing import *
import numpy as np
from precise.util import load_audio
from precise.params import pr
inhibit_t = 0.4
inhibit_dist_t = 1.0
inhibit_hop_t = 0.1
def vectorize_raw(audio: np.ndarray) -> np.ndarray:
"""Turns audio into feature vectors, without clipping for length"""
from speechpy.feature import mfcc
return mfcc(audio, pr.sample_rate, pr.window_t, pr.hop_t, pr.n_mfcc, pr.n_filt, pr.n_fft)
def vectorize(audio: np.ndarray) -> np.ndarray:
"""
Args:
audio: Audio verified to be of `sample_rate`
Returns:
array<float>: Vector representation of audio
"""
if len(audio) > pr.max_samples:
audio = audio[-pr.max_samples:]
features = vectorize_raw(audio)
if len(features) < pr.n_features:
features = np.concatenate(
[np.zeros((pr.n_features - len(features), len(features[0]))), features])
if len(features) > pr.n_features:
features = features[-pr.n_features:]
return features
def vectorize_inhibit(audio: np.ndarray) -> np.ndarray:
"""
Returns an array of inputs generated from the
keyword audio that shouldn't cause an activation
"""
def samp(x):
return int(pr.sample_rate * x)
inputs = []
for offset in range(samp(inhibit_t), samp(inhibit_dist_t), samp(inhibit_hop_t)):
if len(audio) - offset < samp(pr.buffer_t / 2.):
break
inputs.append(vectorize(audio[:-offset]))
return np.array(inputs) if inputs else np.empty((0, pr.n_features, pr.feature_size))
def load_vector(name: str, vectorizer: Callable = vectorize) -> np.ndarray:
"""Loads and caches a vector input from a wav or npy file"""
import os
save_name = name if name.endswith('.npy') else os.path.join('.cache', str(abs(hash(pr))),
vectorizer.__name__ + '.' + name + '.npy')
if os.path.isfile(save_name):
return np.load(save_name)
print('Loading ' + name + '...')
os.makedirs(os.path.dirname(save_name), exist_ok=True)
vec = vectorizer(load_audio(name))
np.save(save_name, vec)
return vec

32
runner/example.py Executable file
View File

@ -0,0 +1,32 @@
#!/usr/bin/env python3
# Copyright (c) 2017 Mycroft AI Inc.
import sys
sys.path += ['.', 'runner'] # noqa
from argparse import ArgumentParser
from subprocess import Popen
from precise_runner import PreciseRunner, PreciseEngine
from threading import Event
def main():
parser = ArgumentParser('Implementation demo of precise-stream')
parser.add_argument('model')
args = parser.parse_args()
def on_prediction(prob):
print('!' if prob > 0.5 else '.', end='', flush=True)
def on_activation():
Popen(['aplay', '-q', 'data/activate.wav'])
engine = PreciseEngine('./precise/stream.py', args.model)
PreciseRunner(engine, on_prediction=on_prediction, on_activation=on_activation, trigger_level=0).start()
Event().wait() # Wait forever
if __name__ == '__main__':
main()

View File

@ -6,15 +6,40 @@ from subprocess import PIPE, Popen
from threading import Thread
class PreciseEngine:
def __init__(self, exe_file, model_file, chunk_size=2048):
self.exe_file = exe_file
self.model_file = model_file
class Engine:
def __init__(self, chunk_size=1024):
self.chunk_size = chunk_size
def start(self):
pass
def stop(self):
pass
def get_prediction(self, chunk):
raise NotImplementedError
class PreciseEngine(Engine):
"""
Wraps a binary precise executable
Args:
exe_file (Union[str, list]): Either filename or list of arguments
(ie. ['python', 'precise_stream.py'])
model_file (str): Location to .pb model file to use (with .pb.params)
chunk_size (int): Number of samples per prediction. Higher numbers
decrease CPU usage but increase latency
"""
def __init__(self, exe_file, model_file, chunk_size=1024):
Engine.__init__(self, chunk_size)
self.exe_args = exe_file if isinstance(exe_file, list) else [exe_file]
self.model_file = model_file
self.proc = None
def start(self):
self.proc = Popen([self.exe_file, self.model_file, str(self.chunk_size)], stdin=PIPE,
self.proc = Popen([*self.exe_args, self.model_file, str(self.chunk_size)], stdin=PIPE,
stdout=PIPE)
def stop(self):
@ -28,39 +53,48 @@ class PreciseEngine:
return float(self.proc.stdout.readline())
class ListenerEngine:
def __init__(self, listener):
self.start = lambda: None
self.stop = lambda: None
class ListenerEngine(Engine):
def __init__(self, listener, chunk_size=1024):
Engine.__init__(self, chunk_size)
self.get_prediction = listener.update
class PreciseRunner:
"""
Wrapper to use Precise
Wrapper to use Precise. Example:
>>> def on_act():
... print('Activation!')
...
>>> p = PreciseRunner(PreciseEngine('./precise-stream'), on_activation=on_act)
>>> p.start()
>>> from time import sleep; sleep(10)
>>> p.stop()
Args:
exe_file (str): Location to precise-stream executable
model (str): Location to .pb model file to use (with .pb.params)
chunk_size (int): Number of samples per prediction. Higher numbers
decrease CPU usage but increase latency
engine (Engine): Object containing info on the binary engine
trigger_level (int): Number of chunk activations needed to trigger on_activation
Higher values add latency but reduce false positives
sensitivity (float): From 0.0 to 1.0, relates to the network output level required
to consider a chunk "active"
stream (BinaryIO): Binary audio stream to read 16000 Hz 1 channel int16
audio from. If not given, the microphone is used
on_prediction: callback for every new prediction
on_activation: callback for when the wake word is heard
on_prediction (Callable): callback for every new prediction
on_activation (Callable): callback for when the wake word is heard
"""
def __init__(self, engine, chunk_size=1024, stream=None,
on_prediction=lambda x: None, on_activation=lambda: None, trigger_level=3):
self.engine = engine
self.pa = None
self.chunk_size = chunk_size
self.thread = None
self.stream = stream
def __init__(self, engine, trigger_level=3, sensitivity=0.5, stream=None,
on_prediction=lambda x: None, on_activation=lambda: None):
self.engine = engine
self.trigger_level = trigger_level
self.sensitivity = sensitivity
self.stream = stream
self.on_prediction = on_prediction
self.on_activation = on_activation
self.chunk_size = engine.chunk_size
self.pa = None
self.thread = None
self.running = False
self.trigger_level = trigger_level
atexit.register(self.stop)
def start(self):
@ -68,7 +102,9 @@ class PreciseRunner:
if self.stream is None:
from pyaudio import PyAudio, paInt16
self.pa = PyAudio()
self.stream = self.pa.open(16000, 1, paInt16, True, frames_per_buffer=self.chunk_size)
self.stream = self.pa.open(
16000, 1, paInt16, True, frames_per_buffer=self.chunk_size // 2
)
self.engine.start()
self.running = True
@ -98,7 +134,7 @@ class PreciseRunner:
prob = self.engine.get_prediction(chunk)
self.on_prediction(prob)
if prob > 0.5 or activation < 0:
if prob > 1 - self.sensitivity or activation < 0:
activation += 1
if activation > self.trigger_level:
activation = -self.chunk_size // 50

View File

@ -9,11 +9,11 @@ setup(
packages=find_packages(),
entry_points={
'console_scripts': [
'precise-train=precise.train:main',
'precise-train-feedback=precise.train_feedback:main',
'precise-stream=precise.stream:main',
'precise-test=precise.test:main',
'precise-convert=precise.convert:main'
'precise-train=precise.scripts.train:main',
'precise-train-feedback=precise.scripts.train_feedback:main',
'precise-stream=precise.scripts.stream:main',
'precise-test=precise.scripts.test:main',
'precise-convert=precise.scripts.convert:main'
]
},
install_requires=[
@ -26,7 +26,8 @@ setup(
'wavio',
'typing',
'dataset',
# 'precise-runner' # Needs to get uploaded to PyPi first
'prettyparse',
'precise-runner'
],
author='Matthew Scholefield',

View File

@ -1,6 +1,6 @@
#!/usr/bin/env bash
found_exe() {
is_command() {
hash "$1" 2>/dev/null
}
@ -17,7 +17,7 @@ wait_for_apt() {
set -e
if found_exe apt-get; then
if is_command apt-get; then
wait_for_apt
sudo apt-get install -y python3-pip libopenblas-dev python3-scipy cython libhdf5-dev python3-h5py portaudio19-dev
fi
@ -40,5 +40,6 @@ if ! $python -c 'import tensorflow' 2>/dev/null && [ "$arch" = "armv7l" ]; then
rm tensorflow-1.1.0-cp34-cp34m-linux_armv7l.whl
fi
$pip install -e runner/
$pip install -e .