Merge remote-tracking branch 'refs/remotes/origin/master'

Conflicts:
	mycroft/client/speech/listener.py
	mycroft/client/speech/wakeword_recognizer.py
pull/60/head
Ryan Sipes 2016-05-27 11:40:24 -05:00
commit fac834cf4a
12 changed files with 372 additions and 416 deletions

View File

@ -20,21 +20,19 @@ import threading
import time
from Queue import Queue
import os
import pyee
import speech_recognition as sr
from speech_recognition import AudioData
from mycroft.client.speech import wakeword_recognizer
from mycroft.client.speech.local_recognizer import LocalRecognizer
from mycroft.client.speech.mic import MutableMicrophone, Recognizer
from mycroft.client.speech.recognizer_wrapper import (
from mycroft.client.speech.recognizer_wrapper import \
RemoteRecognizerWrapperFactory
)
from mycroft.client.speech.word_extractor import WordExtractor
from mycroft.configuration.config import ConfigurationManager
from mycroft.messagebus.message import Message
from mycroft.metrics import MetricsAggregator, Stopwatch
from mycroft.session import SessionManager
from mycroft.util import read_stripped_lines, CerberusAccessDenied
from mycroft.util import CerberusAccessDenied
from mycroft.util.log import getLogger
logger = getLogger(__name__)
@ -49,6 +47,7 @@ class AudioProducer(threading.Thread):
given a mic and a recognizer implementation, continuously listens to the
mic for potential speech chunks and pushes them onto the queue.
"""
def __init__(self, state, queue, mic, recognizer, emitter):
threading.Thread.__init__(self)
self.daemon = True
@ -75,110 +74,6 @@ class AudioProducer(threading.Thread):
self.emitter.emit("recognizer_loop:ioerror", ex)
class WakewordExtractor:
MAX_ERROR_SECONDS = 0.02
TRIM_SECONDS = 0.1
# The seconds the safe end position is pushed back to ensure pocketsphinx
# is consistent
PUSH_BACK_SECONDS = 0.2
# The seconds of silence padded where the wakeword was removed
SILENCE_SECONDS = 0.2
def __init__(self, audio_data, recognizer, metrics):
self.audio_data = audio_data
self.recognizer = recognizer
self.silence_data = self.__generate_silence(
self.SILENCE_SECONDS, self.audio_data.sample_rate,
self.audio_data.sample_width)
self.wav_data = self.audio_data.get_wav_data()
self.AUDIO_SIZE = float(len(self.wav_data))
self.range = self.Range(0, self.AUDIO_SIZE / 2)
self.metrics = metrics
class Range:
def __init__(self, begin, end):
self.begin = begin
self.end = end
def get_marker(self, get_begin):
if get_begin:
return self.begin
else:
return self.end
def add_to_marker(self, add_begin, value):
if add_begin:
self.begin += value
else:
self.end += value
def narrow(self, value):
self.begin += value
self.end -= value
@staticmethod
def __found_in_segment(name, byte_data, recognizer, metrics):
hypothesis = recognizer.transcribe(byte_data, metrics=metrics)
if hypothesis and hypothesis.hypstr.lower().find(name):
return True
else:
return False
def audio_pos(self, raw_pos):
return int(self.audio_data.sample_width *
round(float(raw_pos)/self.audio_data.sample_width))
def get_audio_segment(self, begin, end):
return self.wav_data[self.audio_pos(begin): self.audio_pos(end)]
def __calculate_marker(self, use_begin, sign_if_found, range, delta):
while (2 * delta >= self.MAX_ERROR_SECONDS *
self.audio_data.sample_rate * self.audio_data.sample_width):
byte_data = self.get_audio_segment(range.begin, range.end)
found = self.__found_in_segment(
"mycroft", byte_data, self.recognizer, self.metrics)
sign = sign_if_found if found else -sign_if_found
range.add_to_marker(use_begin, delta * sign)
delta /= 2
return range.get_marker(use_begin)
def calculate_range(self):
delta = self.AUDIO_SIZE / 4
self.range.end = self.__calculate_marker(
False, -1, self.Range(0, self.AUDIO_SIZE / 2), delta)
# Ensures the end position is well past the wakeword part of the audio
pos_end_safe = min(
self.AUDIO_SIZE, self.range.end + self.PUSH_BACK_SECONDS *
self.audio_data.sample_rate * self.audio_data.sample_width)
delta = pos_end_safe / 4
begin = pos_end_safe / 2
self.range.begin = self.__calculate_marker(
True, 1, self.Range(begin, pos_end_safe), delta)
self.range.narrow(self.TRIM_SECONDS * self.audio_data.sample_rate *
self.audio_data.sample_width)
@staticmethod
def __generate_silence(seconds, sample_rate, sample_width):
return '\0'*int(seconds * sample_rate * sample_width)
def get_audio_data_before(self):
byte_data = self.get_audio_segment(
0, self.range.begin) + self.silence_data
return AudioData(
byte_data, self.audio_data.sample_rate,
self.audio_data.sample_width)
def get_audio_data_after(self):
byte_data = self.silence_data + self.get_audio_segment(
self.range.end, self.AUDIO_SIZE)
return AudioData(
byte_data, self.audio_data.sample_rate,
self.audio_data.sample_width)
class AudioConsumer(threading.Thread):
"""
AudioConsumer
@ -188,137 +83,135 @@ class AudioConsumer(threading.Thread):
# In seconds, the minimum audio size to be sent to remote STT
MIN_AUDIO_SIZE = 1.0
def __init__(
self, state, queue, emitter, wakeup_recognizer,
wakeword_recognizer, wrapped_remote_recognizer, wakeup_prefixes,
wakeup_words):
def __init__(self, state, queue, emitter, wakeup_recognizer,
mycroft_recognizer, remote_recognizer):
threading.Thread.__init__(self)
self.daemon = True
self.queue = queue
self.state = state
self.emitter = emitter
self.wakeup_recognizer = wakeup_recognizer
self.ww_recognizer = wakeword_recognizer
self.wrapped_remote_recognizer = wrapped_remote_recognizer
self.wakeup_prefixes = wakeup_prefixes
self.wakeup_words = wakeup_words
self.mycroft_recognizer = mycroft_recognizer
self.remote_recognizer = remote_recognizer
self.metrics = MetricsAggregator()
def run(self):
while self.state.running:
self.try_consume_audio()
self.read_audio()
@staticmethod
def _audio_length(audio):
return float(
len(audio.frame_data))/(audio.sample_rate*audio.sample_width)
return float(len(audio.frame_data)) / (
audio.sample_rate * audio.sample_width)
def try_consume_audio(self):
def read_audio(self):
timer = Stopwatch()
hyp = None
audio = self.queue.get()
self.metrics.timer(
"mycroft.recognizer.audio.length_s", self._audio_length(audio))
self.metrics.timer("mycroft.recognizer.audio.length_s",
self._audio_length(audio))
self.queue.task_done()
timer.start()
if self.state.sleeping:
hyp = self.wakeup_recognizer.transcribe(
audio.get_wav_data(), metrics=self.metrics)
if hyp and hyp.hypstr:
logger.debug("sleeping recognition: " + hyp.hypstr)
if hyp and hyp.hypstr.lower().find("wake up") >= 0:
SessionManager.touch()
self.state.sleeping = False
self.__speak("I'm awake.") # TODO: Localization
self.metrics.increment("mycroft.wakeup")
self.process_wake_up(audio)
elif self.state.skip_wakeword:
self.process_skip_wake_word(audio)
else:
if not self.state.skip_wakeword:
hyp = self.ww_recognizer.transcribe(
audio.get_wav_data(), metrics=self.metrics)
self.process_wake_word(audio, timer)
if hyp and hyp.hypstr.lower().find("mycroft") >= 0:
extractor = WakewordExtractor(
audio, self.ww_recognizer, self.metrics)
timer.lap()
extractor.calculate_range()
self.metrics.timer(
"mycroft.recognizer.extractor.time_s", timer.lap())
audio_before = extractor.get_audio_data_before()
self.metrics.timer(
"mycroft.recognizer.audio_extracted.length_s",
self._audio_length(audio_before))
audio_after = extractor.get_audio_data_after()
self.metrics.timer(
"mycroft.recognizer.audio_extracted.length_s",
self._audio_length(audio_after))
SessionManager.touch()
payload = {
'utterance': hyp.hypstr,
'session': SessionManager.get().session_id,
'pos_begin': int(extractor.range.begin),
'pos_end': int(extractor.range.end)
}
self.emitter.emit("recognizer_loop:wakeword", payload)
try:
self.transcribe([audio_before, audio_after])
except sr.UnknownValueError:
self.__speak("Go ahead")
self.state.skip_wakeword = True
self.metrics.increment("mycroft.wakeword")
elif self.state.skip_wakeword:
SessionManager.touch()
try:
self.transcribe([audio])
except sr.UnknownValueError:
logger.warn(
"Speech Recognition could not understand audio")
self.__speak("Sorry, I didn't catch that.")
self.metrics.increment("mycroft.recognizer.error")
self.state.skip_wakeword = False
else:
self.metrics.clear()
self.metrics.flush()
def process_wake_up(self, audio):
if self.wakeup_recognizer.is_recognized(audio.frame_data,
self.metrics):
SessionManager.touch()
self.state.sleeping = False
self.__speak("I'm awake.") # TODO: Localization
self.metrics.increment("mycroft.wakeup")
def process_wake_word(self, audio, timer):
hyp = self.mycroft_recognizer.transcribe(audio.frame_data,
self.metrics)
if self.mycroft_recognizer.contains(hyp):
extractor = WordExtractor(audio, self.mycroft_recognizer,
self.metrics)
timer.lap()
extractor.calculate_range()
self.metrics.timer("mycroft.recognizer.extractor.time_s",
timer.lap())
audio_before = extractor.get_audio_data_before()
self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
self._audio_length(audio_before))
audio_after = extractor.get_audio_data_after()
self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
self._audio_length(audio_after))
SessionManager.touch()
payload = {
'utterance': hyp.hypstr,
'session': SessionManager.get().session_id,
'pos_begin': extractor.begin,
'pos_end': extractor.end
}
self.emitter.emit("recognizer_loop:wakeword", payload)
try:
self.transcribe([audio_before, audio_after])
except sr.UnknownValueError:
self.__speak("Go ahead")
self.state.skip_wakeword = True
self.metrics.increment("mycroft.wakeword")
def process_skip_wake_word(self, audio):
SessionManager.touch()
try:
self.transcribe([audio])
except sr.UnknownValueError:
logger.warn("Speech Recognition could not understand audio")
self.__speak("Sorry, I didn't catch that.")
self.metrics.increment("mycroft.recognizer.error")
self.state.skip_wakeword = False
def __speak(self, utterance):
"""
Speak commands should be asynchronous to avoid filling up the
portaudio buffer.
Speak commands should be asynchronous to avoid filling up the portaudio
buffer.
:param utterance:
:return:
"""
def target():
self.emitter.emit(
"speak",
Message("speak",
metadata={'utterance': utterance,
'session': SessionManager.get().session_id}))
payload = {
'utterance': utterance,
'session': SessionManager.get().session_id
}
self.emitter.emit("speak", Message("speak", metadata=payload))
threading.Thread(target=target).start()
def _create_remote_stt_runnable(self, audio, utterances):
def runnable():
try:
text = self.wrapped_remote_recognizer.transcribe(
audio, metrics=self.metrics).lower()
text = self.remote_recognizer.transcribe(
audio, metrics=self.metrics).lower()
except sr.UnknownValueError:
pass
except sr.RequestError as e:
logger.error(
"Could not request results from Speech Recognition "
"service; {0}".format(e))
"Could not request results from Speech Recognition "
"service; {0}".format(e))
except CerberusAccessDenied as e:
logger.error("AccessDenied from Cerberus proxy.")
self.__speak(
"Your device is not registered yet. To start pairing, "
"login at cerberus.mycroft.ai")
"Your device is not registered yet. To start pairing, "
"login at cerberus.mycroft.ai")
utterances.append("pair my device")
else:
logger.debug("STT: " + text)
if text.strip() != '':
utterances.append(text)
return runnable
def transcribe(self, audio_segments):
@ -360,20 +253,15 @@ class RecognizerLoop(pyee.EventEmitter):
device_index=None,
lang=core_config.get('lang')):
pyee.EventEmitter.__init__(self)
self.microphone = MutableMicrophone(
sample_rate=sample_rate, device_index=device_index)
self.microphone = MutableMicrophone(sample_rate=sample_rate,
device_index=device_index)
# FIXME - channels are not been used
self.microphone.CHANNELS = channels
self.ww_recognizer = wakeword_recognizer.create_recognizer(
samprate=sample_rate, lang=lang)
self.wakeup_recognizer = wakeword_recognizer.create_recognizer(
samprate=sample_rate, lang=lang,
keyphrase="wake up mycroft") # TODO - localization
self.mycroft_recognizer = LocalRecognizer(sample_rate, lang)
# TODO - localization
self.wakeup_recognizer = LocalRecognizer(sample_rate, lang, "wake up")
self.remote_recognizer = Recognizer()
basedir = os.path.dirname(__file__)
self.wakeup_words = read_stripped_lines(os.path.join(
basedir, 'model', lang, 'WakeUpWord.voc'))
self.wakeup_prefixes = read_stripped_lines(
os.path.join(basedir, 'model', lang, 'PrefixWakeUp.voc'))
self.state = RecognizerLoopState()
def start_async(self):
@ -388,11 +276,9 @@ class RecognizerLoop(pyee.EventEmitter):
queue,
self,
self.wakeup_recognizer,
self.ww_recognizer,
self.mycroft_recognizer,
RemoteRecognizerWrapperFactory.wrap_recognizer(
self.remote_recognizer),
self.wakeup_prefixes,
self.wakeup_words).start()
self.remote_recognizer)).start()
def stop(self):
self.state.running = False

View File

@ -0,0 +1,63 @@
# Copyright 2016 Mycroft AI, Inc.
#
# This file is part of Mycroft Core.
#
# Mycroft Core is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mycroft Core is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mycroft Core. If not, see <http://www.gnu.org/licenses/>.
import time
import os
from pocketsphinx.pocketsphinx import *
__author__ = 'seanfitz, jdorleans'
BASEDIR = os.path.dirname(os.path.abspath(__file__))
class LocalRecognizer(object):
def __init__(self, sample_rate=16000, lang="en-us", key_phrase="mycroft"):
self.lang = lang
self.key_phrase = key_phrase
self.sample_rate = sample_rate
self.configure()
def configure(self):
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
'hmm'))
config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang,
'mycroft-en-us.dict'))
config.set_string('-keyphrase', self.key_phrase)
config.set_float('-kws_threshold', float('1e-45'))
config.set_float('-samprate', self.sample_rate)
config.set_int('-nfft', 2048)
config.set_string('-logfn', '/dev/null')
self.decoder = Decoder(config)
def transcribe(self, byte_data, metrics=None):
start = time.time()
self.decoder.start_utt()
self.decoder.process_raw(byte_data, False, False)
self.decoder.end_utt()
if metrics:
metrics.timer("mycroft.stt.local.time_s", time.time() - start)
return self.decoder.hyp()
def is_recognized(self, byte_data, metrics):
hyp = self.transcribe(byte_data, metrics)
return hyp and self.key_phrase in hyp.hypstr.lower()
def contains(self, hypothesis):
return hypothesis and self.key_phrase in hypothesis.hypstr.lower()

View File

@ -1,4 +0,0 @@
hey
hay
okay
ok

View File

@ -1,7 +0,0 @@
minecraft
microsoft
mycroft
micro
my friend
my brother
mike ross

View File

@ -1,4 +1,8 @@
hey HH EY
ok OW K EY
okay OW K EY
alright AO L R AY T
allright AA L R AY T
mycroft M AY K R AO F T
up AH P
wake W EY K

View File

@ -1,78 +0,0 @@
# Copyright 2016 Mycroft AI, Inc.
#
# This file is part of Mycroft Core.
#
# Mycroft Core is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mycroft Core is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mycroft Core. If not, see <http://www.gnu.org/licenses/>.
from mycroft.metrics import Stopwatch
import os
from pocketsphinx import Decoder
from cmath import exp, pi
__author__ = 'seanfitz'
BASEDIR = os.path.dirname(os.path.abspath(__file__))
def fft(x):
"""
fft function to clean data, but most be converted to array of IEEE floats
first
:param x:
:return:
"""
N = len(x)
if N <= 1:
return x
even = fft(x[0::2])
odd = fft(x[1::2])
T = [exp(-2j*pi*k/N)*odd[k] for k in xrange(N/2)]
return [even[k] + T[k] for k in xrange(N/2)] + \
[even[k] - T[k] for k in xrange(N/2)]
class Recognizer(object):
def __init__(self, local_recognizer):
self.local_recognizer = local_recognizer
def transcribe(self, wav_data, metrics=None):
timer = Stopwatch()
timer.start()
self.local_recognizer.start_utt()
self.local_recognizer.process_raw(wav_data, False, False)
self.local_recognizer.end_utt()
if metrics:
metrics.timer("mycroft.stt.local.time_s", timer.stop())
return self.local_recognizer.hyp()
def create_recognizer(samprate=16000, lang="en-us", keyphrase="hey mycroft"):
sphinx_config = Decoder.default_config()
sphinx_config.set_string(
'-hmm', os.path.join(BASEDIR, 'model', lang, 'hmm'))
sphinx_config.set_string(
'-dict', os.path.join(BASEDIR, 'model', lang, 'mycroft-en-us.dict'))
sphinx_config.set_string('-keyphrase', keyphrase)
sphinx_config.set_float('-kws_threshold', float('1e-45'))
sphinx_config.set_float('-samprate', samprate)
sphinx_config.set_int('-nfft', 2048)
sphinx_config.set_string('-logfn', '/dev/null')
decoder = Decoder(sphinx_config)
return Recognizer(decoder)

View File

@ -0,0 +1,76 @@
# Copyright 2016 Mycroft AI, Inc.
#
# This file is part of Mycroft Core.
#
# Mycroft Core is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mycroft Core is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mycroft Core. If not, see <http://www.gnu.org/licenses/>.
from speech_recognition import AudioData
__author__ = 'jdorleans'
class WordExtractor:
SILENCE_SECS = 0.1
PRECISION_RATE = 0.01
def __init__(self, audio, recognizer, metrics):
self.audio = audio
self.recognizer = recognizer
self.audio_size = len(self.audio.frame_data)
self.delta = int(self.audio_size / 2)
self.begin = 0
self.end = self.audio_size
self.precision = int(self.audio_size * self.PRECISION_RATE)
self.silence_data = self.create_silence(self.SILENCE_SECS,
self.audio.sample_rate,
self.audio.sample_width)
self.metrics = metrics
def __add(self, is_begin, value):
if is_begin:
self.begin += value
else:
self.end += value
def __calculate_marker(self, is_begin):
dt = self.delta
sign = 1 if is_begin else -1
while dt > self.precision:
self.__add(is_begin, dt * sign)
segment = self.audio.frame_data[self.begin:self.end]
found = self.recognizer.is_recognized(segment, self.metrics)
if not found:
self.__add(is_begin, dt * -sign)
dt = int(dt / 2)
def calculate_range(self):
self.__calculate_marker(False)
self.__calculate_marker(True)
@staticmethod
def create_silence(seconds, sample_rate, sample_width):
return '\0' * int(seconds * sample_rate * sample_width)
def get_audio_data_before(self):
byte_data = self.audio.frame_data[0:self.begin] + self.silence_data
return AudioData(byte_data, self.audio.sample_rate,
self.audio.sample_width)
def get_audio_data_after(self):
byte_data = self.silence_data + self.audio.frame_data[
self.end:self.audio_size]
return AudioData(byte_data, self.audio.sample_rate,
self.audio.sample_width)

View File

@ -1,171 +1,188 @@
from Queue import Queue
from os.path import dirname, join
# Copyright 2016 Mycroft AI, Inc.
#
# This file is part of Mycroft Core.
#
# Mycroft Core is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mycroft Core is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mycroft Core. If not, see <http://www.gnu.org/licenses/>.
import unittest
from Queue import Queue
from os.path import dirname, join
from speech_recognition import WavFile, AudioData
from mycroft.client.speech.listener import (
WakewordExtractor,
AudioConsumer,
RecognizerLoop
)
from mycroft.client.speech.listener import AudioConsumer, RecognizerLoop
from mycroft.client.speech.recognizer_wrapper import (
RemoteRecognizerWrapperFactory
)
__author__ = 'seanfitz'
class MockRecognizer(object):
def __init__(self, transcription=None):
def __init__(self):
self.transcriptions = []
def recognize_google(self, audio, key=None, language=None, show_all=False):
return self.tanscriptions.pop(0)
return self.transcriptions.pop(0)
def set_transcriptions(self, transcriptions):
self.tanscriptions = transcriptions
self.transcriptions = transcriptions
class AudioConsumerTest(unittest.TestCase):
"""
AudioConsumerTest
"""
def setUp(self):
self.loop = RecognizerLoop()
self.queue = Queue()
self.recognizer = MockRecognizer()
self.consumer = AudioConsumer(
self.loop.state,
self.queue,
self.loop,
self.loop.wakeup_recognizer,
self.loop.ww_recognizer,
RemoteRecognizerWrapperFactory.wrap_recognizer(
self.recognizer, 'google'),
self.loop.wakeup_prefixes,
self.loop.wakeup_words)
self.loop.state,
self.queue,
self.loop,
self.loop.wakeup_recognizer,
self.loop.mycroft_recognizer,
RemoteRecognizerWrapperFactory.wrap_recognizer(
self.recognizer, 'google'))
def __create_sample_from_test_file(self, sample_name):
root_dir = dirname(dirname(dirname(__file__)))
filename = join(
root_dir, 'test', 'client', 'data', sample_name + '.wav')
root_dir, 'test', 'client', 'data', sample_name + '.wav')
wavfile = WavFile(filename)
with wavfile as source:
return AudioData(
source.stream.read(), wavfile.SAMPLE_RATE,
wavfile.SAMPLE_WIDTH)
source.stream.read(), wavfile.SAMPLE_RATE,
wavfile.SAMPLE_WIDTH)
def test_audio_pos_front_back(self):
audio = self.__create_sample_from_test_file('mycroft_in_utterance')
def test_word_extraction(self):
"""
This is intended to test the extraction of the word: ``mycroft``.
The values for ``ideal_begin`` and ``ideal_end`` were found using an
audio tool like Audacity and they represent a sample value position of
the audio. ``tolerance`` is an acceptable margin error for the distance
between the ideal and actual values found by the ``WordExtractor``
"""
audio = self.__create_sample_from_test_file('weather_mycroft')
self.queue.put(audio)
TRUE_POS_BEGIN = 69857 + int(
WakewordExtractor.TRIM_SECONDS * audio.sample_rate *
audio.sample_width)
TRUE_POS_END = 89138 - int(
WakewordExtractor.TRIM_SECONDS * audio.sample_rate *
audio.sample_width)
TOLERANCE_RANGE_FRAMES = (
WakewordExtractor.MAX_ERROR_SECONDS * audio.sample_rate *
audio.sample_width)
tolerance = 4000
ideal_begin = 70000
ideal_end = 92000
monitor = {}
self.recognizer.set_transcriptions(
["what's the weather next week", ""])
self.recognizer.set_transcriptions(["what's the weather next week"])
def wakeword_callback(message):
monitor['pos_begin'] = message.get('pos_begin')
monitor['pos_end'] = message.get('pos_end')
self.loop.once('recognizer_loop:wakeword', wakeword_callback)
self.consumer.try_consume_audio()
self.consumer.read_audio()
pos_begin = monitor.get('pos_begin')
self.assertIsNotNone(pos_begin)
diff = abs(pos_begin - TRUE_POS_BEGIN)
actual_begin = monitor.get('pos_begin')
self.assertIsNotNone(actual_begin)
diff = abs(actual_begin - ideal_begin)
self.assertTrue(
diff <= TOLERANCE_RANGE_FRAMES,
str(diff) + " is not less than " + str(TOLERANCE_RANGE_FRAMES))
diff <= tolerance,
str(diff) + " is not less than " + str(tolerance))
pos_end = monitor.get('pos_end')
self.assertIsNotNone(pos_end)
diff = abs(pos_end - TRUE_POS_END)
actual_end = monitor.get('pos_end')
self.assertIsNotNone(actual_end)
diff = abs(actual_end - ideal_end)
self.assertTrue(
diff <= TOLERANCE_RANGE_FRAMES,
str(diff) + " is not less than " + str(TOLERANCE_RANGE_FRAMES))
diff <= tolerance,
str(diff) + " is not less than " + str(tolerance))
def test_wakeword_in_beginning(self):
self.queue.put(self.__create_sample_from_test_file('mycroft'))
self.queue.put(self.__create_sample_from_test_file('weather_mycroft'))
self.recognizer.set_transcriptions(["what's the weather next week"])
monitor = {}
self.recognizer.set_transcriptions([
"what's the weather next week", ""])
def callback(message):
monitor['utterances'] = message.get('utterances')
self.loop.once('recognizer_loop:utterance', callback)
self.consumer.try_consume_audio()
self.consumer.read_audio()
utterances = monitor.get('utterances')
self.assertIsNotNone(utterances)
self.assertTrue(len(utterances) == 1)
self.assertEquals("what's the weather next week", utterances[0])
def test_wakeword_in_phrase(self):
def test_wakeword(self):
self.queue.put(self.__create_sample_from_test_file('mycroft'))
self.recognizer.set_transcriptions(["silence"])
monitor = {}
self.recognizer.set_transcriptions([
"he can do other stuff too", "what's the weather in cincinnati"])
def callback(message):
monitor['utterances'] = message.get('utterances')
self.loop.once('recognizer_loop:utterance', callback)
self.consumer.try_consume_audio()
self.consumer.read_audio()
utterances = monitor.get('utterances')
self.assertIsNotNone(utterances)
self.assertTrue(len(utterances) == 2)
self.assertEquals("he can do other stuff too", utterances[0])
self.assertEquals("what's the weather in cincinnati", utterances[1])
self.assertTrue(len(utterances) == 1)
self.assertEquals("silence", utterances[0])
def test_call_and_response(self):
def test_ignore_wakeword_when_sleeping(self):
self.queue.put(self.__create_sample_from_test_file('mycroft'))
self.recognizer.set_transcriptions(["not detected"])
self.loop.sleep()
monitor = {}
self.recognizer.set_transcriptions(["mycroft", ""])
def wakeword_callback(message):
monitor['wakeword'] = message.get('utterance')
self.loop.once('recognizer_loop:wakeword', wakeword_callback)
self.consumer.read_audio()
self.assertIsNone(monitor.get('wakeword'))
self.assertTrue(self.loop.state.sleeping)
def test_wakeup(self):
self.queue.put(self.__create_sample_from_test_file('mycroft_wakeup'))
self.loop.sleep()
self.consumer.read_audio()
self.assertFalse(self.loop.state.sleeping)
def test_call_and_response(self):
self.queue.put(self.__create_sample_from_test_file('mycroft'))
self.recognizer.set_transcriptions(["silence"])
monitor = {}
def wakeword_callback(message):
monitor['wakeword'] = message.get('utterance')
self.loop.once('recognizer_loop:wakeword', wakeword_callback)
self.consumer.read_audio()
self.assertIsNotNone(monitor.get('wakeword'))
self.queue.put(self.__create_sample_from_test_file('weather_mycroft'))
self.recognizer.set_transcriptions(["what's the weather next week"])
def utterance_callback(message):
monitor['utterances'] = message.get('utterances')
self.loop.once('recognizer_loop:wakeword', wakeword_callback)
self.consumer.try_consume_audio()
self.assertIsNotNone(monitor.get('wakeword'))
self.queue.put(self.__create_sample_from_test_file('mycroft'))
self.recognizer.set_transcriptions(
["what's the weather next week", ""])
self.loop.once('recognizer_loop:utterance', utterance_callback)
self.consumer.try_consume_audio()
self.consumer.read_audio()
utterances = monitor.get('utterances')
self.assertIsNotNone(utterances)
self.assertTrue(len(utterances) == 1)
self.assertEquals("what's the weather next week", utterances[0])
def test_ignore_wakeword_when_sleeping(self):
self.queue.put(self.__create_sample_from_test_file('mycroft'))
self.loop.sleep()
monitor = {}
self.recognizer.set_transcriptions(["", ""])
def wakeword_callback(message):
monitor['wakeword'] = message.get('utterance')
self.loop.once('recognizer_loop:wakeword', wakeword_callback)
self.consumer.try_consume_audio()
self.assertIsNone(monitor.get('wakeword'))
self.assertTrue(self.loop.state.sleeping)

Binary file not shown.

View File

@ -0,0 +1,31 @@
import unittest
import os
from speech_recognition import WavFile
from mycroft.client.speech.local_recognizer import LocalRecognizer
__author__ = 'seanfitz'
DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
class LocalRecognizerTest(unittest.TestCase):
def setUp(self):
self.recognizer = LocalRecognizer()
def testRecognizerWrapper(self):
source = WavFile(os.path.join(DATA_DIR, "hey_mycroft.wav"))
with source as audio:
hyp = self.recognizer.transcribe(audio.stream.read())
assert "mycroft" in hyp.hypstr.lower()
source = WavFile(os.path.join(DATA_DIR, "mycroft.wav"))
with source as audio:
hyp = self.recognizer.transcribe(audio.stream.read())
assert "mycroft" in hyp.hypstr.lower()
def testRecognitionInLongerUtterance(self):
source = WavFile(os.path.join(DATA_DIR, "weather_mycroft.wav"))
with source as audio:
hyp = self.recognizer.transcribe(audio.stream.read())
assert "mycroft" in hyp.hypstr.lower()

View File

@ -1,32 +0,0 @@
from speech_recognition import WavFile
import os
from mycroft.client.speech import wakeword_recognizer
import unittest
__author__ = 'seanfitz'
DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
class WakewordRecognizerTest(unittest.TestCase):
def setUp(self):
self.ww_recognizer = wakeword_recognizer.create_recognizer()
def testRecognizerWrapper(self):
source = WavFile(os.path.join(DATA_DIR, "hey_mycroft.wav"))
with source as audio:
hyp = self.ww_recognizer.transcribe(audio.stream.read())
assert hyp.hypstr.lower() == "hey mycroft"
source = WavFile(os.path.join(DATA_DIR, "mycroft.wav"))
with source as audio:
hyp = self.ww_recognizer.transcribe(audio.stream.read())
assert hyp.hypstr.lower() == "hey mycroft"
def testRecognitionInLongerUtterance(self):
source = WavFile(os.path.join(DATA_DIR, "mycroft_in_utterance.wav"))
with source as audio:
hyp = self.ww_recognizer.transcribe(audio.stream.read())
assert hyp.hypstr.lower() == "hey mycroft"