mycroft-core/mycroft/client/speech/recognizer_wrapper.py

131 lines
4.2 KiB
Python

import json
import requests
from speech_recognition import UnknownValueError
from mycroft.configuration.config import ConfigurationManager
from mycroft.identity import IdentityManager
from mycroft.metrics import Stopwatch
from mycroft.util import CerberusAccessDenied
from mycroft.util.log import getLogger
from mycroft.util.setup_base import get_version
__author__ = 'seanfitz'
log = getLogger("RecognizerWrapper")
config = ConfigurationManager.get_config().get('speech_client')
class GoogleRecognizerWrapper(object):
def __init__(self, recognizer):
self.recognizer = recognizer
def transcribe(
self, audio, language="en-US", show_all=False, metrics=None):
key = config.get('goog_api_key')
return self.recognizer.recognize_google(
audio, key=key, language=language, show_all=show_all)
class WitRecognizerWrapper(object):
def __init__(self, recognizer):
self.recognizer = recognizer
def transcribe(
self, audio, language="en-US", show_all=False, metrics=None):
assert language == "en-US", \
"language must be default, language parameter not supported."
key = config.get('wit_api_key')
return self.recognizer.recognize_wit(audio, key, show_all=show_all)
class IBMRecognizerWrapper(object):
def __init__(self, recognizer):
self.recognizer = recognizer
def transcribe(
self, audio, language="en-US", show_all=False, metrics=None):
username = config.get('ibm_username')
password = config.get('ibm_password')
return self.recognizer.recognize_ibm(
audio, username, password, language=language, show_all=show_all)
class CerberusGoogleProxy(object):
def __init__(self, _):
self.version = get_version()
def transcribe(
self, audio, language="en-US", show_all=False, metrics=None):
timer = Stopwatch()
timer.start()
identity = IdentityManager().get()
headers = {}
if identity.token:
headers['Authorization'] = 'Bearer %s:%s' % (
identity.device_id, identity.token)
response = requests.post(config.get("proxy_host") +
"/stt/google_v2?language=%s&version=%s"
% (language, self.version),
audio.get_flac_data(),
headers=headers)
if metrics:
t = timer.stop()
metrics.timer("mycroft.cerberus.proxy.client.time_s", t)
metrics.timer("mycroft.stt.remote.time_s", t)
if response.status_code == 401:
raise CerberusAccessDenied()
try:
actual_result = response.json()
except:
raise UnknownValueError()
log.info("STT JSON: " + json.dumps(actual_result))
if show_all:
return actual_result
# return the best guess
if "alternative" not in actual_result:
raise UnknownValueError()
alternatives = actual_result["alternative"]
if len([alt for alt in alternatives if alt.get('confidence')]) > 0:
# if there is at least one element with confidence, force it to
# the front
alternatives.sort(
key=lambda e: e.get('confidence', 0.0), reverse=True)
for entry in alternatives:
if "transcript" in entry:
return entry["transcript"]
if len(alternatives) > 0:
log.error(
"Found %d entries, but none with a transcript." % len(
alternatives))
# no transcriptions available
raise UnknownValueError()
RECOGNIZER_IMPLS = {
'google': GoogleRecognizerWrapper,
'google_proxy': CerberusGoogleProxy,
'wit': WitRecognizerWrapper,
'ibm': IBMRecognizerWrapper
}
class RemoteRecognizerWrapperFactory(object):
@staticmethod
def wrap_recognizer(recognizer, impl=config.get('recognizer_impl')):
if impl not in RECOGNIZER_IMPLS.keys():
raise NotImplementedError("%s recognizer not implemented." % impl)
impl_class = RECOGNIZER_IMPLS.get(impl)
return impl_class(recognizer)