Initial Google Streaming STT implementation off of PR 2149 with dev logs scattered about

pull/2160/head
David Wagner 2019-06-10 17:32:38 -05:00
parent 7bbfe0c9fb
commit 16248ee313
2 changed files with 102 additions and 1 deletions

View File

@ -74,6 +74,7 @@ class AudioProducer(Thread):
self.recognizer.adjust_for_ambient_noise(source)
while self.state.running:
try:
LOG.info('WAGNER listen')
audio = self.recognizer.listen(source, self.emitter,
self.stream_handler)
self.queue.put((AUDIO_DATA, audio))
@ -132,6 +133,8 @@ class AudioConsumer(Thread):
tag, data = message
LOG.info('WAGNER MESSAGE TAG {}'.format(tag))
if tag == AUDIO_DATA:
if self.state.sleeping:
self.wake_up(data)
@ -144,7 +147,7 @@ class AudioConsumer(Thread):
elif tag == STREAM_STOP:
self.stt.stream_stop()
else:
LOG.error("Unknown audio queue type %r" % audio)
LOG.error("Unknown audio queue type %r" % tag)
# TODO: Localization
def wake_up(self, audio):

View File

@ -20,6 +20,9 @@ from speech_recognition import Recognizer
from queue import Queue
from threading import Thread
from google.cloud import speech
from google.oauth2 import service_account
from mycroft.api import STTApi
from mycroft.configuration import Configuration
from mycroft.util.log import LOG
@ -240,6 +243,100 @@ class DeepSpeechStreamServerSTT(DeepSpeechServerSTT):
self.stream.start()
class GoogleStreamThread(Thread):
def __init__(self, queue, lang, client, streaming_config):
super().__init__()
LOG.info('WAGNER Thread init')
self.lang = lang
self.client = client
self.streaming_config = streaming_config
self.queue = queue
self.response = None
self.text = ''
def _get_data(self):
LOG.info('WAGNER Thread get_data init')
while True:
d = self.queue.get()
if d is None:
break
LOG.info('WAGNER Thread get_data yield d')
yield d
self.queue.task_done()
def run(self):
LOG.info('WAGNER RUN!')
audio = self._get_data()
req = (speech.types.StreamingRecognizeRequest(audio_content=x) for x in audio)
responses = self.client.streaming_recognize(self.streaming_config, req)
for res in responses:
LOG.info('WAGNER -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=')
LOG.info(res)
LOG.info('WAGNER -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=')
if res.results and res.results[0].is_final:
self.text = res.results[0].alternatives[0].transcript
class GoogleCloudStreamingSTT(GoogleJsonSTT):
def __init__(self):
super(GoogleCloudStreamingSTT, self).__init__()
# override language with module specific language selection
self.lang = self.config.get('lang') or self.lang
self.stream = None
self.can_stream = True
credentials = service_account.Credentials.from_service_account_info(
self.credential.get('json')
)
self.client = speech.SpeechClient(credentials=credentials)
recognition_config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code=self.lang,
model='command_and_search',
max_alternatives=1,
)
self.streaming_config = speech.types.StreamingRecognitionConfig(
config=recognition_config,
interim_results=True,
single_utterance=True,
)
def execute(self, audio, language=None):
#if self.stream is None:
# return super().execute(audio, language)
return self.stream_stop()
def stream_stop(self):
LOG.info('WAGNER STOP')
if self.stream is not None:
self.queue.put(None)
self.stream.join()
text = self.stream.text
self.stream = None
self.queue = None
if not text:
return None
return text
return None
def stream_data(self, data):
LOG.info('WAGNER data! {} {}'.format(type(data), len(data)))
self.queue.put(data)
def stream_start(self, language=None):
LOG.info('WAGNER START')
self.lang = language or self.lang
self.stream_stop()
self.queue = Queue()
self.stream = GoogleStreamThread(self.queue, self.lang, self.client, self.streaming_config)
self.stream.start()
class KaldiSTT(STT):
def __init__(self):
super(KaldiSTT, self).__init__()
@ -301,6 +398,7 @@ class STTFactory:
"mycroft": MycroftSTT,
"google": GoogleSTT,
"google_cloud": GoogleCloudSTT,
"google_cloud_streaming": GoogleCloudStreamingSTT,
"wit": WITSTT,
"ibm": IBMSTT,
"kaldi": KaldiSTT,