Move listen trigger to last chunk of sentence

If rendering a chunk of a sentence takes too long time, the audio queue
may run out and trigger the listening.

This moves the listening trigger to after the last chunk.
pull/2351/head
Åke Forslund 2019-10-04 08:00:06 +02:00
parent 15233f8929
commit 29db163a78
2 changed files with 25 additions and 22 deletions

View File

@ -35,11 +35,6 @@ mimic_fallback_obj = None
_last_stop_signal = 0
def _start_listener(_):
"""Force Mycroft to start listening (as if 'Hey Mycroft' was spoken)."""
bus.emit(Message('mycroft.mic.listen'))
def handle_speak(event):
"""Handle "speak" message
@ -60,11 +55,7 @@ def handle_speak(event):
stopwatch = Stopwatch()
stopwatch.start()
utterance = event.data['utterance']
if event.data.get('expect_response', False):
# When expect_response is requested, the listener will be restarted
# at the end of the next bit of spoken audio.
bus.once('recognizer_loop:audio_output_end', _start_listener)
listen = event.data.get('expect_response', False)
# This is a bit of a hack for Picroft. The analog audio on a Pi blocks
# for 30 seconds fairly often, so we don't want to break on periods
# (decreasing the chance of encountering the block). But we will
@ -82,7 +73,10 @@ def handle_speak(event):
utterance = re.sub(r'\b([A-za-z][\.])(\s+)', r'\g<1>', utterance)
chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s',
utterance)
for chunk in chunks:
# Apply the listen flag to the last chunk, set the rest to False
chunks = [(chunks[i], listen if i == len(chunks) - 1 else False)
for i in range(len(chunks))]
for chunk, listen in chunks:
# Check if somthing has aborted the speech
if (_last_stop_signal > start or
check_for_signal('buttonPress')):
@ -90,7 +84,7 @@ def handle_speak(event):
tts.playback.clear()
break
try:
mute_and_speak(chunk, ident)
mute_and_speak(chunk, ident, listen)
except KeyboardInterrupt:
raise
except Exception:
@ -103,7 +97,7 @@ def handle_speak(event):
'tts': tts.__class__.__name__})
def mute_and_speak(utterance, ident):
def mute_and_speak(utterance, ident, listen=False):
"""Mute mic and start speaking the utterance using selected tts backend.
Arguments:
@ -125,7 +119,7 @@ def mute_and_speak(utterance, ident):
LOG.info("Speak: " + utterance)
try:
tts.execute(utterance, ident)
tts.execute(utterance, ident, listen)
except RemoteTTSTimeoutException as e:
LOG.error(e)
mimic_fallback_tts(utterance, ident)

View File

@ -19,7 +19,7 @@ import random
import re
from abc import ABCMeta, abstractmethod
from threading import Thread
from time import time
from time import time, sleep
import os.path
from os.path import dirname, exists, isdir, join
@ -83,7 +83,8 @@ class PlaybackThread(Thread):
"""Thread main loop. get audio and viseme data from queue and play."""
while not self._terminated:
try:
snd_type, data, visemes, ident = self.queue.get(timeout=2)
snd_type, data, visemes, ident, listen = \
self.queue.get(timeout=2)
self.blink(0.5)
if not self._processing_queue:
self._processing_queue = True
@ -111,7 +112,7 @@ class PlaybackThread(Thread):
except Exception as e:
LOG.exception(e)
if self._processing_queue:
self.tts.end_audio()
self.tts.end_audio(listen)
self._processing_queue = False
def show_visemes(self, pairs):
@ -196,7 +197,7 @@ class TTS(metaclass=ABCMeta):
# Create signals informing start of speech
self.bus.emit(Message("recognizer_loop:audio_output_start"))
def end_audio(self):
def end_audio(self, listen):
"""Helper function for child classes to call in execute().
Sends the recognizer_loop:audio_output_end message, indicating
@ -205,6 +206,8 @@ class TTS(metaclass=ABCMeta):
"""
self.bus.emit(Message("recognizer_loop:audio_output_end"))
if listen:
self.bus.emit(Message('mycroft.mic.listen'))
# Clean the cache as needed
cache_dir = mycroft.util.get_cache_directory("tts/" + self.tts_name)
mycroft.util.curate_cache(cache_dir, min_free_percent=100)
@ -287,15 +290,17 @@ class TTS(metaclass=ABCMeta):
"""
return [sentence]
def execute(self, sentence, ident=None):
def execute(self, sentence, ident=None, listen=False):
"""Convert sentence to speech, preprocessing out unsupported ssml
The method caches results if possible using the hash of the
sentence.
Args:
Arguments:
sentence: Sentence to be spoken
ident: Id reference to current interaction
listen: True if listen should be triggered at the end
of the utterance.
"""
sentence = self.validate_ssml(sentence)
@ -307,7 +312,11 @@ class TTS(metaclass=ABCMeta):
self.spellings[word.lower()])
chunks = self._preprocess_sentence(sentence)
for sentence in chunks:
# Apply the listen flag to the last chunk, set the rest to False
chunks = [(chunks[i], listen if i == len(chunks) - 1 else False)
for i in range(len(chunks))]
for sentence, l in chunks:
key = str(hashlib.md5(
sentence.encode('utf-8', 'ignore')).hexdigest())
wav_file = os.path.join(
@ -323,7 +332,7 @@ class TTS(metaclass=ABCMeta):
self.save_phonemes(key, phonemes)
vis = self.viseme(phonemes) if phonemes else None
self.queue.put((self.audio_ext, wav_file, vis, ident))
self.queue.put((self.audio_ext, wav_file, vis, ident, l))
def viseme(self, phonemes):
"""Create visemes from phonemes. Needs to be implemented for all