diff --git a/mycroft/audio/speech.py b/mycroft/audio/speech.py index d8460345cb..0fdab3ff29 100644 --- a/mycroft/audio/speech.py +++ b/mycroft/audio/speech.py @@ -35,11 +35,6 @@ mimic_fallback_obj = None _last_stop_signal = 0 -def _start_listener(_): - """Force Mycroft to start listening (as if 'Hey Mycroft' was spoken).""" - bus.emit(Message('mycroft.mic.listen')) - - def handle_speak(event): """Handle "speak" message @@ -60,11 +55,7 @@ def handle_speak(event): stopwatch = Stopwatch() stopwatch.start() utterance = event.data['utterance'] - if event.data.get('expect_response', False): - # When expect_response is requested, the listener will be restarted - # at the end of the next bit of spoken audio. - bus.once('recognizer_loop:audio_output_end', _start_listener) - + listen = event.data.get('expect_response', False) # This is a bit of a hack for Picroft. The analog audio on a Pi blocks # for 30 seconds fairly often, so we don't want to break on periods # (decreasing the chance of encountering the block). But we will @@ -82,7 +73,10 @@ def handle_speak(event): utterance = re.sub(r'\b([A-za-z][\.])(\s+)', r'\g<1>', utterance) chunks = re.split(r'(? start or check_for_signal('buttonPress')): @@ -90,7 +84,7 @@ def handle_speak(event): tts.playback.clear() break try: - mute_and_speak(chunk, ident) + mute_and_speak(chunk, ident, listen) except KeyboardInterrupt: raise except Exception: @@ -103,7 +97,7 @@ def handle_speak(event): 'tts': tts.__class__.__name__}) -def mute_and_speak(utterance, ident): +def mute_and_speak(utterance, ident, listen=False): """Mute mic and start speaking the utterance using selected tts backend. Arguments: @@ -125,7 +119,7 @@ def mute_and_speak(utterance, ident): LOG.info("Speak: " + utterance) try: - tts.execute(utterance, ident) + tts.execute(utterance, ident, listen) except RemoteTTSTimeoutException as e: LOG.error(e) mimic_fallback_tts(utterance, ident) diff --git a/mycroft/tts/__init__.py b/mycroft/tts/__init__.py index c049091d55..123cc9a9d9 100644 --- a/mycroft/tts/__init__.py +++ b/mycroft/tts/__init__.py @@ -19,7 +19,7 @@ import random import re from abc import ABCMeta, abstractmethod from threading import Thread -from time import time +from time import time, sleep import os.path from os.path import dirname, exists, isdir, join @@ -83,7 +83,8 @@ class PlaybackThread(Thread): """Thread main loop. get audio and viseme data from queue and play.""" while not self._terminated: try: - snd_type, data, visemes, ident = self.queue.get(timeout=2) + snd_type, data, visemes, ident, listen = \ + self.queue.get(timeout=2) self.blink(0.5) if not self._processing_queue: self._processing_queue = True @@ -111,7 +112,7 @@ class PlaybackThread(Thread): except Exception as e: LOG.exception(e) if self._processing_queue: - self.tts.end_audio() + self.tts.end_audio(listen) self._processing_queue = False def show_visemes(self, pairs): @@ -196,7 +197,7 @@ class TTS(metaclass=ABCMeta): # Create signals informing start of speech self.bus.emit(Message("recognizer_loop:audio_output_start")) - def end_audio(self): + def end_audio(self, listen): """Helper function for child classes to call in execute(). Sends the recognizer_loop:audio_output_end message, indicating @@ -205,6 +206,8 @@ class TTS(metaclass=ABCMeta): """ self.bus.emit(Message("recognizer_loop:audio_output_end")) + if listen: + self.bus.emit(Message('mycroft.mic.listen')) # Clean the cache as needed cache_dir = mycroft.util.get_cache_directory("tts/" + self.tts_name) mycroft.util.curate_cache(cache_dir, min_free_percent=100) @@ -287,15 +290,17 @@ class TTS(metaclass=ABCMeta): """ return [sentence] - def execute(self, sentence, ident=None): + def execute(self, sentence, ident=None, listen=False): """Convert sentence to speech, preprocessing out unsupported ssml The method caches results if possible using the hash of the sentence. - Args: + Arguments: sentence: Sentence to be spoken ident: Id reference to current interaction + listen: True if listen should be triggered at the end + of the utterance. """ sentence = self.validate_ssml(sentence) @@ -307,7 +312,11 @@ class TTS(metaclass=ABCMeta): self.spellings[word.lower()]) chunks = self._preprocess_sentence(sentence) - for sentence in chunks: + # Apply the listen flag to the last chunk, set the rest to False + chunks = [(chunks[i], listen if i == len(chunks) - 1 else False) + for i in range(len(chunks))] + + for sentence, l in chunks: key = str(hashlib.md5( sentence.encode('utf-8', 'ignore')).hexdigest()) wav_file = os.path.join( @@ -323,7 +332,7 @@ class TTS(metaclass=ABCMeta): self.save_phonemes(key, phonemes) vis = self.viseme(phonemes) if phonemes else None - self.queue.put((self.audio_ext, wav_file, vis, ident)) + self.queue.put((self.audio_ext, wav_file, vis, ident, l)) def viseme(self, phonemes): """Create visemes from phonemes. Needs to be implemented for all