TTS playback queue singleton (#3055)

* fix issues when remote excepts out

* Remove explicit clear_cache from MimicTTS

* Updates for using singleton TTS playback thread

- Cache is called on all tts's registered as using the thread
- Begin audio and end audio is handled by the playback thread
- Further changes from self.playback to TTS.playback for consistency

* Remove redundant try/except

* Consolidate general and TTS-specific sentence splitting

This performs all sentence-splitting at the same stage. This fixes a
subtle issue where a TTS splits a sentence into chunks and throws an
error on only one of those chunks. The fallback would generate a
sentence for the original un-chunked sentence. possibly saying the same
parts twice.

This also pre-compiles the regexes used to speed things up a bit.

Co-authored-by: Ken <ken.smith@mycroft.ai>
pull/2881/head
Åke 2022-03-02 01:29:57 +01:00 committed by GitHub
parent 36620af703
commit e7ddd51256
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 139 additions and 47 deletions

View File

@ -74,13 +74,7 @@ def handle_speak(event):
# so we likely will want to get rid of this when not running on Mimic
if (config.get('enclosure', {}).get('platform') != "picroft" and
len(re.findall('<[^>]*>', utterance)) == 0):
# Remove any whitespace present after the period,
# if a character (only alpha) ends with a period
# ex: A. Lincoln -> A.Lincoln
# so that we don't split at the period
utterance = re.sub(r'\b([A-za-z][\.])(\s+)', r'\g<1>', utterance)
chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s',
utterance)
chunks = tts.preprocess_utterance(utterance)
# Apply the listen flag to the last chunk, set the rest to False
chunks = [(chunks[i], listen if i == len(chunks) - 1 else False)
for i in range(len(chunks))]
@ -116,10 +110,9 @@ def mute_and_speak(utterance, ident, listen=False):
# update TTS object if configuration has changed
if tts_hash != hash(str(config.get('tts', ''))):
global tts
# Stop tts playback thread
tts.playback.stop()
tts.playback.join()
# Create new tts instance
if tts:
tts.playback.detach_tts(tts)
tts = TTSFactory.create()
tts.init(bus)
tts_hash = hash(str(config.get('tts', '')))

View File

@ -130,8 +130,6 @@ class Mimic(TTS):
)
self.default_binary = get_mimic_binary()
self.clear_cache()
# Download subscriber voices if needed
self.subscriber_voices = get_subscriber_voices()
self.is_subscriber = DeviceApi().is_subscriber

View File

@ -39,22 +39,43 @@ from mycroft.util.plugins import load_plugin
from queue import Queue, Empty
from .cache import hash_sentence, TextToSpeechCache
_TTS_ENV = deepcopy(os.environ)
_TTS_ENV['PULSE_PROP'] = 'media.role=phone'
EMPTY_PLAYBACK_QUEUE_TUPLE = (None, None, None, None, None)
SSML_TAGS = re.compile(r'<[^>]*>')
WHITESPACE_AFTER_PERIOD = re.compile(r'\b([A-za-z][\.])(\s+)')
SENTENCE_DELIMITERS = re.compile(
r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s'
)
def default_preprocess_utterance(utterance):
"""Default method for preprocessing Mycroft utterances for TTS.
Args:
utteance (str): Input utterance
Returns:
[str]: list of preprocessed sentences
"""
utterance = WHITESPACE_AFTER_PERIOD.sub(r'\g<1>', utterance)
chunks = SENTENCE_DELIMITERS.split(utterance)
return chunks
class PlaybackThread(Thread):
"""Thread class for playing back tts audio and sending
viseme data to enclosure.
"""
def __init__(self, queue):
super(PlaybackThread, self).__init__()
self.queue = queue
self.tts = []
self.bus = None
self._terminated = False
self._processing_queue = False
self.enclosure = None
@ -66,7 +87,28 @@ class PlaybackThread(Thread):
self.pulse_env = None
def init(self, tts):
self.tts = tts
"""DEPRECATED! Init the TTS Playback thread.
TODO: 22.02 Remove this
"""
self.attach_tts(tts)
self.set_bus(tts.bus)
def set_bus(self, bus):
"""Provide bus instance to the TTS Playback thread.
Args:
bus (MycroftBusClient): bus client
"""
self.bus = bus
def attach_tts(self, tts):
"""Add TTS to be cache checked."""
self.tts.append(tts)
def detach_tts(self, tts):
"""Remove TTS from cache check."""
self.tts.remove(tts)
def clear_queue(self):
"""Remove all pending playbacks."""
@ -90,7 +132,7 @@ class PlaybackThread(Thread):
the loop then wait for the playback process to finish before starting
checking the next position in queue.
If the queue is empty the tts.end_audio() is called possibly triggering
If the queue is empty the end_audio() is called possibly triggering
listening.
"""
while not self._terminated:
@ -100,7 +142,7 @@ class PlaybackThread(Thread):
self.blink(0.5)
if not self._processing_queue:
self._processing_queue = True
self.tts.begin_audio()
self.begin_audio()
stopwatch = Stopwatch()
with stopwatch:
@ -116,7 +158,7 @@ class PlaybackThread(Thread):
report_timing(ident, 'speech_playback', stopwatch)
if self.queue.empty():
self.tts.end_audio(listen)
self.end_audio(listen)
self._processing_queue = False
self.blink(0.2)
except Empty:
@ -124,9 +166,42 @@ class PlaybackThread(Thread):
except Exception as e:
LOG.exception(e)
if self._processing_queue:
self.tts.end_audio(listen)
self.end_audio(listen)
self._processing_queue = False
def begin_audio(self):
"""Perform befining of speech actions."""
# Create signals informing start of speech
if self.bus:
self.bus.emit(Message("recognizer_loop:audio_output_start"))
else:
LOG.warning("Speech started before bus was attached.")
def end_audio(self, listen):
"""Perform end of speech output actions.
Will inform the system that speech has ended and trigger the TTS's
cache checks. Listening will be triggered if requested.
Args:
listen (bool): True if listening event should be emitted
"""
if self.bus:
# Send end of speech signals to the system
self.bus.emit(Message("recognizer_loop:audio_output_end"))
if listen:
self.bus.emit(Message('mycroft.mic.listen'))
# Clear cache for all attached tts objects
# This is basically the only safe time
for tts in self.tts:
tts.cache.curate()
# This check will clear the filesystem IPC "signal"
check_for_signal("isSpeaking")
else:
LOG.warning("Speech started before bus was attached.")
def show_visemes(self, pairs):
"""Send viseme data to enclosure
@ -167,6 +242,8 @@ class TTS(metaclass=ABCMeta):
phonetic_spelling (bool): Whether to spell certain words phonetically
ssml_tags (list): Supported ssml properties. Ex. ['speak', 'prosody']
"""
queue = None
playback = None
def __init__(self, lang, config, validator, audio_ext='wav',
phonetic_spelling=True, ssml_tags=None):
@ -183,9 +260,12 @@ class TTS(metaclass=ABCMeta):
self.filename = get_temp_path('tts.wav')
self.enclosure = None
random.seed()
self.queue = Queue()
self.playback = PlaybackThread(self.queue)
self.playback.start()
if TTS.queue is None:
TTS.queue = Queue()
TTS.playback = PlaybackThread(TTS.queue)
TTS.playback.start()
self.spellings = self.load_spellings()
self.tts_name = type(self).__name__
self.cache = TextToSpeechCache(
@ -252,9 +332,10 @@ class TTS(metaclass=ABCMeta):
bus: Mycroft messagebus connection
"""
self.bus = bus
self.playback.init(self)
TTS.playback.set_bus(bus)
TTS.playback.attach_tts(self)
self.enclosure = EnclosureAPI(self.bus)
self.playback.enclosure = self.enclosure
TTS.playback.enclosure = self.enclosure
def get_tts(self, sentence, wav_file):
"""Abstract method that a tts implementation needs to implement.
@ -306,7 +387,7 @@ class TTS(metaclass=ABCMeta):
return self.remove_ssml(utterance)
# find ssml tags in string
tags = re.findall('<[^>]*>', utterance)
tags = SSML_TAGS.findall(utterance)
for tag in tags:
if any(supported in tag for supported in self.ssml_tags):
@ -318,6 +399,21 @@ class TTS(metaclass=ABCMeta):
# return text with supported ssml tags only
return utterance.replace(" ", " ")
def preprocess_utterance(self, utterance):
"""Preprocess utterance into list of chunks suitable for the TTS.
Perform general chunking and TTS specific chunking.
"""
# Remove any whitespace present after the period,
# if a character (only alpha) ends with a period
# ex: A. Lincoln -> A.Lincoln
# so that we don't split at the period
chunks = default_preprocess_utterance(utterance)
result = []
for chunk in chunks:
result += self._preprocess_sentence(chunk)
return result
def _preprocess_sentence(self, sentence):
"""Default preprocessing is no preprocessing.
@ -347,13 +443,7 @@ class TTS(metaclass=ABCMeta):
sentence = self.validate_ssml(sentence)
create_signal("isSpeaking")
try:
self._execute(sentence, ident, listen)
except Exception:
# If an error occurs end the audio sequence through an empty entry
self.queue.put(EMPTY_PLAYBACK_QUEUE_TUPLE)
# Re-raise to allow the Exception to be handled externally as well.
raise
self._execute(sentence, ident, listen)
def _execute(self, sentence, ident, listen):
if self.phonetic_spelling:
@ -362,6 +452,8 @@ class TTS(metaclass=ABCMeta):
sentence = sentence.replace(word,
self.spellings[word.lower()])
# TODO: 22.02 This is no longer needed and can be removed
# Just kept for compatibility for now
chunks = self._preprocess_sentence(sentence)
# Apply the listen flag to the last chunk, set the rest to False
chunks = [(chunks[i], listen if i == len(chunks) - 1 else False)
@ -409,7 +501,7 @@ class TTS(metaclass=ABCMeta):
audio_file, phoneme_file
)
viseme = self.viseme(phonemes) if phonemes else None
self.queue.put(
TTS.queue.put(
(self.audio_ext, str(audio_file.path), viseme, ident, l)
)
@ -489,10 +581,6 @@ class TTS(metaclass=ABCMeta):
LOG.debug("Failed to read .PHO from cache")
return None
def __del__(self):
self.playback.stop()
self.playback.join()
class TTSValidator(metaclass=ABCMeta):
"""TTS Validator abstract class to be implemented by all TTS engines.
@ -500,7 +588,6 @@ class TTSValidator(metaclass=ABCMeta):
It exposes and implements ``validate(tts)`` function as a template to
validate the TTS engines.
"""
def __init__(self, tts):
self.tts = tts

View File

@ -23,6 +23,7 @@ from os.path import exists
import mycroft.audio.speech as speech
from mycroft.messagebus import Message
from mycroft.tts.tts import default_preprocess_utterance
from mycroft.tts.remote_tts import RemoteTTSTimeoutException
"""Tests for speech dispatch service."""
@ -36,6 +37,8 @@ def setup_mocks(config_mock, tts_factory_mock):
config_mock.get.return_value = {}
tts_factory_mock.create.return_value = tts_mock
tts_mock.preprocess_utterance.side_effect = default_preprocess_utterance
config_mock.reset_mock()
tts_factory_mock.reset_mock()
tts_mock.reset_mock()

View File

@ -12,6 +12,14 @@ mock_audio = "/tmp/mock_path"
mock_viseme = mock.Mock(name='viseme')
class MsgTypeCheck:
def __init__(self, msg_type):
self.msg_type = msg_type
def __eq__(self, other):
return self.msg_type == other.msg_type
class MockTTS(mycroft.tts.TTS):
def __init__(self, lang, config, validator, audio_ext='wav',
phonetic_spelling=True, ssml_tags=None):
@ -58,17 +66,20 @@ class TestPlaybackThread(unittest.TestCase):
# Test wav data
wav_mock = mock.Mock(name='wav_data')
queue.put(('wav', wav_mock, None, 0, False))
time.sleep(0.2)
mock_tts.begin_audio.called_with()
time.sleep(0.3)
mock_play_wav.assert_called_with(wav_mock, environment=None)
mock_tts.end_audio.assert_called_with(False)
mock_tts.bus.emit.assert_called_with(
MsgTypeCheck('recognizer_loop:audio_output_end')
)
# Test mp3 data and trigger listening True
mp3_mock = mock.Mock(name='mp3_data')
queue.put(('mp3', mp3_mock, None, 0, True))
time.sleep(0.2)
mock_play_mp3.assert_called_with(mp3_mock, environment=None)
mock_tts.end_audio.assert_called_with(True)
mock_tts.bus.emit.assert_called_with(
MsgTypeCheck('mycroft.mic.listen')
)
self.assertFalse(playback.enclosure.get.called)
# Test sending visemes
@ -92,7 +103,7 @@ class TestTTS(unittest.TestCase):
tts.init(bus_mock)
self.assertTrue(tts.bus is bus_mock)
tts.queue = mock.Mock()
mycroft.tts.TTS.queue = mock.Mock()
with mock.patch('mycroft.tts.tts.open') as mock_open:
tts.cache.temporary_cache_dir = Path('/tmp/dummy')
tts.execute('Oh no, not again', 42)
@ -100,7 +111,7 @@ class TestTTS(unittest.TestCase):
'Oh no, not again',
'/tmp/dummy/8da7f22aeb16bc3846ad07b644d59359.wav'
)
tts.queue.put.assert_called_with(
mycroft.tts.TTS.queue.put.assert_called_with(
(
'wav',
mock_audio,
@ -117,7 +128,7 @@ class TestTTS(unittest.TestCase):
tts.init(bus_mock)
self.assertTrue(tts.bus is bus_mock)
tts.queue = mock.Mock()
mycroft.tts.TTS.queue = mock.Mock()
with mock.patch('mycroft.tts.tts.open') as mock_open:
tts.cache.temporary_cache_dir = Path('/tmp/dummy')
tts.execute('Oh no, not again', 42)
@ -125,7 +136,7 @@ class TestTTS(unittest.TestCase):
'Oh no, not again',
'/tmp/dummy/8da7f22aeb16bc3846ad07b644d59359.wav'
)
tts.queue.put.assert_called_with(
mycroft.tts.TTS.queue.put.assert_called_with(
(
'wav',
mock_audio,