TTS playback queue singleton (#3055)
* fix issues when remote excepts out * Remove explicit clear_cache from MimicTTS * Updates for using singleton TTS playback thread - Cache is called on all tts's registered as using the thread - Begin audio and end audio is handled by the playback thread - Further changes from self.playback to TTS.playback for consistency * Remove redundant try/except * Consolidate general and TTS-specific sentence splitting This performs all sentence-splitting at the same stage. This fixes a subtle issue where a TTS splits a sentence into chunks and throws an error on only one of those chunks. The fallback would generate a sentence for the original un-chunked sentence. possibly saying the same parts twice. This also pre-compiles the regexes used to speed things up a bit. Co-authored-by: Ken <ken.smith@mycroft.ai>pull/2881/head
parent
36620af703
commit
e7ddd51256
|
@ -74,13 +74,7 @@ def handle_speak(event):
|
|||
# so we likely will want to get rid of this when not running on Mimic
|
||||
if (config.get('enclosure', {}).get('platform') != "picroft" and
|
||||
len(re.findall('<[^>]*>', utterance)) == 0):
|
||||
# Remove any whitespace present after the period,
|
||||
# if a character (only alpha) ends with a period
|
||||
# ex: A. Lincoln -> A.Lincoln
|
||||
# so that we don't split at the period
|
||||
utterance = re.sub(r'\b([A-za-z][\.])(\s+)', r'\g<1>', utterance)
|
||||
chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s',
|
||||
utterance)
|
||||
chunks = tts.preprocess_utterance(utterance)
|
||||
# Apply the listen flag to the last chunk, set the rest to False
|
||||
chunks = [(chunks[i], listen if i == len(chunks) - 1 else False)
|
||||
for i in range(len(chunks))]
|
||||
|
@ -116,10 +110,9 @@ def mute_and_speak(utterance, ident, listen=False):
|
|||
# update TTS object if configuration has changed
|
||||
if tts_hash != hash(str(config.get('tts', ''))):
|
||||
global tts
|
||||
# Stop tts playback thread
|
||||
tts.playback.stop()
|
||||
tts.playback.join()
|
||||
# Create new tts instance
|
||||
if tts:
|
||||
tts.playback.detach_tts(tts)
|
||||
tts = TTSFactory.create()
|
||||
tts.init(bus)
|
||||
tts_hash = hash(str(config.get('tts', '')))
|
||||
|
|
|
@ -130,8 +130,6 @@ class Mimic(TTS):
|
|||
)
|
||||
self.default_binary = get_mimic_binary()
|
||||
|
||||
self.clear_cache()
|
||||
|
||||
# Download subscriber voices if needed
|
||||
self.subscriber_voices = get_subscriber_voices()
|
||||
self.is_subscriber = DeviceApi().is_subscriber
|
||||
|
|
|
@ -39,22 +39,43 @@ from mycroft.util.plugins import load_plugin
|
|||
from queue import Queue, Empty
|
||||
from .cache import hash_sentence, TextToSpeechCache
|
||||
|
||||
|
||||
_TTS_ENV = deepcopy(os.environ)
|
||||
_TTS_ENV['PULSE_PROP'] = 'media.role=phone'
|
||||
|
||||
|
||||
EMPTY_PLAYBACK_QUEUE_TUPLE = (None, None, None, None, None)
|
||||
|
||||
SSML_TAGS = re.compile(r'<[^>]*>')
|
||||
WHITESPACE_AFTER_PERIOD = re.compile(r'\b([A-za-z][\.])(\s+)')
|
||||
SENTENCE_DELIMITERS = re.compile(
|
||||
r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s'
|
||||
)
|
||||
|
||||
|
||||
def default_preprocess_utterance(utterance):
|
||||
"""Default method for preprocessing Mycroft utterances for TTS.
|
||||
|
||||
Args:
|
||||
utteance (str): Input utterance
|
||||
|
||||
Returns:
|
||||
[str]: list of preprocessed sentences
|
||||
"""
|
||||
|
||||
utterance = WHITESPACE_AFTER_PERIOD.sub(r'\g<1>', utterance)
|
||||
chunks = SENTENCE_DELIMITERS.split(utterance)
|
||||
return chunks
|
||||
|
||||
|
||||
class PlaybackThread(Thread):
|
||||
"""Thread class for playing back tts audio and sending
|
||||
viseme data to enclosure.
|
||||
"""
|
||||
|
||||
def __init__(self, queue):
|
||||
super(PlaybackThread, self).__init__()
|
||||
self.queue = queue
|
||||
self.tts = []
|
||||
self.bus = None
|
||||
|
||||
self._terminated = False
|
||||
self._processing_queue = False
|
||||
self.enclosure = None
|
||||
|
@ -66,7 +87,28 @@ class PlaybackThread(Thread):
|
|||
self.pulse_env = None
|
||||
|
||||
def init(self, tts):
|
||||
self.tts = tts
|
||||
"""DEPRECATED! Init the TTS Playback thread.
|
||||
|
||||
TODO: 22.02 Remove this
|
||||
"""
|
||||
self.attach_tts(tts)
|
||||
self.set_bus(tts.bus)
|
||||
|
||||
def set_bus(self, bus):
|
||||
"""Provide bus instance to the TTS Playback thread.
|
||||
|
||||
Args:
|
||||
bus (MycroftBusClient): bus client
|
||||
"""
|
||||
self.bus = bus
|
||||
|
||||
def attach_tts(self, tts):
|
||||
"""Add TTS to be cache checked."""
|
||||
self.tts.append(tts)
|
||||
|
||||
def detach_tts(self, tts):
|
||||
"""Remove TTS from cache check."""
|
||||
self.tts.remove(tts)
|
||||
|
||||
def clear_queue(self):
|
||||
"""Remove all pending playbacks."""
|
||||
|
@ -90,7 +132,7 @@ class PlaybackThread(Thread):
|
|||
the loop then wait for the playback process to finish before starting
|
||||
checking the next position in queue.
|
||||
|
||||
If the queue is empty the tts.end_audio() is called possibly triggering
|
||||
If the queue is empty the end_audio() is called possibly triggering
|
||||
listening.
|
||||
"""
|
||||
while not self._terminated:
|
||||
|
@ -100,7 +142,7 @@ class PlaybackThread(Thread):
|
|||
self.blink(0.5)
|
||||
if not self._processing_queue:
|
||||
self._processing_queue = True
|
||||
self.tts.begin_audio()
|
||||
self.begin_audio()
|
||||
|
||||
stopwatch = Stopwatch()
|
||||
with stopwatch:
|
||||
|
@ -116,7 +158,7 @@ class PlaybackThread(Thread):
|
|||
report_timing(ident, 'speech_playback', stopwatch)
|
||||
|
||||
if self.queue.empty():
|
||||
self.tts.end_audio(listen)
|
||||
self.end_audio(listen)
|
||||
self._processing_queue = False
|
||||
self.blink(0.2)
|
||||
except Empty:
|
||||
|
@ -124,9 +166,42 @@ class PlaybackThread(Thread):
|
|||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
if self._processing_queue:
|
||||
self.tts.end_audio(listen)
|
||||
self.end_audio(listen)
|
||||
self._processing_queue = False
|
||||
|
||||
def begin_audio(self):
|
||||
"""Perform befining of speech actions."""
|
||||
# Create signals informing start of speech
|
||||
if self.bus:
|
||||
self.bus.emit(Message("recognizer_loop:audio_output_start"))
|
||||
else:
|
||||
LOG.warning("Speech started before bus was attached.")
|
||||
|
||||
def end_audio(self, listen):
|
||||
"""Perform end of speech output actions.
|
||||
|
||||
Will inform the system that speech has ended and trigger the TTS's
|
||||
cache checks. Listening will be triggered if requested.
|
||||
|
||||
Args:
|
||||
listen (bool): True if listening event should be emitted
|
||||
"""
|
||||
if self.bus:
|
||||
# Send end of speech signals to the system
|
||||
self.bus.emit(Message("recognizer_loop:audio_output_end"))
|
||||
if listen:
|
||||
self.bus.emit(Message('mycroft.mic.listen'))
|
||||
|
||||
# Clear cache for all attached tts objects
|
||||
# This is basically the only safe time
|
||||
for tts in self.tts:
|
||||
tts.cache.curate()
|
||||
|
||||
# This check will clear the filesystem IPC "signal"
|
||||
check_for_signal("isSpeaking")
|
||||
else:
|
||||
LOG.warning("Speech started before bus was attached.")
|
||||
|
||||
def show_visemes(self, pairs):
|
||||
"""Send viseme data to enclosure
|
||||
|
||||
|
@ -167,6 +242,8 @@ class TTS(metaclass=ABCMeta):
|
|||
phonetic_spelling (bool): Whether to spell certain words phonetically
|
||||
ssml_tags (list): Supported ssml properties. Ex. ['speak', 'prosody']
|
||||
"""
|
||||
queue = None
|
||||
playback = None
|
||||
|
||||
def __init__(self, lang, config, validator, audio_ext='wav',
|
||||
phonetic_spelling=True, ssml_tags=None):
|
||||
|
@ -183,9 +260,12 @@ class TTS(metaclass=ABCMeta):
|
|||
self.filename = get_temp_path('tts.wav')
|
||||
self.enclosure = None
|
||||
random.seed()
|
||||
self.queue = Queue()
|
||||
self.playback = PlaybackThread(self.queue)
|
||||
self.playback.start()
|
||||
|
||||
if TTS.queue is None:
|
||||
TTS.queue = Queue()
|
||||
TTS.playback = PlaybackThread(TTS.queue)
|
||||
TTS.playback.start()
|
||||
|
||||
self.spellings = self.load_spellings()
|
||||
self.tts_name = type(self).__name__
|
||||
self.cache = TextToSpeechCache(
|
||||
|
@ -252,9 +332,10 @@ class TTS(metaclass=ABCMeta):
|
|||
bus: Mycroft messagebus connection
|
||||
"""
|
||||
self.bus = bus
|
||||
self.playback.init(self)
|
||||
TTS.playback.set_bus(bus)
|
||||
TTS.playback.attach_tts(self)
|
||||
self.enclosure = EnclosureAPI(self.bus)
|
||||
self.playback.enclosure = self.enclosure
|
||||
TTS.playback.enclosure = self.enclosure
|
||||
|
||||
def get_tts(self, sentence, wav_file):
|
||||
"""Abstract method that a tts implementation needs to implement.
|
||||
|
@ -306,7 +387,7 @@ class TTS(metaclass=ABCMeta):
|
|||
return self.remove_ssml(utterance)
|
||||
|
||||
# find ssml tags in string
|
||||
tags = re.findall('<[^>]*>', utterance)
|
||||
tags = SSML_TAGS.findall(utterance)
|
||||
|
||||
for tag in tags:
|
||||
if any(supported in tag for supported in self.ssml_tags):
|
||||
|
@ -318,6 +399,21 @@ class TTS(metaclass=ABCMeta):
|
|||
# return text with supported ssml tags only
|
||||
return utterance.replace(" ", " ")
|
||||
|
||||
def preprocess_utterance(self, utterance):
|
||||
"""Preprocess utterance into list of chunks suitable for the TTS.
|
||||
|
||||
Perform general chunking and TTS specific chunking.
|
||||
"""
|
||||
# Remove any whitespace present after the period,
|
||||
# if a character (only alpha) ends with a period
|
||||
# ex: A. Lincoln -> A.Lincoln
|
||||
# so that we don't split at the period
|
||||
chunks = default_preprocess_utterance(utterance)
|
||||
result = []
|
||||
for chunk in chunks:
|
||||
result += self._preprocess_sentence(chunk)
|
||||
return result
|
||||
|
||||
def _preprocess_sentence(self, sentence):
|
||||
"""Default preprocessing is no preprocessing.
|
||||
|
||||
|
@ -347,13 +443,7 @@ class TTS(metaclass=ABCMeta):
|
|||
sentence = self.validate_ssml(sentence)
|
||||
|
||||
create_signal("isSpeaking")
|
||||
try:
|
||||
self._execute(sentence, ident, listen)
|
||||
except Exception:
|
||||
# If an error occurs end the audio sequence through an empty entry
|
||||
self.queue.put(EMPTY_PLAYBACK_QUEUE_TUPLE)
|
||||
# Re-raise to allow the Exception to be handled externally as well.
|
||||
raise
|
||||
self._execute(sentence, ident, listen)
|
||||
|
||||
def _execute(self, sentence, ident, listen):
|
||||
if self.phonetic_spelling:
|
||||
|
@ -362,6 +452,8 @@ class TTS(metaclass=ABCMeta):
|
|||
sentence = sentence.replace(word,
|
||||
self.spellings[word.lower()])
|
||||
|
||||
# TODO: 22.02 This is no longer needed and can be removed
|
||||
# Just kept for compatibility for now
|
||||
chunks = self._preprocess_sentence(sentence)
|
||||
# Apply the listen flag to the last chunk, set the rest to False
|
||||
chunks = [(chunks[i], listen if i == len(chunks) - 1 else False)
|
||||
|
@ -409,7 +501,7 @@ class TTS(metaclass=ABCMeta):
|
|||
audio_file, phoneme_file
|
||||
)
|
||||
viseme = self.viseme(phonemes) if phonemes else None
|
||||
self.queue.put(
|
||||
TTS.queue.put(
|
||||
(self.audio_ext, str(audio_file.path), viseme, ident, l)
|
||||
)
|
||||
|
||||
|
@ -489,10 +581,6 @@ class TTS(metaclass=ABCMeta):
|
|||
LOG.debug("Failed to read .PHO from cache")
|
||||
return None
|
||||
|
||||
def __del__(self):
|
||||
self.playback.stop()
|
||||
self.playback.join()
|
||||
|
||||
|
||||
class TTSValidator(metaclass=ABCMeta):
|
||||
"""TTS Validator abstract class to be implemented by all TTS engines.
|
||||
|
@ -500,7 +588,6 @@ class TTSValidator(metaclass=ABCMeta):
|
|||
It exposes and implements ``validate(tts)`` function as a template to
|
||||
validate the TTS engines.
|
||||
"""
|
||||
|
||||
def __init__(self, tts):
|
||||
self.tts = tts
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ from os.path import exists
|
|||
|
||||
import mycroft.audio.speech as speech
|
||||
from mycroft.messagebus import Message
|
||||
from mycroft.tts.tts import default_preprocess_utterance
|
||||
from mycroft.tts.remote_tts import RemoteTTSTimeoutException
|
||||
|
||||
"""Tests for speech dispatch service."""
|
||||
|
@ -36,6 +37,8 @@ def setup_mocks(config_mock, tts_factory_mock):
|
|||
config_mock.get.return_value = {}
|
||||
|
||||
tts_factory_mock.create.return_value = tts_mock
|
||||
|
||||
tts_mock.preprocess_utterance.side_effect = default_preprocess_utterance
|
||||
config_mock.reset_mock()
|
||||
tts_factory_mock.reset_mock()
|
||||
tts_mock.reset_mock()
|
||||
|
|
|
@ -12,6 +12,14 @@ mock_audio = "/tmp/mock_path"
|
|||
mock_viseme = mock.Mock(name='viseme')
|
||||
|
||||
|
||||
class MsgTypeCheck:
|
||||
def __init__(self, msg_type):
|
||||
self.msg_type = msg_type
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.msg_type == other.msg_type
|
||||
|
||||
|
||||
class MockTTS(mycroft.tts.TTS):
|
||||
def __init__(self, lang, config, validator, audio_ext='wav',
|
||||
phonetic_spelling=True, ssml_tags=None):
|
||||
|
@ -58,17 +66,20 @@ class TestPlaybackThread(unittest.TestCase):
|
|||
# Test wav data
|
||||
wav_mock = mock.Mock(name='wav_data')
|
||||
queue.put(('wav', wav_mock, None, 0, False))
|
||||
time.sleep(0.2)
|
||||
mock_tts.begin_audio.called_with()
|
||||
time.sleep(0.3)
|
||||
mock_play_wav.assert_called_with(wav_mock, environment=None)
|
||||
mock_tts.end_audio.assert_called_with(False)
|
||||
mock_tts.bus.emit.assert_called_with(
|
||||
MsgTypeCheck('recognizer_loop:audio_output_end')
|
||||
)
|
||||
|
||||
# Test mp3 data and trigger listening True
|
||||
mp3_mock = mock.Mock(name='mp3_data')
|
||||
queue.put(('mp3', mp3_mock, None, 0, True))
|
||||
time.sleep(0.2)
|
||||
mock_play_mp3.assert_called_with(mp3_mock, environment=None)
|
||||
mock_tts.end_audio.assert_called_with(True)
|
||||
mock_tts.bus.emit.assert_called_with(
|
||||
MsgTypeCheck('mycroft.mic.listen')
|
||||
)
|
||||
self.assertFalse(playback.enclosure.get.called)
|
||||
|
||||
# Test sending visemes
|
||||
|
@ -92,7 +103,7 @@ class TestTTS(unittest.TestCase):
|
|||
tts.init(bus_mock)
|
||||
self.assertTrue(tts.bus is bus_mock)
|
||||
|
||||
tts.queue = mock.Mock()
|
||||
mycroft.tts.TTS.queue = mock.Mock()
|
||||
with mock.patch('mycroft.tts.tts.open') as mock_open:
|
||||
tts.cache.temporary_cache_dir = Path('/tmp/dummy')
|
||||
tts.execute('Oh no, not again', 42)
|
||||
|
@ -100,7 +111,7 @@ class TestTTS(unittest.TestCase):
|
|||
'Oh no, not again',
|
||||
'/tmp/dummy/8da7f22aeb16bc3846ad07b644d59359.wav'
|
||||
)
|
||||
tts.queue.put.assert_called_with(
|
||||
mycroft.tts.TTS.queue.put.assert_called_with(
|
||||
(
|
||||
'wav',
|
||||
mock_audio,
|
||||
|
@ -117,7 +128,7 @@ class TestTTS(unittest.TestCase):
|
|||
tts.init(bus_mock)
|
||||
self.assertTrue(tts.bus is bus_mock)
|
||||
|
||||
tts.queue = mock.Mock()
|
||||
mycroft.tts.TTS.queue = mock.Mock()
|
||||
with mock.patch('mycroft.tts.tts.open') as mock_open:
|
||||
tts.cache.temporary_cache_dir = Path('/tmp/dummy')
|
||||
tts.execute('Oh no, not again', 42)
|
||||
|
@ -125,7 +136,7 @@ class TestTTS(unittest.TestCase):
|
|||
'Oh no, not again',
|
||||
'/tmp/dummy/8da7f22aeb16bc3846ad07b644d59359.wav'
|
||||
)
|
||||
tts.queue.put.assert_called_with(
|
||||
mycroft.tts.TTS.queue.put.assert_called_with(
|
||||
(
|
||||
'wav',
|
||||
mock_audio,
|
||||
|
|
Loading…
Reference in New Issue