Merge pull request #804 from forslund/feature/issue-803

TTS playback thread
pull/885/merge
Åke 2017-07-06 12:15:48 +02:00 committed by GitHub
commit 7bab1d1e21
5 changed files with 206 additions and 88 deletions

View File

@ -284,6 +284,7 @@ class RecognizerLoop(EventEmitter):
except KeyboardInterrupt as e:
LOG.error(e)
self.stop()
raise # Re-raise KeyboardInterrupt
def reload(self):
"""

View File

@ -75,6 +75,10 @@ def mute_and_speak(utterance):
lock.acquire()
# update TTS object if configuration has changed
if tts_hash != hash(str(config.get('tts', ''))):
# Stop tts playback thread
tts.playback.stop()
tts.playback.join()
# Create new tts instance
tts = TTSFactory.create()
tts.init(ws)
tts_hash = hash(str(config.get('tts', '')))
@ -124,6 +128,8 @@ def handle_speak(event):
for chunk in chunks:
try:
mute_and_speak(chunk)
except KeyboardInterrupt:
raise
except:
logger.error('Error in mute_and_speak', exc_info=True)
if _last_stop_signal > start or check_for_signal('buttonPress'):
@ -157,6 +163,7 @@ def handle_mic_unmute(event):
def handle_stop(event):
global _last_stop_signal
_last_stop_signal = time.time()
tts.playback.clear_queue()
stop_speaking()
@ -212,8 +219,9 @@ def main():
try:
loop.run()
except KeyboardInterrupt, e:
tts.playback.stop()
tts.playback.join()
logger.exception(e)
event_thread.exit()
sys.exit()

View File

@ -17,17 +17,104 @@
import random
from abc import ABCMeta, abstractmethod
from os.path import dirname, exists, isdir
from threading import Thread
from Queue import Queue
from time import time, sleep
import os
import os.path
import hashlib
from mycroft.client.enclosure.api import EnclosureAPI
from mycroft.configuration import ConfigurationManager
from mycroft.messagebus.client.ws import WebsocketClient
from mycroft.util.log import getLogger
from mycroft.util import play_wav, play_mp3, check_for_signal
import mycroft.util
__author__ = 'jdorleans'
LOGGER = getLogger(__name__)
class PlaybackThread(Thread):
"""
Thread class for playing back tts audio and sending
visime data to enclosure.
"""
def __init__(self, queue):
super(PlaybackThread, self).__init__()
self.queue = queue
self._terminated = False
def clear_queue(self):
"""
Remove all pending playbacks.
"""
while not self.queue.empty():
self.queue.get()
try:
self.p.terminate()
except:
pass
def run(self):
"""
Thread main loop. get audio and visime data from queue
and play.
"""
while not self._terminated:
try:
snd_type, data, visimes = self.queue.get(timeout=2)
self.blink(0.5)
if snd_type == 'wav':
self.p = play_wav(data)
elif snd_type == 'mp3':
self.p = play_mp3(data)
if visimes:
if self.show_visimes(visimes):
self.clear_queue()
else:
self.p.communicate()
self.blink(0.2)
except:
pass
def show_visimes(self, pairs):
"""
Send visime data to enclosure
Args:
pairs(list): Visime and timing pair
Returns:
True if button has been pressed.
"""
start = time()
for code, duration in pairs:
if check_for_signal('stoppingTTS', -1):
return True
if check_for_signal('buttonPress'):
return True
if self.enclosure:
self.enclosure.mouth_viseme(code)
delta = time() - start
if delta < duration:
sleep(duration - delta)
return False
def blink(self, rate=1.0):
""" Blink mycroft's eyes """
if self.enclosure and random.random() < rate:
self.enclosure.eyes_blink("b")
def stop(self):
""" Stop thread """
self._terminated = True
self.clear_queue()
class TTS(object):
"""
TTS abstract class to be implemented by all TTS engines.
@ -45,28 +132,114 @@ class TTS(object):
self.validator = validator
self.enclosure = None
random.seed()
self.queue = Queue()
self.playback = PlaybackThread(self.queue)
self.playback.start()
self.clear_cache()
def init(self, ws):
self.ws = ws
self.enclosure = EnclosureAPI(self.ws)
self.playback.enclosure = self.enclosure
@abstractmethod
def execute(self, sentence):
''' This performs TTS, blocking until audio completes
def get_tts(self, sentence, wav_file):
"""
Abstract method that a tts implementation needs to implement.
Should get data from tts.
This performs the TTS sequence. Upon completion, the sentence will
have been spoken. Optionally, the TTS engine may have sent visemes
to the enclosure by the TTS engine.
Args:
sentence(str): Sentence to synthesize
wav_file(str): output file
Args:
sentence (str): Words to be spoken
'''
# TODO: Move caching support from mimic_tts to here for all TTS
Returns: (wav_file, phoneme) tuple
"""
pass
def blink(self, rate=1.0):
if self.enclosure and random.random() < rate:
self.enclosure.eyes_blink("b")
def execute(self, sentence):
"""
Convert sentence to speech.
The method caches results if possible using the hash of the
sentence.
Args:
sentence: Sentence to be spoken
"""
key = str(hashlib.md5(sentence.encode('utf-8', 'ignore')).hexdigest())
wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
key + self.type)
if os.path.exists(wav_file):
LOGGER.debug("TTS cache hit")
phonemes = self.load_phonemes(key)
else:
wav_file, phonemes = self.get_tts(sentence, wav_file)
if phonemes:
self.save_phonemes(key, phonemes)
self.queue.put((self.type, wav_file, self.visime(phonemes)))
def visime(self, phonemes):
"""
Create visimes from phonemes. Needs to be implemented for all
tts backend
Args:
phonemes(str): String with phoneme data
"""
return None
def clear_cache(self):
""" Remove all cached files. """
if not os.path.exists(mycroft.util.get_cache_directory('tts')):
return
for f in os.listdir(mycroft.util.get_cache_directory("tts")):
file_path = os.path.join(mycroft.util.get_cache_directory("tts"),
f)
if os.path.isfile(file_path):
os.unlink(file_path)
def save_phonemes(self, key, phonemes):
"""
Cache phonemes
Args:
key: Hash key for the sentence
phonemes: phoneme string to save
"""
# Clean out the cache as needed
cache_dir = mycroft.util.get_cache_directory("tts")
mycroft.util.curate_cache(cache_dir)
pho_file = os.path.join(cache_dir, key + ".pho")
try:
with open(pho_file, "w") as cachefile:
cachefile.write(phonemes)
except:
LOGGER.debug("Failed to write .PHO to cache")
pass
def load_phonemes(self, key):
"""
Load phonemes from cache file.
Args:
Key: Key identifying phoneme cache
"""
pho_file = os.path.join(mycroft.util.get_cache_directory("tts"),
key+".pho")
if os.path.exists(pho_file):
try:
with open(pho_file, "r") as cachefile:
phonemes = cachefile.read().strip()
return phonemes
except:
LOGGER.debug("Failed to read .PHO from cache")
return None
def __del__(self):
self.playback.stop()
self.playback.join()
class TTSValidator(object):

View File

@ -19,7 +19,6 @@
from gtts import gTTS
from mycroft.tts import TTS, TTSValidator
from mycroft.util import play_mp3
__author__ = 'jdorleans'
@ -27,12 +26,12 @@ __author__ = 'jdorleans'
class GoogleTTS(TTS):
def __init__(self, lang, voice):
super(GoogleTTS, self).__init__(lang, voice, GoogleTTSValidator(self))
self.type = 'mp3'
def execute(self, sentence):
def get_tts(self, sentence, wav_file):
tts = gTTS(sentence, self.lang)
tts.save(self.filename)
p = play_mp3(self.filename)
p.communicate() # Wait for termination
tts.save(wav_file)
return (wav_file, None) # No phonemes
class GoogleTTSValidator(TTSValidator):

View File

@ -16,7 +16,6 @@
# along with Mycroft Core. If not, see <http://www.gnu.org/licenses/>.
import subprocess
import hashlib
import os
import os.path
from time import time, sleep
@ -46,6 +45,7 @@ class Mimic(TTS):
super(Mimic, self).__init__(lang, voice, MimicValidator(self))
self.init_args()
self.clear_cache()
self.type = 'wav'
def init_args(self):
self.args = [BIN, '-voice', self.voice, '-psdur']
@ -53,85 +53,22 @@ class Mimic(TTS):
if stretch:
self.args += ['--setf', 'duration_stretch=' + stretch]
def get_tts(self, sentence):
key = str(hashlib.md5(sentence.encode('utf-8', 'ignore')).hexdigest())
wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
key + ".wav")
if os.path.exists(wav_file):
phonemes = self.load_phonemes(key)
if phonemes:
# Using cached value
LOGGER.debug("TTS cache hit")
return wav_file, phonemes
def get_tts(self, sentence, wav_file):
# Generate WAV and phonemes
phonemes = subprocess.check_output(self.args + ['-o', wav_file,
'-t', sentence])
self.save_phonemes(key, phonemes)
return wav_file, phonemes
def save_phonemes(self, key, phonemes):
# Clean out the cache as needed
cache_dir = mycroft.util.get_cache_directory("tts")
mycroft.util.curate_cache(cache_dir)
pho_file = os.path.join(cache_dir, key+".pho")
try:
with open(pho_file, "w") as cachefile:
cachefile.write(phonemes)
except:
LOGGER.debug("Failed to write .PHO to cache")
pass
def load_phonemes(self, key):
pho_file = os.path.join(mycroft.util.get_cache_directory("tts"),
key+".pho")
if os.path.exists(pho_file):
try:
with open(pho_file, "r") as cachefile:
phonemes = cachefile.read().strip()
return phonemes
except:
LOGGER.debug("Failed to read .PHO from cache")
return None
def execute(self, sentence):
wav_file, phonemes = self.get_tts(sentence)
self.blink(0.5)
process = mycroft.util.play_wav(wav_file)
self.visime(phonemes)
process.communicate()
self.blink(0.2)
def visime(self, output):
visimes = []
start = time()
pairs = output.split(" ")
for pair in pairs:
if mycroft.util.check_for_signal('buttonPress'):
return
if mycroft.util.check_for_signal('stoppingTTS', -1):
return
pho_dur = pair.split(":") # phoneme:duration
if len(pho_dur) == 2:
code = VISIMES.get(pho_dur[0], '4')
duration = float(pho_dur[1])
delta = time() - start
if delta < duration:
if self.enclosure:
self.enclosure.mouth_viseme(code)
sleep(duration - delta)
def clear_cache(self):
""" Remove all cached files. """
if not os.path.exists(mycroft.util.get_cache_directory('tts')):
return
for f in os.listdir(mycroft.util.get_cache_directory("tts")):
file_path = os.path.join(mycroft.util.get_cache_directory("tts"),
f)
if os.path.isfile(file_path):
os.unlink(file_path)
visimes.append((VISIMES.get(pho_dur[0], '4'),
float(pho_dur[1])))
return visimes
class MimicValidator(TTSValidator):