Merge pull request #804 from forslund/feature/issue-803

TTS playback thread
2017-07-06 12:15:48 +02:00 · 2017-07-06 12:15:48 +02:00 · 7bab1d1e21
parent dbec69c457 f714a5a882
commit 7bab1d1e21
5 changed files with 206 additions and 88 deletions
--- a/mycroft/client/speech/listener.py
+++ b/mycroft/client/speech/listener.py
@ -284,6 +284,7 @@ class RecognizerLoop(EventEmitter):
            except KeyboardInterrupt as e:
                LOG.error(e)
                self.stop()
+                raise  # Re-raise KeyboardInterrupt

    def reload(self):
        """
--- a/mycroft/client/speech/main.py
+++ b/mycroft/client/speech/main.py
@ -75,6 +75,10 @@ def mute_and_speak(utterance):
    lock.acquire()
    # update TTS object if configuration has changed
    if tts_hash != hash(str(config.get('tts', ''))):
+        # Stop tts playback thread
+        tts.playback.stop()
+        tts.playback.join()
+        # Create new tts instance
        tts = TTSFactory.create()
        tts.init(ws)
        tts_hash = hash(str(config.get('tts', '')))
@ -124,6 +128,8 @@ def handle_speak(event):
        for chunk in chunks:
            try:
                mute_and_speak(chunk)
+            except KeyboardInterrupt:
+                raise
            except:
                logger.error('Error in mute_and_speak', exc_info=True)
            if _last_stop_signal > start or check_for_signal('buttonPress'):
@ -157,6 +163,7 @@ def handle_mic_unmute(event):
 def handle_stop(event):
    global _last_stop_signal
    _last_stop_signal = time.time()
+    tts.playback.clear_queue()
    stop_speaking()


@ -212,8 +219,9 @@ def main():
    try:
        loop.run()
    except KeyboardInterrupt, e:
+        tts.playback.stop()
+        tts.playback.join()
        logger.exception(e)
-        event_thread.exit()
        sys.exit()


--- a/mycroft/tts/init.py
+++ b/mycroft/tts/init.py
@ -17,17 +17,104 @@
 import random
 from abc import ABCMeta, abstractmethod
 from os.path import dirname, exists, isdir
+from threading import Thread
+from Queue import Queue
+from time import time, sleep
+import os
+import os.path
+import hashlib

 from mycroft.client.enclosure.api import EnclosureAPI
 from mycroft.configuration import ConfigurationManager
 from mycroft.messagebus.client.ws import WebsocketClient
 from mycroft.util.log import getLogger
+from mycroft.util import play_wav, play_mp3, check_for_signal
+import mycroft.util

 __author__ = 'jdorleans'

 LOGGER = getLogger(__name__)


+class PlaybackThread(Thread):
+    """
+        Thread class for playing back tts audio and sending
+        visime data to enclosure.
+    """
+
+    def __init__(self, queue):
+        super(PlaybackThread, self).__init__()
+        self.queue = queue
+        self._terminated = False
+
+    def clear_queue(self):
+        """
+            Remove all pending playbacks.
+        """
+        while not self.queue.empty():
+            self.queue.get()
+        try:
+            self.p.terminate()
+        except:
+            pass
+
+    def run(self):
+        """
+            Thread main loop. get audio and visime data from queue
+            and play.
+        """
+        while not self._terminated:
+            try:
+                snd_type, data, visimes = self.queue.get(timeout=2)
+                self.blink(0.5)
+                if snd_type == 'wav':
+                    self.p = play_wav(data)
+                elif snd_type == 'mp3':
+                    self.p = play_mp3(data)
+
+                if visimes:
+                    if self.show_visimes(visimes):
+                        self.clear_queue()
+                else:
+                    self.p.communicate()
+                self.blink(0.2)
+            except:
+                pass
+
+    def show_visimes(self, pairs):
+        """
+            Send visime data to enclosure
+
+            Args:
+                pairs(list): Visime and timing pair
+
+            Returns:
+                True if button has been pressed.
+        """
+        start = time()
+        for code, duration in pairs:
+            if check_for_signal('stoppingTTS', -1):
+                return True
+            if check_for_signal('buttonPress'):
+                return True
+            if self.enclosure:
+                self.enclosure.mouth_viseme(code)
+            delta = time() - start
+            if delta < duration:
+                sleep(duration - delta)
+        return False
+
+    def blink(self, rate=1.0):
+        """ Blink mycroft's eyes """
+        if self.enclosure and random.random() < rate:
+            self.enclosure.eyes_blink("b")
+
+    def stop(self):
+        """ Stop thread """
+        self._terminated = True
+        self.clear_queue()
+
+
 class TTS(object):
    """
    TTS abstract class to be implemented by all TTS engines.
@ -45,28 +132,114 @@ class TTS(object):
        self.validator = validator
        self.enclosure = None
        random.seed()
+        self.queue = Queue()
+        self.playback = PlaybackThread(self.queue)
+        self.playback.start()
+        self.clear_cache()

    def init(self, ws):
        self.ws = ws
        self.enclosure = EnclosureAPI(self.ws)
+        self.playback.enclosure = self.enclosure

-    @abstractmethod
-    def execute(self, sentence):
-        ''' This performs TTS, blocking until audio completes
+    def get_tts(self, sentence, wav_file):
+        """
+            Abstract method that a tts implementation needs to implement.
+            Should get data from tts.

-        This performs the TTS sequence.  Upon completion, the sentence will
-        have been spoken.   Optionally, the TTS engine may have sent visemes
-        to the enclosure by the TTS engine.
+            Args:
+                sentence(str): Sentence to synthesize
+                wav_file(str): output file

-        Args:
-            sentence (str): Words to be spoken
-        '''
-        # TODO: Move caching support from mimic_tts to here for all TTS
+            Returns: (wav_file, phoneme) tuple
+        """
        pass

-    def blink(self, rate=1.0):
-        if self.enclosure and random.random() < rate:
-            self.enclosure.eyes_blink("b")
+    def execute(self, sentence):
+        """
+            Convert sentence to speech.
+
+            The method caches results if possible using the hash of the
+            sentence.
+
+            Args:
+                sentence:   Sentence to be spoken
+        """
+        key = str(hashlib.md5(sentence.encode('utf-8', 'ignore')).hexdigest())
+        wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
+                                key + self.type)
+
+        if os.path.exists(wav_file):
+            LOGGER.debug("TTS cache hit")
+            phonemes = self.load_phonemes(key)
+        else:
+            wav_file, phonemes = self.get_tts(sentence, wav_file)
+            if phonemes:
+                self.save_phonemes(key, phonemes)
+
+        self.queue.put((self.type, wav_file, self.visime(phonemes)))
+
+    def visime(self, phonemes):
+        """
+            Create visimes from phonemes. Needs to be implemented for all
+            tts backend
+
+            Args:
+                phonemes(str): String with phoneme data
+        """
+        return None
+
+    def clear_cache(self):
+        """ Remove all cached files. """
+        if not os.path.exists(mycroft.util.get_cache_directory('tts')):
+            return
+        for f in os.listdir(mycroft.util.get_cache_directory("tts")):
+            file_path = os.path.join(mycroft.util.get_cache_directory("tts"),
+                                     f)
+            if os.path.isfile(file_path):
+                os.unlink(file_path)
+
+    def save_phonemes(self, key, phonemes):
+        """
+            Cache phonemes
+
+            Args:
+                key:        Hash key for the sentence
+                phonemes:   phoneme string to save
+        """
+        # Clean out the cache as needed
+        cache_dir = mycroft.util.get_cache_directory("tts")
+        mycroft.util.curate_cache(cache_dir)
+
+        pho_file = os.path.join(cache_dir, key + ".pho")
+        try:
+            with open(pho_file, "w") as cachefile:
+                cachefile.write(phonemes)
+        except:
+            LOGGER.debug("Failed to write .PHO to cache")
+            pass
+
+    def load_phonemes(self, key):
+        """
+            Load phonemes from cache file.
+
+            Args:
+                Key:    Key identifying phoneme cache
+        """
+        pho_file = os.path.join(mycroft.util.get_cache_directory("tts"),
+                                key+".pho")
+        if os.path.exists(pho_file):
+            try:
+                with open(pho_file, "r") as cachefile:
+                    phonemes = cachefile.read().strip()
+                return phonemes
+            except:
+                LOGGER.debug("Failed to read .PHO from cache")
+        return None
+
+    def __del__(self):
+        self.playback.stop()
+        self.playback.join()


 class TTSValidator(object):
--- a/mycroft/tts/google_tts.py
+++ b/mycroft/tts/google_tts.py
@ -19,7 +19,6 @@
 from gtts import gTTS

 from mycroft.tts import TTS, TTSValidator
-from mycroft.util import play_mp3

 __author__ = 'jdorleans'

@ -27,12 +26,12 @@ __author__ = 'jdorleans'
 class GoogleTTS(TTS):
    def __init__(self, lang, voice):
        super(GoogleTTS, self).__init__(lang, voice, GoogleTTSValidator(self))
+        self.type = 'mp3'

-    def execute(self, sentence):
+    def get_tts(self, sentence, wav_file):
        tts = gTTS(sentence, self.lang)
-        tts.save(self.filename)
-        p = play_mp3(self.filename)
-        p.communicate()  # Wait for termination
+        tts.save(wav_file)
+        return (wav_file, None)  # No phonemes


 class GoogleTTSValidator(TTSValidator):
--- a/mycroft/tts/mimic_tts.py
+++ b/mycroft/tts/mimic_tts.py
@ -16,7 +16,6 @@
 # along with Mycroft Core.  If not, see <http://www.gnu.org/licenses/>.

 import subprocess
-import hashlib
 import os
 import os.path
 from time import time, sleep
@ -46,6 +45,7 @@ class Mimic(TTS):
        super(Mimic, self).__init__(lang, voice, MimicValidator(self))
        self.init_args()
        self.clear_cache()
+        self.type = 'wav'

    def init_args(self):
        self.args = [BIN, '-voice', self.voice, '-psdur']
@ -53,85 +53,22 @@ class Mimic(TTS):
        if stretch:
            self.args += ['--setf', 'duration_stretch=' + stretch]

-    def get_tts(self, sentence):
-        key = str(hashlib.md5(sentence.encode('utf-8', 'ignore')).hexdigest())
-        wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
-                                key + ".wav")
-
-        if os.path.exists(wav_file):
-            phonemes = self.load_phonemes(key)
-            if phonemes:
-                # Using cached value
-                LOGGER.debug("TTS cache hit")
-                return wav_file, phonemes
-
+    def get_tts(self, sentence, wav_file):
        # Generate WAV and phonemes
        phonemes = subprocess.check_output(self.args + ['-o', wav_file,
                                                        '-t', sentence])
-        self.save_phonemes(key, phonemes)
        return wav_file, phonemes

-    def save_phonemes(self, key, phonemes):
-        # Clean out the cache as needed
-        cache_dir = mycroft.util.get_cache_directory("tts")
-        mycroft.util.curate_cache(cache_dir)
-
-        pho_file = os.path.join(cache_dir, key+".pho")
-        try:
-            with open(pho_file, "w") as cachefile:
-                cachefile.write(phonemes)
-        except:
-            LOGGER.debug("Failed to write .PHO to cache")
-            pass
-
-    def load_phonemes(self, key):
-        pho_file = os.path.join(mycroft.util.get_cache_directory("tts"),
-                                key+".pho")
-        if os.path.exists(pho_file):
-            try:
-                with open(pho_file, "r") as cachefile:
-                    phonemes = cachefile.read().strip()
-                return phonemes
-            except:
-                LOGGER.debug("Failed to read .PHO from cache")
-        return None
-
-    def execute(self, sentence):
-        wav_file, phonemes = self.get_tts(sentence)
-
-        self.blink(0.5)
-        process = mycroft.util.play_wav(wav_file)
-        self.visime(phonemes)
-        process.communicate()
-        self.blink(0.2)
-
    def visime(self, output):
+        visimes = []
        start = time()
        pairs = output.split(" ")
        for pair in pairs:
-            if mycroft.util.check_for_signal('buttonPress'):
-                return
-            if mycroft.util.check_for_signal('stoppingTTS', -1):
-                return
            pho_dur = pair.split(":")  # phoneme:duration
            if len(pho_dur) == 2:
-                code = VISIMES.get(pho_dur[0], '4')
-                duration = float(pho_dur[1])
-                delta = time() - start
-                if delta < duration:
-                    if self.enclosure:
-                        self.enclosure.mouth_viseme(code)
-                    sleep(duration - delta)
-
-    def clear_cache(self):
-        """ Remove all cached files. """
-        if not os.path.exists(mycroft.util.get_cache_directory('tts')):
-            return
-        for f in os.listdir(mycroft.util.get_cache_directory("tts")):
-            file_path = os.path.join(mycroft.util.get_cache_directory("tts"),
-                                     f)
-            if os.path.isfile(file_path):
-                os.unlink(file_path)
+                visimes.append((VISIMES.get(pho_dur[0], '4'),
+                                float(pho_dur[1])))
+        return visimes


 class MimicValidator(TTSValidator):