mycroft-core/mycroft/audio/speech.py

# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
import re

from threading import Lock
from mycroft.configuration import Configuration
from mycroft.tts import TTSFactory
from mycroft.util import create_signal, check_for_signal
from mycroft.util.log import LOG
from mycroft.metrics import report_metric, Stopwatch

ws = None  # TODO:18.02 - Rename to "messagebus"
config = None
tts = None
tts_hash = None
lock = Lock()

_last_stop_signal = 0


def _start_listener(message):
    """
        Force Mycroft to start listening (as if 'Hey Mycroft' was spoken)
    """
    create_signal('startListening')


def handle_speak(event):
    """
        Handle "speak" message
    """
    config = Configuration.get()
    Configuration.init(ws)
    global _last_stop_signal

    with lock:
        stopwatch = Stopwatch()
        stopwatch.start()
        utterance = event.data['utterance']
        if event.data.get('expect_response', False):
            # When expect_response is requested, the listener will be restarted
            # at the end of the next bit of spoken audio.
            ws.once('recognizer_loop:audio_output_end', _start_listener)

        # This is a bit of a hack for Picroft.  The analog audio on a Pi blocks
        # for 30 seconds fairly often, so we don't want to break on periods
        # (decreasing the chance of encountering the block).  But we will
        # keep the split for non-Picroft installs since it give user feedback
        # faster on longer phrases.
        #
        # TODO: Remove or make an option?  This is really a hack, anyway,
        # so we likely will want to get rid of this when not running on Mimic
        if not config.get('enclosure', {}).get('platform') == "picroft":
            start = time.time()
            chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s',
                              utterance)
            for chunk in chunks:
                try:
                    mute_and_speak(chunk)
                except KeyboardInterrupt:
                    raise
                except Exception:
                    LOG.error('Error in mute_and_speak', exc_info=True)
                if (_last_stop_signal > start or
                        check_for_signal('buttonPress')):
                    break
        else:
            mute_and_speak(utterance)

        stopwatch.stop()
    report_metric('timing',
                  {'id': 'unknown',
                   'system': 'speech',
                   'utterance': utterance,
                   'start_time': stopwatch.timestamp,
                   'time': stopwatch.time})


def mute_and_speak(utterance):
    """
        Mute mic and start speaking the utterance using selected tts backend.

        Args:
            utterance: The sentence to be spoken
    """
    global tts_hash

    # update TTS object if configuration has changed
    if tts_hash != hash(str(config.get('tts', ''))):
        global tts
        # Stop tts playback thread
        tts.playback.stop()
        tts.playback.join()
        # Create new tts instance
        tts = TTSFactory.create()
        tts.init(ws)
        tts_hash = hash(str(config.get('tts', '')))

    LOG.info("Speak: " + utterance)
    tts.execute(utterance)


def handle_stop(event):
    """
        handle stop message
    """
    global _last_stop_signal
    if check_for_signal("isSpeaking", -1):
        _last_stop_signal = time.time()
        tts.playback.clear_queue()
        tts.playback.clear_visimes()


def init(websocket):
    """
        Start speach related handlers
    """

    global ws
    global tts
    global tts_hash
    global config

    ws = websocket
    Configuration.init(ws)
    config = Configuration.get()
    ws.on('mycroft.stop', handle_stop)
    ws.on('mycroft.audio.speech.stop', handle_stop)
    ws.on('speak', handle_speak)
    ws.on('mycroft.mic.listen', _start_listener)

    tts = TTSFactory.create()
    tts.init(ws)
    tts_hash = config.get('tts')


def shutdown():
    global tts
    if tts:
        tts.playback.stop()
        tts.playback.join()
Change to Apache 2.0 license from GPLv3.0 This commit officially switches the mycroft-core repository from GPLv3.0 licensing to Apache 2.0. All dependencies on GPL'ed code have been removed and we have contacted all previous contributors with still-existing code in the repository to agree to this change. Going forward, all contributors will sign a Contributor License Agreement (CLA) by visiting https://mycroft.ai/cla, then they will be included in the Mycroft Project's overall Contributor list, found at: https://github.com/MycroftAI/contributors. This cleanly protects the project, the contributor and all who use the technology to build upon. Futher discussion can be found at this blog post: https://mycroft.ai/blog/right-license/ This commit also removes all __author__="" from the code. These lines are painful to maintain and the etiquette surrounding their maintainence is unclear. Do you remove a name from the list if the last line of code the wrote gets replaced? Etc. Now all contributors are publicly acknowledged in the aforementioned repo, and actual authorship is maintained by Github in a much more effective and elegant way! Finally, a few references to "Mycroft AI" were changed to the correct legal entity name "Mycroft AI Inc." ==== Fixed Issues ==== #403 Update License.md and file headers to Apache 2.0 #400 Update LICENSE.md ==== Documentation Notes ==== Deprecated the ScheduledSkill and ScheduledCRUDSkill classes. These capabilities have been superceded by the more flexible MycroftSkill class methods schedule_event(), schedule_repeating_event(), update_event(), and cancel_event(). 2017-10-04 06:28:44 +00:00			`# Copyright 2017 Mycroft AI Inc.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`#`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00			`import time`
			`import re`

Correct stop_speaking() to handle threaded tts OK ==== Tech Notes ==== Now the stop_speaking() method sends a signal to the speech module to stop speech instead of killing all aplay/paplay instances 2017-10-08 19:03:02 +00:00			`from threading import Lock`
Replace ConfigurationManger with Configuration 2017-09-23 12:13:50 +00:00			`from mycroft.configuration import Configuration`
Optimize imports Remove unused imports and group local vs external alphabetically 2017-09-18 19:14:21 +00:00			`from mycroft.tts import TTSFactory`
Only stop speech is there's any ==== Fixed Issues ==== If stop speech happened while no speaking was happening the following utterance was lost. 2017-10-17 13:02:14 +00:00			`from mycroft.util import create_signal, check_for_signal`
Optimize imports Remove unused imports and group local vs external alphabetically 2017-09-18 19:14:21 +00:00			`from mycroft.util.log import LOG`
Send timing metrics for various subsystems ==== Tech Notes ==== STT, intent handling, intent fallbacks, skill handlers are now timed and tied together with a ident (consistent through the chain so the flow from STT until completion of the skill handler can be follewed. TTS execution time is also measured, right now this is not tied into the ident due to the nature of the speech. The report is always called "timing" and always contain the following fields: - id: Identifier grouping the metrics into interactions - system: Which part (STT, intent service, skill handler, etc) - start_time: timestamp for when the action started - time: how long it took to execute the action The different system adds their own specific information, for example the intent_service adds the intent_type, i.e. which handler was matched. ==== Protocol Notes ==== mycroft.skills.loaded is sent togheter with skill id and skill name whenever a skill is loaded. This is used in the intent_service to convert from id to skill name when reporting 2017-12-21 00:05:14 +00:00			`from mycroft.metrics import report_metric, Stopwatch`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00
Add 'mycroft.mic.listen' messagebus command Add support for 'mycroft.mic.listen' on the messagebus to trigger the system to listen for STT processing. This can be posted on the messagebus by outside systems, such as a physical or GUI Listen button. 2017-11-22 04:45:12 +00:00			`ws = None # TODO:18.02 - Rename to "messagebus"`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00			`config = None`
			`tts = None`
			`tts_hash = None`
			`lock = Lock()`

			`_last_stop_signal = 0`


Add 'mycroft.mic.listen' messagebus command Add support for 'mycroft.mic.listen' on the messagebus to trigger the system to listen for STT processing. This can be posted on the messagebus by outside systems, such as a physical or GUI Listen button. 2017-11-22 04:45:12 +00:00			`def _start_listener(message):`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00			`"""`
Add 'mycroft.mic.listen' messagebus command Add support for 'mycroft.mic.listen' on the messagebus to trigger the system to listen for STT processing. This can be posted on the messagebus by outside systems, such as a physical or GUI Listen button. 2017-11-22 04:45:12 +00:00			`Force Mycroft to start listening (as if 'Hey Mycroft' was spoken)`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00			`"""`
			`create_signal('startListening')`


			`def handle_speak(event):`
			`"""`
			`Handle "speak" message`
			`"""`
Replace ConfigurationManger with Configuration 2017-09-23 12:13:50 +00:00			`config = Configuration.get()`
			`Configuration.init(ws)`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00			`global _last_stop_signal`

Fix lock around speech. the lock could be taken by a waiting thread between sentences in a multi-sentenced utterance. This locking method will allow the entire utterance to be synthezised before handling next. 2017-12-07 19:34:32 +00:00			`with lock:`
Send timing metrics for various subsystems ==== Tech Notes ==== STT, intent handling, intent fallbacks, skill handlers are now timed and tied together with a ident (consistent through the chain so the flow from STT until completion of the skill handler can be follewed. TTS execution time is also measured, right now this is not tied into the ident due to the nature of the speech. The report is always called "timing" and always contain the following fields: - id: Identifier grouping the metrics into interactions - system: Which part (STT, intent service, skill handler, etc) - start_time: timestamp for when the action started - time: how long it took to execute the action The different system adds their own specific information, for example the intent_service adds the intent_type, i.e. which handler was matched. ==== Protocol Notes ==== mycroft.skills.loaded is sent togheter with skill id and skill name whenever a skill is loaded. This is used in the intent_service to convert from id to skill name when reporting 2017-12-21 00:05:14 +00:00			`stopwatch = Stopwatch()`
			`stopwatch.start()`
Fix lock around speech. the lock could be taken by a waiting thread between sentences in a multi-sentenced utterance. This locking method will allow the entire utterance to be synthezised before handling next. 2017-12-07 19:34:32 +00:00			`utterance = event.data['utterance']`
			`if event.data.get('expect_response', False):`
			`# When expect_response is requested, the listener will be restarted`
			`# at the end of the next bit of spoken audio.`
			`ws.once('recognizer_loop:audio_output_end', _start_listener)`

			`# This is a bit of a hack for Picroft. The analog audio on a Pi blocks`
			`# for 30 seconds fairly often, so we don't want to break on periods`
			`# (decreasing the chance of encountering the block). But we will`
			`# keep the split for non-Picroft installs since it give user feedback`
			`# faster on longer phrases.`
			`#`
			`# TODO: Remove or make an option? This is really a hack, anyway,`
			`# so we likely will want to get rid of this when not running on Mimic`
			`if not config.get('enclosure', {}).get('platform') == "picroft":`
			`start = time.time()`
			`chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.\|\?)\s',`
			`utterance)`
			`for chunk in chunks:`
			`try:`
			`mute_and_speak(chunk)`
			`except KeyboardInterrupt:`
			`raise`
Fix bare except, as pointed out by pep8 bot 2017-12-07 19:43:43 +00:00			`except Exception:`
Fix lock around speech. the lock could be taken by a waiting thread between sentences in a multi-sentenced utterance. This locking method will allow the entire utterance to be synthezised before handling next. 2017-12-07 19:34:32 +00:00			`LOG.error('Error in mute_and_speak', exc_info=True)`
			`if (_last_stop_signal > start or`
			`check_for_signal('buttonPress')):`
			`break`
			`else:`
			`mute_and_speak(utterance)`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00
Send timing metrics for various subsystems ==== Tech Notes ==== STT, intent handling, intent fallbacks, skill handlers are now timed and tied together with a ident (consistent through the chain so the flow from STT until completion of the skill handler can be follewed. TTS execution time is also measured, right now this is not tied into the ident due to the nature of the speech. The report is always called "timing" and always contain the following fields: - id: Identifier grouping the metrics into interactions - system: Which part (STT, intent service, skill handler, etc) - start_time: timestamp for when the action started - time: how long it took to execute the action The different system adds their own specific information, for example the intent_service adds the intent_type, i.e. which handler was matched. ==== Protocol Notes ==== mycroft.skills.loaded is sent togheter with skill id and skill name whenever a skill is loaded. This is used in the intent_service to convert from id to skill name when reporting 2017-12-21 00:05:14 +00:00			`stopwatch.stop()`
			`report_metric('timing',`
			`{'id': 'unknown',`
			`'system': 'speech',`
			`'utterance': utterance,`
			`'start_time': stopwatch.timestamp,`
			`'time': stopwatch.time})`

Move text to speech to audio service 2017-06-15 12:47:44 +00:00
			`def mute_and_speak(utterance):`
			`"""`
			`Mute mic and start speaking the utterance using selected tts backend.`

			`Args:`
			`utterance: The sentence to be spoken`
			`"""`
			`global tts_hash`

			`# update TTS object if configuration has changed`
			`if tts_hash != hash(str(config.get('tts', ''))):`
			`global tts`
			`# Stop tts playback thread`
			`tts.playback.stop()`
			`tts.playback.join()`
			`# Create new tts instance`
			`tts = TTSFactory.create()`
			`tts.init(ws)`
			`tts_hash = hash(str(config.get('tts', '')))`

Add new LOG class 2017-09-18 18:55:58 +00:00			`LOG.info("Speak: " + utterance)`
Fix lock around speech. the lock could be taken by a waiting thread between sentences in a multi-sentenced utterance. This locking method will allow the entire utterance to be synthezised before handling next. 2017-12-07 19:34:32 +00:00			`tts.execute(utterance)`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00

			`def handle_stop(event):`
			`"""`
			`handle stop message`
			`"""`
			`global _last_stop_signal`
Only stop speech is there's any ==== Fixed Issues ==== If stop speech happened while no speaking was happening the following utterance was lost. 2017-10-17 13:02:14 +00:00			`if check_for_signal("isSpeaking", -1):`
			`_last_stop_signal = time.time()`
			`tts.playback.clear_queue()`
			`tts.playback.clear_visimes()`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00

			`def init(websocket):`
			`"""`
			`Start speach related handlers`
			`"""`

			`global ws`
			`global tts`
			`global tts_hash`
			`global config`

			`ws = websocket`
Replace ConfigurationManger with Configuration 2017-09-23 12:13:50 +00:00			`Configuration.init(ws)`
			`config = Configuration.get()`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00			`ws.on('mycroft.stop', handle_stop)`
Correct stop_speaking() to handle threaded tts OK ==== Tech Notes ==== Now the stop_speaking() method sends a signal to the speech module to stop speech instead of killing all aplay/paplay instances 2017-10-08 19:03:02 +00:00			`ws.on('mycroft.audio.speech.stop', handle_stop)`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00			`ws.on('speak', handle_speak)`
Add 'mycroft.mic.listen' messagebus command Add support for 'mycroft.mic.listen' on the messagebus to trigger the system to listen for STT processing. This can be posted on the messagebus by outside systems, such as a physical or GUI Listen button. 2017-11-22 04:45:12 +00:00			`ws.on('mycroft.mic.listen', _start_listener)`
Move text to speech to audio service 2017-06-15 12:47:44 +00:00
			`tts = TTSFactory.create()`
			`tts.init(ws)`
			`tts_hash = config.get('tts')`
Fix rebase issues introduced with audio subsystem util/signal.py add missing import time move tts thread shutdown/clear to audio/speech.py 2017-08-02 21:04:55 +00:00

			`def shutdown():`
			`global tts`
			`if tts:`
			`tts.playback.stop()`
			`tts.playback.join()`