mycroft-core/test/integrationtests/skills/skill_tester.py

# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""The module execute a test of one skill intent.

Using a mocked message bus this module is responsible for sending utterences
and testing that the intent is called.

The module runner can test:
    That the expected intent in the skill is activated
    That the expected parameters are extracted from the utterance
    That Mycroft contexts are set or removed
    That the skill speak the intended answer
    The content of any message exchanged between the skill and the mycroft core

To set up a test the test runner can
    Send an utterance, as the user would normally speak
    Set up and remove context
    Set up a custom timeout for the test runner, to allow for skills that runs
    for a very long time

"""
from queue import Queue, Empty
from copy import copy
import json
import time
import os
import re
import ast
from os.path import join, isdir, basename
from pyee import EventEmitter
from numbers import Number
from mycroft.messagebus.message import Message
from mycroft.skills.core import MycroftSkill, FallbackSkill
from mycroft.skills.skill_loader import SkillLoader
from mycroft.configuration import Configuration
from mycroft.util.log import LOG

from logging import StreamHandler
from io import StringIO
from contextlib import contextmanager

from .colors import color
from .rules import (intent_type_check, play_query_check, question_check,
                    expected_data_check, expected_dialog_check,
                    changed_context_check)

MainModule = '__init__'

DEFAULT_EVALUAITON_TIMEOUT = 30

# Set a configuration value to allow skills to check if they're in a test
Configuration.get()['test_env'] = True


class SkillTestError(Exception):
    pass


@contextmanager
def temporary_handler(log, handler):
    """Context manager to replace the default logger with a temporary logger.

    Args:
        log (LOG): mycroft LOG object
        handler (logging.Handler): Handler object to use
    """
    old_handler = log.handler
    log.handler = handler
    yield
    log.handler = old_handler


def create_skill_descriptor(skill_path):
    return {"path": skill_path}


def get_skills(skills_folder):
    """Find skills in the skill folder or sub folders.

        Recursive traversal into subfolders stop when a __init__.py file
        is discovered

        Args:
            skills_folder:  Folder to start a search for skills __init__.py
                            files

        Returns:
            list: the skills
    """

    skills = []

    def _get_skill_descriptor(skills_folder):
        if not isdir(skills_folder):
            return
        if MainModule + ".py" in os.listdir(skills_folder):
            skills.append(create_skill_descriptor(skills_folder))
            return

        possible_skills = os.listdir(skills_folder)
        for i in possible_skills:
            _get_skill_descriptor(join(skills_folder, i))

    _get_skill_descriptor(skills_folder)

    skills = sorted(skills, key=lambda p: basename(p['path']))
    return skills


def load_skills(emitter, skills_root):
    """Load all skills and set up emitter

        Args:
            emitter: The emmitter to use
            skills_root: Directory of the skills __init__.py

        Returns:
            tuple: (list of loaded skills, dict with logs for each skill)

    """
    skill_list = []
    log = {}
    for skill in get_skills(skills_root):
        path = skill["path"]
        skill_id = 'test-' + basename(path)

        # Catch the logs during skill loading
        from mycroft.util.log import LOG as skills_log
        buf = StringIO()
        with temporary_handler(skills_log, StreamHandler(buf)):
            skill_loader = SkillLoader(emitter, path)
            skill_loader.skill_id = skill_id
            skill_loader.load()
            skill_list.append(skill_loader.instance)

        # Restore skill logger since it was created with the temporary handler
        if skill_loader.instance:
            skill_loader.instance.log = LOG.create_logger(
                skill_loader.instance.name)
        log[path] = buf.getvalue()

    return skill_list, log


def unload_skills(skills):
    for s in skills:
        s.default_shutdown()


class InterceptEmitter(object):
    """
    This class intercepts and allows emitting events between the
    skill_tester and the skill being tested.
    When a test is running emitted communication is intercepted for analysis
    """

    def __init__(self):
        self.emitter = EventEmitter()
        self.q = None

    def on(self, event, f):
        # run all events
        print("Event: ", event)
        self.emitter.on(event, f)

    def emit(self, event, *args, **kwargs):
        event_name = event.msg_type
        if self.q:
            self.q.put(event)
        self.emitter.emit(event_name, event, *args, **kwargs)

    def wait_for_response(self, event, reply_type=None, *args, **kwargs):
        """Simple single thread implementation of wait_for_response."""
        message_type = reply_type or event.msg_type + '.response'
        response = None

        def response_handler(msg):
            nonlocal response
            response = msg

        self.emitter.once(message_type, response_handler)
        self.emitter.emit(event.msg_type, event)
        return response

    def once(self, event, f):
        self.emitter.once(event, f)

    def remove(self, event_name, func):
        pass

    def remove_all_listeners(self, event_name):
        pass


class MockSkillsLoader(object):
    """Load a skill and set up emitter
    """

    def __init__(self, skills_root):
        self.load_log = None

        self.skills_root = skills_root
        self.emitter = InterceptEmitter()
        from mycroft.skills.intent_service import IntentService
        self.ih = IntentService(self.emitter)
        self.skills = None
        self.emitter.on(
            'mycroft.skills.fallback',
            FallbackSkill.make_intent_failure_handler(self.emitter))

        def make_response(message):
            skill_id = message.data.get('skill_id', '')
            data = dict(result=False, skill_id=skill_id)
            self.emitter.emit(Message('skill.converse.response', data))
        self.emitter.on('skill.converse.request', make_response)

    def load_skills(self):
        skills, self.load_log = load_skills(self.emitter, self.skills_root)
        self.skills = [s for s in skills if s]
        self.ih.padatious_service.train(
            Message('', data=dict(single_thread=True)))
        return self.emitter.emitter  # kick out the underlying emitter

    def unload_skills(self):
        unload_skills(self.skills)


def load_test_case_file(test_case_file):
    """Load a test case to run."""
    print("")
    print(color.HEADER + "="*20 + " RUNNING TEST " + "="*20 + color.RESET)
    print('Test file: ', test_case_file)
    with open(test_case_file, 'r') as f:
        test_case = json.load(f)
    print('Test:', json.dumps(test_case, indent=4, sort_keys=False))
    return test_case


class SkillTest(object):
    """
        This class is instantiated for each skill being tested. It holds the
        data needed for the test, and contains the methods doing the test

    """

    def __init__(self, skill, test_case_file, emitter, test_status=None):
        self.skill = skill
        self.test_case_file = test_case_file
        self.emitter = emitter
        self.dict = dict
        self.output_file = None
        self.returned_intent = False
        self.test_status = test_status
        self.failure_msg = None
        self.end_of_skill = False

    def run(self, loader):
        """ Execute the test

        Run a test for a skill. The skill, test_case_file and emitter is
        already set up in the __init__ method.

        This method does all the preparation and cleanup and calls
        self.execute_test() to perform the actual test.

        Args:
            bool: Test results -- only True if all passed
        """
        self.end_of_skill = False  # Reset to false at beginning of test

        s = [s for s in loader.skills if s and s.root_dir == self.skill]
        if s:
            s = s[0]
        else:
            # The skill wasn't loaded, print the load log for the skill
            if self.skill in loader.load_log:
                print('\n {} Captured Logs from loading {}'.format('=' * 15,
                                                                   '=' * 15))
                print(loader.load_log.pop(self.skill))

            raise SkillTestError('Skill couldn\'t be loaded')

        orig_get_response = s.get_response
        original_settings = s.settings
        try:
            return self.execute_test(s)
        finally:
            s.get_response = orig_get_response
            s.settings = original_settings

    def send_play_query(self, s, test_case):
        """Emit an event triggering the a check for playback possibilities."""
        play_query = test_case['play_query']
        print('PLAY QUERY', color.USER_UTT + play_query + color.RESET)
        self.emitter.emit('play:query', Message('play:query:',
                                                {'phrase': play_query}))

    def send_play_start(self, s, test_case):
        """Emit an event starting playback from the skill."""
        print('PLAY START')
        callback_data = test_case['play_start']
        callback_data['skill_id'] = s.skill_id
        self.emitter.emit('play:start',
                          Message('play:start', callback_data))

    def send_question(self, test_case):
        """Emit a Question to the loaded skills."""
        print("QUESTION: {}".format(test_case['question']))
        callback_data = {'phrase': test_case['question']}
        self.emitter.emit('question:query',
                          Message('question:query', data=callback_data))

    def send_utterance(self, test_case):
        """Emit an utterance to the loaded skills."""
        utt = test_case['utterance']
        print("UTTERANCE:", color.USER_UTT + utt + color.RESET)
        self.emitter.emit('recognizer_loop:utterance',
                          Message('recognizer_loop:utterance',
                                  {'utterances': [utt]}))

    def apply_test_settings(self, s, test_case):
        """Replace the skills settings with settings from the test_case."""
        s.settings = copy(test_case['settings'])
        print(color.YELLOW, 'will run test with custom settings:',
                            '\n{}'.format(s.settings), color.RESET)

    def setup_get_response(self, s, test_case):
        """Setup interception of get_response calls."""
        def get_response(dialog='', data=None, announcement='',
                         validator=None, on_fail=None, num_retries=-1):
            data = data or {}
            utt = announcement or s.dialog_renderer.render(dialog, data)
            print(color.MYCROFT + ">> " + utt + color.RESET)
            s.speak(utt)

            response = test_case['responses'].pop(0)
            print("SENDING RESPONSE:",
                  color.USER_UTT + response + color.RESET)
            return response

        s.get_response = get_response

    def remove_context(self, s, cxt):
        """remove an adapt context."""
        if isinstance(cxt, list):
            for x in cxt:
                MycroftSkill.remove_context(s, x)
        else:
            MycroftSkill.remove_context(s, cxt)

    def set_context(self, s, cxt):
        """Set an adapt context."""
        for key, value in cxt.items():
            MycroftSkill.set_context(s, key, value)

    def send_test_input(self, s, test_case):
        """Emit an utterance, just like the STT engine does. This sends the
        provided text to the skill engine for intent matching and it then
        invokes the skill.

        It also handles some special cases for common play skills and common
        query skills.
        """
        if 'utterance' in test_case:
            self.send_utterance(test_case)
        elif 'play_query' in test_case:
            self.send_play_query(s, test_case)
        elif 'play_start' in test_case:
            self.send_play_start(s, test_case)
        elif 'question' in test_case:
            self.send_question(test_case)
        else:
            raise SkillTestError('No input provided in test case')

    def execute_test(self, s):
        """ Execute test case.

        Args:
            s (MycroftSkill): mycroft skill to test

        Returns:
            (bool) True if the test succeeded completely.
        """
        test_case = load_test_case_file(self.test_case_file)

        if 'settings' in test_case:
            self.apply_test_settings(s, test_case)

        if 'responses' in test_case:
            self.setup_get_response(s, test_case)

        # If we keep track of test status for the entire skill, then
        # get all intents from the skill, and mark current intent
        # tested
        if self.test_status:
            self.test_status.append_intent(s)
            if 'intent_type' in test_case:
                self.test_status.set_tested(test_case['intent_type'])

        evaluation_rule = EvaluationRule(test_case, s)

        # Set up queue for emitted events. Because
        # the evaluation method expects events to be received in convoy,
        # and be handled one by one. We cant make assumptions about threading
        # in the core or the skill
        q = Queue()
        s.bus.q = q

        # Set up context before calling intent
        # This option makes it possible to better isolate (reduce dependance)
        # between test_cases
        cxt = test_case.get('remove_context', None)
        if cxt:
            self.remove_context(s, cxt)

        cxt = test_case.get('set_context', None)
        if cxt:
            self.set_context(s, cxt)

        self.send_test_input(s, test_case)
        # Wait up to X seconds for the test_case to complete
        timeout = self.get_timeout(test_case)

        while not evaluation_rule.all_succeeded():
            # Process the queue until a skill handler sends a complete message
            if self.check_queue(q, evaluation_rule) or time.time() > timeout:
                break

        self.shutdown_emitter(s)

        # Report test result if failed
        return self.results(evaluation_rule)

    def get_timeout(self, test_case):
        """Find any timeout specified in test case.

        If no timeout is specified return the default.
        """
        if (test_case.get('evaluation_timeout', None) and
                isinstance(test_case['evaluation_timeout'], int)):
            return time.time() + int(test_case.get('evaluation_timeout'))
        else:
            return time.time() + DEFAULT_EVALUAITON_TIMEOUT

    def check_queue(self, q, evaluation_rule):
        """Check the queue for events.

        If event indicating skill completion is found returns True, else False.
        """
        try:
            event = q.get(timeout=1)
            if ':' in event.msg_type:
                event.data['__type__'] = event.msg_type.split(':')[1]
            else:
                event.data['__type__'] = event.msg_type

            evaluation_rule.evaluate(event.data)
            if event.msg_type == 'mycroft.skill.handler.complete':
                self.end_of_skill = True
        except Empty:
            pass

        if q.empty() and self.end_of_skill:
            return True
        else:
            return False

    def shutdown_emitter(self, s):
        """Shutdown the skill connection to the bus."""
        # Stop emiter from sending on queue
        s.bus.q = None

        # remove the skill which is not responding
        self.emitter.remove_all_listeners('speak')
        self.emitter.remove_all_listeners('mycroft.skill.handler.complete')

    def results(self, evaluation_rule):
        """Display and report the results."""
        if not evaluation_rule.all_succeeded():
            self.failure_msg = str(evaluation_rule.get_failure())
            print(color.FAIL + "Evaluation failed" + color.RESET)
            print(color.FAIL + "Failure:", self.failure_msg + color.RESET)
            return False

        return True


# Messages that should not print debug info
HIDDEN_MESSAGES = ['skill.converse.request', 'skill.converse.response',
                   'gui.page.show', 'gui.value.set']


class EvaluationRule:
    """
        This class initially convert the test_case json file to internal rule
        format, which is stored throughout the testcase run. All Messages on
        the event bus can be evaluated against the rules (test_case)

        This approach makes it easier to add new tests, since Message and rule
        traversal is already set up for the internal rule format.
        The test writer can use the internal rule format directly in the
        test_case using the assert keyword, which allows for more
        powerfull/individual test cases than the standard dictionaly
    """

    def __init__(self, test_case, skill=None):
        """ Convert test_case read from file to internal rule format

        Args:
            test_case:  The loaded test case
            skill:      optional skill to test, used to fetch dialogs
        """
        self.rule = []

        _x = ['and']
        if 'utterance' in test_case and 'intent_type' in test_case:
            intent_type = str(test_case['intent_type'])
            _x.append(intent_type_check(intent_type))

        # Check for adapt intent info
        if test_case.get('intent', None):
            for item in test_case['intent'].items():
                _x.append(['equal', str(item[0]), str(item[1])])

        if 'play_query_match' in test_case:
            match = test_case['play_query_match']
            phrase = match.get('phrase', test_case.get('play_query'))
            self.rule.append(play_query_check(skill, match, phrase))
        elif 'expected_answer' in test_case:
            question = test_case['question']
            expected_answer = test_case['expected_answer']
            self.rule.append(question_check(skill, question, expected_answer))

        # Check for expected data structure
        if test_case.get('expected_data'):
            expected_items = test_case['expected_data'].items()
            self.rule.append(expected_data_check(expected_items))

        if _x != ['and']:
            self.rule.append(_x)

        # Add rules from expeceted_response
        # Accepts a string or a list of multiple strings
        if isinstance(test_case.get('expected_response', None), str):
            self.rule.append(['match', 'utterance',
                              str(test_case['expected_response'])])
        elif isinstance(test_case.get('expected_response', None), list):
            texts = test_case['expected_response']
            rules = [['match', 'utterance', str(r)] for r in texts]
            self.rule.append(['or'] + rules)

        # Add rules from expected_dialog
        # Accepts dialog (without ".dialog"), the same way as self.speak_dialog
        # as a string or a list of dialogs
        if test_case.get('expected_dialog', None):
            if not skill:
                print(color.FAIL +
                      'Skill is missing, can\'t run expected_dialog test' +
                      color.RESET)
            else:
                expected_dialog = test_case['expected_dialog']
                self.rule.append(['or'] +
                                 expected_dialog_check(expected_dialog,
                                                       skill))

        if test_case.get('changed_context', None):
            ctx = test_case['changed_context']
            for c in changed_context_check(ctx):
                self.rule.append(c)

        if test_case.get('assert', None):
            for _x in ast.literal_eval(test_case['assert']):
                self.rule.append(_x)

        print("Rule created ", self.rule)

    def evaluate(self, msg):
        """ Main entry for evaluating a message against the rules.

        The rules are prepared in the __init__
        This method is usually called several times with different
        messages using the same rule set. Each call contributing
        to fulfilling all the rules

        Args:
            msg:  The message event to evaluate
        """
        if msg.get('__type__', '') not in HIDDEN_MESSAGES:
            print("\nEvaluating message: ", msg)
        for r in self.rule:
            self._partial_evaluate(r, msg)

    def _get_field_value(self, rule, msg):
        if isinstance(rule, list):
            value = msg.get(rule[0], None)
            if len(rule) > 1 and value:
                for field in rule[1:]:
                    value = value.get(field, None)
                    if not value:
                        break
        else:
            value = msg.get(rule, None)

        return value

    def _partial_evaluate(self, rule, msg):
        """ Evaluate the message against a part of the rules

        Recursive over rules

        Args:
            rule:  A rule or a part of the rules to be broken down further
            msg:   The message event being evaluated

        Returns:
            Bool: True if a partial evaluation succeeded
        """
        if 'succeeded' in rule:  # Rule has already succeeded, test not needed
            return True

        if rule[0] == 'equal':
            if self._get_field_value(rule[1], msg) != rule[2]:
                return False

        if rule[0] == 'lt':
            if not isinstance(self._get_field_value(rule[1], msg), Number):
                return False
            if self._get_field_value(rule[1], msg) >= rule[2]:
                return False

        if rule[0] == 'gt':
            if not isinstance(self._get_field_value(rule[1], msg), Number):
                return False
            if self._get_field_value(rule[1], msg) <= rule[2]:
                return False

        if rule[0] == 'notEqual':
            if self._get_field_value(rule[1], msg) == rule[2]:
                return False

        if rule[0] == 'endsWith':
            if not (self._get_field_value(rule[1], msg) and
                    self._get_field_value(rule[1], msg).endswith(rule[2])):
                return False

        if rule[0] == 'exists':
            if not self._get_field_value(rule[1], msg):
                return False

        if rule[0] == 'match':
            if not (self._get_field_value(rule[1], msg) and
                    re.match(rule[2], self._get_field_value(rule[1], msg))):
                return False

        if rule[0] == 'and':
            for i in rule[1:]:
                if not self._partial_evaluate(i, msg):
                    return False

        if rule[0] == 'or':
            for i in rule[1:]:
                if self._partial_evaluate(i, msg):
                    break
            else:
                return False
        rule.append('succeeded')
        return True

    def get_failure(self):
        """ Get the first rule which has not succeeded

        Returns:
            str: The failed rule
        """
        for x in self.rule:
            if x[-1] != 'succeeded':
                return x
        return None

    def all_succeeded(self):
        """ Test if all rules succeeded

        Returns:
            bool: True if all rules succeeded
        """
        return len([x for x in self.rule if x[-1] != 'succeeded']) == 0