Merge pull request #1422 from forslund/feature/munge-keywords

Make keywords per skill to fix overlapping keyword names
2018-02-15 09:57:03 +01:00 · 2018-02-15 09:57:03 +01:00 · ed6ab224c9
parent 53919d9ea6 23302b60d9
commit ed6ab224c9
3 changed files with 217 additions and 89 deletions
--- a/mycroft/skills/core.py
+++ b/mycroft/skills/core.py
@ -24,8 +24,7 @@ import inspect
 import abc
 import re
 from adapt.intent import Intent, IntentBuilder
-from os import listdir
-from os.path import join, abspath, dirname, splitext, basename, exists
+from os.path import join, abspath, dirname, basename, exists
 from threading import Event

 from mycroft.api import DeviceApi
@ -36,6 +35,8 @@ from mycroft.filesystem import FileSystemAccess
 from mycroft.messagebus.message import Message
 from mycroft.metrics import report_metric, report_timing, Stopwatch
 from mycroft.skills.settings import SkillSettings
+from mycroft.skills.skill_data import (load_vocabulary, load_regex, to_letters,
+                                       munge_intent_parser)
 from mycroft.util import resolve_resource_file
 from mycroft.util.log import LOG
 # python 2+3 compatibility
@ -57,60 +58,20 @@ def dig_for_message():
            return l['message']


-def load_vocab_from_file(path, vocab_type, emitter):
+def unmunge_message(message, skill_id):
+    """Restore message keywords by removing the Letterified skill ID.
+
+    Args:
+        message (Message): Intent result message
+        skill_id (int): skill identifier
+
+    Returns:
+        Message without clear keywords
    """
-        Load mycroft vocabulary from file. and send it on the message bus for
-        the intent handler.
-
-        Args:
-            path:       path to vocabulary file (*.voc)
-            vocab_type: keyword name
-            emitter:    emitter to access the message bus
-    """
-    if path.endswith('.voc'):
-        with open(path, 'r') as voc_file:
-            for line in voc_file.readlines():
-                parts = line.strip().split("|")
-                entity = parts[0]
-
-                emitter.emit(Message("register_vocab", {
-                    'start': entity, 'end': vocab_type
-                }))
-                for alias in parts[1:]:
-                    emitter.emit(Message("register_vocab", {
-                        'start': alias, 'end': vocab_type, 'alias_of': entity
-                    }))
-
-
-def load_regex_from_file(path, emitter):
-    """
-        Load regex from file and send it on the message bus for
-        the intent handler.
-
-        Args:
-            path:       path to vocabulary file (*.voc)
-            emitter:    emitter to access the message bus
-    """
-    if path.endswith('.rx'):
-        with open(path, 'r') as reg_file:
-            for line in reg_file.readlines():
-                re.compile(line.strip())
-                emitter.emit(
-                    Message("register_vocab", {'regex': line.strip()}))
-
-
-def load_vocabulary(basedir, emitter):
-    for vocab_type in listdir(basedir):
-        if vocab_type.endswith(".voc"):
-            load_vocab_from_file(
-                join(basedir, vocab_type), splitext(vocab_type)[0], emitter)
-
-
-def load_regex(basedir, emitter):
-    for regex_type in listdir(basedir):
-        if regex_type.endswith(".rx"):
-            load_regex_from_file(
-                join(basedir, regex_type), emitter)
+    for key in message.data:
+        new_key = key.replace(to_letters(skill_id), '')
+        message.data[new_key] = message.data.pop(key)
+    return message


 def open_intent_envelope(message):
@ -622,14 +583,16 @@ class MycroftSkill(object):
                    if need_self:
                        # When registring from decorator self is required
                        if len(getargspec(handler).args) == 2:
-                            handler(self, message)
+                            handler(self, unmunge_message(message,
+                                                          self.skill_id))
                        elif len(getargspec(handler).args) == 1:
-                            handler(self)
+                            handler(unmunge_message(message, self.skill_id))
                        elif len(getargspec(handler).args) == 0:
                            # Zero may indicate multiple decorators, trying the
                            # usual call signatures
                            try:
-                                handler(self, message)
+                                handler(self, unmunge_message(message,
+                                                              self.skill_id))
                            except TypeError:
                                handler(self)
                        else:
@ -638,7 +601,7 @@ class MycroftSkill(object):
                            raise TypeError
                    else:
                        if len(getargspec(handler).args) == 2:
-                            handler(message)
+                            handler(unmunge_message(message, self.skill_id))
                        elif len(getargspec(handler).args) == 1:
                            handler()
                        else:
@ -718,7 +681,7 @@ class MycroftSkill(object):

        # Default to the handler's function name if none given
        name = intent_parser.name or handler.__name__
-        intent_parser.name = str(self.skill_id) + ':' + name
+        munge_intent_parser(intent_parser, name, self.skill_id)
        self.emitter.emit(Message("register_intent", intent_parser.__dict__))
        self.registered_intents.append((name, intent_parser))
        self.add_event(intent_parser.name, handler, need_self)
@ -814,6 +777,7 @@ class MycroftSkill(object):
            raise ValueError('context should be a string')
        if not isinstance(word, basestring):
            raise ValueError('word should be a string')
+        context = to_letters(self.skill_id) + context
        self.emitter.emit(Message('add_context',
                                  {'context': context, 'word': word}))

@ -892,12 +856,12 @@ class MycroftSkill(object):
    def load_vocab_files(self, vocab_dir):
        self.vocab_dir = vocab_dir
        if exists(vocab_dir):
-            load_vocabulary(vocab_dir, self.emitter)
+            load_vocabulary(vocab_dir, self.emitter, self.skill_id)
        else:
            LOG.debug('No vocab loaded, ' + vocab_dir + ' does not exist')

    def load_regex_files(self, regex_dir):
-        load_regex(regex_dir, self.emitter)
+        load_regex(regex_dir, self.emitter, self.skill_id)

    def __handle_stop(self, event):
        """
--- a/mycroft/skills/skill_data.py
+++ b/mycroft/skills/skill_data.py
@ -0,0 +1,163 @@
+# Copyright 2018 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Module containing methods needed to load skill
+data such as dialogs, intents and regular expressions.
+"""
+
+from os import listdir
+from os.path import splitext, join
+import re
+
+from mycroft.messagebus.message import Message
+
+
+def load_vocab_from_file(path, vocab_type, emitter):
+    """Load Mycroft vocabulary from file
+    The vocab is sent to the intent handler using the message bus
+
+    Args:
+        path:           path to vocabulary file (*.voc)
+        vocab_type:     keyword name
+        emitter:        emitter to access the message bus
+        skill_id(str):  skill id
+    """
+    if path.endswith('.voc'):
+        with open(path, 'r') as voc_file:
+            for line in voc_file.readlines():
+                parts = line.strip().split("|")
+                entity = parts[0]
+                emitter.emit(Message("register_vocab", {
+                    'start': entity, 'end': vocab_type
+                }))
+                for alias in parts[1:]:
+                    emitter.emit(Message("register_vocab", {
+                        'start': alias, 'end': vocab_type, 'alias_of': entity
+                    }))
+
+
+def load_regex_from_file(path, emitter, skill_id):
+    """Load regex from file
+    The regex is sent to the intent handler using the message bus
+
+    Args:
+        path:       path to vocabulary file (*.voc)
+        emitter:    emitter to access the message bus
+    """
+    if path.endswith('.rx'):
+        with open(path, 'r') as reg_file:
+            for line in reg_file.readlines():
+                re.compile(munge_regex(line.strip(), skill_id))
+                emitter.emit(
+                    Message("register_vocab",
+                            {'regex': munge_regex(line.strip(), skill_id)}))
+
+
+def load_vocabulary(basedir, emitter, skill_id):
+    """Load vocabulary from all files in the specified directory.
+
+    Args:
+        basedir (str): path of directory to load from
+        emitter (messagebus emitter): websocket used to send the vocab to
+                                      the intent service
+        skill_id: skill the data belongs to
+    """
+    for vocab_file in listdir(basedir):
+        if vocab_file.endswith(".voc"):
+            vocab_type = to_letters(skill_id) + splitext(vocab_file)[0]
+            load_vocab_from_file(
+                join(basedir, vocab_file), vocab_type, emitter)
+
+
+def load_regex(basedir, emitter, skill_id):
+    """Load regex from all files in the specified directory.
+
+    Args:
+        basedir (str): path of directory to load from
+        emitter (messagebus emitter): websocket used to send the vocab to
+                                      the intent service
+        skill_id (int): skill identifier
+    """
+    for regex_type in listdir(basedir):
+        if regex_type.endswith(".rx"):
+            load_regex_from_file(
+                join(basedir, regex_type), emitter, skill_id)
+
+
+def to_letters(number):
+    """Convert number to string of letters.
+
+    0 -> A, 1 -> B, etc.
+
+    Args:
+        number (int): number to be converted
+    Returns:
+        (str) String of letters
+    """
+    ret = ''
+    for n in str(number).strip('-'):
+        ret += chr(65 + int(n))
+    return ret
+
+
+def munge_regex(regex, skill_id):
+    """Insert skill id as letters into match groups.
+
+    Args:
+        regex (str): regex string
+        skill_id (int): skill identifier
+    Returns:
+        (str) munged regex
+    """
+    base = '(?P<' + to_letters(skill_id)
+    return base.join(regex.split('(?P<'))
+
+
+def munge_intent_parser(intent_parser, name, skill_id):
+    """Rename intent keywords to make them skill exclusive
+    This gives the intent parser an exclusive name in the
+    format <skill_id>:<name>.  The keywords are given unique
+    names in the format <Skill id as letters><Intent name>.
+
+    Args:
+        intent_parser: (IntentParser) object to update
+        name: (str) Skill name
+        skill_id: (int) skill identifier
+    """
+    # Munge parser name
+    intent_parser.name = str(skill_id) + ':' + name
+
+    # Munge keywords
+    skill_id = to_letters(skill_id)
+    # Munge required keyword
+    reqs = []
+    for i in intent_parser.requires:
+        kw = (skill_id + i[0], skill_id + i[0])
+        reqs.append(kw)
+    intent_parser.requires = reqs
+
+    # Munge optional keywords
+    opts = []
+    for i in intent_parser.optional:
+        kw = (skill_id + i[0], skill_id + i[0])
+        opts.append(kw)
+    intent_parser.optional = opts
+
+    # Munge at_least_one keywords
+    at_least_one = []
+    for i in intent_parser.at_least_one:
+        element = [skill_id + e for e in i]
+        at_least_one.append(tuple(element))
+    intent_parser.at_least_one = at_least_one
--- a/test/unittests/skills/core.py
+++ b/test/unittests/skills/core.py
@ -24,9 +24,10 @@ from datetime import datetime

 from mycroft.configuration import Configuration
 from mycroft.messagebus.message import Message
-from mycroft.skills.core import load_regex_from_file, load_regex, \
-    load_vocab_from_file, load_vocabulary, MycroftSkill, \
-    load_skill, create_skill_descriptor, open_intent_envelope
+from mycroft.skills.skill_data import load_regex_from_file, load_regex, \
+    load_vocab_from_file, load_vocabulary
+from mycroft.skills.core import MycroftSkill, load_skill, \
+    create_skill_descriptor, open_intent_envelope


 class MockEmitter(object):
@ -68,17 +69,17 @@ class MycroftSkillTest(unittest.TestCase):

    def check_regex_from_file(self, filename, result_list=None):
        result_list = result_list or []
-        load_regex_from_file(join(self.regex_path, filename), self.emitter)
+        load_regex_from_file(join(self.regex_path, filename), self.emitter, 0)
        self.check_emitter(result_list)

    def check_vocab(self, path, result_list=None):
        result_list = result_list or []
-        load_vocabulary(path, self.emitter)
+        load_vocabulary(path, self.emitter, 0)
        self.check_emitter(result_list)

    def check_regex(self, path, result_list=None):
        result_list = result_list or []
-        load_regex(path, self.emitter)
+        load_regex(path, self.emitter, 0)
        self.check_emitter(result_list)

    def check_emitter(self, result_list):
@ -90,12 +91,12 @@ class MycroftSkillTest(unittest.TestCase):

    def test_load_regex_from_file_single(self):
        self.check_regex_from_file('valid/single.rx',
-                                   [{'regex': '(?P<SingleTest>.*)'}])
+                                   [{'regex': '(?P<ASingleTest>.*)'}])

    def test_load_regex_from_file_multiple(self):
        self.check_regex_from_file('valid/multiple.rx',
-                                   [{'regex': '(?P<MultipleTest1>.*)'},
-                                    {'regex': '(?P<MultipleTest2>.*)'}])
+                                   [{'regex': '(?P<AMultipleTest1>.*)'},
+                                    {'regex': '(?P<AMultipleTest2>.*)'}])

    def test_load_regex_from_file_none(self):
        self.check_regex_from_file('invalid/none.rx')
@ -115,9 +116,9 @@ class MycroftSkillTest(unittest.TestCase):

    def test_load_regex_full(self):
        self.check_regex(join(self.regex_path, 'valid'),
-                         [{'regex': '(?P<MultipleTest1>.*)'},
-                          {'regex': '(?P<MultipleTest2>.*)'},
-                          {'regex': '(?P<SingleTest>.*)'}])
+                         [{'regex': '(?P<AMultipleTest1>.*)'},
+                          {'regex': '(?P<AMultipleTest2>.*)'},
+                          {'regex': '(?P<ASingleTest>.*)'}])

    def test_load_regex_empty(self):
        self.check_regex(join(dirname(__file__),
@ -165,17 +166,17 @@ class MycroftSkillTest(unittest.TestCase):

    def test_load_vocab_full(self):
        self.check_vocab(join(self.vocab_path, 'valid'),
-                         [{'start': 'test', 'end': 'single'},
-                          {'start': 'water', 'end': 'singlealias'},
-                          {'start': 'watering', 'end': 'singlealias',
+                         [{'start': 'test', 'end': 'Asingle'},
+                          {'start': 'water', 'end': 'Asinglealias'},
+                          {'start': 'watering', 'end': 'Asinglealias',
                           'alias_of': 'water'},
-                          {'start': 'animal', 'end': 'multiple'},
-                          {'start': 'animals', 'end': 'multiple'},
-                          {'start': 'chair', 'end': 'multiplealias'},
-                          {'start': 'chairs', 'end': 'multiplealias',
+                          {'start': 'animal', 'end': 'Amultiple'},
+                          {'start': 'animals', 'end': 'Amultiple'},
+                          {'start': 'chair', 'end': 'Amultiplealias'},
+                          {'start': 'chairs', 'end': 'Amultiplealias',
                           'alias_of': 'chair'},
-                          {'start': 'table', 'end': 'multiplealias'},
-                          {'start': 'tables', 'end': 'multiplealias',
+                          {'start': 'table', 'end': 'Amultiplealias'},
+                          {'start': 'tables', 'end': 'Amultiplealias',
                           'alias_of': 'table'}])

    def test_load_vocab_empty(self):
@ -219,7 +220,7 @@ class MycroftSkillTest(unittest.TestCase):
        expected = [{'at_least_one': [],
                     'name': '0:a',
                     'optional': [],
-                     'requires': [('Keyword', 'Keyword')]}]
+                     'requires': [('AKeyword', 'AKeyword')]}]
        self.check_register_intent(expected)

        # Test register IntentBuilder object
@ -229,7 +230,7 @@ class MycroftSkillTest(unittest.TestCase):
        expected = [{'at_least_one': [],
                     'name': '0:a',
                     'optional': [],
-                     'requires': [('Keyword', 'Keyword')]}]
+                     'requires': [('AKeyword', 'AKeyword')]}]

        self.check_register_intent(expected)

@ -290,7 +291,7 @@ class MycroftSkillTest(unittest.TestCase):
        expected = [{'at_least_one': [],
                     'name': '0:a',
                     'optional': [],
-                     'requires': [('Keyword', 'Keyword')]},
+                     'requires': [('AKeyword', 'AKeyword')]},
                    {
                     'file_name': join(dirname(__file__), 'intent_file',
                                       'test.intent'),
@ -320,17 +321,17 @@ class MycroftSkillTest(unittest.TestCase):
        s.bind(self.emitter)
        # No context content
        s.set_context('TurtlePower')
-        expected = [{'context': 'TurtlePower', 'word': ''}]
+        expected = [{'context': 'ATurtlePower', 'word': ''}]
        check_set_context(expected)

        # context with content
        s.set_context('Technodrome', 'Shredder')
-        expected = [{'context': 'Technodrome', 'word': 'Shredder'}]
+        expected = [{'context': 'ATechnodrome', 'word': 'Shredder'}]
        check_set_context(expected)

        # UTF-8 context
        s.set_context(u'Smörgåsbord€15')
-        expected = [{'context': u'Smörgåsbord€15', 'word': ''}]
+        expected = [{'context': u'ASmörgåsbord€15', 'word': ''}]
        check_set_context(expected)

        self.emitter.reset()