From 51ed5cd8101cff2be64c56f1fa424e333d20d1c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Tue, 12 Dec 2017 20:49:10 +0100 Subject: [PATCH 1/4] Munge keywords for intents Convert keyword names to unique names by prepending the keyword with a letter string derived from the unique skill id. This commit modifies required keywords, optional keywords, one_of keywords and regex matches. This also munges the context keyword when that is sent to match the intent correctly --- mycroft/skills/core.py | 51 +++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/mycroft/skills/core.py b/mycroft/skills/core.py index 2b4a4d1d6a..2a2183fd63 100644 --- a/mycroft/skills/core.py +++ b/mycroft/skills/core.py @@ -63,16 +63,16 @@ def load_vocab_from_file(path, vocab_type, emitter): the intent handler. Args: - path: path to vocabulary file (*.voc) - vocab_type: keyword name - emitter: emitter to access the message bus + path: path to vocabulary file (*.voc) + vocab_type: keyword name + emitter: emitter to access the message bus + skill_id(str): skill id """ if path.endswith('.voc'): with open(path, 'r') as voc_file: for line in voc_file.readlines(): parts = line.strip().split("|") entity = parts[0] - emitter.emit(Message("register_vocab", { 'start': entity, 'end': vocab_type })) @@ -99,13 +99,31 @@ def load_regex_from_file(path, emitter): Message("register_vocab", {'regex': line.strip()})) -def load_vocabulary(basedir, emitter): - for vocab_type in listdir(basedir): - if vocab_type.endswith(".voc"): +def load_vocabulary(basedir, emitter, skill_id): + for vocab_file in listdir(basedir): + if vocab_file.endswith(".voc"): + vocab_type = str(skill_id) + ':' + splitext(vocab_file)[0] load_vocab_from_file( - join(basedir, vocab_type), splitext(vocab_type)[0], emitter) + join(basedir, vocab_file), vocab_type, emitter) +def unmunge_message(message, skill_id): + for key in message.data: + new_key = key.replace(str(skill_id) + ':', '') + message.data[new_key] = message.data.pop(key) + return message + +def munge_intent_parser(intent_parser, name, skill_id): + skill_id = str(skill_id) + intent_parser.name = skill_id + ':' + name + reqs = [] + for i in intent_parser.requires: + kw = (skill_id + ':' + i[0], skill_id + ':' + i[0]) + reqs.append(kw) + + intent_parser.requires = reqs + + def load_regex(basedir, emitter): for regex_type in listdir(basedir): if regex_type.endswith(".rx"): @@ -620,14 +638,16 @@ class MycroftSkill(object): if need_self: # When registring from decorator self is required if len(getargspec(handler).args) == 2: - handler(self, message) + handler(self, unmunge_message(message, + self.skill_id)) elif len(getargspec(handler).args) == 1: - handler(self) + handler(unmunge_message(message, self.skill_id)) elif len(getargspec(handler).args) == 0: # Zero may indicate multiple decorators, trying the # usual call signatures try: - handler(self, message) + handler(self, unmunge_message(message, + self.skill_id)) except TypeError: handler(self) else: @@ -636,7 +656,7 @@ class MycroftSkill(object): raise TypeError else: if len(getargspec(handler).args) == 2: - handler(message) + handler(unmunge_message(message, self.skill_id)) elif len(getargspec(handler).args) == 1: handler() else: @@ -704,7 +724,7 @@ class MycroftSkill(object): # Default to the handler's function name if none given name = intent_parser.name or handler.__name__ - intent_parser.name = str(self.skill_id) + ':' + name + munge_intent_parser(intent_parser, name, self.skill_id) self.emitter.emit(Message("register_intent", intent_parser.__dict__)) self.registered_intents.append((name, intent_parser)) self.add_event(intent_parser.name, handler, need_self) @@ -800,6 +820,7 @@ class MycroftSkill(object): raise ValueError('context should be a string') if not isinstance(word, basestring): raise ValueError('word should be a string') + context = to_letters(self.skill_id) + context self.emitter.emit(Message('add_context', {'context': context, 'word': word})) @@ -878,12 +899,12 @@ class MycroftSkill(object): def load_vocab_files(self, vocab_dir): self.vocab_dir = vocab_dir if exists(vocab_dir): - load_vocabulary(vocab_dir, self.emitter) + load_vocabulary(vocab_dir, self.emitter, self.skill_id) else: LOG.debug('No vocab loaded, ' + vocab_dir + ' does not exist') def load_regex_files(self, regex_dir): - load_regex(regex_dir, self.emitter) + load_regex(regex_dir, self.emitter, self.skill_id) def __handle_stop(self, event): """ From 378d3535723a088360637c2b9ae774029b5cef6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Wed, 14 Feb 2018 14:41:12 +0100 Subject: [PATCH 2/4] Move data loading helpers to separate file. All methods relating to loading vocabulary, dialog and regular expressions has grown quite large. To make the core functionallity of the skills more readable these are moved to the new module skill_data. Additional method documentation has been addedi as well. --- mycroft/skills/core.py | 83 +++---------------- mycroft/skills/skill_data.py | 150 +++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 70 deletions(-) create mode 100644 mycroft/skills/skill_data.py diff --git a/mycroft/skills/core.py b/mycroft/skills/core.py index 2a2183fd63..a16c3f6798 100644 --- a/mycroft/skills/core.py +++ b/mycroft/skills/core.py @@ -24,8 +24,7 @@ import inspect import abc import re from adapt.intent import Intent, IntentBuilder -from os import listdir -from os.path import join, abspath, dirname, splitext, basename, exists +from os.path import join, abspath, dirname, basename, exists from threading import Event from mycroft.api import DeviceApi @@ -36,6 +35,8 @@ from mycroft.filesystem import FileSystemAccess from mycroft.messagebus.message import Message from mycroft.metrics import report_metric, report_timing, Stopwatch from mycroft.skills.settings import SkillSettings +from mycroft.skills.skill_data import (load_vocabulary, load_regex, to_letters, + munge_intent_parser) from mycroft.util import resolve_resource_file from mycroft.util.log import LOG # python 2+3 compatibility @@ -57,79 +58,21 @@ def dig_for_message(): return l['message'] -def load_vocab_from_file(path, vocab_type, emitter): - """ - Load mycroft vocabulary from file. and send it on the message bus for - the intent handler. - - Args: - path: path to vocabulary file (*.voc) - vocab_type: keyword name - emitter: emitter to access the message bus - skill_id(str): skill id - """ - if path.endswith('.voc'): - with open(path, 'r') as voc_file: - for line in voc_file.readlines(): - parts = line.strip().split("|") - entity = parts[0] - emitter.emit(Message("register_vocab", { - 'start': entity, 'end': vocab_type - })) - for alias in parts[1:]: - emitter.emit(Message("register_vocab", { - 'start': alias, 'end': vocab_type, 'alias_of': entity - })) - - -def load_regex_from_file(path, emitter): - """ - Load regex from file and send it on the message bus for - the intent handler. - - Args: - path: path to vocabulary file (*.voc) - emitter: emitter to access the message bus - """ - if path.endswith('.rx'): - with open(path, 'r') as reg_file: - for line in reg_file.readlines(): - re.compile(line.strip()) - emitter.emit( - Message("register_vocab", {'regex': line.strip()})) - - -def load_vocabulary(basedir, emitter, skill_id): - for vocab_file in listdir(basedir): - if vocab_file.endswith(".voc"): - vocab_type = str(skill_id) + ':' + splitext(vocab_file)[0] - load_vocab_from_file( - join(basedir, vocab_file), vocab_type, emitter) - - def unmunge_message(message, skill_id): + """Restore message keywords by removing the Letterified skill ID. + + Args: + message (Message): Intent result message + skill_id (int): skill identifier + + Returns: + Message without clear keywords + """ for key in message.data: - new_key = key.replace(str(skill_id) + ':', '') + new_key = key.replace(to_letters(skill_id), '') message.data[new_key] = message.data.pop(key) return message -def munge_intent_parser(intent_parser, name, skill_id): - skill_id = str(skill_id) - intent_parser.name = skill_id + ':' + name - reqs = [] - for i in intent_parser.requires: - kw = (skill_id + ':' + i[0], skill_id + ':' + i[0]) - reqs.append(kw) - - intent_parser.requires = reqs - - -def load_regex(basedir, emitter): - for regex_type in listdir(basedir): - if regex_type.endswith(".rx"): - load_regex_from_file( - join(basedir, regex_type), emitter) - def open_intent_envelope(message): """ Convert dictionary received over messagebus to Intent. """ diff --git a/mycroft/skills/skill_data.py b/mycroft/skills/skill_data.py new file mode 100644 index 0000000000..7610553610 --- /dev/null +++ b/mycroft/skills/skill_data.py @@ -0,0 +1,150 @@ +"""Module containing methods needed to load skill +data such as dialogs, intents and regular expressions. +""" + +from os import listdir +from os.path import splitext, join +import re + +from mycroft.messagebus.message import Message + + +def load_vocab_from_file(path, vocab_type, emitter): + """ + Load mycroft vocabulary from file. and send it on the message bus for + the intent handler. + + Args: + path: path to vocabulary file (*.voc) + vocab_type: keyword name + emitter: emitter to access the message bus + skill_id(str): skill id + """ + if path.endswith('.voc'): + with open(path, 'r') as voc_file: + for line in voc_file.readlines(): + parts = line.strip().split("|") + entity = parts[0] + emitter.emit(Message("register_vocab", { + 'start': entity, 'end': vocab_type + })) + for alias in parts[1:]: + emitter.emit(Message("register_vocab", { + 'start': alias, 'end': vocab_type, 'alias_of': entity + })) + + +def load_regex_from_file(path, emitter, skill_id): + """ + Load regex from file and send it on the message bus for + the intent handler. + + Args: + path: path to vocabulary file (*.voc) + emitter: emitter to access the message bus + """ + if path.endswith('.rx'): + with open(path, 'r') as reg_file: + for line in reg_file.readlines(): + re.compile(munge_regex(line.strip(), skill_id)) + emitter.emit( + Message("register_vocab", + {'regex': munge_regex(line.strip(), skill_id)})) + + +def load_vocabulary(basedir, emitter, skill_id): + """Load vocabulary from all files in the specified directory. + + Args: + basedir (str): path of directory to load from + emitter (messagebus emitter): websocket used to send the vocab to + the intent service + skill_id: skill the data belongs to + """ + for vocab_file in listdir(basedir): + if vocab_file.endswith(".voc"): + vocab_type = to_letters(skill_id) + splitext(vocab_file)[0] + load_vocab_from_file( + join(basedir, vocab_file), vocab_type, emitter) + + +def load_regex(basedir, emitter, skill_id): + """Load regex from all files in the specified directory. + + Args: + basedir (str): path of directory to load from + emitter (messagebus emitter): websocket used to send the vocab to + the intent service + skill_id: (int) skill identifier + """ + for regex_type in listdir(basedir): + if regex_type.endswith(".rx"): + load_regex_from_file( + join(basedir, regex_type), emitter, skill_id) + + +def to_letters(number): + """Convert number to string of letters. + + 0 -> A, 1 -> B, etc. + + Args: + number (int): number to be converted + Returns: + (str) String of letters + """ + ret = '' + for n in str(number).strip('-'): + ret += chr(65 + int(n)) + return ret + + +def munge_regex(regex, skill_id): + """Insert skill id as letters into match groups. + + Args: + regex (str): regex string + skill_id (int): skill identifier + Returns: + (str) munged regex + """ + base = '(?P<' + to_letters(skill_id) + return base.join(regex.split('(?P<')) + + +def munge_intent_parser(intent_parser, name, skill_id): + """Rename the intent keywords to make them skill exclusive and gives the + intent parser an exclusive name. + The format of the intent parser name is :. + The format of the keywords is . + + Args: + intent_parser: (IntentParser) object to update + name: (str) Skill name + skill_id: (int) skill identifier + """ + # Munge parser name + intent_parser.name = str(skill_id) + ':' + name + + # Munge keywords + skill_id = to_letters(skill_id) + # Munge required keyword + reqs = [] + for i in intent_parser.requires: + kw = (skill_id + i[0], skill_id + i[0]) + reqs.append(kw) + intent_parser.requires = reqs + + # Munge optional keywords + opts = [] + for i in intent_parser.optional: + kw = (skill_id + i[0], skill_id + i[0]) + opts.append(kw) + intent_parser.optional = opts + + # Munge at_least_one keywords + at_least_one = [] + for i in intent_parser.at_least_one: + element = [skill_id + e for e in i] + at_least_one.append(tuple(element)) + intent_parser.at_least_one = at_least_one From 364dce2c660c398dd6ba7c4f71532522cb9161f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Wed, 14 Feb 2018 15:16:41 +0100 Subject: [PATCH 3/4] Update test cases to handle munged values --- test/unittests/skills/core.py | 55 ++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/test/unittests/skills/core.py b/test/unittests/skills/core.py index d6df09dcd9..d4cbbee8e0 100644 --- a/test/unittests/skills/core.py +++ b/test/unittests/skills/core.py @@ -23,9 +23,10 @@ from re import error from mycroft.configuration import Configuration from mycroft.messagebus.message import Message -from mycroft.skills.core import load_regex_from_file, load_regex, \ - load_vocab_from_file, load_vocabulary, MycroftSkill, \ - load_skill, create_skill_descriptor, open_intent_envelope +from mycroft.skills.skill_data import load_regex_from_file, load_regex, \ + load_vocab_from_file, load_vocabulary +from mycroft.skills.core import MycroftSkill, load_skill, \ + create_skill_descriptor, open_intent_envelope class MockEmitter(object): @@ -67,17 +68,17 @@ class MycroftSkillTest(unittest.TestCase): def check_regex_from_file(self, filename, result_list=None): result_list = result_list or [] - load_regex_from_file(join(self.regex_path, filename), self.emitter) + load_regex_from_file(join(self.regex_path, filename), self.emitter, 0) self.check_emitter(result_list) def check_vocab(self, path, result_list=None): result_list = result_list or [] - load_vocabulary(path, self.emitter) + load_vocabulary(path, self.emitter, 0) self.check_emitter(result_list) def check_regex(self, path, result_list=None): result_list = result_list or [] - load_regex(path, self.emitter) + load_regex(path, self.emitter, 0) self.check_emitter(result_list) def check_emitter(self, result_list): @@ -89,12 +90,12 @@ class MycroftSkillTest(unittest.TestCase): def test_load_regex_from_file_single(self): self.check_regex_from_file('valid/single.rx', - [{'regex': '(?P.*)'}]) + [{'regex': '(?P.*)'}]) def test_load_regex_from_file_multiple(self): self.check_regex_from_file('valid/multiple.rx', - [{'regex': '(?P.*)'}, - {'regex': '(?P.*)'}]) + [{'regex': '(?P.*)'}, + {'regex': '(?P.*)'}]) def test_load_regex_from_file_none(self): self.check_regex_from_file('invalid/none.rx') @@ -114,9 +115,9 @@ class MycroftSkillTest(unittest.TestCase): def test_load_regex_full(self): self.check_regex(join(self.regex_path, 'valid'), - [{'regex': '(?P.*)'}, - {'regex': '(?P.*)'}, - {'regex': '(?P.*)'}]) + [{'regex': '(?P.*)'}, + {'regex': '(?P.*)'}, + {'regex': '(?P.*)'}]) def test_load_regex_empty(self): self.check_regex(join(dirname(__file__), @@ -164,17 +165,17 @@ class MycroftSkillTest(unittest.TestCase): def test_load_vocab_full(self): self.check_vocab(join(self.vocab_path, 'valid'), - [{'start': 'test', 'end': 'single'}, - {'start': 'water', 'end': 'singlealias'}, - {'start': 'watering', 'end': 'singlealias', + [{'start': 'test', 'end': 'Asingle'}, + {'start': 'water', 'end': 'Asinglealias'}, + {'start': 'watering', 'end': 'Asinglealias', 'alias_of': 'water'}, - {'start': 'animal', 'end': 'multiple'}, - {'start': 'animals', 'end': 'multiple'}, - {'start': 'chair', 'end': 'multiplealias'}, - {'start': 'chairs', 'end': 'multiplealias', + {'start': 'animal', 'end': 'Amultiple'}, + {'start': 'animals', 'end': 'Amultiple'}, + {'start': 'chair', 'end': 'Amultiplealias'}, + {'start': 'chairs', 'end': 'Amultiplealias', 'alias_of': 'chair'}, - {'start': 'table', 'end': 'multiplealias'}, - {'start': 'tables', 'end': 'multiplealias', + {'start': 'table', 'end': 'Amultiplealias'}, + {'start': 'tables', 'end': 'Amultiplealias', 'alias_of': 'table'}]) def test_load_vocab_empty(self): @@ -218,7 +219,7 @@ class MycroftSkillTest(unittest.TestCase): expected = [{'at_least_one': [], 'name': '0:a', 'optional': [], - 'requires': [('Keyword', 'Keyword')]}] + 'requires': [('AKeyword', 'AKeyword')]}] self.check_register_intent(expected) # Test register IntentBuilder object @@ -228,7 +229,7 @@ class MycroftSkillTest(unittest.TestCase): expected = [{'at_least_one': [], 'name': '0:a', 'optional': [], - 'requires': [('Keyword', 'Keyword')]}] + 'requires': [('AKeyword', 'AKeyword')]}] self.check_register_intent(expected) @@ -289,7 +290,7 @@ class MycroftSkillTest(unittest.TestCase): expected = [{'at_least_one': [], 'name': '0:a', 'optional': [], - 'requires': [('Keyword', 'Keyword')]}, + 'requires': [('AKeyword', 'AKeyword')]}, { 'file_name': join(dirname(__file__), 'intent_file', 'test.intent'), @@ -319,17 +320,17 @@ class MycroftSkillTest(unittest.TestCase): s.bind(self.emitter) # No context content s.set_context('TurtlePower') - expected = [{'context': 'TurtlePower', 'word': ''}] + expected = [{'context': 'ATurtlePower', 'word': ''}] check_set_context(expected) # context with content s.set_context('Technodrome', 'Shredder') - expected = [{'context': 'Technodrome', 'word': 'Shredder'}] + expected = [{'context': 'ATechnodrome', 'word': 'Shredder'}] check_set_context(expected) # UTF-8 context s.set_context(u'Smörgåsbord€15') - expected = [{'context': u'Smörgåsbord€15', 'word': ''}] + expected = [{'context': u'ASmörgåsbord€15', 'word': ''}] check_set_context(expected) self.emitter.reset() From 23302b60d91c5bbdb284934cb9354967f4f2a11e Mon Sep 17 00:00:00 2001 From: Steve Penrod Date: Thu, 15 Feb 2018 02:12:51 -0600 Subject: [PATCH 4/4] Add copyright notice and minor docstring changes --- mycroft/skills/skill_data.py | 51 ++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/mycroft/skills/skill_data.py b/mycroft/skills/skill_data.py index 7610553610..902c9462f2 100644 --- a/mycroft/skills/skill_data.py +++ b/mycroft/skills/skill_data.py @@ -1,3 +1,18 @@ +# Copyright 2018 Mycroft AI Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + """Module containing methods needed to load skill data such as dialogs, intents and regular expressions. """ @@ -10,15 +25,14 @@ from mycroft.messagebus.message import Message def load_vocab_from_file(path, vocab_type, emitter): - """ - Load mycroft vocabulary from file. and send it on the message bus for - the intent handler. + """Load Mycroft vocabulary from file + The vocab is sent to the intent handler using the message bus - Args: - path: path to vocabulary file (*.voc) - vocab_type: keyword name - emitter: emitter to access the message bus - skill_id(str): skill id + Args: + path: path to vocabulary file (*.voc) + vocab_type: keyword name + emitter: emitter to access the message bus + skill_id(str): skill id """ if path.endswith('.voc'): with open(path, 'r') as voc_file: @@ -35,13 +49,12 @@ def load_vocab_from_file(path, vocab_type, emitter): def load_regex_from_file(path, emitter, skill_id): - """ - Load regex from file and send it on the message bus for - the intent handler. + """Load regex from file + The regex is sent to the intent handler using the message bus - Args: - path: path to vocabulary file (*.voc) - emitter: emitter to access the message bus + Args: + path: path to vocabulary file (*.voc) + emitter: emitter to access the message bus """ if path.endswith('.rx'): with open(path, 'r') as reg_file: @@ -75,7 +88,7 @@ def load_regex(basedir, emitter, skill_id): basedir (str): path of directory to load from emitter (messagebus emitter): websocket used to send the vocab to the intent service - skill_id: (int) skill identifier + skill_id (int): skill identifier """ for regex_type in listdir(basedir): if regex_type.endswith(".rx"): @@ -113,10 +126,10 @@ def munge_regex(regex, skill_id): def munge_intent_parser(intent_parser, name, skill_id): - """Rename the intent keywords to make them skill exclusive and gives the - intent parser an exclusive name. - The format of the intent parser name is :. - The format of the keywords is . + """Rename intent keywords to make them skill exclusive + This gives the intent parser an exclusive name in the + format :. The keywords are given unique + names in the format . Args: intent_parser: (IntentParser) object to update