Merge pull request #1422 from forslund/feature/munge-keywords

Make keywords per skill to fix overlapping keyword names
pull/1104/head
Åke 2018-02-15 09:57:03 +01:00 committed by GitHub
commit ed6ab224c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 217 additions and 89 deletions

View File

@ -24,8 +24,7 @@ import inspect
import abc
import re
from adapt.intent import Intent, IntentBuilder
from os import listdir
from os.path import join, abspath, dirname, splitext, basename, exists
from os.path import join, abspath, dirname, basename, exists
from threading import Event
from mycroft.api import DeviceApi
@ -36,6 +35,8 @@ from mycroft.filesystem import FileSystemAccess
from mycroft.messagebus.message import Message
from mycroft.metrics import report_metric, report_timing, Stopwatch
from mycroft.skills.settings import SkillSettings
from mycroft.skills.skill_data import (load_vocabulary, load_regex, to_letters,
munge_intent_parser)
from mycroft.util import resolve_resource_file
from mycroft.util.log import LOG
# python 2+3 compatibility
@ -57,60 +58,20 @@ def dig_for_message():
return l['message']
def load_vocab_from_file(path, vocab_type, emitter):
def unmunge_message(message, skill_id):
"""Restore message keywords by removing the Letterified skill ID.
Args:
message (Message): Intent result message
skill_id (int): skill identifier
Returns:
Message without clear keywords
"""
Load mycroft vocabulary from file. and send it on the message bus for
the intent handler.
Args:
path: path to vocabulary file (*.voc)
vocab_type: keyword name
emitter: emitter to access the message bus
"""
if path.endswith('.voc'):
with open(path, 'r') as voc_file:
for line in voc_file.readlines():
parts = line.strip().split("|")
entity = parts[0]
emitter.emit(Message("register_vocab", {
'start': entity, 'end': vocab_type
}))
for alias in parts[1:]:
emitter.emit(Message("register_vocab", {
'start': alias, 'end': vocab_type, 'alias_of': entity
}))
def load_regex_from_file(path, emitter):
"""
Load regex from file and send it on the message bus for
the intent handler.
Args:
path: path to vocabulary file (*.voc)
emitter: emitter to access the message bus
"""
if path.endswith('.rx'):
with open(path, 'r') as reg_file:
for line in reg_file.readlines():
re.compile(line.strip())
emitter.emit(
Message("register_vocab", {'regex': line.strip()}))
def load_vocabulary(basedir, emitter):
for vocab_type in listdir(basedir):
if vocab_type.endswith(".voc"):
load_vocab_from_file(
join(basedir, vocab_type), splitext(vocab_type)[0], emitter)
def load_regex(basedir, emitter):
for regex_type in listdir(basedir):
if regex_type.endswith(".rx"):
load_regex_from_file(
join(basedir, regex_type), emitter)
for key in message.data:
new_key = key.replace(to_letters(skill_id), '')
message.data[new_key] = message.data.pop(key)
return message
def open_intent_envelope(message):
@ -622,14 +583,16 @@ class MycroftSkill(object):
if need_self:
# When registring from decorator self is required
if len(getargspec(handler).args) == 2:
handler(self, message)
handler(self, unmunge_message(message,
self.skill_id))
elif len(getargspec(handler).args) == 1:
handler(self)
handler(unmunge_message(message, self.skill_id))
elif len(getargspec(handler).args) == 0:
# Zero may indicate multiple decorators, trying the
# usual call signatures
try:
handler(self, message)
handler(self, unmunge_message(message,
self.skill_id))
except TypeError:
handler(self)
else:
@ -638,7 +601,7 @@ class MycroftSkill(object):
raise TypeError
else:
if len(getargspec(handler).args) == 2:
handler(message)
handler(unmunge_message(message, self.skill_id))
elif len(getargspec(handler).args) == 1:
handler()
else:
@ -718,7 +681,7 @@ class MycroftSkill(object):
# Default to the handler's function name if none given
name = intent_parser.name or handler.__name__
intent_parser.name = str(self.skill_id) + ':' + name
munge_intent_parser(intent_parser, name, self.skill_id)
self.emitter.emit(Message("register_intent", intent_parser.__dict__))
self.registered_intents.append((name, intent_parser))
self.add_event(intent_parser.name, handler, need_self)
@ -814,6 +777,7 @@ class MycroftSkill(object):
raise ValueError('context should be a string')
if not isinstance(word, basestring):
raise ValueError('word should be a string')
context = to_letters(self.skill_id) + context
self.emitter.emit(Message('add_context',
{'context': context, 'word': word}))
@ -892,12 +856,12 @@ class MycroftSkill(object):
def load_vocab_files(self, vocab_dir):
self.vocab_dir = vocab_dir
if exists(vocab_dir):
load_vocabulary(vocab_dir, self.emitter)
load_vocabulary(vocab_dir, self.emitter, self.skill_id)
else:
LOG.debug('No vocab loaded, ' + vocab_dir + ' does not exist')
def load_regex_files(self, regex_dir):
load_regex(regex_dir, self.emitter)
load_regex(regex_dir, self.emitter, self.skill_id)
def __handle_stop(self, event):
"""

View File

@ -0,0 +1,163 @@
# Copyright 2018 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Module containing methods needed to load skill
data such as dialogs, intents and regular expressions.
"""
from os import listdir
from os.path import splitext, join
import re
from mycroft.messagebus.message import Message
def load_vocab_from_file(path, vocab_type, emitter):
"""Load Mycroft vocabulary from file
The vocab is sent to the intent handler using the message bus
Args:
path: path to vocabulary file (*.voc)
vocab_type: keyword name
emitter: emitter to access the message bus
skill_id(str): skill id
"""
if path.endswith('.voc'):
with open(path, 'r') as voc_file:
for line in voc_file.readlines():
parts = line.strip().split("|")
entity = parts[0]
emitter.emit(Message("register_vocab", {
'start': entity, 'end': vocab_type
}))
for alias in parts[1:]:
emitter.emit(Message("register_vocab", {
'start': alias, 'end': vocab_type, 'alias_of': entity
}))
def load_regex_from_file(path, emitter, skill_id):
"""Load regex from file
The regex is sent to the intent handler using the message bus
Args:
path: path to vocabulary file (*.voc)
emitter: emitter to access the message bus
"""
if path.endswith('.rx'):
with open(path, 'r') as reg_file:
for line in reg_file.readlines():
re.compile(munge_regex(line.strip(), skill_id))
emitter.emit(
Message("register_vocab",
{'regex': munge_regex(line.strip(), skill_id)}))
def load_vocabulary(basedir, emitter, skill_id):
"""Load vocabulary from all files in the specified directory.
Args:
basedir (str): path of directory to load from
emitter (messagebus emitter): websocket used to send the vocab to
the intent service
skill_id: skill the data belongs to
"""
for vocab_file in listdir(basedir):
if vocab_file.endswith(".voc"):
vocab_type = to_letters(skill_id) + splitext(vocab_file)[0]
load_vocab_from_file(
join(basedir, vocab_file), vocab_type, emitter)
def load_regex(basedir, emitter, skill_id):
"""Load regex from all files in the specified directory.
Args:
basedir (str): path of directory to load from
emitter (messagebus emitter): websocket used to send the vocab to
the intent service
skill_id (int): skill identifier
"""
for regex_type in listdir(basedir):
if regex_type.endswith(".rx"):
load_regex_from_file(
join(basedir, regex_type), emitter, skill_id)
def to_letters(number):
"""Convert number to string of letters.
0 -> A, 1 -> B, etc.
Args:
number (int): number to be converted
Returns:
(str) String of letters
"""
ret = ''
for n in str(number).strip('-'):
ret += chr(65 + int(n))
return ret
def munge_regex(regex, skill_id):
"""Insert skill id as letters into match groups.
Args:
regex (str): regex string
skill_id (int): skill identifier
Returns:
(str) munged regex
"""
base = '(?P<' + to_letters(skill_id)
return base.join(regex.split('(?P<'))
def munge_intent_parser(intent_parser, name, skill_id):
"""Rename intent keywords to make them skill exclusive
This gives the intent parser an exclusive name in the
format <skill_id>:<name>. The keywords are given unique
names in the format <Skill id as letters><Intent name>.
Args:
intent_parser: (IntentParser) object to update
name: (str) Skill name
skill_id: (int) skill identifier
"""
# Munge parser name
intent_parser.name = str(skill_id) + ':' + name
# Munge keywords
skill_id = to_letters(skill_id)
# Munge required keyword
reqs = []
for i in intent_parser.requires:
kw = (skill_id + i[0], skill_id + i[0])
reqs.append(kw)
intent_parser.requires = reqs
# Munge optional keywords
opts = []
for i in intent_parser.optional:
kw = (skill_id + i[0], skill_id + i[0])
opts.append(kw)
intent_parser.optional = opts
# Munge at_least_one keywords
at_least_one = []
for i in intent_parser.at_least_one:
element = [skill_id + e for e in i]
at_least_one.append(tuple(element))
intent_parser.at_least_one = at_least_one

View File

@ -24,9 +24,10 @@ from datetime import datetime
from mycroft.configuration import Configuration
from mycroft.messagebus.message import Message
from mycroft.skills.core import load_regex_from_file, load_regex, \
load_vocab_from_file, load_vocabulary, MycroftSkill, \
load_skill, create_skill_descriptor, open_intent_envelope
from mycroft.skills.skill_data import load_regex_from_file, load_regex, \
load_vocab_from_file, load_vocabulary
from mycroft.skills.core import MycroftSkill, load_skill, \
create_skill_descriptor, open_intent_envelope
class MockEmitter(object):
@ -68,17 +69,17 @@ class MycroftSkillTest(unittest.TestCase):
def check_regex_from_file(self, filename, result_list=None):
result_list = result_list or []
load_regex_from_file(join(self.regex_path, filename), self.emitter)
load_regex_from_file(join(self.regex_path, filename), self.emitter, 0)
self.check_emitter(result_list)
def check_vocab(self, path, result_list=None):
result_list = result_list or []
load_vocabulary(path, self.emitter)
load_vocabulary(path, self.emitter, 0)
self.check_emitter(result_list)
def check_regex(self, path, result_list=None):
result_list = result_list or []
load_regex(path, self.emitter)
load_regex(path, self.emitter, 0)
self.check_emitter(result_list)
def check_emitter(self, result_list):
@ -90,12 +91,12 @@ class MycroftSkillTest(unittest.TestCase):
def test_load_regex_from_file_single(self):
self.check_regex_from_file('valid/single.rx',
[{'regex': '(?P<SingleTest>.*)'}])
[{'regex': '(?P<ASingleTest>.*)'}])
def test_load_regex_from_file_multiple(self):
self.check_regex_from_file('valid/multiple.rx',
[{'regex': '(?P<MultipleTest1>.*)'},
{'regex': '(?P<MultipleTest2>.*)'}])
[{'regex': '(?P<AMultipleTest1>.*)'},
{'regex': '(?P<AMultipleTest2>.*)'}])
def test_load_regex_from_file_none(self):
self.check_regex_from_file('invalid/none.rx')
@ -115,9 +116,9 @@ class MycroftSkillTest(unittest.TestCase):
def test_load_regex_full(self):
self.check_regex(join(self.regex_path, 'valid'),
[{'regex': '(?P<MultipleTest1>.*)'},
{'regex': '(?P<MultipleTest2>.*)'},
{'regex': '(?P<SingleTest>.*)'}])
[{'regex': '(?P<AMultipleTest1>.*)'},
{'regex': '(?P<AMultipleTest2>.*)'},
{'regex': '(?P<ASingleTest>.*)'}])
def test_load_regex_empty(self):
self.check_regex(join(dirname(__file__),
@ -165,17 +166,17 @@ class MycroftSkillTest(unittest.TestCase):
def test_load_vocab_full(self):
self.check_vocab(join(self.vocab_path, 'valid'),
[{'start': 'test', 'end': 'single'},
{'start': 'water', 'end': 'singlealias'},
{'start': 'watering', 'end': 'singlealias',
[{'start': 'test', 'end': 'Asingle'},
{'start': 'water', 'end': 'Asinglealias'},
{'start': 'watering', 'end': 'Asinglealias',
'alias_of': 'water'},
{'start': 'animal', 'end': 'multiple'},
{'start': 'animals', 'end': 'multiple'},
{'start': 'chair', 'end': 'multiplealias'},
{'start': 'chairs', 'end': 'multiplealias',
{'start': 'animal', 'end': 'Amultiple'},
{'start': 'animals', 'end': 'Amultiple'},
{'start': 'chair', 'end': 'Amultiplealias'},
{'start': 'chairs', 'end': 'Amultiplealias',
'alias_of': 'chair'},
{'start': 'table', 'end': 'multiplealias'},
{'start': 'tables', 'end': 'multiplealias',
{'start': 'table', 'end': 'Amultiplealias'},
{'start': 'tables', 'end': 'Amultiplealias',
'alias_of': 'table'}])
def test_load_vocab_empty(self):
@ -219,7 +220,7 @@ class MycroftSkillTest(unittest.TestCase):
expected = [{'at_least_one': [],
'name': '0:a',
'optional': [],
'requires': [('Keyword', 'Keyword')]}]
'requires': [('AKeyword', 'AKeyword')]}]
self.check_register_intent(expected)
# Test register IntentBuilder object
@ -229,7 +230,7 @@ class MycroftSkillTest(unittest.TestCase):
expected = [{'at_least_one': [],
'name': '0:a',
'optional': [],
'requires': [('Keyword', 'Keyword')]}]
'requires': [('AKeyword', 'AKeyword')]}]
self.check_register_intent(expected)
@ -290,7 +291,7 @@ class MycroftSkillTest(unittest.TestCase):
expected = [{'at_least_one': [],
'name': '0:a',
'optional': [],
'requires': [('Keyword', 'Keyword')]},
'requires': [('AKeyword', 'AKeyword')]},
{
'file_name': join(dirname(__file__), 'intent_file',
'test.intent'),
@ -320,17 +321,17 @@ class MycroftSkillTest(unittest.TestCase):
s.bind(self.emitter)
# No context content
s.set_context('TurtlePower')
expected = [{'context': 'TurtlePower', 'word': ''}]
expected = [{'context': 'ATurtlePower', 'word': ''}]
check_set_context(expected)
# context with content
s.set_context('Technodrome', 'Shredder')
expected = [{'context': 'Technodrome', 'word': 'Shredder'}]
expected = [{'context': 'ATechnodrome', 'word': 'Shredder'}]
check_set_context(expected)
# UTF-8 context
s.set_context(u'Smörgåsbord€15')
expected = [{'context': u'Smörgåsbord€15', 'word': ''}]
expected = [{'context': u'ASmörgåsbord€15', 'word': ''}]
check_set_context(expected)
self.emitter.reset()