Merge pull request #1422 from forslund/feature/munge-keywords
Make keywords per skill to fix overlapping keyword namespull/1104/head
commit
ed6ab224c9
|
@ -24,8 +24,7 @@ import inspect
|
|||
import abc
|
||||
import re
|
||||
from adapt.intent import Intent, IntentBuilder
|
||||
from os import listdir
|
||||
from os.path import join, abspath, dirname, splitext, basename, exists
|
||||
from os.path import join, abspath, dirname, basename, exists
|
||||
from threading import Event
|
||||
|
||||
from mycroft.api import DeviceApi
|
||||
|
@ -36,6 +35,8 @@ from mycroft.filesystem import FileSystemAccess
|
|||
from mycroft.messagebus.message import Message
|
||||
from mycroft.metrics import report_metric, report_timing, Stopwatch
|
||||
from mycroft.skills.settings import SkillSettings
|
||||
from mycroft.skills.skill_data import (load_vocabulary, load_regex, to_letters,
|
||||
munge_intent_parser)
|
||||
from mycroft.util import resolve_resource_file
|
||||
from mycroft.util.log import LOG
|
||||
# python 2+3 compatibility
|
||||
|
@ -57,60 +58,20 @@ def dig_for_message():
|
|||
return l['message']
|
||||
|
||||
|
||||
def load_vocab_from_file(path, vocab_type, emitter):
|
||||
def unmunge_message(message, skill_id):
|
||||
"""Restore message keywords by removing the Letterified skill ID.
|
||||
|
||||
Args:
|
||||
message (Message): Intent result message
|
||||
skill_id (int): skill identifier
|
||||
|
||||
Returns:
|
||||
Message without clear keywords
|
||||
"""
|
||||
Load mycroft vocabulary from file. and send it on the message bus for
|
||||
the intent handler.
|
||||
|
||||
Args:
|
||||
path: path to vocabulary file (*.voc)
|
||||
vocab_type: keyword name
|
||||
emitter: emitter to access the message bus
|
||||
"""
|
||||
if path.endswith('.voc'):
|
||||
with open(path, 'r') as voc_file:
|
||||
for line in voc_file.readlines():
|
||||
parts = line.strip().split("|")
|
||||
entity = parts[0]
|
||||
|
||||
emitter.emit(Message("register_vocab", {
|
||||
'start': entity, 'end': vocab_type
|
||||
}))
|
||||
for alias in parts[1:]:
|
||||
emitter.emit(Message("register_vocab", {
|
||||
'start': alias, 'end': vocab_type, 'alias_of': entity
|
||||
}))
|
||||
|
||||
|
||||
def load_regex_from_file(path, emitter):
|
||||
"""
|
||||
Load regex from file and send it on the message bus for
|
||||
the intent handler.
|
||||
|
||||
Args:
|
||||
path: path to vocabulary file (*.voc)
|
||||
emitter: emitter to access the message bus
|
||||
"""
|
||||
if path.endswith('.rx'):
|
||||
with open(path, 'r') as reg_file:
|
||||
for line in reg_file.readlines():
|
||||
re.compile(line.strip())
|
||||
emitter.emit(
|
||||
Message("register_vocab", {'regex': line.strip()}))
|
||||
|
||||
|
||||
def load_vocabulary(basedir, emitter):
|
||||
for vocab_type in listdir(basedir):
|
||||
if vocab_type.endswith(".voc"):
|
||||
load_vocab_from_file(
|
||||
join(basedir, vocab_type), splitext(vocab_type)[0], emitter)
|
||||
|
||||
|
||||
def load_regex(basedir, emitter):
|
||||
for regex_type in listdir(basedir):
|
||||
if regex_type.endswith(".rx"):
|
||||
load_regex_from_file(
|
||||
join(basedir, regex_type), emitter)
|
||||
for key in message.data:
|
||||
new_key = key.replace(to_letters(skill_id), '')
|
||||
message.data[new_key] = message.data.pop(key)
|
||||
return message
|
||||
|
||||
|
||||
def open_intent_envelope(message):
|
||||
|
@ -622,14 +583,16 @@ class MycroftSkill(object):
|
|||
if need_self:
|
||||
# When registring from decorator self is required
|
||||
if len(getargspec(handler).args) == 2:
|
||||
handler(self, message)
|
||||
handler(self, unmunge_message(message,
|
||||
self.skill_id))
|
||||
elif len(getargspec(handler).args) == 1:
|
||||
handler(self)
|
||||
handler(unmunge_message(message, self.skill_id))
|
||||
elif len(getargspec(handler).args) == 0:
|
||||
# Zero may indicate multiple decorators, trying the
|
||||
# usual call signatures
|
||||
try:
|
||||
handler(self, message)
|
||||
handler(self, unmunge_message(message,
|
||||
self.skill_id))
|
||||
except TypeError:
|
||||
handler(self)
|
||||
else:
|
||||
|
@ -638,7 +601,7 @@ class MycroftSkill(object):
|
|||
raise TypeError
|
||||
else:
|
||||
if len(getargspec(handler).args) == 2:
|
||||
handler(message)
|
||||
handler(unmunge_message(message, self.skill_id))
|
||||
elif len(getargspec(handler).args) == 1:
|
||||
handler()
|
||||
else:
|
||||
|
@ -718,7 +681,7 @@ class MycroftSkill(object):
|
|||
|
||||
# Default to the handler's function name if none given
|
||||
name = intent_parser.name or handler.__name__
|
||||
intent_parser.name = str(self.skill_id) + ':' + name
|
||||
munge_intent_parser(intent_parser, name, self.skill_id)
|
||||
self.emitter.emit(Message("register_intent", intent_parser.__dict__))
|
||||
self.registered_intents.append((name, intent_parser))
|
||||
self.add_event(intent_parser.name, handler, need_self)
|
||||
|
@ -814,6 +777,7 @@ class MycroftSkill(object):
|
|||
raise ValueError('context should be a string')
|
||||
if not isinstance(word, basestring):
|
||||
raise ValueError('word should be a string')
|
||||
context = to_letters(self.skill_id) + context
|
||||
self.emitter.emit(Message('add_context',
|
||||
{'context': context, 'word': word}))
|
||||
|
||||
|
@ -892,12 +856,12 @@ class MycroftSkill(object):
|
|||
def load_vocab_files(self, vocab_dir):
|
||||
self.vocab_dir = vocab_dir
|
||||
if exists(vocab_dir):
|
||||
load_vocabulary(vocab_dir, self.emitter)
|
||||
load_vocabulary(vocab_dir, self.emitter, self.skill_id)
|
||||
else:
|
||||
LOG.debug('No vocab loaded, ' + vocab_dir + ' does not exist')
|
||||
|
||||
def load_regex_files(self, regex_dir):
|
||||
load_regex(regex_dir, self.emitter)
|
||||
load_regex(regex_dir, self.emitter, self.skill_id)
|
||||
|
||||
def __handle_stop(self, event):
|
||||
"""
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
# Copyright 2018 Mycroft AI Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""Module containing methods needed to load skill
|
||||
data such as dialogs, intents and regular expressions.
|
||||
"""
|
||||
|
||||
from os import listdir
|
||||
from os.path import splitext, join
|
||||
import re
|
||||
|
||||
from mycroft.messagebus.message import Message
|
||||
|
||||
|
||||
def load_vocab_from_file(path, vocab_type, emitter):
|
||||
"""Load Mycroft vocabulary from file
|
||||
The vocab is sent to the intent handler using the message bus
|
||||
|
||||
Args:
|
||||
path: path to vocabulary file (*.voc)
|
||||
vocab_type: keyword name
|
||||
emitter: emitter to access the message bus
|
||||
skill_id(str): skill id
|
||||
"""
|
||||
if path.endswith('.voc'):
|
||||
with open(path, 'r') as voc_file:
|
||||
for line in voc_file.readlines():
|
||||
parts = line.strip().split("|")
|
||||
entity = parts[0]
|
||||
emitter.emit(Message("register_vocab", {
|
||||
'start': entity, 'end': vocab_type
|
||||
}))
|
||||
for alias in parts[1:]:
|
||||
emitter.emit(Message("register_vocab", {
|
||||
'start': alias, 'end': vocab_type, 'alias_of': entity
|
||||
}))
|
||||
|
||||
|
||||
def load_regex_from_file(path, emitter, skill_id):
|
||||
"""Load regex from file
|
||||
The regex is sent to the intent handler using the message bus
|
||||
|
||||
Args:
|
||||
path: path to vocabulary file (*.voc)
|
||||
emitter: emitter to access the message bus
|
||||
"""
|
||||
if path.endswith('.rx'):
|
||||
with open(path, 'r') as reg_file:
|
||||
for line in reg_file.readlines():
|
||||
re.compile(munge_regex(line.strip(), skill_id))
|
||||
emitter.emit(
|
||||
Message("register_vocab",
|
||||
{'regex': munge_regex(line.strip(), skill_id)}))
|
||||
|
||||
|
||||
def load_vocabulary(basedir, emitter, skill_id):
|
||||
"""Load vocabulary from all files in the specified directory.
|
||||
|
||||
Args:
|
||||
basedir (str): path of directory to load from
|
||||
emitter (messagebus emitter): websocket used to send the vocab to
|
||||
the intent service
|
||||
skill_id: skill the data belongs to
|
||||
"""
|
||||
for vocab_file in listdir(basedir):
|
||||
if vocab_file.endswith(".voc"):
|
||||
vocab_type = to_letters(skill_id) + splitext(vocab_file)[0]
|
||||
load_vocab_from_file(
|
||||
join(basedir, vocab_file), vocab_type, emitter)
|
||||
|
||||
|
||||
def load_regex(basedir, emitter, skill_id):
|
||||
"""Load regex from all files in the specified directory.
|
||||
|
||||
Args:
|
||||
basedir (str): path of directory to load from
|
||||
emitter (messagebus emitter): websocket used to send the vocab to
|
||||
the intent service
|
||||
skill_id (int): skill identifier
|
||||
"""
|
||||
for regex_type in listdir(basedir):
|
||||
if regex_type.endswith(".rx"):
|
||||
load_regex_from_file(
|
||||
join(basedir, regex_type), emitter, skill_id)
|
||||
|
||||
|
||||
def to_letters(number):
|
||||
"""Convert number to string of letters.
|
||||
|
||||
0 -> A, 1 -> B, etc.
|
||||
|
||||
Args:
|
||||
number (int): number to be converted
|
||||
Returns:
|
||||
(str) String of letters
|
||||
"""
|
||||
ret = ''
|
||||
for n in str(number).strip('-'):
|
||||
ret += chr(65 + int(n))
|
||||
return ret
|
||||
|
||||
|
||||
def munge_regex(regex, skill_id):
|
||||
"""Insert skill id as letters into match groups.
|
||||
|
||||
Args:
|
||||
regex (str): regex string
|
||||
skill_id (int): skill identifier
|
||||
Returns:
|
||||
(str) munged regex
|
||||
"""
|
||||
base = '(?P<' + to_letters(skill_id)
|
||||
return base.join(regex.split('(?P<'))
|
||||
|
||||
|
||||
def munge_intent_parser(intent_parser, name, skill_id):
|
||||
"""Rename intent keywords to make them skill exclusive
|
||||
This gives the intent parser an exclusive name in the
|
||||
format <skill_id>:<name>. The keywords are given unique
|
||||
names in the format <Skill id as letters><Intent name>.
|
||||
|
||||
Args:
|
||||
intent_parser: (IntentParser) object to update
|
||||
name: (str) Skill name
|
||||
skill_id: (int) skill identifier
|
||||
"""
|
||||
# Munge parser name
|
||||
intent_parser.name = str(skill_id) + ':' + name
|
||||
|
||||
# Munge keywords
|
||||
skill_id = to_letters(skill_id)
|
||||
# Munge required keyword
|
||||
reqs = []
|
||||
for i in intent_parser.requires:
|
||||
kw = (skill_id + i[0], skill_id + i[0])
|
||||
reqs.append(kw)
|
||||
intent_parser.requires = reqs
|
||||
|
||||
# Munge optional keywords
|
||||
opts = []
|
||||
for i in intent_parser.optional:
|
||||
kw = (skill_id + i[0], skill_id + i[0])
|
||||
opts.append(kw)
|
||||
intent_parser.optional = opts
|
||||
|
||||
# Munge at_least_one keywords
|
||||
at_least_one = []
|
||||
for i in intent_parser.at_least_one:
|
||||
element = [skill_id + e for e in i]
|
||||
at_least_one.append(tuple(element))
|
||||
intent_parser.at_least_one = at_least_one
|
|
@ -24,9 +24,10 @@ from datetime import datetime
|
|||
|
||||
from mycroft.configuration import Configuration
|
||||
from mycroft.messagebus.message import Message
|
||||
from mycroft.skills.core import load_regex_from_file, load_regex, \
|
||||
load_vocab_from_file, load_vocabulary, MycroftSkill, \
|
||||
load_skill, create_skill_descriptor, open_intent_envelope
|
||||
from mycroft.skills.skill_data import load_regex_from_file, load_regex, \
|
||||
load_vocab_from_file, load_vocabulary
|
||||
from mycroft.skills.core import MycroftSkill, load_skill, \
|
||||
create_skill_descriptor, open_intent_envelope
|
||||
|
||||
|
||||
class MockEmitter(object):
|
||||
|
@ -68,17 +69,17 @@ class MycroftSkillTest(unittest.TestCase):
|
|||
|
||||
def check_regex_from_file(self, filename, result_list=None):
|
||||
result_list = result_list or []
|
||||
load_regex_from_file(join(self.regex_path, filename), self.emitter)
|
||||
load_regex_from_file(join(self.regex_path, filename), self.emitter, 0)
|
||||
self.check_emitter(result_list)
|
||||
|
||||
def check_vocab(self, path, result_list=None):
|
||||
result_list = result_list or []
|
||||
load_vocabulary(path, self.emitter)
|
||||
load_vocabulary(path, self.emitter, 0)
|
||||
self.check_emitter(result_list)
|
||||
|
||||
def check_regex(self, path, result_list=None):
|
||||
result_list = result_list or []
|
||||
load_regex(path, self.emitter)
|
||||
load_regex(path, self.emitter, 0)
|
||||
self.check_emitter(result_list)
|
||||
|
||||
def check_emitter(self, result_list):
|
||||
|
@ -90,12 +91,12 @@ class MycroftSkillTest(unittest.TestCase):
|
|||
|
||||
def test_load_regex_from_file_single(self):
|
||||
self.check_regex_from_file('valid/single.rx',
|
||||
[{'regex': '(?P<SingleTest>.*)'}])
|
||||
[{'regex': '(?P<ASingleTest>.*)'}])
|
||||
|
||||
def test_load_regex_from_file_multiple(self):
|
||||
self.check_regex_from_file('valid/multiple.rx',
|
||||
[{'regex': '(?P<MultipleTest1>.*)'},
|
||||
{'regex': '(?P<MultipleTest2>.*)'}])
|
||||
[{'regex': '(?P<AMultipleTest1>.*)'},
|
||||
{'regex': '(?P<AMultipleTest2>.*)'}])
|
||||
|
||||
def test_load_regex_from_file_none(self):
|
||||
self.check_regex_from_file('invalid/none.rx')
|
||||
|
@ -115,9 +116,9 @@ class MycroftSkillTest(unittest.TestCase):
|
|||
|
||||
def test_load_regex_full(self):
|
||||
self.check_regex(join(self.regex_path, 'valid'),
|
||||
[{'regex': '(?P<MultipleTest1>.*)'},
|
||||
{'regex': '(?P<MultipleTest2>.*)'},
|
||||
{'regex': '(?P<SingleTest>.*)'}])
|
||||
[{'regex': '(?P<AMultipleTest1>.*)'},
|
||||
{'regex': '(?P<AMultipleTest2>.*)'},
|
||||
{'regex': '(?P<ASingleTest>.*)'}])
|
||||
|
||||
def test_load_regex_empty(self):
|
||||
self.check_regex(join(dirname(__file__),
|
||||
|
@ -165,17 +166,17 @@ class MycroftSkillTest(unittest.TestCase):
|
|||
|
||||
def test_load_vocab_full(self):
|
||||
self.check_vocab(join(self.vocab_path, 'valid'),
|
||||
[{'start': 'test', 'end': 'single'},
|
||||
{'start': 'water', 'end': 'singlealias'},
|
||||
{'start': 'watering', 'end': 'singlealias',
|
||||
[{'start': 'test', 'end': 'Asingle'},
|
||||
{'start': 'water', 'end': 'Asinglealias'},
|
||||
{'start': 'watering', 'end': 'Asinglealias',
|
||||
'alias_of': 'water'},
|
||||
{'start': 'animal', 'end': 'multiple'},
|
||||
{'start': 'animals', 'end': 'multiple'},
|
||||
{'start': 'chair', 'end': 'multiplealias'},
|
||||
{'start': 'chairs', 'end': 'multiplealias',
|
||||
{'start': 'animal', 'end': 'Amultiple'},
|
||||
{'start': 'animals', 'end': 'Amultiple'},
|
||||
{'start': 'chair', 'end': 'Amultiplealias'},
|
||||
{'start': 'chairs', 'end': 'Amultiplealias',
|
||||
'alias_of': 'chair'},
|
||||
{'start': 'table', 'end': 'multiplealias'},
|
||||
{'start': 'tables', 'end': 'multiplealias',
|
||||
{'start': 'table', 'end': 'Amultiplealias'},
|
||||
{'start': 'tables', 'end': 'Amultiplealias',
|
||||
'alias_of': 'table'}])
|
||||
|
||||
def test_load_vocab_empty(self):
|
||||
|
@ -219,7 +220,7 @@ class MycroftSkillTest(unittest.TestCase):
|
|||
expected = [{'at_least_one': [],
|
||||
'name': '0:a',
|
||||
'optional': [],
|
||||
'requires': [('Keyword', 'Keyword')]}]
|
||||
'requires': [('AKeyword', 'AKeyword')]}]
|
||||
self.check_register_intent(expected)
|
||||
|
||||
# Test register IntentBuilder object
|
||||
|
@ -229,7 +230,7 @@ class MycroftSkillTest(unittest.TestCase):
|
|||
expected = [{'at_least_one': [],
|
||||
'name': '0:a',
|
||||
'optional': [],
|
||||
'requires': [('Keyword', 'Keyword')]}]
|
||||
'requires': [('AKeyword', 'AKeyword')]}]
|
||||
|
||||
self.check_register_intent(expected)
|
||||
|
||||
|
@ -290,7 +291,7 @@ class MycroftSkillTest(unittest.TestCase):
|
|||
expected = [{'at_least_one': [],
|
||||
'name': '0:a',
|
||||
'optional': [],
|
||||
'requires': [('Keyword', 'Keyword')]},
|
||||
'requires': [('AKeyword', 'AKeyword')]},
|
||||
{
|
||||
'file_name': join(dirname(__file__), 'intent_file',
|
||||
'test.intent'),
|
||||
|
@ -320,17 +321,17 @@ class MycroftSkillTest(unittest.TestCase):
|
|||
s.bind(self.emitter)
|
||||
# No context content
|
||||
s.set_context('TurtlePower')
|
||||
expected = [{'context': 'TurtlePower', 'word': ''}]
|
||||
expected = [{'context': 'ATurtlePower', 'word': ''}]
|
||||
check_set_context(expected)
|
||||
|
||||
# context with content
|
||||
s.set_context('Technodrome', 'Shredder')
|
||||
expected = [{'context': 'Technodrome', 'word': 'Shredder'}]
|
||||
expected = [{'context': 'ATechnodrome', 'word': 'Shredder'}]
|
||||
check_set_context(expected)
|
||||
|
||||
# UTF-8 context
|
||||
s.set_context(u'Smörgåsbord€15')
|
||||
expected = [{'context': u'Smörgåsbord€15', 'word': ''}]
|
||||
expected = [{'context': u'ASmörgåsbord€15', 'word': ''}]
|
||||
check_set_context(expected)
|
||||
|
||||
self.emitter.reset()
|
||||
|
|
Loading…
Reference in New Issue