518 lines
20 KiB
Python
518 lines
20 KiB
Python
# Copyright 2017 Mycroft AI Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import time
|
|
from adapt.context import ContextManagerFrame
|
|
from adapt.engine import IntentDeterminationEngine
|
|
from adapt.intent import IntentBuilder
|
|
|
|
from mycroft.configuration import Configuration
|
|
from mycroft.messagebus.message import Message
|
|
from mycroft.util.lang import set_active_lang
|
|
from mycroft.util.log import LOG
|
|
from mycroft.util.parse import normalize
|
|
from mycroft.metrics import report_timing, Stopwatch
|
|
from mycroft.skills.padatious_service import PadatiousService
|
|
from .intent_service_interface import open_intent_envelope
|
|
|
|
|
|
class AdaptIntent(IntentBuilder):
|
|
def __init__(self, name=''):
|
|
super().__init__(name)
|
|
|
|
|
|
def workaround_one_of_context(best_intent):
|
|
""" Handle Adapt issue with context injection combined with one_of.
|
|
|
|
For all entries in the intent result where the value is None try to
|
|
populate using a value from the __tags__ structure.
|
|
"""
|
|
for key in best_intent:
|
|
if best_intent[key] is None:
|
|
for t in best_intent['__tags__']:
|
|
if key in t:
|
|
best_intent[key] = t[key][0]['entities'][0]['key']
|
|
return best_intent
|
|
|
|
|
|
class ContextManager:
|
|
"""
|
|
ContextManager
|
|
Use to track context throughout the course of a conversational session.
|
|
How to manage a session's lifecycle is not captured here.
|
|
"""
|
|
|
|
def __init__(self, timeout):
|
|
self.frame_stack = []
|
|
self.timeout = timeout * 60 # minutes to seconds
|
|
|
|
def clear_context(self):
|
|
self.frame_stack = []
|
|
|
|
def remove_context(self, context_id):
|
|
self.frame_stack = [(f, t) for (f, t) in self.frame_stack
|
|
if context_id in f.entities[0].get('data', [])]
|
|
|
|
def inject_context(self, entity, metadata=None):
|
|
"""
|
|
Args:
|
|
entity(object): Format example...
|
|
{'data': 'Entity tag as <str>',
|
|
'key': 'entity proper name as <str>',
|
|
'confidence': <float>'
|
|
}
|
|
metadata(object): dict, arbitrary metadata about entity injected
|
|
"""
|
|
metadata = metadata or {}
|
|
try:
|
|
if len(self.frame_stack) > 0:
|
|
top_frame = self.frame_stack[0]
|
|
else:
|
|
top_frame = None
|
|
if top_frame and top_frame[0].metadata_matches(metadata):
|
|
top_frame[0].merge_context(entity, metadata)
|
|
else:
|
|
frame = ContextManagerFrame(entities=[entity],
|
|
metadata=metadata.copy())
|
|
self.frame_stack.insert(0, (frame, time.time()))
|
|
except (IndexError, KeyError):
|
|
pass
|
|
|
|
def get_context(self, max_frames=None, missing_entities=None):
|
|
""" Constructs a list of entities from the context.
|
|
|
|
Args:
|
|
max_frames(int): maximum number of frames to look back
|
|
missing_entities(list of str): a list or set of tag names,
|
|
as strings
|
|
|
|
Returns:
|
|
list: a list of entities
|
|
"""
|
|
missing_entities = missing_entities or []
|
|
|
|
relevant_frames = [frame[0] for frame in self.frame_stack if
|
|
time.time() - frame[1] < self.timeout]
|
|
if not max_frames or max_frames > len(relevant_frames):
|
|
max_frames = len(relevant_frames)
|
|
|
|
missing_entities = list(missing_entities)
|
|
context = []
|
|
last = ''
|
|
depth = 0
|
|
for i in range(max_frames):
|
|
frame_entities = [entity.copy() for entity in
|
|
relevant_frames[i].entities]
|
|
for entity in frame_entities:
|
|
entity['confidence'] = entity.get('confidence', 1.0) \
|
|
/ (2.0 + depth)
|
|
context += frame_entities
|
|
|
|
# Update depth
|
|
if entity['origin'] != last or entity['origin'] == '':
|
|
depth += 1
|
|
last = entity['origin']
|
|
print(depth)
|
|
|
|
result = []
|
|
if len(missing_entities) > 0:
|
|
for entity in context:
|
|
if entity.get('data') in missing_entities:
|
|
result.append(entity)
|
|
# NOTE: this implies that we will only ever get one
|
|
# of an entity kind from context, unless specified
|
|
# multiple times in missing_entities. Cannot get
|
|
# an arbitrary number of an entity kind.
|
|
missing_entities.remove(entity.get('data'))
|
|
else:
|
|
result = context
|
|
|
|
# Only use the latest instance of each keyword
|
|
stripped = []
|
|
processed = []
|
|
for f in result:
|
|
keyword = f['data'][0][1]
|
|
if keyword not in processed:
|
|
stripped.append(f)
|
|
processed.append(keyword)
|
|
result = stripped
|
|
return result
|
|
|
|
|
|
class IntentService:
|
|
def __init__(self, bus):
|
|
self.config = Configuration.get().get('context', {})
|
|
self.engine = IntentDeterminationEngine()
|
|
|
|
# Dictionary for translating a skill id to a name
|
|
self.skill_names = {}
|
|
# Context related intializations
|
|
self.context_keywords = self.config.get('keywords', [])
|
|
self.context_max_frames = self.config.get('max_frames', 3)
|
|
self.context_timeout = self.config.get('timeout', 2)
|
|
self.context_greedy = self.config.get('greedy', False)
|
|
self.context_manager = ContextManager(self.context_timeout)
|
|
self.bus = bus
|
|
self.bus.on('register_vocab', self.handle_register_vocab)
|
|
self.bus.on('register_intent', self.handle_register_intent)
|
|
self.bus.on('recognizer_loop:utterance', self.handle_utterance)
|
|
self.bus.on('detach_intent', self.handle_detach_intent)
|
|
self.bus.on('detach_skill', self.handle_detach_skill)
|
|
# Context related handlers
|
|
self.bus.on('add_context', self.handle_add_context)
|
|
self.bus.on('remove_context', self.handle_remove_context)
|
|
self.bus.on('clear_context', self.handle_clear_context)
|
|
# Converse method
|
|
self.bus.on('skill.converse.response', self.handle_converse_response)
|
|
self.bus.on('skill.converse.error', self.handle_converse_error)
|
|
self.bus.on('mycroft.speech.recognition.unknown', self.reset_converse)
|
|
self.bus.on('mycroft.skills.loaded', self.update_skill_name_dict)
|
|
|
|
def add_active_skill_handler(message):
|
|
self.add_active_skill(message.data['skill_id'])
|
|
self.bus.on('active_skill_request', add_active_skill_handler)
|
|
self.active_skills = [] # [skill_id , timestamp]
|
|
self.converse_timeout = 5 # minutes to prune active_skills
|
|
self.waiting_for_converse = False
|
|
self.converse_result = False
|
|
self.converse_skill_id = ""
|
|
|
|
def update_skill_name_dict(self, message):
|
|
"""
|
|
Messagebus handler, updates dictionary of if to skill name
|
|
conversions.
|
|
"""
|
|
self.skill_names[message.data['id']] = message.data['name']
|
|
|
|
def get_skill_name(self, skill_id):
|
|
""" Get skill name from skill ID.
|
|
|
|
Args:
|
|
skill_id: a skill id as encoded in Intent handlers.
|
|
|
|
Returns:
|
|
(str) Skill name or the skill id if the skill wasn't found
|
|
"""
|
|
return self.skill_names.get(skill_id, skill_id)
|
|
|
|
def reset_converse(self, message):
|
|
"""Let skills know there was a problem with speech recognition"""
|
|
lang = message.data.get('lang', "en-us")
|
|
set_active_lang(lang)
|
|
for skill in self.active_skills:
|
|
self.do_converse(None, skill[0], lang)
|
|
|
|
def do_converse(self, utterances, skill_id, lang):
|
|
self.waiting_for_converse = True
|
|
self.converse_result = False
|
|
self.converse_skill_id = skill_id
|
|
self.bus.emit(Message("skill.converse.request", {
|
|
"skill_id": skill_id, "utterances": utterances, "lang": lang}))
|
|
start_time = time.time()
|
|
t = 0
|
|
while self.waiting_for_converse and t < 5:
|
|
t = time.time() - start_time
|
|
time.sleep(0.1)
|
|
self.waiting_for_converse = False
|
|
self.converse_skill_id = ""
|
|
return self.converse_result
|
|
|
|
def handle_converse_error(self, message):
|
|
skill_id = message.data["skill_id"]
|
|
if message.data["error"] == "skill id does not exist":
|
|
self.remove_active_skill(skill_id)
|
|
if skill_id == self.converse_skill_id:
|
|
self.converse_result = False
|
|
self.waiting_for_converse = False
|
|
|
|
def handle_converse_response(self, message):
|
|
skill_id = message.data["skill_id"]
|
|
if skill_id == self.converse_skill_id:
|
|
self.converse_result = message.data.get("result", False)
|
|
self.waiting_for_converse = False
|
|
|
|
def remove_active_skill(self, skill_id):
|
|
for skill in self.active_skills:
|
|
if skill[0] == skill_id:
|
|
self.active_skills.remove(skill)
|
|
|
|
def add_active_skill(self, skill_id):
|
|
# search the list for an existing entry that already contains it
|
|
# and remove that reference
|
|
self.remove_active_skill(skill_id)
|
|
# add skill with timestamp to start of skill_list
|
|
self.active_skills.insert(0, [skill_id, time.time()])
|
|
|
|
def update_context(self, intent):
|
|
""" Updates context with keyword from the intent.
|
|
|
|
NOTE: This method currently won't handle one_of intent keywords
|
|
since it's not using quite the same format as other intent
|
|
keywords. This is under investigation in adapt, PR pending.
|
|
|
|
Args:
|
|
intent: Intent to scan for keywords
|
|
"""
|
|
for tag in intent['__tags__']:
|
|
if 'entities' not in tag:
|
|
continue
|
|
context_entity = tag['entities'][0]
|
|
if self.context_greedy:
|
|
self.context_manager.inject_context(context_entity)
|
|
elif context_entity['data'][0][1] in self.context_keywords:
|
|
self.context_manager.inject_context(context_entity)
|
|
|
|
def send_metrics(self, intent, context, stopwatch):
|
|
"""
|
|
Send timing metrics to the backend.
|
|
|
|
NOTE: This only applies to those with Opt In.
|
|
"""
|
|
ident = context['ident'] if 'ident' in context else None
|
|
if intent:
|
|
# Recreate skill name from skill id
|
|
parts = intent.get('intent_type', '').split(':')
|
|
intent_type = self.get_skill_name(parts[0])
|
|
if len(parts) > 1:
|
|
intent_type = ':'.join([intent_type] + parts[1:])
|
|
report_timing(ident, 'intent_service', stopwatch,
|
|
{'intent_type': intent_type})
|
|
else:
|
|
report_timing(ident, 'intent_service', stopwatch,
|
|
{'intent_type': 'intent_failure'})
|
|
|
|
def handle_utterance(self, message):
|
|
""" Main entrypoint for handling user utterances with Mycroft skills
|
|
|
|
Monitor the messagebus for 'recognizer_loop:utterance', typically
|
|
generated by a spoken interaction but potentially also from a CLI
|
|
or other method of injecting a 'user utterance' into the system.
|
|
|
|
Utterances then work through this sequence to be handled:
|
|
1) Active skills attempt to handle using converse()
|
|
2) Padatious high match intents (conf > 0.95)
|
|
3) Adapt intent handlers
|
|
5) Fallbacks:
|
|
- Padatious near match intents (conf > 0.8)
|
|
- General fallbacks
|
|
- Padatious loose match intents (conf > 0.5)
|
|
- Unknown intent handler
|
|
|
|
Args:
|
|
message (Message): The messagebus data
|
|
"""
|
|
try:
|
|
# Get language of the utterance
|
|
lang = message.data.get('lang', "en-us")
|
|
set_active_lang(lang)
|
|
|
|
utterances = message.data.get('utterances', [])
|
|
# normalize() changes "it's a boy" to "it is a boy", etc.
|
|
norm_utterances = [normalize(u.lower(), remove_articles=False)
|
|
for u in utterances]
|
|
|
|
# Build list with raw utterance(s) first, then optionally a
|
|
# normalized version following.
|
|
combined = utterances + list(set(norm_utterances) -
|
|
set(utterances))
|
|
LOG.debug("Utterances: {}".format(combined))
|
|
|
|
stopwatch = Stopwatch()
|
|
intent = None
|
|
padatious_intent = None
|
|
with stopwatch:
|
|
# Give active skills an opportunity to handle the utterance
|
|
converse = self._converse(combined, lang)
|
|
|
|
if not converse:
|
|
# No conversation, use intent system to handle utterance
|
|
intent = self._adapt_intent_match(utterances,
|
|
norm_utterances, lang)
|
|
for utt in combined:
|
|
_intent = PadatiousService.instance.calc_intent(utt)
|
|
if _intent:
|
|
best = padatious_intent.conf if padatious_intent\
|
|
else 0.0
|
|
if best < _intent.conf:
|
|
padatious_intent = _intent
|
|
LOG.debug("Padatious intent: {}".format(padatious_intent))
|
|
LOG.debug(" Adapt intent: {}".format(intent))
|
|
|
|
if converse:
|
|
# Report that converse handled the intent and return
|
|
LOG.debug("Handled in converse()")
|
|
ident = message.context['ident'] if message.context else None
|
|
report_timing(ident, 'intent_service', stopwatch,
|
|
{'intent_type': 'converse'})
|
|
return
|
|
elif (intent and intent.get('confidence', 0.0) > 0.0 and
|
|
not (padatious_intent and padatious_intent.conf >= 0.95)):
|
|
# Send the message to the Adapt intent's handler unless
|
|
# Padatious is REALLY sure it was directed at it instead.
|
|
self.update_context(intent)
|
|
# update active skills
|
|
skill_id = intent['intent_type'].split(":")[0]
|
|
self.add_active_skill(skill_id)
|
|
# Adapt doesn't handle context injection for one_of keywords
|
|
# correctly. Workaround this issue if possible.
|
|
try:
|
|
intent = workaround_one_of_context(intent)
|
|
except LookupError:
|
|
LOG.error('Error during workaround_one_of_context')
|
|
reply = message.reply(intent.get('intent_type'), intent)
|
|
else:
|
|
# Allow fallback system to handle utterance
|
|
# NOTE: A matched padatious_intent is handled this way, too
|
|
# TODO: Need to redefine intent_failure when STT can return
|
|
# multiple hypothesis -- i.e. len(utterances) > 1
|
|
reply = message.reply('intent_failure',
|
|
{'utterance': utterances[0],
|
|
'norm_utt': norm_utterances[0],
|
|
'lang': lang})
|
|
self.bus.emit(reply)
|
|
self.send_metrics(intent, message.context, stopwatch)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
def _converse(self, utterances, lang):
|
|
""" Give active skills a chance at the utterance
|
|
|
|
Args:
|
|
utterances (list): list of utterances
|
|
lang (string): 4 letter ISO language code
|
|
|
|
Returns:
|
|
bool: True if converse handled it, False if no skill processes it
|
|
"""
|
|
|
|
# check for conversation time-out
|
|
self.active_skills = [skill for skill in self.active_skills
|
|
if time.time() - skill[
|
|
1] <= self.converse_timeout * 60]
|
|
|
|
# check if any skill wants to handle utterance
|
|
for skill in self.active_skills:
|
|
if self.do_converse(utterances, skill[0], lang):
|
|
# update timestamp, or there will be a timeout where
|
|
# intent stops conversing whether its being used or not
|
|
self.add_active_skill(skill[0])
|
|
return True
|
|
return False
|
|
|
|
def _adapt_intent_match(self, raw_utt, norm_utt, lang):
|
|
""" Run the Adapt engine to search for an matching intent
|
|
|
|
Args:
|
|
raw_utt (list): list of utterances
|
|
norm_utt (list): same list of utterances, normalized
|
|
lang (string): language code, e.g "en-us"
|
|
|
|
Returns:
|
|
Intent structure, or None if no match was found.
|
|
"""
|
|
best_intent = None
|
|
|
|
def take_best(intent, utt):
|
|
nonlocal best_intent
|
|
best = best_intent.get('confidence', 0.0) if best_intent else 0.0
|
|
conf = intent.get('confidence', 0.0)
|
|
if conf > best:
|
|
best_intent = intent
|
|
# TODO - Shouldn't Adapt do this?
|
|
best_intent['utterance'] = utt
|
|
|
|
for idx, utt in enumerate(raw_utt):
|
|
try:
|
|
intents = [i for i in self.engine.determine_intent(
|
|
utt, 100,
|
|
include_tags=True,
|
|
context_manager=self.context_manager)]
|
|
if intents:
|
|
take_best(intents[0], utt)
|
|
|
|
# Also test the normalized version, but set the utternace to
|
|
# the raw version so skill has access to original STT
|
|
norm_intents = [i for i in self.engine.determine_intent(
|
|
norm_utt[idx], 100,
|
|
include_tags=True,
|
|
context_manager=self.context_manager)]
|
|
if norm_intents:
|
|
take_best(norm_intents[0], utt)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
return best_intent
|
|
|
|
def handle_register_vocab(self, message):
|
|
start_concept = message.data.get('start')
|
|
end_concept = message.data.get('end')
|
|
regex_str = message.data.get('regex')
|
|
alias_of = message.data.get('alias_of')
|
|
if regex_str:
|
|
self.engine.register_regex_entity(regex_str)
|
|
else:
|
|
self.engine.register_entity(
|
|
start_concept, end_concept, alias_of=alias_of)
|
|
|
|
def handle_register_intent(self, message):
|
|
intent = open_intent_envelope(message)
|
|
self.engine.register_intent_parser(intent)
|
|
|
|
def handle_detach_intent(self, message):
|
|
intent_name = message.data.get('intent_name')
|
|
new_parsers = [
|
|
p for p in self.engine.intent_parsers if p.name != intent_name]
|
|
self.engine.intent_parsers = new_parsers
|
|
|
|
def handle_detach_skill(self, message):
|
|
skill_id = message.data.get('skill_id')
|
|
new_parsers = [
|
|
p for p in self.engine.intent_parsers if
|
|
not p.name.startswith(skill_id)]
|
|
self.engine.intent_parsers = new_parsers
|
|
|
|
def handle_add_context(self, message):
|
|
""" Add context
|
|
|
|
Args:
|
|
message: data contains the 'context' item to add
|
|
optionally can include 'word' to be injected as
|
|
an alias for the context item.
|
|
"""
|
|
entity = {'confidence': 1.0}
|
|
context = message.data.get('context')
|
|
word = message.data.get('word') or ''
|
|
origin = message.data.get('origin') or ''
|
|
# if not a string type try creating a string from it
|
|
if not isinstance(word, str):
|
|
word = str(word)
|
|
entity['data'] = [(word, context)]
|
|
entity['match'] = word
|
|
entity['key'] = word
|
|
entity['origin'] = origin
|
|
self.context_manager.inject_context(entity)
|
|
|
|
def handle_remove_context(self, message):
|
|
""" Remove specific context
|
|
|
|
Args:
|
|
message: data contains the 'context' item to remove
|
|
"""
|
|
context = message.data.get('context')
|
|
if context:
|
|
self.context_manager.remove_context(context)
|
|
|
|
def handle_clear_context(self, message):
|
|
""" Clears all keywords from context """
|
|
self.context_manager.clear_context()
|