# Copyright 2017 Mycroft AI Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import time from adapt.context import ContextManagerFrame from adapt.engine import IntentDeterminationEngine from adapt.intent import IntentBuilder from mycroft.configuration import Configuration from mycroft.messagebus.message import Message from mycroft.util.lang import set_active_lang from mycroft.util.log import LOG from mycroft.util.parse import normalize from mycroft.metrics import report_timing, Stopwatch from mycroft.skills.padatious_service import PadatiousService from .intent_service_interface import open_intent_envelope class AdaptIntent(IntentBuilder): def __init__(self, name=''): super().__init__(name) def workaround_one_of_context(best_intent): """ Handle Adapt issue with context injection combined with one_of. For all entries in the intent result where the value is None try to populate using a value from the __tags__ structure. """ for key in best_intent: if best_intent[key] is None: for t in best_intent['__tags__']: if key in t: best_intent[key] = t[key][0]['entities'][0]['key'] return best_intent class ContextManager: """ ContextManager Use to track context throughout the course of a conversational session. How to manage a session's lifecycle is not captured here. """ def __init__(self, timeout): self.frame_stack = [] self.timeout = timeout * 60 # minutes to seconds def clear_context(self): self.frame_stack = [] def remove_context(self, context_id): self.frame_stack = [(f, t) for (f, t) in self.frame_stack if context_id in f.entities[0].get('data', [])] def inject_context(self, entity, metadata=None): """ Args: entity(object): Format example... {'data': 'Entity tag as ', 'key': 'entity proper name as ', 'confidence': ' } metadata(object): dict, arbitrary metadata about entity injected """ metadata = metadata or {} try: if len(self.frame_stack) > 0: top_frame = self.frame_stack[0] else: top_frame = None if top_frame and top_frame[0].metadata_matches(metadata): top_frame[0].merge_context(entity, metadata) else: frame = ContextManagerFrame(entities=[entity], metadata=metadata.copy()) self.frame_stack.insert(0, (frame, time.time())) except (IndexError, KeyError): pass def get_context(self, max_frames=None, missing_entities=None): """ Constructs a list of entities from the context. Args: max_frames(int): maximum number of frames to look back missing_entities(list of str): a list or set of tag names, as strings Returns: list: a list of entities """ missing_entities = missing_entities or [] relevant_frames = [frame[0] for frame in self.frame_stack if time.time() - frame[1] < self.timeout] if not max_frames or max_frames > len(relevant_frames): max_frames = len(relevant_frames) missing_entities = list(missing_entities) context = [] last = '' depth = 0 for i in range(max_frames): frame_entities = [entity.copy() for entity in relevant_frames[i].entities] for entity in frame_entities: entity['confidence'] = entity.get('confidence', 1.0) \ / (2.0 + depth) context += frame_entities # Update depth if entity['origin'] != last or entity['origin'] == '': depth += 1 last = entity['origin'] print(depth) result = [] if len(missing_entities) > 0: for entity in context: if entity.get('data') in missing_entities: result.append(entity) # NOTE: this implies that we will only ever get one # of an entity kind from context, unless specified # multiple times in missing_entities. Cannot get # an arbitrary number of an entity kind. missing_entities.remove(entity.get('data')) else: result = context # Only use the latest instance of each keyword stripped = [] processed = [] for f in result: keyword = f['data'][0][1] if keyword not in processed: stripped.append(f) processed.append(keyword) result = stripped return result class IntentService: def __init__(self, bus): self.config = Configuration.get().get('context', {}) self.engine = IntentDeterminationEngine() # Dictionary for translating a skill id to a name self.skill_names = {} # Context related intializations self.context_keywords = self.config.get('keywords', []) self.context_max_frames = self.config.get('max_frames', 3) self.context_timeout = self.config.get('timeout', 2) self.context_greedy = self.config.get('greedy', False) self.context_manager = ContextManager(self.context_timeout) self.bus = bus self.bus.on('register_vocab', self.handle_register_vocab) self.bus.on('register_intent', self.handle_register_intent) self.bus.on('recognizer_loop:utterance', self.handle_utterance) self.bus.on('detach_intent', self.handle_detach_intent) self.bus.on('detach_skill', self.handle_detach_skill) # Context related handlers self.bus.on('add_context', self.handle_add_context) self.bus.on('remove_context', self.handle_remove_context) self.bus.on('clear_context', self.handle_clear_context) # Converse method self.bus.on('skill.converse.response', self.handle_converse_response) self.bus.on('skill.converse.error', self.handle_converse_error) self.bus.on('mycroft.speech.recognition.unknown', self.reset_converse) self.bus.on('mycroft.skills.loaded', self.update_skill_name_dict) def add_active_skill_handler(message): self.add_active_skill(message.data['skill_id']) self.bus.on('active_skill_request', add_active_skill_handler) self.active_skills = [] # [skill_id , timestamp] self.converse_timeout = 5 # minutes to prune active_skills self.waiting_for_converse = False self.converse_result = False self.converse_skill_id = "" def update_skill_name_dict(self, message): """ Messagebus handler, updates dictionary of if to skill name conversions. """ self.skill_names[message.data['id']] = message.data['name'] def get_skill_name(self, skill_id): """ Get skill name from skill ID. Args: skill_id: a skill id as encoded in Intent handlers. Returns: (str) Skill name or the skill id if the skill wasn't found """ return self.skill_names.get(skill_id, skill_id) def reset_converse(self, message): """Let skills know there was a problem with speech recognition""" lang = message.data.get('lang', "en-us") set_active_lang(lang) for skill in self.active_skills: self.do_converse(None, skill[0], lang) def do_converse(self, utterances, skill_id, lang): self.waiting_for_converse = True self.converse_result = False self.converse_skill_id = skill_id self.bus.emit(Message("skill.converse.request", { "skill_id": skill_id, "utterances": utterances, "lang": lang})) start_time = time.time() t = 0 while self.waiting_for_converse and t < 5: t = time.time() - start_time time.sleep(0.1) self.waiting_for_converse = False self.converse_skill_id = "" return self.converse_result def handle_converse_error(self, message): skill_id = message.data["skill_id"] if message.data["error"] == "skill id does not exist": self.remove_active_skill(skill_id) if skill_id == self.converse_skill_id: self.converse_result = False self.waiting_for_converse = False def handle_converse_response(self, message): skill_id = message.data["skill_id"] if skill_id == self.converse_skill_id: self.converse_result = message.data.get("result", False) self.waiting_for_converse = False def remove_active_skill(self, skill_id): for skill in self.active_skills: if skill[0] == skill_id: self.active_skills.remove(skill) def add_active_skill(self, skill_id): # search the list for an existing entry that already contains it # and remove that reference self.remove_active_skill(skill_id) # add skill with timestamp to start of skill_list self.active_skills.insert(0, [skill_id, time.time()]) def update_context(self, intent): """ Updates context with keyword from the intent. NOTE: This method currently won't handle one_of intent keywords since it's not using quite the same format as other intent keywords. This is under investigation in adapt, PR pending. Args: intent: Intent to scan for keywords """ for tag in intent['__tags__']: if 'entities' not in tag: continue context_entity = tag['entities'][0] if self.context_greedy: self.context_manager.inject_context(context_entity) elif context_entity['data'][0][1] in self.context_keywords: self.context_manager.inject_context(context_entity) def send_metrics(self, intent, context, stopwatch): """ Send timing metrics to the backend. NOTE: This only applies to those with Opt In. """ ident = context['ident'] if 'ident' in context else None if intent: # Recreate skill name from skill id parts = intent.get('intent_type', '').split(':') intent_type = self.get_skill_name(parts[0]) if len(parts) > 1: intent_type = ':'.join([intent_type] + parts[1:]) report_timing(ident, 'intent_service', stopwatch, {'intent_type': intent_type}) else: report_timing(ident, 'intent_service', stopwatch, {'intent_type': 'intent_failure'}) def handle_utterance(self, message): """ Main entrypoint for handling user utterances with Mycroft skills Monitor the messagebus for 'recognizer_loop:utterance', typically generated by a spoken interaction but potentially also from a CLI or other method of injecting a 'user utterance' into the system. Utterances then work through this sequence to be handled: 1) Active skills attempt to handle using converse() 2) Padatious high match intents (conf > 0.95) 3) Adapt intent handlers 5) Fallbacks: - Padatious near match intents (conf > 0.8) - General fallbacks - Padatious loose match intents (conf > 0.5) - Unknown intent handler Args: message (Message): The messagebus data """ try: # Get language of the utterance lang = message.data.get('lang', "en-us") set_active_lang(lang) utterances = message.data.get('utterances', []) # normalize() changes "it's a boy" to "it is a boy", etc. norm_utterances = [normalize(u.lower(), remove_articles=False) for u in utterances] # Build list with raw utterance(s) first, then optionally a # normalized version following. combined = utterances + list(set(norm_utterances) - set(utterances)) LOG.debug("Utterances: {}".format(combined)) stopwatch = Stopwatch() intent = None padatious_intent = None with stopwatch: # Give active skills an opportunity to handle the utterance converse = self._converse(combined, lang) if not converse: # No conversation, use intent system to handle utterance intent = self._adapt_intent_match(utterances, norm_utterances, lang) for utt in combined: _intent = PadatiousService.instance.calc_intent(utt) if _intent: best = padatious_intent.conf if padatious_intent\ else 0.0 if best < _intent.conf: padatious_intent = _intent LOG.debug("Padatious intent: {}".format(padatious_intent)) LOG.debug(" Adapt intent: {}".format(intent)) if converse: # Report that converse handled the intent and return LOG.debug("Handled in converse()") ident = message.context['ident'] if message.context else None report_timing(ident, 'intent_service', stopwatch, {'intent_type': 'converse'}) return elif (intent and intent.get('confidence', 0.0) > 0.0 and not (padatious_intent and padatious_intent.conf >= 0.95)): # Send the message to the Adapt intent's handler unless # Padatious is REALLY sure it was directed at it instead. self.update_context(intent) # update active skills skill_id = intent['intent_type'].split(":")[0] self.add_active_skill(skill_id) # Adapt doesn't handle context injection for one_of keywords # correctly. Workaround this issue if possible. try: intent = workaround_one_of_context(intent) except LookupError: LOG.error('Error during workaround_one_of_context') reply = message.reply(intent.get('intent_type'), intent) else: # Allow fallback system to handle utterance # NOTE: A matched padatious_intent is handled this way, too # TODO: Need to redefine intent_failure when STT can return # multiple hypothesis -- i.e. len(utterances) > 1 reply = message.reply('intent_failure', {'utterance': utterances[0], 'norm_utt': norm_utterances[0], 'lang': lang}) self.bus.emit(reply) self.send_metrics(intent, message.context, stopwatch) except Exception as e: LOG.exception(e) def _converse(self, utterances, lang): """ Give active skills a chance at the utterance Args: utterances (list): list of utterances lang (string): 4 letter ISO language code Returns: bool: True if converse handled it, False if no skill processes it """ # check for conversation time-out self.active_skills = [skill for skill in self.active_skills if time.time() - skill[ 1] <= self.converse_timeout * 60] # check if any skill wants to handle utterance for skill in self.active_skills: if self.do_converse(utterances, skill[0], lang): # update timestamp, or there will be a timeout where # intent stops conversing whether its being used or not self.add_active_skill(skill[0]) return True return False def _adapt_intent_match(self, raw_utt, norm_utt, lang): """ Run the Adapt engine to search for an matching intent Args: raw_utt (list): list of utterances norm_utt (list): same list of utterances, normalized lang (string): language code, e.g "en-us" Returns: Intent structure, or None if no match was found. """ best_intent = None def take_best(intent, utt): nonlocal best_intent best = best_intent.get('confidence', 0.0) if best_intent else 0.0 conf = intent.get('confidence', 0.0) if conf > best: best_intent = intent # TODO - Shouldn't Adapt do this? best_intent['utterance'] = utt for idx, utt in enumerate(raw_utt): try: intents = [i for i in self.engine.determine_intent( utt, 100, include_tags=True, context_manager=self.context_manager)] if intents: take_best(intents[0], utt) # Also test the normalized version, but set the utternace to # the raw version so skill has access to original STT norm_intents = [i for i in self.engine.determine_intent( norm_utt[idx], 100, include_tags=True, context_manager=self.context_manager)] if norm_intents: take_best(norm_intents[0], utt) except Exception as e: LOG.exception(e) return best_intent def handle_register_vocab(self, message): start_concept = message.data.get('start') end_concept = message.data.get('end') regex_str = message.data.get('regex') alias_of = message.data.get('alias_of') if regex_str: self.engine.register_regex_entity(regex_str) else: self.engine.register_entity( start_concept, end_concept, alias_of=alias_of) def handle_register_intent(self, message): intent = open_intent_envelope(message) self.engine.register_intent_parser(intent) def handle_detach_intent(self, message): intent_name = message.data.get('intent_name') new_parsers = [ p for p in self.engine.intent_parsers if p.name != intent_name] self.engine.intent_parsers = new_parsers def handle_detach_skill(self, message): skill_id = message.data.get('skill_id') new_parsers = [ p for p in self.engine.intent_parsers if not p.name.startswith(skill_id)] self.engine.intent_parsers = new_parsers def handle_add_context(self, message): """ Add context Args: message: data contains the 'context' item to add optionally can include 'word' to be injected as an alias for the context item. """ entity = {'confidence': 1.0} context = message.data.get('context') word = message.data.get('word') or '' origin = message.data.get('origin') or '' # if not a string type try creating a string from it if not isinstance(word, str): word = str(word) entity['data'] = [(word, context)] entity['match'] = word entity['key'] = word entity['origin'] = origin self.context_manager.inject_context(entity) def handle_remove_context(self, message): """ Remove specific context Args: message: data contains the 'context' item to remove """ context = message.data.get('context') if context: self.context_manager.remove_context(context) def handle_clear_context(self, message): """ Clears all keywords from context """ self.context_manager.clear_context()