Fixes issue #539

The utterance is now placed on the bus along with its language code.  If not specified, it uses "en-us".

Added a new mycroft.util.parse module.  It contains the normalize() function.  Normalization currently does two things:
  * Expands contractions ("they're" -> "they are", etc)
  * Optionally removes articles ("a", "an", "the").  Removing is the default.
  * Textual numbers become digits, up to 20.  E.g. "What is the weather in four days" becomes "What is weather in 4 days".

NOTE:  This is potentially a breaking change!  Remove "the", "a" and "an" from your .voc files!

Skill changes:
  * I cleaned up the .voc files for the default Skills.
  * Split the date_time keyword into an extra entity.  Now a "QueryKeyword.voc" exists, with "what|tell" instead of combing that into "what is time" in the TimeKeyword.voc.
  * Volume skill now accepts 1-11, e.g. "turn volume to 11"
pull/531/head
penrods 2017-02-23 04:40:46 -08:00 committed by Augusto Monteiro
parent 621faef118
commit cfa79e03a2
20 changed files with 349 additions and 48 deletions

View File

@ -130,11 +130,13 @@ class AudioConsumer(Thread):
elif connected():
self.transcribe(audio)
else:
# TODO: Localization
self.__speak("Mycroft seems not to be connected to the Internet")
def transcribe(self, audio):
text = None
try:
# Invoke the STT engine on the audio clip
text = self.stt.execute(audio).lower().strip()
LOG.debug("STT: " + text)
except sr.RequestError as e:
@ -148,8 +150,10 @@ class AudioConsumer(Thread):
LOG.error("Speech Recognition could not understand audio")
self.__speak("Sorry, I didn't catch that")
if text:
# STT succeeded, send the transcribed speech on for processing
payload = {
'utterances': [text],
'lang': self.stt.lang,
'session': SessionManager.get().session_id
}
self.emitter.emit("recognizer_loop:utterance", payload)
@ -188,6 +192,7 @@ class RecognizerLoop(EventEmitter):
self.state = RecognizerLoopState()
def create_mycroft_recognizer(self, rate, lang):
# Create a local recognizer to hear the wakeup word, e.g. 'Hey Mycroft'
wake_word = self.config.get('wake_word')
phonemes = self.config.get('phonemes')
threshold = self.config.get('threshold')
@ -195,6 +200,8 @@ class RecognizerLoop(EventEmitter):
@staticmethod
def create_wakeup_recognizer(rate, lang):
# Create a local recognizer to come out of sleep with 'wake up'
# TODO - localization
return LocalRecognizer("wake up", "W EY K . AH P", 1e-10, rate, lang)
def start_async(self):

View File

@ -43,8 +43,8 @@ class TimeSkill(MycroftSkill):
self.format = "%I:%M, %p"
def initialize(self):
intent = IntentBuilder("TimeIntent").require("TimeKeyword") \
.optionally("Location").build()
intent = IntentBuilder("TimeIntent").require("QueryKeyword") \
.require("TimeKeyword").optionally("Location").build()
self.register_intent(intent, self.handle_intent)
def get_timezone(self, locale):

View File

@ -1,4 +1,2 @@
what's the date
whats the date
what day is it
what is the date
date
day

View File

@ -0,0 +1,2 @@
what
tell

View File

@ -1,6 +1 @@
what time is it
what is the time
what's the time
whats the time
what time is
time is it
time

View File

@ -21,6 +21,7 @@ from adapt.engine import IntentDeterminationEngine
from mycroft.messagebus.message import Message
from mycroft.skills.core import open_intent_envelope, MycroftSkill
from mycroft.util.log import getLogger
from mycroft.util.parser import normalize
__author__ = 'seanfitz'
@ -40,13 +41,20 @@ class IntentSkill(MycroftSkill):
self.emitter.on('detach_intent', self.handle_detach_intent)
def handle_utterance(self, message):
# Get language of the utterance
lang = message.data.get('lang', None)
if not lang:
lang = "en-us"
utterances = message.data.get('utterances', '')
best_intent = None
for utterance in utterances:
try:
# normalize() changes "it's a boy" to "it is boy", etc.
best_intent = next(self.engine.determine_intent(
utterance, 100))
normalize(utterance, lang), 100))
# TODO - Should Adapt handle this?
best_intent['utterance'] = utterance
except StopIteration, e:

View File

@ -1,4 +1,4 @@
joke
make me laugh
brighten my day
tell me a joke
tell me joke

View File

@ -1,6 +1,4 @@
what's currently playing
what is currently playing
what are you playing
What are we listening to
what's playing
what are we listening to
what is playing

View File

@ -1,2 +1,2 @@
news
tell me the news
tell me news

View File

@ -1,2 +1 @@
who are you
who're you

View File

@ -1,2 +1,4 @@
who made you
who were you made by
who created you
who built you

View File

@ -1,5 +1,5 @@
all|all my
1|one
2|two
the next
the following
1
2
next
following

View File

@ -3,4 +3,4 @@ notify
notify me
remind me
reminder
set a reminder
set reminder

View File

@ -1,4 +1,4 @@
spell
spell the word
spell word
spelling of
spelling of the word
spelling of word

View File

@ -1,6 +1,15 @@
0|zero
1|one
2|two
0
1
2
3
4
5
6
7
8
9
10
11
quiet
normal
loud

View File

@ -1,9 +1,5 @@
tomorrow
1 day
in 1 day
one day
in one day
next day
the next day
following day
the following day

View File

@ -1,14 +1,10 @@
next hour
the next hour
in the next hour
in next hour
next hours
the next hours
in the next hours
in next hours
few hours
next few hours
the next few hours
in the next few hours
in next few hours
couple of hours
next couple of hours
the next couple of hours
in the next couple of hours
in next couple of hours

View File

@ -70,8 +70,8 @@ class GoogleSTT(TokenSTT):
super(GoogleSTT, self).__init__()
def execute(self, audio, language=None):
language = language or self.lang
return self.recognizer.recognize_google(audio, self.token, language)
self.lang = language or self.lang
return self.recognizer.recognize_google(audio, self.token, s)
class WITSTT(TokenSTT):
@ -88,9 +88,9 @@ class IBMSTT(BasicSTT):
super(IBMSTT, self).__init__()
def execute(self, audio, language=None):
language = language or self.lang
self.lang = language or self.lang
return self.recognizer.recognize_ibm(audio, self.username,
self.password, language)
self.password, self.lang)
class MycroftSTT(STT):
@ -99,8 +99,8 @@ class MycroftSTT(STT):
self.api = STTApi()
def execute(self, audio, language=None):
language = language or self.lang
return self.api.stt(audio.get_flac_data(), language, 1)[0]
self.lang = language or self.lang
return self.api.stt(audio.get_flac_data(), self.lang, 1)[0]
class STTFactory(object):

96
mycroft/util/parse.py Normal file
View File

@ -0,0 +1,96 @@
# Copyright 2017 Mycroft AI, Inc.
#
# This file is part of Mycroft Core.
#
# Mycroft Core is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mycroft Core is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mycroft Core. If not, see <http://www.gnu.org/licenses/>.
def normalize(text, lang="en-us", remove_articles=True):
"""Prepare a string for parsing
This function prepares the given text for parsing by making
numbers consistent, getting rid of contractions, etc.
"""
if str(lang).lower().startswith("en"):
return normalize_en(text, remove_articles)
# TODO: Normalization for other languages
return text
def normalize_en(text, remove_articles):
""" English string normalization """
words = text.split() # this also removed extra spaces
normalized = ""
for word in words:
if remove_articles and word in ["the", "a", "an"]:
continue
# Expand common contractions, e.g. "isn't" -> "is not"
contraction = ["ain't", "aren't", "can't", "could've", "couldn't",
"didn't", "doesn't", "don't", "gonna", "gotta",
"hadn't", "hasn't", "haven't", "he'd", "he'll", "he's",
"how'd", "how'll", "how's", "I'd", "I'll", "I'm",
"I've", "isn't", "it'd", "it'll", "it's", "mightn't",
"might've", "mustn't", "must've", "needn't", "oughtn't",
"shan't", "she'd", "she'll", "she's", "shouldn't",
"should've", "somebody's", "someone'd", "someone'll",
"someone's", "that'll", "that's", "that'd", "there'd",
"there're", "there's", "they'd", "they'll", "they're",
"they've", "wasn't", "we'd", "we'll", "we're", "we've",
"weren't", "what'd", "what'll", "what're", "what's",
"what've", "when's", "when'd", "where'd", "where's",
"where've", "who'd", "who'd've", "who'll", "who're",
"who's", "who've", "why'd", "why're", "why's", "won't",
"won't've", "would've", "wouldn't", "wouldn't've",
"y'all", "ya'll", "you'd", "you'd've", "you'll",
"y'aint", "y'ain't", "you're", "you've"]
if word in contraction:
expansion = ["is not", "are not", "can not", "could have",
"could not", "did not", "does not", "do not",
"going to", "got to", "had not", "has not",
"have not", "he would", "he will", "he is", "how did",
"how will", "how is", "I would", "I will", "I am",
"I have", "is not", "it would", "it will", "it is",
"might not", "might have", "must not", "must have",
"need not", "ought not", "shall not", "she would",
"she will", "she is", "should not", "should have",
"somebody is", "someone would", "someone will",
"someone is", "that will", "that is", "that would",
"there would", "there are", "there is", "they would",
"they will", "they are", "they have", "was not",
"we would", "we will", "we are", "we have",
"were not", "what did", "what will", "what are",
"what is", "what have", "when is", "when did",
"where did", "where is", "where have", "who would",
"who would have", "who will", "who are", "who is",
"who have", "why did", "why are", "why is",
"will not", "will not have", "would have",
"would not", "would not have", "you all", "you all",
"you would", "you would have", "you will",
"you are not", "you are not", "you are", "you have"]
word = expansion[contraction.index(word)]
# Convert numbers into digits, e.g. "two" -> "2"
textNumbers = ["zero", "one", "two", "three", "four", "five", "six",
"seven", "eight", "nine", "ten", "eleven", "twelve",
"thirteen", "fourteen", "fifteen", "sixteen",
"seventeen", "eighteen", "nineteen", "twenty"]
if word in textNumbers:
word = str(textNumbers.index(word))
normalized += " "+word
return normalized[1:] # strip the initial space

195
test/util/test_parse.py Normal file
View File

@ -0,0 +1,195 @@
import unittest
from mycroft.util.parse import normalize
class TestNormalize(unittest.TestCase):
def test_articles(self):
self.assertEqual(normalize("this is a test", remove_articles=True),
"this is test")
self.assertEqual(normalize("this is the test", remove_articles=True),
"this is test")
self.assertEqual(normalize("and annother test", remove_articles=True),
"and another test")
self.assertEqual(normalize("this is an extra test",
remove_articles=False),
"this is an extra test")
def test_spaces(self):
self.assertEqual(normalize(" this is a test"),
"this is a test")
self.assertEqual(normalize(" this is a test "),
"this is a test")
self.assertEqual(normalize(" this is one test"),
"this is 1 test")
def test_numbers(self):
self.assertEqual(normalize("this is a one two three test"),
"this is a 1 2 3 test")
self.assertEqual(normalize(" it's a four five six test"),
"it is a 4 5 6 test")
self.assertEqual(normalize("it's a seven eight nine test"),
"it is a 7 8 9 test")
self.assertEqual(normalize("it's a seven eight nine test"),
"it is a 7 8 9 test")
self.assertEqual(normalize("that's a ten eleven twelve test"),
"that is 10 11 12 test")
self.assertEqual(normalize("that's a thirteen fourteen test"),
"that is 13 14 test")
self.assertEqual(normalize("that's fifteen sixteen seventeen"),
"that is 15 16 17")
self.assertEqual(normalize("that's eighteen nineteen twenty"),
"that is 18 19 20")
def test_contractions(self):
self.assertEqual(normalize("ain't"), "is not")
self.assertEqual(normalize("aren't"), "are not")
self.assertEqual(normalize("can't"), "can not")
self.assertEqual(normalize("could've"), "could have")
self.assertEqual(normalize("couldn't"), "could not")
self.assertEqual(normalize("didn't"), "did not")
self.assertEqual(normalize("doesn't"), "does not")
self.assertEqual(normalize("don't"), "do not")
self.assertEqual(normalize("gonna"), "going to")
self.assertEqual(normalize("gotta"), "got to")
self.assertEqual(normalize("hadn't"), "had not")
self.assertEqual(normalize("hadn't have"), "had not have")
self.assertEqual(normalize("hasn't"), "has not")
self.assertEqual(normalize("haven't"), "have not")
# TODO: Ambiguous with "he had"
self.assertEqual(normalize("he'd"), "he would")
self.assertEqual(normalize("he'll"), "he will")
# TODO: Ambiguous with "he has"
self.assertEqual(normalize("he's"), "he is")
# TODO: Ambiguous with "how would"
self.assertEqual(normalize("how'd"), "how did")
self.assertEqual(normalize("how'll"), "how will")
# TODO: Ambiguous with "how has" and "how does"
self.assertEqual(normalize("how's"), "how is")
# TODO: Ambiguous with "I had"
self.assertEqual(normalize("I'd"), "I would")
self.assertEqual(normalize("I'll"), "I will")
self.assertEqual(normalize("I'm"), "I am")
self.assertEqual(normalize("I've"), "I have")
self.assertEqual(normalize("I haven't"), "I have not")
self.assertEqual(normalize("isn't"), "is not")
self.assertEqual(normalize("it'd"), "it would")
self.assertEqual(normalize("it'll"), "it will")
# TODO: Ambiguous with "it has"
self.assertEqual(normalize("it's"), "it is")
self.assertEqual(normalize("it isn't"), "it is not")
self.assertEqual(normalize("mightn't"), "might not")
self.assertEqual(normalize("might've"), "might have")
self.assertEqual(normalize("mustn't"), "must not")
self.assertEqual(normalize("mustn't have"), "must not have")
self.assertEqual(normalize("must've"), "must have")
self.assertEqual(normalize("needn't"), "need not")
self.assertEqual(normalize("oughtn't"), "ought not")
self.assertEqual(normalize("shan't"), "shall not")
# TODO: Ambiguous wiht "she had"
self.assertEqual(normalize("she'd"), "she would")
self.assertEqual(normalize("she hadn't"), "she had not")
self.assertEqual(normalize("she'll"), "she will")
self.assertEqual(normalize("she's"), "she is")
self.assertEqual(normalize("she isn't"), "she is not")
self.assertEqual(normalize("should've"), "should have")
self.assertEqual(normalize("shouldn't"), "should not")
self.assertEqual(normalize("shouldn't have"), "should not have")
self.assertEqual(normalize("somebody's"), "somebody is")
# TODO: Ambiguous with "someone had"
self.assertEqual(normalize("someone'd"), "someone would")
self.assertEqual(normalize("someone hadn't"), "someone had not")
self.assertEqual(normalize("someone'll"), "someone will")
# TODO: Ambiguous with "someone has"
self.assertEqual(normalize("someone's"), "someone is")
self.assertEqual(normalize("that'll"), "that will")
# TODO: Ambiguous with "that has"
self.assertEqual(normalize("that's"), "that is")
# TODO: Ambiguous with "that had"
self.assertEqual(normalize("that'd"), "that would")
# TODO: Ambiguous with "there had"
self.assertEqual(normalize("there'd"), "there would")
self.assertEqual(normalize("there're"), "there are")
# TODO: Ambiguous with "there has"
self.assertEqual(normalize("there's"), "there is")
# TODO: Ambiguous with "they had"
self.assertEqual(normalize("they'd"), "they would")
self.assertEqual(normalize("they'll"), "they will")
self.assertEqual(normalize("they won't have"), "they will not have")
self.assertEqual(normalize("they're"), "they are")
self.assertEqual(normalize("they've"), "they have")
self.assertEqual(normalize("they haven't"), "they have not")
self.assertEqual(normalize("wasn't"), "was not")
# TODO: Ambiguous wiht "we had"
self.assertEqual(normalize("we'd"), "we would")
self.assertEqual(normalize("we would've"), "we would have")
self.assertEqual(normalize("we wouldn't"), "we would not")
self.assertEqual(normalize("we wouldn't have"), "we would not have")
self.assertEqual(normalize("we'll"), "we will")
self.assertEqual(normalize("we won't have"), "we will not have")
self.assertEqual(normalize("we're"), "we are")
self.assertEqual(normalize("we've"), "we have")
self.assertEqual(normalize("weren't"), "were not")
self.assertEqual(normalize("what'd"), "what did")
self.assertEqual(normalize("what'll"), "what will")
self.assertEqual(normalize("what're"), "what are")
# TODO: Ambiguous with "what has" / "what does")
self.assertEqual(normalize("what's"), "what is")
self.assertEqual(normalize("what've"), "what have")
# TODO: Ambiguous with "when has"
self.assertEqual(normalize("when's"), "when is")
self.assertEqual(normalize("where'd"), "where did")
# TODO: Ambiguous with "where has" / where does"
self.assertEqual(normalize("where's"), "where is")
self.assertEqual(normalize("where've"), "where have")
# TODO: Ambiguous with "who had" "who did")
self.assertEqual(normalize("who'd"), "who would")
self.assertEqual(normalize("who'd've"), "who would have")
self.assertEqual(normalize("who'll"), "who will")
self.assertEqual(normalize("who're"), "who are")
# TODO: Ambiguous with "who has" / "who does"
self.assertEqual(normalize("who's"), "who is")
self.assertEqual(normalize("who've"), "who have")
self.assertEqual(normalize("why'd"), "why did")
self.assertEqual(normalize("why're"), "why are")
# TODO: Ambiguous with "why has" / "why does"
self.assertEqual(normalize("why's"), "why is")
self.assertEqual(normalize("won't"), "will not")
self.assertEqual(normalize("won't've"), "will not have")
self.assertEqual(normalize("would've"), "would have")
self.assertEqual(normalize("wouldn't"), "would not")
self.assertEqual(normalize("wouldn't've"), "would not have")
self.assertEqual(normalize("ya'll"), "you all")
self.assertEqual(normalize("y'all"), "you all")
self.assertEqual(normalize("y'ain't"), "you are not")
# TODO: Ambiguous with "you had"
self.assertEqual(normalize("you'd"), "you would")
self.assertEqual(normalize("you'd've"), "you would have")
self.assertEqual(normalize("you'll"), "you will")
self.assertEqual(normalize("you're"), "you are")
self.assertEqual(normalize("you aren't"), "you are not")
self.assertEqual(normalize("you've"), "you have")
self.assertEqual(normalize("you haven't"), "you have not")
def test_combinations(self):
self.assertEqual(normalize("I couldn't have guessed there'd be two"),
"I could not have guessed there would be 2")
self.assertEqual(normalize("I wouldn't have"), "I would not have")
self.assertEqual(normalize("I hadn't been there"),
"I had not been there")
self.assertEqual(normalize("I would've"), "I would have")
self.assertEqual(normalize("it hadn't"), "it had not")
self.assertEqual(normalize("it hadn't have"), "it had not have")
self.assertEqual(normalize("it would've"), "it would have")
self.assertEqual(normalize("she wouldn't have"), "she would not have")
self.assertEqual(normalize("she would've"), "she would have")
self.assertEqual(normalize("someone wouldn't have"),
"someone would not have")
self.assertEqual(normalize("someone would've"), "someone would have")
self.assertEqual(normalize("what's the weather like"),
"what is the weather like")
self.assertEqual(normalize("that's what I told you"),
"that is what I told you")
if __name__ == "__main__":
unittest.main()