commit
6e00b43acf
|
@ -18,6 +18,195 @@
|
|||
# You should have received a copy of the GNU General Public License
|
||||
# along with Mycroft Core. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# ==============================================================
|
||||
|
||||
|
||||
# def extractnumber(text, lang="en-us", remove_articles=True):
|
||||
def extractnumber(text, lang="en-us"):
|
||||
"""Takes in a string and extracts a number.
|
||||
Args:
|
||||
text (str): the string to extract a number from
|
||||
lang (str): the code for the language text is in
|
||||
Returns:
|
||||
(str): The number extracted or the original text.
|
||||
"""
|
||||
|
||||
lang_lower = str(lang).lower()
|
||||
if lang_lower.startswith("en"):
|
||||
# return extractnumber_en(text, remove_articles)
|
||||
return extractnumber_en(text)
|
||||
|
||||
# TODO: Normalization for other languages
|
||||
return text
|
||||
|
||||
|
||||
def is_numeric(input_str):
|
||||
"""
|
||||
Takes in a string and tests to see if it is a number.
|
||||
Args:
|
||||
text (str): string to test if a number
|
||||
Returns:
|
||||
(bool): True if a number, else False
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
float(input_str)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def extractnumber_en(text):
|
||||
"""
|
||||
This function prepares the given text for parsing by making
|
||||
numbers consistent, getting rid of contractions, etc.
|
||||
Args:
|
||||
text (str): the string to normalize
|
||||
Returns:
|
||||
(int) or (float): The value of extracted number
|
||||
|
||||
"""
|
||||
aWords = text.split()
|
||||
aWords = [word for word in aWords if word not in ["the", "a", "an"]]
|
||||
andPass = False
|
||||
valPreAnd = False
|
||||
val = False
|
||||
count = 0
|
||||
while count < len(aWords):
|
||||
word = aWords[count]
|
||||
if is_numeric(word):
|
||||
# if word.isdigit(): # doesn't work with decimals
|
||||
val = float(word)
|
||||
elif word == "first":
|
||||
val = 1
|
||||
elif word == "second":
|
||||
val = 2
|
||||
elif isFractional(word):
|
||||
val = isFractional(word)
|
||||
else:
|
||||
if word == "one":
|
||||
val = 1
|
||||
elif word == "two":
|
||||
val = 2
|
||||
elif word == "three":
|
||||
val = 3
|
||||
elif word == "four":
|
||||
val = 4
|
||||
elif word == "five":
|
||||
val = 5
|
||||
elif word == "six":
|
||||
val = 6
|
||||
elif word == "seven":
|
||||
val = 7
|
||||
elif word == "eight":
|
||||
val = 8
|
||||
elif word == "nine":
|
||||
val = 9
|
||||
elif word == "ten":
|
||||
val = 10
|
||||
if val:
|
||||
if count < (len(aWords) - 1):
|
||||
wordNext = aWords[count+1]
|
||||
else:
|
||||
wordNext = ""
|
||||
valNext = isFractional(wordNext)
|
||||
|
||||
if valNext:
|
||||
val = val * valNext
|
||||
aWords[count+1] = ""
|
||||
|
||||
# if val == False:
|
||||
if not val:
|
||||
# look for fractions like "2/3"
|
||||
aPieces = word.split('/')
|
||||
# if (len(aPieces) == 2 and is_numeric(aPieces[0])
|
||||
# and is_numeric(aPieces[1])):
|
||||
if look_for_fractions(aPieces):
|
||||
val = float(aPieces[0]) / float(aPieces[1])
|
||||
elif andPass:
|
||||
# added to value, quit here
|
||||
val = valPreAnd
|
||||
break
|
||||
else:
|
||||
count += 1
|
||||
continue
|
||||
|
||||
aWords[count] = ""
|
||||
|
||||
if (andPass):
|
||||
aWords[count-1] = '' # remove "and"
|
||||
val += valPreAnd
|
||||
elif count+1 < len(aWords) and aWords[count+1] == 'and':
|
||||
andPass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 2
|
||||
continue
|
||||
elif count+2 < len(aWords) and aWords[count+2] == 'and':
|
||||
andPass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 3
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
# if val == False:
|
||||
if not val:
|
||||
return False
|
||||
|
||||
# Return the $str with the number related words removed
|
||||
# (now empty strings, so strlen == 0)
|
||||
aWords = [word for word in aWords if len(word) > 0]
|
||||
text = ' '.join(aWords)
|
||||
|
||||
return val
|
||||
|
||||
|
||||
def look_for_fractions(split_list):
|
||||
""""
|
||||
This function takes a list made by fraction & determines if a fraction.
|
||||
|
||||
Args:
|
||||
split_list (list): list created by splitting on '/'
|
||||
Returns:
|
||||
(bool): False if not a fraction, otherwise True
|
||||
|
||||
"""
|
||||
|
||||
if len(split_list) == 2:
|
||||
if is_numeric(split_list[0]) and is_numeric(split_list[1]):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def isFractional(input_str):
|
||||
"""
|
||||
This function takes the given text and checks if it is a fraction.
|
||||
|
||||
Args:
|
||||
text (str): the string to check if fractional
|
||||
Returns:
|
||||
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||
|
||||
"""
|
||||
if input_str.endswith('s', -1):
|
||||
input_str = input_str[:len(input_str)-1] # e.g. "fifths"
|
||||
|
||||
aFrac = ["whole", "half", "third", "fourth", "fifth", "sixth",
|
||||
"seventh", "eighth", "ninth", "tenth", "eleventh", "twelfth"]
|
||||
|
||||
if input_str.lower() in aFrac:
|
||||
return 1.0/(aFrac.index(input_str)+1)
|
||||
if input_str == "quarter":
|
||||
return 1.0/4
|
||||
|
||||
return False
|
||||
|
||||
# ==============================================================
|
||||
|
||||
|
||||
def normalize(text, lang="en-us", remove_articles=True):
|
||||
"""Prepare a string for parsing
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
import unittest
|
||||
from mycroft.util.parse import normalize
|
||||
from mycroft.util.parse import extractnumber
|
||||
|
||||
|
||||
class TestNormalize(unittest.TestCase):
|
||||
|
@ -17,6 +18,28 @@ class TestNormalize(unittest.TestCase):
|
|||
remove_articles=False),
|
||||
"this is an extra test")
|
||||
|
||||
def test_extractnumber(self):
|
||||
self.assertEqual(extractnumber("this is the first test"), 1)
|
||||
self.assertEqual(extractnumber("this is 2 test"), 2)
|
||||
self.assertEqual(extractnumber("this is second test"), 2)
|
||||
self.assertEqual(extractnumber("this is the third test"), 1.0/3.0)
|
||||
self.assertEqual(extractnumber("this is test number 4"), 4)
|
||||
self.assertEqual(extractnumber("one third of a cup"), 1.0/3.0)
|
||||
self.assertEqual(extractnumber("three cups"), 3)
|
||||
self.assertEqual(extractnumber("1/3 cups"), 1.0/3.0)
|
||||
self.assertEqual(extractnumber("quarter cup"), 0.25)
|
||||
self.assertEqual(extractnumber("1/4 cup"), 0.25)
|
||||
self.assertEqual(extractnumber("one fourth cup"), 0.25)
|
||||
self.assertEqual(extractnumber("2/3 cups"), 2.0/3.0)
|
||||
self.assertEqual(extractnumber("3/4 cups"), 3.0/4.0)
|
||||
self.assertEqual(extractnumber("1 and 3/4 cups"), 1.75)
|
||||
self.assertEqual(extractnumber("1 cup and a half"), 1.5)
|
||||
self.assertEqual(extractnumber("one cup and a half"), 1.5)
|
||||
self.assertEqual(extractnumber("one and a half cups"), 1.5)
|
||||
self.assertEqual(extractnumber("one and one half cups"), 1.5)
|
||||
self.assertEqual(extractnumber("three quarter cups"), 3.0/4.0)
|
||||
self.assertEqual(extractnumber("three quarters cups"), 3.0/4.0)
|
||||
|
||||
def test_spaces(self):
|
||||
self.assertEqual(normalize(" this is a test"),
|
||||
"this is test")
|
||||
|
|
Loading…
Reference in New Issue