diff --git a/mycroft/util/format.py b/mycroft/util/format.py index bbf3dc5b3c..ff3fc5368e 100755 --- a/mycroft/util/format.py +++ b/mycroft/util/format.py @@ -254,7 +254,8 @@ def nice_time(dt, lang="en-us", speech=True, use_24hour=False, return str(dt) -def pronounce_number(number, lang="en-us", places=2): +def pronounce_number(number, lang="en-us", places=2, short_scale=True, + scientific=False): """ Convert a number to it's spoken equivalent @@ -262,12 +263,17 @@ def pronounce_number(number, lang="en-us", places=2): Args: number: the number to pronounce + short_scale (bool) : use short (True) or long scale (False) + https://en.wikipedia.org/wiki/Names_of_large_numbers + scientific (bool) : convert and pronounce in scientific notation Returns: (str): The pronounced number """ lang_lower = str(lang).lower() if lang_lower.startswith("en"): - return pronounce_number_en(number, places=places) + return pronounce_number_en(number, places=places, + short_scale=short_scale, + scientific=scientific) elif lang_lower.startswith("it"): return pronounce_number_it(number, places=places) elif lang_lower.startswith("fr"): diff --git a/mycroft/util/lang/format_en.py b/mycroft/util/lang/format_en.py index c8ee8cd3f2..9698b91d3d 100644 --- a/mycroft/util/lang/format_en.py +++ b/mycroft/util/lang/format_en.py @@ -16,6 +16,8 @@ # from mycroft.util.lang.format_common import convert_to_mixed_fraction +import collections + NUM_STRING_EN = { 0: 'zero', @@ -45,10 +47,7 @@ NUM_STRING_EN = { 60: 'sixty', 70: 'seventy', 80: 'eighty', - 90: 'ninety', - 100: 'hundred', - 1000: 'thousand', - 1000000: 'million' + 90: 'ninety' } FRACTION_STRING_EN = { @@ -73,6 +72,109 @@ FRACTION_STRING_EN = { 20: 'twentyith' } +LONG_SCALE_EN = collections.OrderedDict([ + (100, 'hundred'), + (1000, 'thousand'), + (1000000, 'million'), + (1e12, "billion"), + (1e18, 'trillion'), + (1e24, "quadrillion"), + (1e30, "quintillion"), + (1e36, "sextillion"), + (1e42, "septillion"), + (1e48, "octillion"), + (1e54, "nonillion"), + (1e60, "decillion"), + (1e66, "undecillion"), + (1e72, "duodecillion"), + (1e78, "tredecillion"), + (1e84, "quattuordecillion"), + (1e90, "quinquadecillion"), + (1e96, "sedecillion"), + (1e102, "septendecillion"), + (1e108, "octodecillion"), + (1e114, "novendecillion"), + (1e120, "vigintillion"), + (1e306, "unquinquagintillion"), + (1e312, "duoquinquagintillion"), + (1e336, "sesquinquagintillion"), + (1e366, "unsexagintillion") +]) + +SHORT_SCALE_EN = collections.OrderedDict([ + (100, 'hundred'), + (1000, 'thousand'), + (1000000, 'million'), + (1e9, "billion"), + (1e10, 'trillion'), + (1e15, "quadrillion"), + (1e18, "quintillion"), + (1e21, "sextillion"), + (1e24, "septillion"), + (1e27, "octillion"), + (1e30, "nonillion"), + (1e33, "decillion"), + (1e36, "undecillion"), + (1e39, "duodecillion"), + (1e42, "tredecillion"), + (1e45, "quattuordecillion"), + (1e48, "quinquadecillion"), + (1e51, "sedecillion"), + (1e54, "septendecillion"), + (1e57, "octodecillion"), + (1e60, "novendecillion"), + (1e63, "vigintillion"), + (1e66, "unvigintillion"), + (1e69, "uuovigintillion"), + (1e72, "tresvigintillion"), + (1e75, "quattuorvigintillion"), + (1e78, "quinquavigintillion"), + (1e81, "qesvigintillion"), + (1e84, "septemvigintillion"), + (1e87, "octovigintillion"), + (1e90, "novemvigintillion"), + (1e93, "trigintillion"), + (1e96, "untrigintillion"), + (1e99, "duotrigintillion"), + (1e102, "trestrigintillion"), + (1e105, "quattuortrigintillion"), + (1e108, "quinquatrigintillion"), + (1e111, "sestrigintillion"), + (1e114, "septentrigintillion"), + (1e117, "octotrigintillion"), + (1e120, "noventrigintillion"), + (1e123, "quadragintillion"), + (1e153, "quinquagintillion"), + (1e183, "sexagintillion"), + (1e213, "septuagintillion"), + (1e243, "octogintillion"), + (1e273, "nonagintillion"), + (1e303, "centillion"), + (1e306, "uncentillion"), + (1e309, "duocentillion"), + (1e312, "trescentillion"), + (1e333, "decicentillion"), + (1e336, "undecicentillion"), + (1e363, "viginticentillion"), + (1e366, "unviginticentillion"), + (1e393, "trigintacentillion"), + (1e423, "quadragintacentillion"), + (1e453, "quinquagintacentillion"), + (1e483, "sexagintacentillion"), + (1e513, "septuagintacentillion"), + (1e543, "ctogintacentillion"), + (1e573, "nonagintacentillion"), + (1e603, "ducentillion"), + (1e903, "trecentillion"), + (1e1203, "quadringentillion"), + (1e1503, "quingentillion"), + (1e1803, "sescentillion"), + (1e2103, "septingentillion"), + (1e2403, "octingentillion"), + (1e2703, "nongentillion"), + (1e3003, "millinillion") +]) + def nice_number_en(number, speech, denominators): """ English helper for nice_number @@ -119,7 +221,7 @@ def nice_number_en(number, speech, denominators): return return_string -def pronounce_number_en(num, places=2): +def pronounce_number_en(num, places=2, short_scale=True, scientific=False): """ Convert a number to it's spoken equivalent @@ -128,32 +230,114 @@ def pronounce_number_en(num, places=2): Args: num(float or int): the number to pronounce (under 100) places(int): maximum decimal places to speak + short_scale (bool) : use short (True) or long scale (False) + https://en.wikipedia.org/wiki/Names_of_large_numbers + scientific (bool): pronounce in scientific notation Returns: (str): The pronounced number """ - if abs(num) >= 100: - # TODO: Support for numbers over 100 - return str(num) + if scientific: + number = '%E' % num + n, power = number.replace("+", "").split("E") + power = int(power) + if power != 0: + return pronounce_number_en(float(n), places, short_scale, False) \ + + " times ten to the power of " + \ + pronounce_number_en(power, places, short_scale, False) + if short_scale: + number_names = NUM_STRING_EN.copy() + number_names.update(SHORT_SCALE_EN) + else: + number_names = NUM_STRING_EN.copy() + number_names.update(LONG_SCALE_EN) + digits = [number_names[n] for n in range(0, 20)] + + tens = [number_names[n] for n in range(10, 100, 10)] + + if short_scale: + hundreds = [SHORT_SCALE_EN[n] for n in SHORT_SCALE_EN.keys()] + else: + hundreds = [LONG_SCALE_EN[n] for n in LONG_SCALE_EN.keys()] + + # deal with negatives result = "" if num < 0: result = "negative " num = abs(num) - if num > 20: - tens = int(num - int(num) % 10) - result += NUM_STRING_EN[tens] - if int(num - tens) != 0: - result += " " + NUM_STRING_EN[int(num - tens)] + # check for a direct match + if num in number_names: + if num > 90: + result += "one " + result += number_names[num] else: - result += NUM_STRING_EN[int(num)] + def _sub_thousand(n): + assert 0 <= n <= 999 + if n <= 19: + return digits[n] + elif n <= 99: + q, r = divmod(n, 10) + return tens[q - 1] + (" " + _sub_thousand(r) if r else "") + else: + q, r = divmod(n, 100) + return digits[q] + " hundred" + ( + " and " + _sub_thousand(r) if r else "") + + def _short_scale(n): + n = int(n) + assert 0 <= n + return ", ".join(reversed( + [_sub_thousand(z) + ( + " " + hundreds[i] if i else "") if z else "" + for i, z in enumerate(_split_by_thousands(n))])) + + def _split_by_thousands(n): + assert 0 <= n + res = [] + while n: + n, r = divmod(n, 1000) + res.append(r) + return res + + def _split_by_millions(n): + assert 0 <= n + res = [] + while n: + n, r = divmod(n, 1000) + res.append(r) + return res + + def _long_scale(n): + if n >= 10e153: + return "infinity" + n = int(n) + assert 0 <= n + res = [] + for i, z in enumerate(_split_by_millions(n)): + if not z: + continue + number = pronounce_number_en(z, places, True) + if i % 2 != 0 and i > 1: + number += " " + "thousand" + elif i > 0 and i < 3: + number += " " + hundreds[i] + "," + elif i: + number += " " + hundreds[i - 1] + "," + res.append(number) + return " ".join(reversed(res)) + + if short_scale: + result += _short_scale(num) + else: + result += _long_scale(num) # Deal with fractional part if not num == int(num) and places > 0: result += " point" place = 10 while int(num * place) % 10 > 0 and places > 0: - result += " " + NUM_STRING_EN[int(num * place) % 10] + result += " " + number_names[int(num * place) % 10] place *= 10 places -= 1 return result diff --git a/mycroft/util/lang/parse_en.py b/mycroft/util/lang/parse_en.py index 8d80200606..78b3378493 100644 --- a/mycroft/util/lang/parse_en.py +++ b/mycroft/util/lang/parse_en.py @@ -19,106 +19,8 @@ from datetime import datetime from dateutil.relativedelta import relativedelta from mycroft.util.lang.parse_common import is_numeric, look_for_fractions -from mycroft.util.lang.format_en import NUM_STRING_EN - -LONG_SCALE_EN = { - 10e12: "billion", - 10e18: 'trillion', - 10e24: "quadrillion", - 10e30: "quintillion", - 10e36: "sextillion", - 10e42: "septillion", - 10e48: "octillion", - 10e54: "nonillion", - 10e60: "decillion", - 10e66: "undecillion", - 10e72: "duodecillion", - 10e78: "tredecillion", - 10e84: "quattuordecillion", - 10e90: "quinquadecillion", - 10e96: "sedecillion", - 10e102: "septendecillion", - 10e108: "octodecillion", - 10e114: "novendecillion", - 10e120: "vigintillion", - 10e306: "unquinquagintillion", - 10e312: "duoquinquagintillion", - 10e336: "sesquinquagintillion", - 10e366: "unsexagintillion", - 10e100: "googol" -} - -SHORT_SCALE_EN = { - 10e9: "billion", - 10e10: 'trillion', - 10e15: "quadrillion", - 10e18: "quintillion", - 10e21: "sextillion", - 10e24: "septillion", - 10e27: "octillion", - 10e30: "nonillion", - 10e33: "decillion", - 10e36: "undecillion", - 10e39: "duodecillion", - 10e42: "tredecillion", - 10e45: "quattuordecillion", - 10e48: "quinquadecillion", - 10e51: "sedecillion", - 10e54: "septendecillion", - 10e57: "octodecillion", - 10e60: "novendecillion", - 10e63: "vigintillion", - 10e66: "unvigintillion", - 10e69: "uuovigintillion", - 10e72: "tresvigintillion", - 10e75: "quattuorvigintillion", - 10e78: "quinquavigintillion", - 10e81: "qesvigintillion", - 10e84: "septemvigintillion", - 10e87: "octovigintillion", - 10e90: "novemvigintillion", - 10e93: "trigintillion", - 10e96: "untrigintillion", - 10e99: "duotrigintillion", - 10e102: "trestrigintillion", - 10e105: "quattuortrigintillion", - 10e108: "quinquatrigintillion", - 10e111: "sestrigintillion", - 10e114: "septentrigintillion", - 10e117: "octotrigintillion", - 10e120: "noventrigintillion", - 10e123: "quadragintillion", - 10e153: "quinquagintillion", - 10e183: "sexagintillion", - 10e213: "septuagintillion", - 10e243: "octogintillion", - 10e273: "nonagintillion", - 10e303: "centillion", - 10e306: "uncentillion", - 10e309: "duocentillion", - 10e312: "trescentillion", - 10e333: "decicentillion", - 10e336: "undecicentillion", - 10e363: "viginticentillion", - 10e366: "unviginticentillion", - 10e393: "trigintacentillion", - 10e423: "quadragintacentillion", - 10e453: "quinquagintacentillion", - 10e483: "sexagintacentillion", - 10e513: "septuagintacentillion", - 10e543: "ctogintacentillion", - 10e573: "nonagintacentillion", - 10e603: "ducentillion", - 10e903: "trecentillion", - 10e1203: "quadringentillion", - 10e1503: "quingentillion", - 10e1803: "sescentillion", - 10e2103: "septingentillion", - 10e2403: "octingentillion", - 10e2703: "nongentillion", - 10e3003: "millinillion", - 10e100: "googol" -} +from mycroft.util.lang.format_en import NUM_STRING_EN, LONG_SCALE_EN, \ + SHORT_SCALE_EN SHORT_ORDINAL_STRING_EN = { 1: 'first', @@ -227,8 +129,11 @@ def extractnumber_en(text, short_scale=True, ordinals=False): string_num_en = { "half": 0.5, "halves": 0.5, + "hundred": 100, "hundreds": 100, + "thousand": 1000, "thousands": 1000, + "million": 1000000, 'millions': 1000000} for num in NUM_STRING_EN: diff --git a/test/unittests/util/test_format.py b/test/unittests/util/test_format.py index 3266748faf..d9436b96f3 100755 --- a/test/unittests/util/test_format.py +++ b/test/unittests/util/test_format.py @@ -143,6 +143,50 @@ class TestPronounceNumber(unittest.TestCase): self.assertEqual(pronounce_number(-21.234, places=5), "negative twenty one point two three four") + def test_convert_hundreds(self): + self.assertEqual(pronounce_number(100), "one hundred") + self.assertEqual(pronounce_number(666), "six hundred and sixty six") + self.assertEqual(pronounce_number(1456), "one thousand, four hundred " + "and fifty six") + self.assertEqual(pronounce_number(103254654), "one hundred and three " + "million, two hundred " + "and fifty four " + "thousand, six hundred " + "and fifty four") + self.assertEqual(pronounce_number(1512457), "one million, five hundred" + " and twelve thousand, " + "four hundred and fifty " + "seven") + self.assertEqual(pronounce_number(209996), "two hundred and nine " + "thousand, nine hundred " + "and ninety six") + self.assertEqual(pronounce_number(95505896639631893), + "ninety five quadrillion, five hundred and five " + "trillion, eight hundred and ninety six billion, six " + "hundred and thirty nine million, six hundred and " + "thirty one thousand, eight hundred and ninety three") + self.assertEqual(pronounce_number(95505896639631893, + short_scale=False), + "ninety five thousand five hundred and five billion, " + "eight hundred and ninety six thousand six hundred " + "and thirty nine million, six hundred and thirty one " + "thousand, eight hundred and ninety three") + + def test_convert_scientific_notation(self): + self.assertEqual(pronounce_number(0, scientific=True), "zero") + self.assertEqual(pronounce_number(33, scientific=True), + "three point three times ten to the power of one") + self.assertEqual(pronounce_number(299792458, scientific=True), + "two point nine nine times ten to the power of eight") + self.assertEqual(pronounce_number(299792458, places=6, + scientific=True), + "two point nine nine seven nine two five times " + "ten to the power of eight") + self.assertEqual(pronounce_number(1.672e-27, places=3, + scientific=True), + "one point six seven two times ten to the power of " + "negative twenty seven") + # def nice_time(dt, lang="en-us", speech=True, use_24hour=False, # use_ampm=False): diff --git a/test/unittests/util/test_parse.py b/test/unittests/util/test_parse.py index c5f0610a9d..f3f64759b4 100644 --- a/test/unittests/util/test_parse.py +++ b/test/unittests/util/test_parse.py @@ -91,17 +91,17 @@ class TestNormalize(unittest.TestCase): self.assertEqual(extractnumber("two million"), 2000000) self.assertEqual(extractnumber("two million five hundred thousand " "tons of spinning metal"), 2500000) - self.assertEqual(extractnumber("six trillion"), 600000000000.0) + self.assertEqual(extractnumber("six trillion"), 60000000000.0) self.assertEqual(extractnumber("six trillion", short_scale=False), - 6e+19) + 6e+18) self.assertEqual(extractnumber("one point five"), 1.5) self.assertEqual(extractnumber("three dot fourteen"), 3.14) self.assertEqual(extractnumber("zero point two"), 0.2) self.assertEqual(extractnumber("billions of years older"), - 10000000000.0) + 1000000000.0) self.assertEqual(extractnumber("billions of years older", short_scale=False), - 10000000000000.0) + 1000000000000.0) self.assertEqual(extractnumber("one hundred thousand"), 100000) self.assertEqual(extractnumber("minus 2"), -2) self.assertEqual(extractnumber("negative seventy"), -70)