Merge pull request #1653 from JarbasAl/feature/pronounce_scientific

Feature/pronounce scientific
2018-07-31 13:04:14 -05:00 · 2018-07-31 13:04:14 -05:00 · 7b54149bcd
parent 040b64d09d 57a86a7fe6
commit 7b54149bcd
5 changed files with 260 additions and 121 deletions
--- a/mycroft/util/format.py
+++ b/mycroft/util/format.py
@ -254,7 +254,8 @@ def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
    return str(dt)
-def pronounce_number(number, lang="en-us", places=2):
+def pronounce_number(number, lang="en-us", places=2, short_scale=True,
                     scientific=False):
    """
    Convert a number to it's spoken equivalent
@ -262,12 +263,17 @@ def pronounce_number(number, lang="en-us", places=2):
    Args:
        number: the number to pronounce
        short_scale (bool) : use short (True) or long scale (False)
            https://en.wikipedia.org/wiki/Names_of_large_numbers
        scientific (bool) : convert and pronounce in scientific notation
    Returns:
        (str): The pronounced number
    """
    lang_lower = str(lang).lower()
    if lang_lower.startswith("en"):
-        return pronounce_number_en(number, places=places)
+        return pronounce_number_en(number, places=places,
                                   short_scale=short_scale,
                                   scientific=scientific)
    elif lang_lower.startswith("it"):
        return pronounce_number_it(number, places=places)
    elif lang_lower.startswith("fr"):
--- a/mycroft/util/lang/format_en.py
+++ b/mycroft/util/lang/format_en.py
@ -16,6 +16,8 @@
 #
 from mycroft.util.lang.format_common import convert_to_mixed_fraction
 import collections
 NUM_STRING_EN = {
    0: 'zero',
@ -45,10 +47,7 @@ NUM_STRING_EN = {
    60: 'sixty',
    70: 'seventy',
    80: 'eighty',
-    90: 'ninety',
+    90: 'ninety'
    100: 'hundred',
    1000: 'thousand',
    1000000: 'million'
 }
 FRACTION_STRING_EN = {
@ -73,6 +72,109 @@ FRACTION_STRING_EN = {
    20: 'twentyith'
 }
 LONG_SCALE_EN = collections.OrderedDict([
    (100, 'hundred'),
    (1000, 'thousand'),
    (1000000, 'million'),
    (1e12, "billion"),
    (1e18, 'trillion'),
    (1e24, "quadrillion"),
    (1e30, "quintillion"),
    (1e36, "sextillion"),
    (1e42, "septillion"),
    (1e48, "octillion"),
    (1e54, "nonillion"),
    (1e60, "decillion"),
    (1e66, "undecillion"),
    (1e72, "duodecillion"),
    (1e78, "tredecillion"),
    (1e84, "quattuordecillion"),
    (1e90, "quinquadecillion"),
    (1e96, "sedecillion"),
    (1e102, "septendecillion"),
    (1e108, "octodecillion"),
    (1e114, "novendecillion"),
    (1e120, "vigintillion"),
    (1e306, "unquinquagintillion"),
    (1e312, "duoquinquagintillion"),
    (1e336, "sesquinquagintillion"),
    (1e366, "unsexagintillion")
 ])
 SHORT_SCALE_EN = collections.OrderedDict([
    (100, 'hundred'),
    (1000, 'thousand'),
    (1000000, 'million'),
    (1e9, "billion"),
    (1e10, 'trillion'),
    (1e15, "quadrillion"),
    (1e18, "quintillion"),
    (1e21, "sextillion"),
    (1e24, "septillion"),
    (1e27, "octillion"),
    (1e30, "nonillion"),
    (1e33, "decillion"),
    (1e36, "undecillion"),
    (1e39, "duodecillion"),
    (1e42, "tredecillion"),
    (1e45, "quattuordecillion"),
    (1e48, "quinquadecillion"),
    (1e51, "sedecillion"),
    (1e54, "septendecillion"),
    (1e57, "octodecillion"),
    (1e60, "novendecillion"),
    (1e63, "vigintillion"),
    (1e66, "unvigintillion"),
    (1e69, "uuovigintillion"),
    (1e72, "tresvigintillion"),
    (1e75, "quattuorvigintillion"),
    (1e78, "quinquavigintillion"),
    (1e81, "qesvigintillion"),
    (1e84, "septemvigintillion"),
    (1e87, "octovigintillion"),
    (1e90, "novemvigintillion"),
    (1e93, "trigintillion"),
    (1e96, "untrigintillion"),
    (1e99, "duotrigintillion"),
    (1e102, "trestrigintillion"),
    (1e105, "quattuortrigintillion"),
    (1e108, "quinquatrigintillion"),
    (1e111, "sestrigintillion"),
    (1e114, "septentrigintillion"),
    (1e117, "octotrigintillion"),
    (1e120, "noventrigintillion"),
    (1e123, "quadragintillion"),
    (1e153, "quinquagintillion"),
    (1e183, "sexagintillion"),
    (1e213, "septuagintillion"),
    (1e243, "octogintillion"),
    (1e273, "nonagintillion"),
    (1e303, "centillion"),
    (1e306, "uncentillion"),
    (1e309, "duocentillion"),
    (1e312, "trescentillion"),
    (1e333, "decicentillion"),
    (1e336, "undecicentillion"),
    (1e363, "viginticentillion"),
    (1e366, "unviginticentillion"),
    (1e393, "trigintacentillion"),
    (1e423, "quadragintacentillion"),
    (1e453, "quinquagintacentillion"),
    (1e483, "sexagintacentillion"),
    (1e513, "septuagintacentillion"),
    (1e543, "ctogintacentillion"),
    (1e573, "nonagintacentillion"),
    (1e603, "ducentillion"),
    (1e903, "trecentillion"),
    (1e1203, "quadringentillion"),
    (1e1503, "quingentillion"),
    (1e1803, "sescentillion"),
    (1e2103, "septingentillion"),
    (1e2403, "octingentillion"),
    (1e2703, "nongentillion"),
    (1e3003, "millinillion")
 ])
 def nice_number_en(number, speech, denominators):
    """ English helper for nice_number
@ -119,7 +221,7 @@ def nice_number_en(number, speech, denominators):
    return return_string
-def pronounce_number_en(num, places=2):
+def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
    """
    Convert a number to it's spoken equivalent
@ -128,32 +230,114 @@ def pronounce_number_en(num, places=2):
    Args:
        num(float or int): the number to pronounce (under 100)
        places(int): maximum decimal places to speak
        short_scale (bool) : use short (True) or long scale (False)
            https://en.wikipedia.org/wiki/Names_of_large_numbers
        scientific (bool): pronounce in scientific notation
    Returns:
        (str): The pronounced number
    """
-    if abs(num) >= 100:
+    if scientific:
-        # TODO: Support for numbers over 100
+        number = '%E' % num
-        return str(num)
+        n, power = number.replace("+", "").split("E")
        power = int(power)
        if power != 0:
            return pronounce_number_en(float(n), places, short_scale, False) \
                   + " times ten to the power of " + \
                   pronounce_number_en(power, places, short_scale, False)
    if short_scale:
        number_names = NUM_STRING_EN.copy()
        number_names.update(SHORT_SCALE_EN)
    else:
        number_names = NUM_STRING_EN.copy()
        number_names.update(LONG_SCALE_EN)
    digits = [number_names[n] for n in range(0, 20)]
    tens = [number_names[n] for n in range(10, 100, 10)]
    if short_scale:
        hundreds = [SHORT_SCALE_EN[n] for n in SHORT_SCALE_EN.keys()]
    else:
        hundreds = [LONG_SCALE_EN[n] for n in LONG_SCALE_EN.keys()]
    # deal with negatives
    result = ""
    if num < 0:
        result = "negative "
    num = abs(num)
-    if num > 20:
+    # check for a direct match
-        tens = int(num - int(num) % 10)
+    if num in number_names:
-        result += NUM_STRING_EN[tens]
+        if num > 90:
-        if int(num - tens) != 0:
+            result += "one "
-            result += " " + NUM_STRING_EN[int(num - tens)]
+        result += number_names[num]
    else:
-        result += NUM_STRING_EN[int(num)]
+        def _sub_thousand(n):
            assert 0 <= n <= 999
            if n <= 19:
                return digits[n]
            elif n <= 99:
                q, r = divmod(n, 10)
                return tens[q - 1] + (" " + _sub_thousand(r) if r else "")
            else:
                q, r = divmod(n, 100)
                return digits[q] + " hundred" + (
                    " and " + _sub_thousand(r) if r else "")
        def _short_scale(n):
            n = int(n)
            assert 0 <= n
            return ", ".join(reversed(
                [_sub_thousand(z) + (
                    " " + hundreds[i] if i else "") if z else ""
                 for i, z in enumerate(_split_by_thousands(n))]))
        def _split_by_thousands(n):
            assert 0 <= n
            res = []
            while n:
                n, r = divmod(n, 1000)
                res.append(r)
            return res
        def _split_by_millions(n):
            assert 0 <= n
            res = []
            while n:
                n, r = divmod(n, 1000)
                res.append(r)
            return res
        def _long_scale(n):
            if n >= 10e153:
                return "infinity"
            n = int(n)
            assert 0 <= n
            res = []
            for i, z in enumerate(_split_by_millions(n)):
                if not z:
                    continue
                number = pronounce_number_en(z, places, True)
                if i % 2 != 0 and i > 1:
                    number += " " + "thousand"
                elif i > 0 and i < 3:
                    number += " " + hundreds[i] + ","
                elif i:
                    number += " " + hundreds[i - 1] + ","
                res.append(number)
            return " ".join(reversed(res))
        if short_scale:
            result += _short_scale(num)
        else:
            result += _long_scale(num)
    # Deal with fractional part
    if not num == int(num) and places > 0:
        result += " point"
        place = 10
        while int(num * place) % 10 > 0 and places > 0:
-            result += " " + NUM_STRING_EN[int(num * place) % 10]
+            result += " " + number_names[int(num * place) % 10]
            place *= 10
            places -= 1
    return result
--- a/mycroft/util/lang/parse_en.py
+++ b/mycroft/util/lang/parse_en.py
@ -19,106 +19,8 @@ from datetime import datetime
 from dateutil.relativedelta import relativedelta
 from mycroft.util.lang.parse_common import is_numeric, look_for_fractions
-from mycroft.util.lang.format_en import NUM_STRING_EN
+from mycroft.util.lang.format_en import NUM_STRING_EN, LONG_SCALE_EN, \
-
+    SHORT_SCALE_EN
 LONG_SCALE_EN = {
    10e12: "billion",
    10e18: 'trillion',
    10e24: "quadrillion",
    10e30: "quintillion",
    10e36: "sextillion",
    10e42: "septillion",
    10e48: "octillion",
    10e54: "nonillion",
    10e60: "decillion",
    10e66: "undecillion",
    10e72: "duodecillion",
    10e78: "tredecillion",
    10e84: "quattuordecillion",
    10e90: "quinquadecillion",
    10e96: "sedecillion",
    10e102: "septendecillion",
    10e108: "octodecillion",
    10e114: "novendecillion",
    10e120: "vigintillion",
    10e306: "unquinquagintillion",
    10e312: "duoquinquagintillion",
    10e336: "sesquinquagintillion",
    10e366: "unsexagintillion",
    10e100: "googol"
 }
 SHORT_SCALE_EN = {
    10e9: "billion",
    10e10: 'trillion',
    10e15: "quadrillion",
    10e18: "quintillion",
    10e21: "sextillion",
    10e24: "septillion",
    10e27: "octillion",
    10e30: "nonillion",
    10e33: "decillion",
    10e36: "undecillion",
    10e39: "duodecillion",
    10e42: "tredecillion",
    10e45: "quattuordecillion",
    10e48: "quinquadecillion",
    10e51: "sedecillion",
    10e54: "septendecillion",
    10e57: "octodecillion",
    10e60: "novendecillion",
    10e63: "vigintillion",
    10e66: "unvigintillion",
    10e69: "uuovigintillion",
    10e72: "tresvigintillion",
    10e75: "quattuorvigintillion",
    10e78: "quinquavigintillion",
    10e81: "qesvigintillion",
    10e84: "septemvigintillion",
    10e87: "octovigintillion",
    10e90: "novemvigintillion",
    10e93: "trigintillion",
    10e96: "untrigintillion",
    10e99: "duotrigintillion",
    10e102: "trestrigintillion",
    10e105: "quattuortrigintillion",
    10e108: "quinquatrigintillion",
    10e111: "sestrigintillion",
    10e114: "septentrigintillion",
    10e117: "octotrigintillion",
    10e120: "noventrigintillion",
    10e123: "quadragintillion",
    10e153: "quinquagintillion",
    10e183: "sexagintillion",
    10e213: "septuagintillion",
    10e243: "octogintillion",
    10e273: "nonagintillion",
    10e303: "centillion",
    10e306: "uncentillion",
    10e309: "duocentillion",
    10e312: "trescentillion",
    10e333: "decicentillion",
    10e336: "undecicentillion",
    10e363: "viginticentillion",
    10e366: "unviginticentillion",
    10e393: "trigintacentillion",
    10e423: "quadragintacentillion",
    10e453: "quinquagintacentillion",
    10e483: "sexagintacentillion",
    10e513: "septuagintacentillion",
    10e543: "ctogintacentillion",
    10e573: "nonagintacentillion",
    10e603: "ducentillion",
    10e903: "trecentillion",
    10e1203: "quadringentillion",
    10e1503: "quingentillion",
    10e1803: "sescentillion",
    10e2103: "septingentillion",
    10e2403: "octingentillion",
    10e2703: "nongentillion",
    10e3003: "millinillion",
    10e100: "googol"
 }
 SHORT_ORDINAL_STRING_EN = {
    1: 'first',
@ -227,8 +129,11 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
    string_num_en = {
                     "half": 0.5,
                     "halves": 0.5,
                     "hundred": 100,
                     "hundreds": 100,
                     "thousand": 1000,
                     "thousands": 1000,
                     "million": 1000000,
                     'millions': 1000000}
    for num in NUM_STRING_EN:
--- a/test/unittests/util/test_format.py
+++ b/test/unittests/util/test_format.py
@ -143,6 +143,50 @@ class TestPronounceNumber(unittest.TestCase):
        self.assertEqual(pronounce_number(-21.234, places=5),
                         "negative twenty one point two three four")
    def test_convert_hundreds(self):
        self.assertEqual(pronounce_number(100), "one hundred")
        self.assertEqual(pronounce_number(666), "six hundred and sixty six")
        self.assertEqual(pronounce_number(1456), "one thousand, four hundred "
                                                 "and fifty six")
        self.assertEqual(pronounce_number(103254654), "one hundred and three "
                                                      "million, two hundred "
                                                      "and fifty four "
                                                      "thousand, six hundred "
                                                      "and fifty four")
        self.assertEqual(pronounce_number(1512457), "one million, five hundred"
                                                    " and twelve thousand, "
                                                    "four hundred and fifty "
                                                    "seven")
        self.assertEqual(pronounce_number(209996), "two hundred and nine "
                                                   "thousand, nine hundred "
                                                   "and ninety six")
        self.assertEqual(pronounce_number(95505896639631893),
                         "ninety five quadrillion, five hundred and five "
                         "trillion, eight hundred and ninety six billion, six "
                         "hundred and thirty nine million, six hundred and "
                         "thirty one thousand, eight hundred and ninety three")
        self.assertEqual(pronounce_number(95505896639631893,
                                          short_scale=False),
                         "ninety five thousand five hundred and five billion, "
                         "eight hundred and ninety six thousand six hundred "
                         "and thirty nine million, six hundred and thirty one "
                         "thousand, eight hundred and ninety three")
    def test_convert_scientific_notation(self):
        self.assertEqual(pronounce_number(0, scientific=True), "zero")
        self.assertEqual(pronounce_number(33, scientific=True),
                         "three point three times ten to the power of one")
        self.assertEqual(pronounce_number(299792458, scientific=True),
                         "two point nine nine times ten to the power of eight")
        self.assertEqual(pronounce_number(299792458, places=6,
                                          scientific=True),
                         "two point nine nine seven nine two five times "
                         "ten to the power of eight")
        self.assertEqual(pronounce_number(1.672e-27, places=3,
                                          scientific=True),
                         "one point six seven two times ten to the power of "
                         "negative twenty seven")
 # def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
 #              use_ampm=False):
--- a/test/unittests/util/test_parse.py
+++ b/test/unittests/util/test_parse.py
@ -91,17 +91,17 @@ class TestNormalize(unittest.TestCase):
        self.assertEqual(extractnumber("two million"), 2000000)
        self.assertEqual(extractnumber("two million five hundred thousand "
                                       "tons of spinning metal"), 2500000)
-        self.assertEqual(extractnumber("six trillion"), 600000000000.0)
+        self.assertEqual(extractnumber("six trillion"), 60000000000.0)
        self.assertEqual(extractnumber("six trillion", short_scale=False),
-                         6e+19)
+                         6e+18)
        self.assertEqual(extractnumber("one point five"), 1.5)
        self.assertEqual(extractnumber("three dot fourteen"), 3.14)
        self.assertEqual(extractnumber("zero point two"), 0.2)
        self.assertEqual(extractnumber("billions of years older"),
-                         10000000000.0)
+                         1000000000.0)
        self.assertEqual(extractnumber("billions of years older",
                                       short_scale=False),
-                         10000000000000.0)
+                         1000000000000.0)
        self.assertEqual(extractnumber("one hundred thousand"), 100000)
        self.assertEqual(extractnumber("minus 2"), -2)
        self.assertEqual(extractnumber("negative seventy"), -70)