Merge pull request #1653 from JarbasAl/feature/pronounce_scientific

Feature/pronounce scientific
pull/1709/head
Michael Nguyen 2018-07-31 13:04:14 -05:00 committed by GitHub
commit 7b54149bcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 260 additions and 121 deletions

View File

@ -254,7 +254,8 @@ def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
return str(dt) return str(dt)
def pronounce_number(number, lang="en-us", places=2): def pronounce_number(number, lang="en-us", places=2, short_scale=True,
scientific=False):
""" """
Convert a number to it's spoken equivalent Convert a number to it's spoken equivalent
@ -262,12 +263,17 @@ def pronounce_number(number, lang="en-us", places=2):
Args: Args:
number: the number to pronounce number: the number to pronounce
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool) : convert and pronounce in scientific notation
Returns: Returns:
(str): The pronounced number (str): The pronounced number
""" """
lang_lower = str(lang).lower() lang_lower = str(lang).lower()
if lang_lower.startswith("en"): if lang_lower.startswith("en"):
return pronounce_number_en(number, places=places) return pronounce_number_en(number, places=places,
short_scale=short_scale,
scientific=scientific)
elif lang_lower.startswith("it"): elif lang_lower.startswith("it"):
return pronounce_number_it(number, places=places) return pronounce_number_it(number, places=places)
elif lang_lower.startswith("fr"): elif lang_lower.startswith("fr"):

View File

@ -16,6 +16,8 @@
# #
from mycroft.util.lang.format_common import convert_to_mixed_fraction from mycroft.util.lang.format_common import convert_to_mixed_fraction
import collections
NUM_STRING_EN = { NUM_STRING_EN = {
0: 'zero', 0: 'zero',
@ -45,10 +47,7 @@ NUM_STRING_EN = {
60: 'sixty', 60: 'sixty',
70: 'seventy', 70: 'seventy',
80: 'eighty', 80: 'eighty',
90: 'ninety', 90: 'ninety'
100: 'hundred',
1000: 'thousand',
1000000: 'million'
} }
FRACTION_STRING_EN = { FRACTION_STRING_EN = {
@ -73,6 +72,109 @@ FRACTION_STRING_EN = {
20: 'twentyith' 20: 'twentyith'
} }
LONG_SCALE_EN = collections.OrderedDict([
(100, 'hundred'),
(1000, 'thousand'),
(1000000, 'million'),
(1e12, "billion"),
(1e18, 'trillion'),
(1e24, "quadrillion"),
(1e30, "quintillion"),
(1e36, "sextillion"),
(1e42, "septillion"),
(1e48, "octillion"),
(1e54, "nonillion"),
(1e60, "decillion"),
(1e66, "undecillion"),
(1e72, "duodecillion"),
(1e78, "tredecillion"),
(1e84, "quattuordecillion"),
(1e90, "quinquadecillion"),
(1e96, "sedecillion"),
(1e102, "septendecillion"),
(1e108, "octodecillion"),
(1e114, "novendecillion"),
(1e120, "vigintillion"),
(1e306, "unquinquagintillion"),
(1e312, "duoquinquagintillion"),
(1e336, "sesquinquagintillion"),
(1e366, "unsexagintillion")
])
SHORT_SCALE_EN = collections.OrderedDict([
(100, 'hundred'),
(1000, 'thousand'),
(1000000, 'million'),
(1e9, "billion"),
(1e10, 'trillion'),
(1e15, "quadrillion"),
(1e18, "quintillion"),
(1e21, "sextillion"),
(1e24, "septillion"),
(1e27, "octillion"),
(1e30, "nonillion"),
(1e33, "decillion"),
(1e36, "undecillion"),
(1e39, "duodecillion"),
(1e42, "tredecillion"),
(1e45, "quattuordecillion"),
(1e48, "quinquadecillion"),
(1e51, "sedecillion"),
(1e54, "septendecillion"),
(1e57, "octodecillion"),
(1e60, "novendecillion"),
(1e63, "vigintillion"),
(1e66, "unvigintillion"),
(1e69, "uuovigintillion"),
(1e72, "tresvigintillion"),
(1e75, "quattuorvigintillion"),
(1e78, "quinquavigintillion"),
(1e81, "qesvigintillion"),
(1e84, "septemvigintillion"),
(1e87, "octovigintillion"),
(1e90, "novemvigintillion"),
(1e93, "trigintillion"),
(1e96, "untrigintillion"),
(1e99, "duotrigintillion"),
(1e102, "trestrigintillion"),
(1e105, "quattuortrigintillion"),
(1e108, "quinquatrigintillion"),
(1e111, "sestrigintillion"),
(1e114, "septentrigintillion"),
(1e117, "octotrigintillion"),
(1e120, "noventrigintillion"),
(1e123, "quadragintillion"),
(1e153, "quinquagintillion"),
(1e183, "sexagintillion"),
(1e213, "septuagintillion"),
(1e243, "octogintillion"),
(1e273, "nonagintillion"),
(1e303, "centillion"),
(1e306, "uncentillion"),
(1e309, "duocentillion"),
(1e312, "trescentillion"),
(1e333, "decicentillion"),
(1e336, "undecicentillion"),
(1e363, "viginticentillion"),
(1e366, "unviginticentillion"),
(1e393, "trigintacentillion"),
(1e423, "quadragintacentillion"),
(1e453, "quinquagintacentillion"),
(1e483, "sexagintacentillion"),
(1e513, "septuagintacentillion"),
(1e543, "ctogintacentillion"),
(1e573, "nonagintacentillion"),
(1e603, "ducentillion"),
(1e903, "trecentillion"),
(1e1203, "quadringentillion"),
(1e1503, "quingentillion"),
(1e1803, "sescentillion"),
(1e2103, "septingentillion"),
(1e2403, "octingentillion"),
(1e2703, "nongentillion"),
(1e3003, "millinillion")
])
def nice_number_en(number, speech, denominators): def nice_number_en(number, speech, denominators):
""" English helper for nice_number """ English helper for nice_number
@ -119,7 +221,7 @@ def nice_number_en(number, speech, denominators):
return return_string return return_string
def pronounce_number_en(num, places=2): def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
""" """
Convert a number to it's spoken equivalent Convert a number to it's spoken equivalent
@ -128,32 +230,114 @@ def pronounce_number_en(num, places=2):
Args: Args:
num(float or int): the number to pronounce (under 100) num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
Returns: Returns:
(str): The pronounced number (str): The pronounced number
""" """
if abs(num) >= 100: if scientific:
# TODO: Support for numbers over 100 number = '%E' % num
return str(num) n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
return pronounce_number_en(float(n), places, short_scale, False) \
+ " times ten to the power of " + \
pronounce_number_en(power, places, short_scale, False)
if short_scale:
number_names = NUM_STRING_EN.copy()
number_names.update(SHORT_SCALE_EN)
else:
number_names = NUM_STRING_EN.copy()
number_names.update(LONG_SCALE_EN)
digits = [number_names[n] for n in range(0, 20)]
tens = [number_names[n] for n in range(10, 100, 10)]
if short_scale:
hundreds = [SHORT_SCALE_EN[n] for n in SHORT_SCALE_EN.keys()]
else:
hundreds = [LONG_SCALE_EN[n] for n in LONG_SCALE_EN.keys()]
# deal with negatives
result = "" result = ""
if num < 0: if num < 0:
result = "negative " result = "negative "
num = abs(num) num = abs(num)
if num > 20: # check for a direct match
tens = int(num - int(num) % 10) if num in number_names:
result += NUM_STRING_EN[tens] if num > 90:
if int(num - tens) != 0: result += "one "
result += " " + NUM_STRING_EN[int(num - tens)] result += number_names[num]
else: else:
result += NUM_STRING_EN[int(num)] def _sub_thousand(n):
assert 0 <= n <= 999
if n <= 19:
return digits[n]
elif n <= 99:
q, r = divmod(n, 10)
return tens[q - 1] + (" " + _sub_thousand(r) if r else "")
else:
q, r = divmod(n, 100)
return digits[q] + " hundred" + (
" and " + _sub_thousand(r) if r else "")
def _short_scale(n):
n = int(n)
assert 0 <= n
return ", ".join(reversed(
[_sub_thousand(z) + (
" " + hundreds[i] if i else "") if z else ""
for i, z in enumerate(_split_by_thousands(n))]))
def _split_by_thousands(n):
assert 0 <= n
res = []
while n:
n, r = divmod(n, 1000)
res.append(r)
return res
def _split_by_millions(n):
assert 0 <= n
res = []
while n:
n, r = divmod(n, 1000)
res.append(r)
return res
def _long_scale(n):
if n >= 10e153:
return "infinity"
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by_millions(n)):
if not z:
continue
number = pronounce_number_en(z, places, True)
if i % 2 != 0 and i > 1:
number += " " + "thousand"
elif i > 0 and i < 3:
number += " " + hundreds[i] + ","
elif i:
number += " " + hundreds[i - 1] + ","
res.append(number)
return " ".join(reversed(res))
if short_scale:
result += _short_scale(num)
else:
result += _long_scale(num)
# Deal with fractional part # Deal with fractional part
if not num == int(num) and places > 0: if not num == int(num) and places > 0:
result += " point" result += " point"
place = 10 place = 10
while int(num * place) % 10 > 0 and places > 0: while int(num * place) % 10 > 0 and places > 0:
result += " " + NUM_STRING_EN[int(num * place) % 10] result += " " + number_names[int(num * place) % 10]
place *= 10 place *= 10
places -= 1 places -= 1
return result return result

View File

@ -19,106 +19,8 @@ from datetime import datetime
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions from mycroft.util.lang.parse_common import is_numeric, look_for_fractions
from mycroft.util.lang.format_en import NUM_STRING_EN from mycroft.util.lang.format_en import NUM_STRING_EN, LONG_SCALE_EN, \
SHORT_SCALE_EN
LONG_SCALE_EN = {
10e12: "billion",
10e18: 'trillion',
10e24: "quadrillion",
10e30: "quintillion",
10e36: "sextillion",
10e42: "septillion",
10e48: "octillion",
10e54: "nonillion",
10e60: "decillion",
10e66: "undecillion",
10e72: "duodecillion",
10e78: "tredecillion",
10e84: "quattuordecillion",
10e90: "quinquadecillion",
10e96: "sedecillion",
10e102: "septendecillion",
10e108: "octodecillion",
10e114: "novendecillion",
10e120: "vigintillion",
10e306: "unquinquagintillion",
10e312: "duoquinquagintillion",
10e336: "sesquinquagintillion",
10e366: "unsexagintillion",
10e100: "googol"
}
SHORT_SCALE_EN = {
10e9: "billion",
10e10: 'trillion',
10e15: "quadrillion",
10e18: "quintillion",
10e21: "sextillion",
10e24: "septillion",
10e27: "octillion",
10e30: "nonillion",
10e33: "decillion",
10e36: "undecillion",
10e39: "duodecillion",
10e42: "tredecillion",
10e45: "quattuordecillion",
10e48: "quinquadecillion",
10e51: "sedecillion",
10e54: "septendecillion",
10e57: "octodecillion",
10e60: "novendecillion",
10e63: "vigintillion",
10e66: "unvigintillion",
10e69: "uuovigintillion",
10e72: "tresvigintillion",
10e75: "quattuorvigintillion",
10e78: "quinquavigintillion",
10e81: "qesvigintillion",
10e84: "septemvigintillion",
10e87: "octovigintillion",
10e90: "novemvigintillion",
10e93: "trigintillion",
10e96: "untrigintillion",
10e99: "duotrigintillion",
10e102: "trestrigintillion",
10e105: "quattuortrigintillion",
10e108: "quinquatrigintillion",
10e111: "sestrigintillion",
10e114: "septentrigintillion",
10e117: "octotrigintillion",
10e120: "noventrigintillion",
10e123: "quadragintillion",
10e153: "quinquagintillion",
10e183: "sexagintillion",
10e213: "septuagintillion",
10e243: "octogintillion",
10e273: "nonagintillion",
10e303: "centillion",
10e306: "uncentillion",
10e309: "duocentillion",
10e312: "trescentillion",
10e333: "decicentillion",
10e336: "undecicentillion",
10e363: "viginticentillion",
10e366: "unviginticentillion",
10e393: "trigintacentillion",
10e423: "quadragintacentillion",
10e453: "quinquagintacentillion",
10e483: "sexagintacentillion",
10e513: "septuagintacentillion",
10e543: "ctogintacentillion",
10e573: "nonagintacentillion",
10e603: "ducentillion",
10e903: "trecentillion",
10e1203: "quadringentillion",
10e1503: "quingentillion",
10e1803: "sescentillion",
10e2103: "septingentillion",
10e2403: "octingentillion",
10e2703: "nongentillion",
10e3003: "millinillion",
10e100: "googol"
}
SHORT_ORDINAL_STRING_EN = { SHORT_ORDINAL_STRING_EN = {
1: 'first', 1: 'first',
@ -227,8 +129,11 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
string_num_en = { string_num_en = {
"half": 0.5, "half": 0.5,
"halves": 0.5, "halves": 0.5,
"hundred": 100,
"hundreds": 100, "hundreds": 100,
"thousand": 1000,
"thousands": 1000, "thousands": 1000,
"million": 1000000,
'millions': 1000000} 'millions': 1000000}
for num in NUM_STRING_EN: for num in NUM_STRING_EN:

View File

@ -143,6 +143,50 @@ class TestPronounceNumber(unittest.TestCase):
self.assertEqual(pronounce_number(-21.234, places=5), self.assertEqual(pronounce_number(-21.234, places=5),
"negative twenty one point two three four") "negative twenty one point two three four")
def test_convert_hundreds(self):
self.assertEqual(pronounce_number(100), "one hundred")
self.assertEqual(pronounce_number(666), "six hundred and sixty six")
self.assertEqual(pronounce_number(1456), "one thousand, four hundred "
"and fifty six")
self.assertEqual(pronounce_number(103254654), "one hundred and three "
"million, two hundred "
"and fifty four "
"thousand, six hundred "
"and fifty four")
self.assertEqual(pronounce_number(1512457), "one million, five hundred"
" and twelve thousand, "
"four hundred and fifty "
"seven")
self.assertEqual(pronounce_number(209996), "two hundred and nine "
"thousand, nine hundred "
"and ninety six")
self.assertEqual(pronounce_number(95505896639631893),
"ninety five quadrillion, five hundred and five "
"trillion, eight hundred and ninety six billion, six "
"hundred and thirty nine million, six hundred and "
"thirty one thousand, eight hundred and ninety three")
self.assertEqual(pronounce_number(95505896639631893,
short_scale=False),
"ninety five thousand five hundred and five billion, "
"eight hundred and ninety six thousand six hundred "
"and thirty nine million, six hundred and thirty one "
"thousand, eight hundred and ninety three")
def test_convert_scientific_notation(self):
self.assertEqual(pronounce_number(0, scientific=True), "zero")
self.assertEqual(pronounce_number(33, scientific=True),
"three point three times ten to the power of one")
self.assertEqual(pronounce_number(299792458, scientific=True),
"two point nine nine times ten to the power of eight")
self.assertEqual(pronounce_number(299792458, places=6,
scientific=True),
"two point nine nine seven nine two five times "
"ten to the power of eight")
self.assertEqual(pronounce_number(1.672e-27, places=3,
scientific=True),
"one point six seven two times ten to the power of "
"negative twenty seven")
# def nice_time(dt, lang="en-us", speech=True, use_24hour=False, # def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
# use_ampm=False): # use_ampm=False):

View File

@ -91,17 +91,17 @@ class TestNormalize(unittest.TestCase):
self.assertEqual(extractnumber("two million"), 2000000) self.assertEqual(extractnumber("two million"), 2000000)
self.assertEqual(extractnumber("two million five hundred thousand " self.assertEqual(extractnumber("two million five hundred thousand "
"tons of spinning metal"), 2500000) "tons of spinning metal"), 2500000)
self.assertEqual(extractnumber("six trillion"), 600000000000.0) self.assertEqual(extractnumber("six trillion"), 60000000000.0)
self.assertEqual(extractnumber("six trillion", short_scale=False), self.assertEqual(extractnumber("six trillion", short_scale=False),
6e+19) 6e+18)
self.assertEqual(extractnumber("one point five"), 1.5) self.assertEqual(extractnumber("one point five"), 1.5)
self.assertEqual(extractnumber("three dot fourteen"), 3.14) self.assertEqual(extractnumber("three dot fourteen"), 3.14)
self.assertEqual(extractnumber("zero point two"), 0.2) self.assertEqual(extractnumber("zero point two"), 0.2)
self.assertEqual(extractnumber("billions of years older"), self.assertEqual(extractnumber("billions of years older"),
10000000000.0) 1000000000.0)
self.assertEqual(extractnumber("billions of years older", self.assertEqual(extractnumber("billions of years older",
short_scale=False), short_scale=False),
10000000000000.0) 1000000000000.0)
self.assertEqual(extractnumber("one hundred thousand"), 100000) self.assertEqual(extractnumber("one hundred thousand"), 100000)
self.assertEqual(extractnumber("minus 2"), -2) self.assertEqual(extractnumber("minus 2"), -2)
self.assertEqual(extractnumber("negative seventy"), -70) self.assertEqual(extractnumber("negative seventy"), -70)