Merge pull request #1653 from JarbasAl/feature/pronounce_scientific

Feature/pronounce scientific
pull/1709/head
Michael Nguyen 2018-07-31 13:04:14 -05:00 committed by GitHub
commit 7b54149bcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 260 additions and 121 deletions

View File

@ -254,7 +254,8 @@ def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
return str(dt)
def pronounce_number(number, lang="en-us", places=2):
def pronounce_number(number, lang="en-us", places=2, short_scale=True,
scientific=False):
"""
Convert a number to it's spoken equivalent
@ -262,12 +263,17 @@ def pronounce_number(number, lang="en-us", places=2):
Args:
number: the number to pronounce
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool) : convert and pronounce in scientific notation
Returns:
(str): The pronounced number
"""
lang_lower = str(lang).lower()
if lang_lower.startswith("en"):
return pronounce_number_en(number, places=places)
return pronounce_number_en(number, places=places,
short_scale=short_scale,
scientific=scientific)
elif lang_lower.startswith("it"):
return pronounce_number_it(number, places=places)
elif lang_lower.startswith("fr"):

View File

@ -16,6 +16,8 @@
#
from mycroft.util.lang.format_common import convert_to_mixed_fraction
import collections
NUM_STRING_EN = {
0: 'zero',
@ -45,10 +47,7 @@ NUM_STRING_EN = {
60: 'sixty',
70: 'seventy',
80: 'eighty',
90: 'ninety',
100: 'hundred',
1000: 'thousand',
1000000: 'million'
90: 'ninety'
}
FRACTION_STRING_EN = {
@ -73,6 +72,109 @@ FRACTION_STRING_EN = {
20: 'twentyith'
}
LONG_SCALE_EN = collections.OrderedDict([
(100, 'hundred'),
(1000, 'thousand'),
(1000000, 'million'),
(1e12, "billion"),
(1e18, 'trillion'),
(1e24, "quadrillion"),
(1e30, "quintillion"),
(1e36, "sextillion"),
(1e42, "septillion"),
(1e48, "octillion"),
(1e54, "nonillion"),
(1e60, "decillion"),
(1e66, "undecillion"),
(1e72, "duodecillion"),
(1e78, "tredecillion"),
(1e84, "quattuordecillion"),
(1e90, "quinquadecillion"),
(1e96, "sedecillion"),
(1e102, "septendecillion"),
(1e108, "octodecillion"),
(1e114, "novendecillion"),
(1e120, "vigintillion"),
(1e306, "unquinquagintillion"),
(1e312, "duoquinquagintillion"),
(1e336, "sesquinquagintillion"),
(1e366, "unsexagintillion")
])
SHORT_SCALE_EN = collections.OrderedDict([
(100, 'hundred'),
(1000, 'thousand'),
(1000000, 'million'),
(1e9, "billion"),
(1e10, 'trillion'),
(1e15, "quadrillion"),
(1e18, "quintillion"),
(1e21, "sextillion"),
(1e24, "septillion"),
(1e27, "octillion"),
(1e30, "nonillion"),
(1e33, "decillion"),
(1e36, "undecillion"),
(1e39, "duodecillion"),
(1e42, "tredecillion"),
(1e45, "quattuordecillion"),
(1e48, "quinquadecillion"),
(1e51, "sedecillion"),
(1e54, "septendecillion"),
(1e57, "octodecillion"),
(1e60, "novendecillion"),
(1e63, "vigintillion"),
(1e66, "unvigintillion"),
(1e69, "uuovigintillion"),
(1e72, "tresvigintillion"),
(1e75, "quattuorvigintillion"),
(1e78, "quinquavigintillion"),
(1e81, "qesvigintillion"),
(1e84, "septemvigintillion"),
(1e87, "octovigintillion"),
(1e90, "novemvigintillion"),
(1e93, "trigintillion"),
(1e96, "untrigintillion"),
(1e99, "duotrigintillion"),
(1e102, "trestrigintillion"),
(1e105, "quattuortrigintillion"),
(1e108, "quinquatrigintillion"),
(1e111, "sestrigintillion"),
(1e114, "septentrigintillion"),
(1e117, "octotrigintillion"),
(1e120, "noventrigintillion"),
(1e123, "quadragintillion"),
(1e153, "quinquagintillion"),
(1e183, "sexagintillion"),
(1e213, "septuagintillion"),
(1e243, "octogintillion"),
(1e273, "nonagintillion"),
(1e303, "centillion"),
(1e306, "uncentillion"),
(1e309, "duocentillion"),
(1e312, "trescentillion"),
(1e333, "decicentillion"),
(1e336, "undecicentillion"),
(1e363, "viginticentillion"),
(1e366, "unviginticentillion"),
(1e393, "trigintacentillion"),
(1e423, "quadragintacentillion"),
(1e453, "quinquagintacentillion"),
(1e483, "sexagintacentillion"),
(1e513, "septuagintacentillion"),
(1e543, "ctogintacentillion"),
(1e573, "nonagintacentillion"),
(1e603, "ducentillion"),
(1e903, "trecentillion"),
(1e1203, "quadringentillion"),
(1e1503, "quingentillion"),
(1e1803, "sescentillion"),
(1e2103, "septingentillion"),
(1e2403, "octingentillion"),
(1e2703, "nongentillion"),
(1e3003, "millinillion")
])
def nice_number_en(number, speech, denominators):
""" English helper for nice_number
@ -119,7 +221,7 @@ def nice_number_en(number, speech, denominators):
return return_string
def pronounce_number_en(num, places=2):
def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
"""
Convert a number to it's spoken equivalent
@ -128,32 +230,114 @@ def pronounce_number_en(num, places=2):
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
Returns:
(str): The pronounced number
"""
if abs(num) >= 100:
# TODO: Support for numbers over 100
return str(num)
if scientific:
number = '%E' % num
n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
return pronounce_number_en(float(n), places, short_scale, False) \
+ " times ten to the power of " + \
pronounce_number_en(power, places, short_scale, False)
if short_scale:
number_names = NUM_STRING_EN.copy()
number_names.update(SHORT_SCALE_EN)
else:
number_names = NUM_STRING_EN.copy()
number_names.update(LONG_SCALE_EN)
digits = [number_names[n] for n in range(0, 20)]
tens = [number_names[n] for n in range(10, 100, 10)]
if short_scale:
hundreds = [SHORT_SCALE_EN[n] for n in SHORT_SCALE_EN.keys()]
else:
hundreds = [LONG_SCALE_EN[n] for n in LONG_SCALE_EN.keys()]
# deal with negatives
result = ""
if num < 0:
result = "negative "
num = abs(num)
if num > 20:
tens = int(num - int(num) % 10)
result += NUM_STRING_EN[tens]
if int(num - tens) != 0:
result += " " + NUM_STRING_EN[int(num - tens)]
# check for a direct match
if num in number_names:
if num > 90:
result += "one "
result += number_names[num]
else:
result += NUM_STRING_EN[int(num)]
def _sub_thousand(n):
assert 0 <= n <= 999
if n <= 19:
return digits[n]
elif n <= 99:
q, r = divmod(n, 10)
return tens[q - 1] + (" " + _sub_thousand(r) if r else "")
else:
q, r = divmod(n, 100)
return digits[q] + " hundred" + (
" and " + _sub_thousand(r) if r else "")
def _short_scale(n):
n = int(n)
assert 0 <= n
return ", ".join(reversed(
[_sub_thousand(z) + (
" " + hundreds[i] if i else "") if z else ""
for i, z in enumerate(_split_by_thousands(n))]))
def _split_by_thousands(n):
assert 0 <= n
res = []
while n:
n, r = divmod(n, 1000)
res.append(r)
return res
def _split_by_millions(n):
assert 0 <= n
res = []
while n:
n, r = divmod(n, 1000)
res.append(r)
return res
def _long_scale(n):
if n >= 10e153:
return "infinity"
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by_millions(n)):
if not z:
continue
number = pronounce_number_en(z, places, True)
if i % 2 != 0 and i > 1:
number += " " + "thousand"
elif i > 0 and i < 3:
number += " " + hundreds[i] + ","
elif i:
number += " " + hundreds[i - 1] + ","
res.append(number)
return " ".join(reversed(res))
if short_scale:
result += _short_scale(num)
else:
result += _long_scale(num)
# Deal with fractional part
if not num == int(num) and places > 0:
result += " point"
place = 10
while int(num * place) % 10 > 0 and places > 0:
result += " " + NUM_STRING_EN[int(num * place) % 10]
result += " " + number_names[int(num * place) % 10]
place *= 10
places -= 1
return result

View File

@ -19,106 +19,8 @@ from datetime import datetime
from dateutil.relativedelta import relativedelta
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions
from mycroft.util.lang.format_en import NUM_STRING_EN
LONG_SCALE_EN = {
10e12: "billion",
10e18: 'trillion',
10e24: "quadrillion",
10e30: "quintillion",
10e36: "sextillion",
10e42: "septillion",
10e48: "octillion",
10e54: "nonillion",
10e60: "decillion",
10e66: "undecillion",
10e72: "duodecillion",
10e78: "tredecillion",
10e84: "quattuordecillion",
10e90: "quinquadecillion",
10e96: "sedecillion",
10e102: "septendecillion",
10e108: "octodecillion",
10e114: "novendecillion",
10e120: "vigintillion",
10e306: "unquinquagintillion",
10e312: "duoquinquagintillion",
10e336: "sesquinquagintillion",
10e366: "unsexagintillion",
10e100: "googol"
}
SHORT_SCALE_EN = {
10e9: "billion",
10e10: 'trillion',
10e15: "quadrillion",
10e18: "quintillion",
10e21: "sextillion",
10e24: "septillion",
10e27: "octillion",
10e30: "nonillion",
10e33: "decillion",
10e36: "undecillion",
10e39: "duodecillion",
10e42: "tredecillion",
10e45: "quattuordecillion",
10e48: "quinquadecillion",
10e51: "sedecillion",
10e54: "septendecillion",
10e57: "octodecillion",
10e60: "novendecillion",
10e63: "vigintillion",
10e66: "unvigintillion",
10e69: "uuovigintillion",
10e72: "tresvigintillion",
10e75: "quattuorvigintillion",
10e78: "quinquavigintillion",
10e81: "qesvigintillion",
10e84: "septemvigintillion",
10e87: "octovigintillion",
10e90: "novemvigintillion",
10e93: "trigintillion",
10e96: "untrigintillion",
10e99: "duotrigintillion",
10e102: "trestrigintillion",
10e105: "quattuortrigintillion",
10e108: "quinquatrigintillion",
10e111: "sestrigintillion",
10e114: "septentrigintillion",
10e117: "octotrigintillion",
10e120: "noventrigintillion",
10e123: "quadragintillion",
10e153: "quinquagintillion",
10e183: "sexagintillion",
10e213: "septuagintillion",
10e243: "octogintillion",
10e273: "nonagintillion",
10e303: "centillion",
10e306: "uncentillion",
10e309: "duocentillion",
10e312: "trescentillion",
10e333: "decicentillion",
10e336: "undecicentillion",
10e363: "viginticentillion",
10e366: "unviginticentillion",
10e393: "trigintacentillion",
10e423: "quadragintacentillion",
10e453: "quinquagintacentillion",
10e483: "sexagintacentillion",
10e513: "septuagintacentillion",
10e543: "ctogintacentillion",
10e573: "nonagintacentillion",
10e603: "ducentillion",
10e903: "trecentillion",
10e1203: "quadringentillion",
10e1503: "quingentillion",
10e1803: "sescentillion",
10e2103: "septingentillion",
10e2403: "octingentillion",
10e2703: "nongentillion",
10e3003: "millinillion",
10e100: "googol"
}
from mycroft.util.lang.format_en import NUM_STRING_EN, LONG_SCALE_EN, \
SHORT_SCALE_EN
SHORT_ORDINAL_STRING_EN = {
1: 'first',
@ -227,8 +129,11 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
string_num_en = {
"half": 0.5,
"halves": 0.5,
"hundred": 100,
"hundreds": 100,
"thousand": 1000,
"thousands": 1000,
"million": 1000000,
'millions': 1000000}
for num in NUM_STRING_EN:

View File

@ -143,6 +143,50 @@ class TestPronounceNumber(unittest.TestCase):
self.assertEqual(pronounce_number(-21.234, places=5),
"negative twenty one point two three four")
def test_convert_hundreds(self):
self.assertEqual(pronounce_number(100), "one hundred")
self.assertEqual(pronounce_number(666), "six hundred and sixty six")
self.assertEqual(pronounce_number(1456), "one thousand, four hundred "
"and fifty six")
self.assertEqual(pronounce_number(103254654), "one hundred and three "
"million, two hundred "
"and fifty four "
"thousand, six hundred "
"and fifty four")
self.assertEqual(pronounce_number(1512457), "one million, five hundred"
" and twelve thousand, "
"four hundred and fifty "
"seven")
self.assertEqual(pronounce_number(209996), "two hundred and nine "
"thousand, nine hundred "
"and ninety six")
self.assertEqual(pronounce_number(95505896639631893),
"ninety five quadrillion, five hundred and five "
"trillion, eight hundred and ninety six billion, six "
"hundred and thirty nine million, six hundred and "
"thirty one thousand, eight hundred and ninety three")
self.assertEqual(pronounce_number(95505896639631893,
short_scale=False),
"ninety five thousand five hundred and five billion, "
"eight hundred and ninety six thousand six hundred "
"and thirty nine million, six hundred and thirty one "
"thousand, eight hundred and ninety three")
def test_convert_scientific_notation(self):
self.assertEqual(pronounce_number(0, scientific=True), "zero")
self.assertEqual(pronounce_number(33, scientific=True),
"three point three times ten to the power of one")
self.assertEqual(pronounce_number(299792458, scientific=True),
"two point nine nine times ten to the power of eight")
self.assertEqual(pronounce_number(299792458, places=6,
scientific=True),
"two point nine nine seven nine two five times "
"ten to the power of eight")
self.assertEqual(pronounce_number(1.672e-27, places=3,
scientific=True),
"one point six seven two times ten to the power of "
"negative twenty seven")
# def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
# use_ampm=False):

View File

@ -91,17 +91,17 @@ class TestNormalize(unittest.TestCase):
self.assertEqual(extractnumber("two million"), 2000000)
self.assertEqual(extractnumber("two million five hundred thousand "
"tons of spinning metal"), 2500000)
self.assertEqual(extractnumber("six trillion"), 600000000000.0)
self.assertEqual(extractnumber("six trillion"), 60000000000.0)
self.assertEqual(extractnumber("six trillion", short_scale=False),
6e+19)
6e+18)
self.assertEqual(extractnumber("one point five"), 1.5)
self.assertEqual(extractnumber("three dot fourteen"), 3.14)
self.assertEqual(extractnumber("zero point two"), 0.2)
self.assertEqual(extractnumber("billions of years older"),
10000000000.0)
1000000000.0)
self.assertEqual(extractnumber("billions of years older",
short_scale=False),
10000000000000.0)
1000000000000.0)
self.assertEqual(extractnumber("one hundred thousand"), 100000)
self.assertEqual(extractnumber("minus 2"), -2)
self.assertEqual(extractnumber("negative seventy"), -70)