Merge pull request #1653 from JarbasAl/feature/pronounce_scientific
Feature/pronounce scientificpull/1709/head
commit
7b54149bcd
|
@ -254,7 +254,8 @@ def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
|
|||
return str(dt)
|
||||
|
||||
|
||||
def pronounce_number(number, lang="en-us", places=2):
|
||||
def pronounce_number(number, lang="en-us", places=2, short_scale=True,
|
||||
scientific=False):
|
||||
"""
|
||||
Convert a number to it's spoken equivalent
|
||||
|
||||
|
@ -262,12 +263,17 @@ def pronounce_number(number, lang="en-us", places=2):
|
|||
|
||||
Args:
|
||||
number: the number to pronounce
|
||||
short_scale (bool) : use short (True) or long scale (False)
|
||||
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||
scientific (bool) : convert and pronounce in scientific notation
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
"""
|
||||
lang_lower = str(lang).lower()
|
||||
if lang_lower.startswith("en"):
|
||||
return pronounce_number_en(number, places=places)
|
||||
return pronounce_number_en(number, places=places,
|
||||
short_scale=short_scale,
|
||||
scientific=scientific)
|
||||
elif lang_lower.startswith("it"):
|
||||
return pronounce_number_it(number, places=places)
|
||||
elif lang_lower.startswith("fr"):
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#
|
||||
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
import collections
|
||||
|
||||
|
||||
NUM_STRING_EN = {
|
||||
0: 'zero',
|
||||
|
@ -45,10 +47,7 @@ NUM_STRING_EN = {
|
|||
60: 'sixty',
|
||||
70: 'seventy',
|
||||
80: 'eighty',
|
||||
90: 'ninety',
|
||||
100: 'hundred',
|
||||
1000: 'thousand',
|
||||
1000000: 'million'
|
||||
90: 'ninety'
|
||||
}
|
||||
|
||||
FRACTION_STRING_EN = {
|
||||
|
@ -73,6 +72,109 @@ FRACTION_STRING_EN = {
|
|||
20: 'twentyith'
|
||||
}
|
||||
|
||||
LONG_SCALE_EN = collections.OrderedDict([
|
||||
(100, 'hundred'),
|
||||
(1000, 'thousand'),
|
||||
(1000000, 'million'),
|
||||
(1e12, "billion"),
|
||||
(1e18, 'trillion'),
|
||||
(1e24, "quadrillion"),
|
||||
(1e30, "quintillion"),
|
||||
(1e36, "sextillion"),
|
||||
(1e42, "septillion"),
|
||||
(1e48, "octillion"),
|
||||
(1e54, "nonillion"),
|
||||
(1e60, "decillion"),
|
||||
(1e66, "undecillion"),
|
||||
(1e72, "duodecillion"),
|
||||
(1e78, "tredecillion"),
|
||||
(1e84, "quattuordecillion"),
|
||||
(1e90, "quinquadecillion"),
|
||||
(1e96, "sedecillion"),
|
||||
(1e102, "septendecillion"),
|
||||
(1e108, "octodecillion"),
|
||||
(1e114, "novendecillion"),
|
||||
(1e120, "vigintillion"),
|
||||
(1e306, "unquinquagintillion"),
|
||||
(1e312, "duoquinquagintillion"),
|
||||
(1e336, "sesquinquagintillion"),
|
||||
(1e366, "unsexagintillion")
|
||||
])
|
||||
|
||||
SHORT_SCALE_EN = collections.OrderedDict([
|
||||
(100, 'hundred'),
|
||||
(1000, 'thousand'),
|
||||
(1000000, 'million'),
|
||||
(1e9, "billion"),
|
||||
(1e10, 'trillion'),
|
||||
(1e15, "quadrillion"),
|
||||
(1e18, "quintillion"),
|
||||
(1e21, "sextillion"),
|
||||
(1e24, "septillion"),
|
||||
(1e27, "octillion"),
|
||||
(1e30, "nonillion"),
|
||||
(1e33, "decillion"),
|
||||
(1e36, "undecillion"),
|
||||
(1e39, "duodecillion"),
|
||||
(1e42, "tredecillion"),
|
||||
(1e45, "quattuordecillion"),
|
||||
(1e48, "quinquadecillion"),
|
||||
(1e51, "sedecillion"),
|
||||
(1e54, "septendecillion"),
|
||||
(1e57, "octodecillion"),
|
||||
(1e60, "novendecillion"),
|
||||
(1e63, "vigintillion"),
|
||||
(1e66, "unvigintillion"),
|
||||
(1e69, "uuovigintillion"),
|
||||
(1e72, "tresvigintillion"),
|
||||
(1e75, "quattuorvigintillion"),
|
||||
(1e78, "quinquavigintillion"),
|
||||
(1e81, "qesvigintillion"),
|
||||
(1e84, "septemvigintillion"),
|
||||
(1e87, "octovigintillion"),
|
||||
(1e90, "novemvigintillion"),
|
||||
(1e93, "trigintillion"),
|
||||
(1e96, "untrigintillion"),
|
||||
(1e99, "duotrigintillion"),
|
||||
(1e102, "trestrigintillion"),
|
||||
(1e105, "quattuortrigintillion"),
|
||||
(1e108, "quinquatrigintillion"),
|
||||
(1e111, "sestrigintillion"),
|
||||
(1e114, "septentrigintillion"),
|
||||
(1e117, "octotrigintillion"),
|
||||
(1e120, "noventrigintillion"),
|
||||
(1e123, "quadragintillion"),
|
||||
(1e153, "quinquagintillion"),
|
||||
(1e183, "sexagintillion"),
|
||||
(1e213, "septuagintillion"),
|
||||
(1e243, "octogintillion"),
|
||||
(1e273, "nonagintillion"),
|
||||
(1e303, "centillion"),
|
||||
(1e306, "uncentillion"),
|
||||
(1e309, "duocentillion"),
|
||||
(1e312, "trescentillion"),
|
||||
(1e333, "decicentillion"),
|
||||
(1e336, "undecicentillion"),
|
||||
(1e363, "viginticentillion"),
|
||||
(1e366, "unviginticentillion"),
|
||||
(1e393, "trigintacentillion"),
|
||||
(1e423, "quadragintacentillion"),
|
||||
(1e453, "quinquagintacentillion"),
|
||||
(1e483, "sexagintacentillion"),
|
||||
(1e513, "septuagintacentillion"),
|
||||
(1e543, "ctogintacentillion"),
|
||||
(1e573, "nonagintacentillion"),
|
||||
(1e603, "ducentillion"),
|
||||
(1e903, "trecentillion"),
|
||||
(1e1203, "quadringentillion"),
|
||||
(1e1503, "quingentillion"),
|
||||
(1e1803, "sescentillion"),
|
||||
(1e2103, "septingentillion"),
|
||||
(1e2403, "octingentillion"),
|
||||
(1e2703, "nongentillion"),
|
||||
(1e3003, "millinillion")
|
||||
])
|
||||
|
||||
|
||||
def nice_number_en(number, speech, denominators):
|
||||
""" English helper for nice_number
|
||||
|
@ -119,7 +221,7 @@ def nice_number_en(number, speech, denominators):
|
|||
return return_string
|
||||
|
||||
|
||||
def pronounce_number_en(num, places=2):
|
||||
def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
|
||||
"""
|
||||
Convert a number to it's spoken equivalent
|
||||
|
||||
|
@ -128,32 +230,114 @@ def pronounce_number_en(num, places=2):
|
|||
Args:
|
||||
num(float or int): the number to pronounce (under 100)
|
||||
places(int): maximum decimal places to speak
|
||||
short_scale (bool) : use short (True) or long scale (False)
|
||||
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||
scientific (bool): pronounce in scientific notation
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
"""
|
||||
if abs(num) >= 100:
|
||||
# TODO: Support for numbers over 100
|
||||
return str(num)
|
||||
if scientific:
|
||||
number = '%E' % num
|
||||
n, power = number.replace("+", "").split("E")
|
||||
power = int(power)
|
||||
if power != 0:
|
||||
return pronounce_number_en(float(n), places, short_scale, False) \
|
||||
+ " times ten to the power of " + \
|
||||
pronounce_number_en(power, places, short_scale, False)
|
||||
if short_scale:
|
||||
number_names = NUM_STRING_EN.copy()
|
||||
number_names.update(SHORT_SCALE_EN)
|
||||
else:
|
||||
number_names = NUM_STRING_EN.copy()
|
||||
number_names.update(LONG_SCALE_EN)
|
||||
|
||||
digits = [number_names[n] for n in range(0, 20)]
|
||||
|
||||
tens = [number_names[n] for n in range(10, 100, 10)]
|
||||
|
||||
if short_scale:
|
||||
hundreds = [SHORT_SCALE_EN[n] for n in SHORT_SCALE_EN.keys()]
|
||||
else:
|
||||
hundreds = [LONG_SCALE_EN[n] for n in LONG_SCALE_EN.keys()]
|
||||
|
||||
# deal with negatives
|
||||
result = ""
|
||||
if num < 0:
|
||||
result = "negative "
|
||||
num = abs(num)
|
||||
|
||||
if num > 20:
|
||||
tens = int(num - int(num) % 10)
|
||||
result += NUM_STRING_EN[tens]
|
||||
if int(num - tens) != 0:
|
||||
result += " " + NUM_STRING_EN[int(num - tens)]
|
||||
# check for a direct match
|
||||
if num in number_names:
|
||||
if num > 90:
|
||||
result += "one "
|
||||
result += number_names[num]
|
||||
else:
|
||||
result += NUM_STRING_EN[int(num)]
|
||||
def _sub_thousand(n):
|
||||
assert 0 <= n <= 999
|
||||
if n <= 19:
|
||||
return digits[n]
|
||||
elif n <= 99:
|
||||
q, r = divmod(n, 10)
|
||||
return tens[q - 1] + (" " + _sub_thousand(r) if r else "")
|
||||
else:
|
||||
q, r = divmod(n, 100)
|
||||
return digits[q] + " hundred" + (
|
||||
" and " + _sub_thousand(r) if r else "")
|
||||
|
||||
def _short_scale(n):
|
||||
n = int(n)
|
||||
assert 0 <= n
|
||||
return ", ".join(reversed(
|
||||
[_sub_thousand(z) + (
|
||||
" " + hundreds[i] if i else "") if z else ""
|
||||
for i, z in enumerate(_split_by_thousands(n))]))
|
||||
|
||||
def _split_by_thousands(n):
|
||||
assert 0 <= n
|
||||
res = []
|
||||
while n:
|
||||
n, r = divmod(n, 1000)
|
||||
res.append(r)
|
||||
return res
|
||||
|
||||
def _split_by_millions(n):
|
||||
assert 0 <= n
|
||||
res = []
|
||||
while n:
|
||||
n, r = divmod(n, 1000)
|
||||
res.append(r)
|
||||
return res
|
||||
|
||||
def _long_scale(n):
|
||||
if n >= 10e153:
|
||||
return "infinity"
|
||||
n = int(n)
|
||||
assert 0 <= n
|
||||
res = []
|
||||
for i, z in enumerate(_split_by_millions(n)):
|
||||
if not z:
|
||||
continue
|
||||
number = pronounce_number_en(z, places, True)
|
||||
if i % 2 != 0 and i > 1:
|
||||
number += " " + "thousand"
|
||||
elif i > 0 and i < 3:
|
||||
number += " " + hundreds[i] + ","
|
||||
elif i:
|
||||
number += " " + hundreds[i - 1] + ","
|
||||
res.append(number)
|
||||
return " ".join(reversed(res))
|
||||
|
||||
if short_scale:
|
||||
result += _short_scale(num)
|
||||
else:
|
||||
result += _long_scale(num)
|
||||
|
||||
# Deal with fractional part
|
||||
if not num == int(num) and places > 0:
|
||||
result += " point"
|
||||
place = 10
|
||||
while int(num * place) % 10 > 0 and places > 0:
|
||||
result += " " + NUM_STRING_EN[int(num * place) % 10]
|
||||
result += " " + number_names[int(num * place) % 10]
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
|
|
@ -19,106 +19,8 @@ from datetime import datetime
|
|||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions
|
||||
from mycroft.util.lang.format_en import NUM_STRING_EN
|
||||
|
||||
LONG_SCALE_EN = {
|
||||
10e12: "billion",
|
||||
10e18: 'trillion',
|
||||
10e24: "quadrillion",
|
||||
10e30: "quintillion",
|
||||
10e36: "sextillion",
|
||||
10e42: "septillion",
|
||||
10e48: "octillion",
|
||||
10e54: "nonillion",
|
||||
10e60: "decillion",
|
||||
10e66: "undecillion",
|
||||
10e72: "duodecillion",
|
||||
10e78: "tredecillion",
|
||||
10e84: "quattuordecillion",
|
||||
10e90: "quinquadecillion",
|
||||
10e96: "sedecillion",
|
||||
10e102: "septendecillion",
|
||||
10e108: "octodecillion",
|
||||
10e114: "novendecillion",
|
||||
10e120: "vigintillion",
|
||||
10e306: "unquinquagintillion",
|
||||
10e312: "duoquinquagintillion",
|
||||
10e336: "sesquinquagintillion",
|
||||
10e366: "unsexagintillion",
|
||||
10e100: "googol"
|
||||
}
|
||||
|
||||
SHORT_SCALE_EN = {
|
||||
10e9: "billion",
|
||||
10e10: 'trillion',
|
||||
10e15: "quadrillion",
|
||||
10e18: "quintillion",
|
||||
10e21: "sextillion",
|
||||
10e24: "septillion",
|
||||
10e27: "octillion",
|
||||
10e30: "nonillion",
|
||||
10e33: "decillion",
|
||||
10e36: "undecillion",
|
||||
10e39: "duodecillion",
|
||||
10e42: "tredecillion",
|
||||
10e45: "quattuordecillion",
|
||||
10e48: "quinquadecillion",
|
||||
10e51: "sedecillion",
|
||||
10e54: "septendecillion",
|
||||
10e57: "octodecillion",
|
||||
10e60: "novendecillion",
|
||||
10e63: "vigintillion",
|
||||
10e66: "unvigintillion",
|
||||
10e69: "uuovigintillion",
|
||||
10e72: "tresvigintillion",
|
||||
10e75: "quattuorvigintillion",
|
||||
10e78: "quinquavigintillion",
|
||||
10e81: "qesvigintillion",
|
||||
10e84: "septemvigintillion",
|
||||
10e87: "octovigintillion",
|
||||
10e90: "novemvigintillion",
|
||||
10e93: "trigintillion",
|
||||
10e96: "untrigintillion",
|
||||
10e99: "duotrigintillion",
|
||||
10e102: "trestrigintillion",
|
||||
10e105: "quattuortrigintillion",
|
||||
10e108: "quinquatrigintillion",
|
||||
10e111: "sestrigintillion",
|
||||
10e114: "septentrigintillion",
|
||||
10e117: "octotrigintillion",
|
||||
10e120: "noventrigintillion",
|
||||
10e123: "quadragintillion",
|
||||
10e153: "quinquagintillion",
|
||||
10e183: "sexagintillion",
|
||||
10e213: "septuagintillion",
|
||||
10e243: "octogintillion",
|
||||
10e273: "nonagintillion",
|
||||
10e303: "centillion",
|
||||
10e306: "uncentillion",
|
||||
10e309: "duocentillion",
|
||||
10e312: "trescentillion",
|
||||
10e333: "decicentillion",
|
||||
10e336: "undecicentillion",
|
||||
10e363: "viginticentillion",
|
||||
10e366: "unviginticentillion",
|
||||
10e393: "trigintacentillion",
|
||||
10e423: "quadragintacentillion",
|
||||
10e453: "quinquagintacentillion",
|
||||
10e483: "sexagintacentillion",
|
||||
10e513: "septuagintacentillion",
|
||||
10e543: "ctogintacentillion",
|
||||
10e573: "nonagintacentillion",
|
||||
10e603: "ducentillion",
|
||||
10e903: "trecentillion",
|
||||
10e1203: "quadringentillion",
|
||||
10e1503: "quingentillion",
|
||||
10e1803: "sescentillion",
|
||||
10e2103: "septingentillion",
|
||||
10e2403: "octingentillion",
|
||||
10e2703: "nongentillion",
|
||||
10e3003: "millinillion",
|
||||
10e100: "googol"
|
||||
}
|
||||
from mycroft.util.lang.format_en import NUM_STRING_EN, LONG_SCALE_EN, \
|
||||
SHORT_SCALE_EN
|
||||
|
||||
SHORT_ORDINAL_STRING_EN = {
|
||||
1: 'first',
|
||||
|
@ -227,8 +129,11 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
|
|||
string_num_en = {
|
||||
"half": 0.5,
|
||||
"halves": 0.5,
|
||||
"hundred": 100,
|
||||
"hundreds": 100,
|
||||
"thousand": 1000,
|
||||
"thousands": 1000,
|
||||
"million": 1000000,
|
||||
'millions': 1000000}
|
||||
|
||||
for num in NUM_STRING_EN:
|
||||
|
|
|
@ -143,6 +143,50 @@ class TestPronounceNumber(unittest.TestCase):
|
|||
self.assertEqual(pronounce_number(-21.234, places=5),
|
||||
"negative twenty one point two three four")
|
||||
|
||||
def test_convert_hundreds(self):
|
||||
self.assertEqual(pronounce_number(100), "one hundred")
|
||||
self.assertEqual(pronounce_number(666), "six hundred and sixty six")
|
||||
self.assertEqual(pronounce_number(1456), "one thousand, four hundred "
|
||||
"and fifty six")
|
||||
self.assertEqual(pronounce_number(103254654), "one hundred and three "
|
||||
"million, two hundred "
|
||||
"and fifty four "
|
||||
"thousand, six hundred "
|
||||
"and fifty four")
|
||||
self.assertEqual(pronounce_number(1512457), "one million, five hundred"
|
||||
" and twelve thousand, "
|
||||
"four hundred and fifty "
|
||||
"seven")
|
||||
self.assertEqual(pronounce_number(209996), "two hundred and nine "
|
||||
"thousand, nine hundred "
|
||||
"and ninety six")
|
||||
self.assertEqual(pronounce_number(95505896639631893),
|
||||
"ninety five quadrillion, five hundred and five "
|
||||
"trillion, eight hundred and ninety six billion, six "
|
||||
"hundred and thirty nine million, six hundred and "
|
||||
"thirty one thousand, eight hundred and ninety three")
|
||||
self.assertEqual(pronounce_number(95505896639631893,
|
||||
short_scale=False),
|
||||
"ninety five thousand five hundred and five billion, "
|
||||
"eight hundred and ninety six thousand six hundred "
|
||||
"and thirty nine million, six hundred and thirty one "
|
||||
"thousand, eight hundred and ninety three")
|
||||
|
||||
def test_convert_scientific_notation(self):
|
||||
self.assertEqual(pronounce_number(0, scientific=True), "zero")
|
||||
self.assertEqual(pronounce_number(33, scientific=True),
|
||||
"three point three times ten to the power of one")
|
||||
self.assertEqual(pronounce_number(299792458, scientific=True),
|
||||
"two point nine nine times ten to the power of eight")
|
||||
self.assertEqual(pronounce_number(299792458, places=6,
|
||||
scientific=True),
|
||||
"two point nine nine seven nine two five times "
|
||||
"ten to the power of eight")
|
||||
self.assertEqual(pronounce_number(1.672e-27, places=3,
|
||||
scientific=True),
|
||||
"one point six seven two times ten to the power of "
|
||||
"negative twenty seven")
|
||||
|
||||
|
||||
# def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
|
||||
# use_ampm=False):
|
||||
|
|
|
@ -91,17 +91,17 @@ class TestNormalize(unittest.TestCase):
|
|||
self.assertEqual(extractnumber("two million"), 2000000)
|
||||
self.assertEqual(extractnumber("two million five hundred thousand "
|
||||
"tons of spinning metal"), 2500000)
|
||||
self.assertEqual(extractnumber("six trillion"), 600000000000.0)
|
||||
self.assertEqual(extractnumber("six trillion"), 60000000000.0)
|
||||
self.assertEqual(extractnumber("six trillion", short_scale=False),
|
||||
6e+19)
|
||||
6e+18)
|
||||
self.assertEqual(extractnumber("one point five"), 1.5)
|
||||
self.assertEqual(extractnumber("three dot fourteen"), 3.14)
|
||||
self.assertEqual(extractnumber("zero point two"), 0.2)
|
||||
self.assertEqual(extractnumber("billions of years older"),
|
||||
10000000000.0)
|
||||
1000000000.0)
|
||||
self.assertEqual(extractnumber("billions of years older",
|
||||
short_scale=False),
|
||||
10000000000000.0)
|
||||
1000000000000.0)
|
||||
self.assertEqual(extractnumber("one hundred thousand"), 100000)
|
||||
self.assertEqual(extractnumber("minus 2"), -2)
|
||||
self.assertEqual(extractnumber("negative seventy"), -70)
|
||||
|
|
Loading…
Reference in New Issue