463 lines
14 KiB
Python
463 lines
14 KiB
Python
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2017 Mycroft AI Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
|
import collections
|
|
from mycroft.util.log import LOG
|
|
|
|
|
|
NUM_STRING_EN = {
|
|
0: 'zero',
|
|
1: 'one',
|
|
2: 'two',
|
|
3: 'three',
|
|
4: 'four',
|
|
5: 'five',
|
|
6: 'six',
|
|
7: 'seven',
|
|
8: 'eight',
|
|
9: 'nine',
|
|
10: 'ten',
|
|
11: 'eleven',
|
|
12: 'twelve',
|
|
13: 'thirteen',
|
|
14: 'fourteen',
|
|
15: 'fifteen',
|
|
16: 'sixteen',
|
|
17: 'seventeen',
|
|
18: 'eighteen',
|
|
19: 'nineteen',
|
|
20: 'twenty',
|
|
30: 'thirty',
|
|
40: 'forty',
|
|
50: 'fifty',
|
|
60: 'sixty',
|
|
70: 'seventy',
|
|
80: 'eighty',
|
|
90: 'ninety'
|
|
}
|
|
|
|
FRACTION_STRING_EN = {
|
|
2: 'half',
|
|
3: 'third',
|
|
4: 'forth',
|
|
5: 'fifth',
|
|
6: 'sixth',
|
|
7: 'seventh',
|
|
8: 'eigth',
|
|
9: 'ninth',
|
|
10: 'tenth',
|
|
11: 'eleventh',
|
|
12: 'twelveth',
|
|
13: 'thirteenth',
|
|
14: 'fourteenth',
|
|
15: 'fifteenth',
|
|
16: 'sixteenth',
|
|
17: 'seventeenth',
|
|
18: 'eighteenth',
|
|
19: 'nineteenth',
|
|
20: 'twentyith'
|
|
}
|
|
|
|
LONG_SCALE_EN = collections.OrderedDict([
|
|
(100, 'hundred'),
|
|
(1000, 'thousand'),
|
|
(1000000, 'million'),
|
|
(1e12, "billion"),
|
|
(1e18, 'trillion'),
|
|
(1e24, "quadrillion"),
|
|
(1e30, "quintillion"),
|
|
(1e36, "sextillion"),
|
|
(1e42, "septillion"),
|
|
(1e48, "octillion"),
|
|
(1e54, "nonillion"),
|
|
(1e60, "decillion"),
|
|
(1e66, "undecillion"),
|
|
(1e72, "duodecillion"),
|
|
(1e78, "tredecillion"),
|
|
(1e84, "quattuordecillion"),
|
|
(1e90, "quinquadecillion"),
|
|
(1e96, "sedecillion"),
|
|
(1e102, "septendecillion"),
|
|
(1e108, "octodecillion"),
|
|
(1e114, "novendecillion"),
|
|
(1e120, "vigintillion"),
|
|
(1e306, "unquinquagintillion"),
|
|
(1e312, "duoquinquagintillion"),
|
|
(1e336, "sesquinquagintillion"),
|
|
(1e366, "unsexagintillion")
|
|
])
|
|
|
|
SHORT_SCALE_EN = collections.OrderedDict([
|
|
(100, 'hundred'),
|
|
(1000, 'thousand'),
|
|
(1000000, 'million'),
|
|
(1e9, "billion"),
|
|
(1e10, 'trillion'),
|
|
(1e15, "quadrillion"),
|
|
(1e18, "quintillion"),
|
|
(1e21, "sextillion"),
|
|
(1e24, "septillion"),
|
|
(1e27, "octillion"),
|
|
(1e30, "nonillion"),
|
|
(1e33, "decillion"),
|
|
(1e36, "undecillion"),
|
|
(1e39, "duodecillion"),
|
|
(1e42, "tredecillion"),
|
|
(1e45, "quattuordecillion"),
|
|
(1e48, "quinquadecillion"),
|
|
(1e51, "sedecillion"),
|
|
(1e54, "septendecillion"),
|
|
(1e57, "octodecillion"),
|
|
(1e60, "novendecillion"),
|
|
(1e63, "vigintillion"),
|
|
(1e66, "unvigintillion"),
|
|
(1e69, "uuovigintillion"),
|
|
(1e72, "tresvigintillion"),
|
|
(1e75, "quattuorvigintillion"),
|
|
(1e78, "quinquavigintillion"),
|
|
(1e81, "qesvigintillion"),
|
|
(1e84, "septemvigintillion"),
|
|
(1e87, "octovigintillion"),
|
|
(1e90, "novemvigintillion"),
|
|
(1e93, "trigintillion"),
|
|
(1e96, "untrigintillion"),
|
|
(1e99, "duotrigintillion"),
|
|
(1e102, "trestrigintillion"),
|
|
(1e105, "quattuortrigintillion"),
|
|
(1e108, "quinquatrigintillion"),
|
|
(1e111, "sestrigintillion"),
|
|
(1e114, "septentrigintillion"),
|
|
(1e117, "octotrigintillion"),
|
|
(1e120, "noventrigintillion"),
|
|
(1e123, "quadragintillion"),
|
|
(1e153, "quinquagintillion"),
|
|
(1e183, "sexagintillion"),
|
|
(1e213, "septuagintillion"),
|
|
(1e243, "octogintillion"),
|
|
(1e273, "nonagintillion"),
|
|
(1e303, "centillion"),
|
|
(1e306, "uncentillion"),
|
|
(1e309, "duocentillion"),
|
|
(1e312, "trescentillion"),
|
|
(1e333, "decicentillion"),
|
|
(1e336, "undecicentillion"),
|
|
(1e363, "viginticentillion"),
|
|
(1e366, "unviginticentillion"),
|
|
(1e393, "trigintacentillion"),
|
|
(1e423, "quadragintacentillion"),
|
|
(1e453, "quinquagintacentillion"),
|
|
(1e483, "sexagintacentillion"),
|
|
(1e513, "septuagintacentillion"),
|
|
(1e543, "ctogintacentillion"),
|
|
(1e573, "nonagintacentillion"),
|
|
(1e603, "ducentillion"),
|
|
(1e903, "trecentillion"),
|
|
(1e1203, "quadringentillion"),
|
|
(1e1503, "quingentillion"),
|
|
(1e1803, "sescentillion"),
|
|
(1e2103, "septingentillion"),
|
|
(1e2403, "octingentillion"),
|
|
(1e2703, "nongentillion"),
|
|
(1e3003, "millinillion")
|
|
])
|
|
|
|
|
|
def nice_number_en(number, speech, denominators):
|
|
""" English helper for nice_number
|
|
|
|
This function formats a float to human understandable functions. Like
|
|
4.5 becomes "4 and a half" for speech and "4 1/2" for text
|
|
|
|
Args:
|
|
number (int or float): the float to format
|
|
speech (bool): format for speech (True) or display (False)
|
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
|
Returns:
|
|
(str): The formatted string.
|
|
"""
|
|
|
|
result = convert_to_mixed_fraction(number, denominators)
|
|
if not result:
|
|
# Give up, just represent as a 3 decimal number
|
|
return str(round(number, 3))
|
|
|
|
whole, num, den = result
|
|
|
|
if not speech:
|
|
if num == 0:
|
|
# TODO: Number grouping? E.g. "1,000,000"
|
|
return str(whole)
|
|
else:
|
|
return '{} {}/{}'.format(whole, num, den)
|
|
|
|
if num == 0:
|
|
return str(whole)
|
|
den_str = FRACTION_STRING_EN[den]
|
|
if whole == 0:
|
|
if num == 1:
|
|
return_string = 'a {}'.format(den_str)
|
|
else:
|
|
return_string = '{} {}'.format(num, den_str)
|
|
elif num == 1:
|
|
return_string = '{} and a {}'.format(whole, den_str)
|
|
else:
|
|
return_string = '{} and {} {}'.format(whole, num, den_str)
|
|
if num > 1:
|
|
return_string += 's'
|
|
return return_string
|
|
|
|
|
|
def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
|
|
"""
|
|
Convert a number to it's spoken equivalent
|
|
|
|
For example, '5.2' would return 'five point two'
|
|
|
|
Args:
|
|
num(float or int): the number to pronounce (under 100)
|
|
places(int): maximum decimal places to speak
|
|
short_scale (bool) : use short (True) or long scale (False)
|
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
|
scientific (bool): pronounce in scientific notation
|
|
Returns:
|
|
(str): The pronounced number
|
|
"""
|
|
if scientific:
|
|
number = '%E' % num
|
|
n, power = number.replace("+", "").split("E")
|
|
power = int(power)
|
|
if power != 0:
|
|
return pronounce_number_en(float(n), places, short_scale, False) \
|
|
+ " times ten to the power of " + \
|
|
pronounce_number_en(power, places, short_scale, False)
|
|
if short_scale:
|
|
number_names = NUM_STRING_EN.copy()
|
|
number_names.update(SHORT_SCALE_EN)
|
|
else:
|
|
number_names = NUM_STRING_EN.copy()
|
|
number_names.update(LONG_SCALE_EN)
|
|
|
|
digits = [number_names[n] for n in range(0, 20)]
|
|
|
|
tens = [number_names[n] for n in range(10, 100, 10)]
|
|
|
|
if short_scale:
|
|
hundreds = [SHORT_SCALE_EN[n] for n in SHORT_SCALE_EN.keys()]
|
|
else:
|
|
hundreds = [LONG_SCALE_EN[n] for n in LONG_SCALE_EN.keys()]
|
|
|
|
# deal with negatives
|
|
result = ""
|
|
if num < 0:
|
|
result = "negative "
|
|
num = abs(num)
|
|
|
|
try:
|
|
# deal with 4 digits
|
|
# usually if it's a 4 digit num it should be said like a date
|
|
# i.e. 1972 => nineteen seventy two
|
|
if len(str(num)) == 4:
|
|
_num = str(num)
|
|
# deal with 1000, 2000, 2001, 2100, 3123, etc
|
|
# is skipped as the rest of the
|
|
# functin deals with this already
|
|
if _num[1:4] == '000' or _num[1:3] == '00' or int(_num[0:2]) >= 20:
|
|
pass
|
|
# deal with 1900, 1300, etc
|
|
# i.e. 1900 => nineteen hundred
|
|
elif _num[2:4] == '00':
|
|
first = number_names[int(_num[0:2])]
|
|
last = number_names[100]
|
|
return first + " " + last
|
|
# deal with 1960, 1961, etc
|
|
# i.e. 1960 => nineteen sixty
|
|
# 1961 => nineteen sixty one
|
|
else:
|
|
first = number_names[int(_num[0:2])]
|
|
if _num[3:4] == '0':
|
|
last = number_names[int(_num[2:4])]
|
|
else:
|
|
second = number_names[int(_num[2:3])*10]
|
|
last = second + " " + number_names[int(_num[3:4])]
|
|
return first + " " + last
|
|
# exception used to catch any unforseen edge cases
|
|
# will default back to normal subroutine
|
|
except Exception as e:
|
|
LOG.error('Exception in pronounce_number_en: {}' + repr(e))
|
|
|
|
# check for a direct match
|
|
if num in number_names:
|
|
if num > 90:
|
|
result += "one "
|
|
result += number_names[num]
|
|
else:
|
|
def _sub_thousand(n):
|
|
assert 0 <= n <= 999
|
|
if n <= 19:
|
|
return digits[n]
|
|
elif n <= 99:
|
|
q, r = divmod(n, 10)
|
|
return tens[q - 1] + (" " + _sub_thousand(r) if r else "")
|
|
else:
|
|
q, r = divmod(n, 100)
|
|
return digits[q] + " hundred" + (
|
|
" and " + _sub_thousand(r) if r else "")
|
|
|
|
def _short_scale(n):
|
|
if n > max(SHORT_SCALE_EN.keys()):
|
|
return "infinity"
|
|
n = int(n)
|
|
assert 0 <= n
|
|
# TODO Some more or less obvious error to be fixed here
|
|
res = []
|
|
for i, z in enumerate(_split_by_thousands(n)):
|
|
if not z:
|
|
continue
|
|
number = _sub_thousand(z)
|
|
if i:
|
|
number += " "
|
|
number += hundreds[i]
|
|
res.append(number)
|
|
|
|
return " ".join(reversed(res))
|
|
|
|
def _split_by_thousands(n):
|
|
assert 0 <= n
|
|
res = []
|
|
while n:
|
|
n, r = divmod(n, 1000)
|
|
res.append(r)
|
|
return res
|
|
|
|
def _long_scale(n):
|
|
if n > max(LONG_SCALE_EN.keys()):
|
|
return "infinity"
|
|
n = int(n)
|
|
assert 0 <= n
|
|
res = []
|
|
for i, z in enumerate(_split_by_thousands(n)):
|
|
if not z:
|
|
continue
|
|
number = _sub_thousand(z)
|
|
if i % 2 != 0 and i > 1:
|
|
number += " " + "thousand"
|
|
elif 0 < i < 3:
|
|
number += " " + hundreds[i] + ","
|
|
elif i:
|
|
number += " " + hundreds[i - 1] + ","
|
|
res.append(number)
|
|
return " ".join(reversed(res))
|
|
|
|
if short_scale:
|
|
result += _short_scale(num)
|
|
else:
|
|
result += _long_scale(num)
|
|
|
|
# Deal with fractional part
|
|
if not num == int(num) and places > 0:
|
|
result += " point"
|
|
place = 10
|
|
while int(num * place) % 10 > 0 and places > 0:
|
|
result += " " + number_names[int(num * place) % 10]
|
|
place *= 10
|
|
places -= 1
|
|
return result
|
|
|
|
|
|
def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
|
|
"""
|
|
Format a time to a comfortable human format
|
|
|
|
For example, generate 'five thirty' for speech or '5:30' for
|
|
text display.
|
|
|
|
Args:
|
|
dt (datetime): date to format (assumes already in local timezone)
|
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
|
use_ampm (bool): include the am/pm for 12-hour format
|
|
Returns:
|
|
(str): The formatted time string
|
|
"""
|
|
if use_24hour:
|
|
# e.g. "03:01" or "14:22"
|
|
string = dt.strftime("%H:%M")
|
|
else:
|
|
if use_ampm:
|
|
# e.g. "3:01 AM" or "2:22 PM"
|
|
string = dt.strftime("%I:%M %p")
|
|
else:
|
|
# e.g. "3:01" or "2:22"
|
|
string = dt.strftime("%I:%M")
|
|
if string[0] == '0':
|
|
string = string[1:] # strip leading zeros
|
|
|
|
if not speech:
|
|
return string
|
|
|
|
# Generate a speakable version of the time
|
|
if use_24hour:
|
|
speak = ""
|
|
|
|
# Either "0 8 hundred" or "13 hundred"
|
|
if string[0] == '0':
|
|
speak += pronounce_number_en(int(string[0])) + " "
|
|
speak += pronounce_number_en(int(string[1]))
|
|
else:
|
|
speak = pronounce_number_en(int(string[0:2]))
|
|
|
|
speak += " "
|
|
if string[3:5] == '00':
|
|
speak += "hundred"
|
|
else:
|
|
if string[3] == '0':
|
|
speak += pronounce_number_en(0) + " "
|
|
speak += pronounce_number_en(int(string[4]))
|
|
else:
|
|
speak += pronounce_number_en(int(string[3:5]))
|
|
return speak
|
|
else:
|
|
if dt.hour == 0 and dt.minute == 0:
|
|
return "midnight"
|
|
if dt.hour == 12 and dt.minute == 0:
|
|
return "noon"
|
|
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
|
|
|
|
if dt.hour == 0:
|
|
speak = pronounce_number_en(12)
|
|
elif dt.hour < 13:
|
|
speak = pronounce_number_en(dt.hour)
|
|
else:
|
|
speak = pronounce_number_en(dt.hour - 12)
|
|
|
|
if dt.minute == 0:
|
|
if not use_ampm:
|
|
return speak + " o'clock"
|
|
else:
|
|
if dt.minute < 10:
|
|
speak += " oh"
|
|
speak += " " + pronounce_number_en(dt.minute)
|
|
|
|
if use_ampm:
|
|
if dt.hour > 11:
|
|
speak += " p.m."
|
|
else:
|
|
speak += " a.m."
|
|
|
|
return speak
|