Remove content from lang specific files
The files are kept for backwards compatibility but these just contains imports of lingua-franca versions of variablespull/2438/head
parent
1b88db4fa1
commit
32666e9d68
|
@ -18,5 +18,4 @@
|
|||
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
|
||||
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||
from lingua_franca.lang.format_common import *
|
||||
|
|
|
@ -14,285 +14,8 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
from mycroft.util.log import LOG
|
||||
from mycroft.util.lang.common_data_en import _NUM_STRING_EN, \
|
||||
_FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN
|
||||
"""File kept for backwards compatibility
|
||||
|
||||
|
||||
def nice_number_en(number, speech, denominators=range(1, 21)):
|
||||
""" English helper for nice_number
|
||||
|
||||
This function formats a float to human understandable functions. Like
|
||||
4.5 becomes "4 and a half" for speech and "4 1/2" for text
|
||||
|
||||
Args:
|
||||
number (int or float): the float to format
|
||||
speech (bool): format for speech (True) or display (False)
|
||||
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||
Returns:
|
||||
(str): The formatted string.
|
||||
"""
|
||||
|
||||
result = convert_to_mixed_fraction(number, denominators)
|
||||
if not result:
|
||||
# Give up, just represent as a 3 decimal number
|
||||
return str(round(number, 3))
|
||||
|
||||
whole, num, den = result
|
||||
|
||||
if not speech:
|
||||
if num == 0:
|
||||
# TODO: Number grouping? E.g. "1,000,000"
|
||||
return str(whole)
|
||||
else:
|
||||
return '{} {}/{}'.format(whole, num, den)
|
||||
|
||||
if num == 0:
|
||||
return str(whole)
|
||||
den_str = _FRACTION_STRING_EN[den]
|
||||
if whole == 0:
|
||||
if num == 1:
|
||||
return_string = 'a {}'.format(den_str)
|
||||
else:
|
||||
return_string = '{} {}'.format(num, den_str)
|
||||
elif num == 1:
|
||||
return_string = '{} and a {}'.format(whole, den_str)
|
||||
else:
|
||||
return_string = '{} and {} {}'.format(whole, num, den_str)
|
||||
if num > 1:
|
||||
return_string += 's'
|
||||
return return_string
|
||||
|
||||
|
||||
def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
|
||||
"""
|
||||
Convert a number to its spoken equivalent
|
||||
|
||||
For example, '5.2' would return 'five point two'
|
||||
|
||||
Args:
|
||||
num(float or int): the number to pronounce
|
||||
places(int): maximum decimal places to speak
|
||||
short_scale (bool) : use short (True) or long scale (False)
|
||||
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||
scientific (bool): pronounce in scientific notation
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
"""
|
||||
if scientific:
|
||||
number = '%E' % num
|
||||
n, power = number.split("E")
|
||||
power = int(power)
|
||||
if power != 0:
|
||||
# This handles negatives of powers separately from the normal
|
||||
# handling since each call disables the scientific flag
|
||||
return '{} times ten to the power of {}{}'.format(
|
||||
pronounce_number_en(float(n), places, short_scale, True),
|
||||
'negative ' if power < 0 else '',
|
||||
pronounce_number_en(abs(power), places, short_scale, False))
|
||||
|
||||
number_names = _NUM_STRING_EN
|
||||
big_number_names = _SHORT_SCALE_EN if short_scale else _LONG_SCALE_EN
|
||||
|
||||
# deal with negatives
|
||||
result = ""
|
||||
if num < 0:
|
||||
result = "negative " if scientific else "minus "
|
||||
num = abs(num)
|
||||
|
||||
try:
|
||||
# deal with 4 digits
|
||||
# usually if it's a 4 digit num it should be said like a date
|
||||
# i.e. 1972 => nineteen seventy two
|
||||
if 10000 > num >= 1000 and isinstance(num, int):
|
||||
# deal with 1000, 2000, 2001, 2100, 3123, etc
|
||||
# is skipped as the rest of the
|
||||
# functin deals with this already
|
||||
if num % 1000 < 10 or num > 2000:
|
||||
pass
|
||||
# deal with 1900, 1300, etc
|
||||
# i.e. 1900 => nineteen hundred
|
||||
elif not num % 100:
|
||||
first = number_names[num / 100]
|
||||
last = big_number_names[100]
|
||||
return first + " " + last
|
||||
# deal with 1960, 1961, etc
|
||||
# i.e. 1960 => nineteen sixty
|
||||
# 1961 => nineteen sixty one
|
||||
else:
|
||||
first = number_names[num // 100]
|
||||
last = number_names[num % 100 - num % 10]
|
||||
if num % 10:
|
||||
last += " " + number_names[num % 10]
|
||||
return first + " " + last
|
||||
# exception used to catch any unforseen edge cases
|
||||
# will default back to normal subroutine
|
||||
except Exception as e:
|
||||
LOG.error('Exception in pronounce_number_en: {}' + repr(e))
|
||||
|
||||
# check for a direct match
|
||||
if num in number_names:
|
||||
result += number_names[num]
|
||||
elif num in big_number_names:
|
||||
result += "one " + big_number_names[num]
|
||||
else:
|
||||
hundreds = list(big_number_names.values())
|
||||
|
||||
def _sub_thousand(n):
|
||||
assert 0 <= n <= 999
|
||||
if n <= 19:
|
||||
return number_names[n]
|
||||
elif n <= 99:
|
||||
q, r = divmod(n, 10)
|
||||
return number_names[q * 10] + (
|
||||
" " + _sub_thousand(r) if r else "")
|
||||
else:
|
||||
q, r = divmod(n, 100)
|
||||
return number_names[q] + " hundred" + (
|
||||
" and " + _sub_thousand(r) if r else "")
|
||||
|
||||
def _short_scale(n):
|
||||
if n >= max(_SHORT_SCALE_EN):
|
||||
return "infinity"
|
||||
n = int(n)
|
||||
assert 0 <= n
|
||||
res = []
|
||||
for i, z in enumerate(_split_by(n, 1000)):
|
||||
if not z:
|
||||
continue
|
||||
number = _sub_thousand(z)
|
||||
if i:
|
||||
number += " "
|
||||
number += hundreds[i]
|
||||
res.append(number)
|
||||
|
||||
return ", ".join(reversed(res))
|
||||
|
||||
def _split_by(n, split=1000):
|
||||
assert 0 <= n
|
||||
res = []
|
||||
while n:
|
||||
n, r = divmod(n, split)
|
||||
res.append(r)
|
||||
return res
|
||||
|
||||
def _long_scale(n):
|
||||
if n >= max(_LONG_SCALE_EN):
|
||||
return "infinity"
|
||||
n = int(n)
|
||||
assert 0 <= n
|
||||
res = []
|
||||
for i, z in enumerate(_split_by(n, 1000000)):
|
||||
if not z:
|
||||
continue
|
||||
number = pronounce_number_en(z, places, True, scientific)
|
||||
# strip off the comma after the thousand
|
||||
if i:
|
||||
# plus one as we skip 'thousand'
|
||||
# (and 'hundred', but this is excluded by index value)
|
||||
number = number.replace(',', '')
|
||||
number += " " + hundreds[i+1]
|
||||
res.append(number)
|
||||
return ", ".join(reversed(res))
|
||||
|
||||
if short_scale:
|
||||
result += _short_scale(num)
|
||||
else:
|
||||
result += _long_scale(num)
|
||||
|
||||
# Deal with fractional part
|
||||
if not num == int(num) and places > 0:
|
||||
result += " point"
|
||||
place = 10
|
||||
while int(num * place) % 10 > 0 and places > 0:
|
||||
result += " " + number_names[int(num * place) % 10]
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
||||
|
||||
def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
|
||||
For example, generate 'five thirty' for speech or '5:30' for
|
||||
text display.
|
||||
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
if string[0] == '0':
|
||||
string = string[1:] # strip leading zeros
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
if use_24hour:
|
||||
speak = ""
|
||||
|
||||
# Either "0 8 hundred" or "13 hundred"
|
||||
if string[0] == '0':
|
||||
speak += pronounce_number_en(int(string[0])) + " "
|
||||
speak += pronounce_number_en(int(string[1]))
|
||||
else:
|
||||
speak = pronounce_number_en(int(string[0:2]))
|
||||
|
||||
speak += " "
|
||||
if string[3:5] == '00':
|
||||
speak += "hundred"
|
||||
else:
|
||||
if string[3] == '0':
|
||||
speak += pronounce_number_en(0) + " "
|
||||
speak += pronounce_number_en(int(string[4]))
|
||||
else:
|
||||
speak += pronounce_number_en(int(string[3:5]))
|
||||
return speak
|
||||
else:
|
||||
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
|
||||
|
||||
if dt.hour == 0 and dt.minute == 0:
|
||||
return "midnight"
|
||||
if dt.hour == 12 and dt.minute == 0:
|
||||
return "noon"
|
||||
elif dt.minute == 15:
|
||||
speak = "quarter past " + pronounce_number_en(hour)
|
||||
elif dt.minute == 30:
|
||||
speak = "half past " + pronounce_number_en(hour)
|
||||
elif dt.minute == 45:
|
||||
next_hour = (dt.hour + 1) % 12 or 12
|
||||
speak = "quarter to " + pronounce_number_en(next_hour)
|
||||
else:
|
||||
speak = pronounce_number_en(hour)
|
||||
|
||||
if dt.minute == 0:
|
||||
if not use_ampm:
|
||||
return speak + " o'clock"
|
||||
else:
|
||||
if dt.minute < 10:
|
||||
speak += " oh"
|
||||
speak += " " + pronounce_number_en(dt.minute)
|
||||
|
||||
if use_ampm:
|
||||
if dt.hour > 11:
|
||||
speak += " p.m."
|
||||
else:
|
||||
speak += " a.m."
|
||||
|
||||
return speak
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.format_en import *
|
||||
|
|
|
@ -13,307 +13,8 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
"""File kept for backwards compatibility
|
||||
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
Format functions for castillian (es-es)
|
||||
|
||||
"""
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
|
||||
NUM_STRING_ES = {
|
||||
0: 'cero',
|
||||
1: 'uno',
|
||||
2: 'dos',
|
||||
3: 'tres',
|
||||
4: 'cuatro',
|
||||
5: 'cinco',
|
||||
6: 'seis',
|
||||
7: 'siete',
|
||||
8: 'ocho',
|
||||
9: 'nueve',
|
||||
10: 'diez',
|
||||
11: 'once',
|
||||
12: 'doce',
|
||||
13: 'trece',
|
||||
14: 'catorce',
|
||||
15: 'quince',
|
||||
16: 'dieciséis',
|
||||
17: 'diecisete',
|
||||
18: 'dieciocho',
|
||||
19: 'diecinueve',
|
||||
20: 'veinte',
|
||||
30: 'treinta',
|
||||
40: 'cuarenta',
|
||||
50: 'cincuenta',
|
||||
60: 'sesenta',
|
||||
70: 'setenta',
|
||||
80: 'ochenta',
|
||||
90: 'noventa'
|
||||
}
|
||||
|
||||
FRACTION_STRING_ES = {
|
||||
2: 'medio',
|
||||
3: 'tercio',
|
||||
4: 'cuarto',
|
||||
5: 'quinto',
|
||||
6: 'sexto',
|
||||
7: 'séptimo',
|
||||
8: 'octavo',
|
||||
9: 'noveno',
|
||||
10: 'décimo',
|
||||
11: 'onceavo',
|
||||
12: 'doceavo',
|
||||
13: 'treceavo',
|
||||
14: 'catorceavo',
|
||||
15: 'quinceavo',
|
||||
16: 'dieciseisavo',
|
||||
17: 'diecisieteavo',
|
||||
18: 'dieciochoavo',
|
||||
19: 'diecinueveavo',
|
||||
20: 'veinteavo'
|
||||
}
|
||||
|
||||
|
||||
def nice_number_es(number, speech, denominators=range(1, 21)):
|
||||
""" Spanish helper for nice_number
|
||||
|
||||
This function formats a float to human understandable functions. Like
|
||||
4.5 becomes "4 y medio" for speech and "4 1/2" for text
|
||||
|
||||
Args:
|
||||
number (int or float): the float to format
|
||||
speech (bool): format for speech (True) or display (False)
|
||||
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||
Returns:
|
||||
(str): The formatted string.
|
||||
"""
|
||||
strNumber = ""
|
||||
whole = 0
|
||||
num = 0
|
||||
den = 0
|
||||
|
||||
result = convert_to_mixed_fraction(number, denominators)
|
||||
|
||||
if not result:
|
||||
# Give up, just represent as a 3 decimal number
|
||||
whole = round(number, 3)
|
||||
else:
|
||||
whole, num, den = result
|
||||
|
||||
if not speech:
|
||||
if num == 0:
|
||||
strNumber = '{:,}'.format(whole)
|
||||
strNumber = strNumber.replace(",", " ")
|
||||
strNumber = strNumber.replace(".", ",")
|
||||
return strNumber
|
||||
else:
|
||||
return '{} {}/{}'.format(whole, num, den)
|
||||
else:
|
||||
if num == 0:
|
||||
# if the number is not a fraction, nothing to do
|
||||
strNumber = str(whole)
|
||||
strNumber = strNumber.replace(".", ",")
|
||||
return strNumber
|
||||
den_str = FRACTION_STRING_ES[den]
|
||||
# if it is not an integer
|
||||
if whole == 0:
|
||||
# if there is no whole number
|
||||
if num == 1:
|
||||
# if numerator is 1, return "un medio", for example
|
||||
strNumber = 'un {}'.format(den_str)
|
||||
else:
|
||||
# else return "cuatro tercios", for example
|
||||
strNumber = '{} {}'.format(num, den_str)
|
||||
elif num == 1:
|
||||
# if there is a whole number and numerator is 1
|
||||
if den == 2:
|
||||
# if denominator is 2, return "1 y medio", for example
|
||||
strNumber = '{} y {}'.format(whole, den_str)
|
||||
else:
|
||||
# else return "1 y 1 tercio", for example
|
||||
strNumber = '{} y 1 {}'.format(whole, den_str)
|
||||
else:
|
||||
# else return "2 y 3 cuarto", for example
|
||||
strNumber = '{} y {} {}'.format(whole, num, den_str)
|
||||
if num > 1 and den != 3:
|
||||
# if the numerator is greater than 1 and the denominator
|
||||
# is not 3 ("tercio"), add an s for plural
|
||||
strNumber += 's'
|
||||
|
||||
return strNumber
|
||||
|
||||
|
||||
def pronounce_number_es(num, places=2):
|
||||
"""
|
||||
Convert a number to it's spoken equivalent
|
||||
|
||||
For example, '5.2' would return 'cinco coma dos'
|
||||
|
||||
Args:
|
||||
num(float or int): the number to pronounce (under 100)
|
||||
places(int): maximum decimal places to speak
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
"""
|
||||
if abs(num) >= 100:
|
||||
# TODO: Soporta a números por encima de 100
|
||||
return str(num)
|
||||
|
||||
result = ""
|
||||
if num < 0:
|
||||
result = "menos "
|
||||
num = abs(num)
|
||||
|
||||
# del 21 al 29 tienen una pronunciación especial
|
||||
if 20 <= num <= 29:
|
||||
tens = int(num-int(num) % 10)
|
||||
ones = int(num - tens)
|
||||
result += NUM_STRING_ES[tens]
|
||||
if ones > 0:
|
||||
result = result[:-1]
|
||||
# a veinte le quitamos la "e" final para construir los
|
||||
# números del 21 - 29. Pero primero tenemos en cuenta
|
||||
# las excepciones: 22, 23 y 26, que llevan tilde.
|
||||
if ones == 2:
|
||||
result += "idós"
|
||||
elif ones == 3:
|
||||
result += "itrés"
|
||||
elif ones == 6:
|
||||
result += "iséis"
|
||||
else:
|
||||
result += "i" + NUM_STRING_ES[ones]
|
||||
elif num >= 30: # de 30 en adelante
|
||||
tens = int(num-int(num) % 10)
|
||||
ones = int(num - tens)
|
||||
result += NUM_STRING_ES[tens]
|
||||
if ones > 0:
|
||||
result += " y " + NUM_STRING_ES[ones]
|
||||
else:
|
||||
result += NUM_STRING_ES[int(num)]
|
||||
|
||||
# Deal with decimal part, in spanish is commonly used the comma
|
||||
# instead the dot. Decimal part can be written both with comma
|
||||
# and dot, but when pronounced, its pronounced "coma"
|
||||
if not num == int(num) and places > 0:
|
||||
result += " coma"
|
||||
place = 10
|
||||
while int(num*place) % 10 > 0 and places > 0:
|
||||
result += " " + NUM_STRING_ES[int(num*place) % 10]
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
||||
|
||||
def nice_time_es(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
|
||||
For example, generate 'cinco treinta' for speech or '5:30' for
|
||||
text display.
|
||||
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
if string[0] == '0':
|
||||
string = string[1:] # strip leading zeros
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
speak = ""
|
||||
if use_24hour:
|
||||
# Tenemos que tener en cuenta que cuando hablamos en formato
|
||||
# 24h, no hay que especificar ninguna precisión adicional
|
||||
# como "la noche", "la tarde" o "la mañana"
|
||||
# http://lema.rae.es/dpd/srv/search?id=YNoTWNJnAD6bhhVBf9
|
||||
if dt.hour == 1:
|
||||
speak += "la una"
|
||||
else:
|
||||
speak += "las " + pronounce_number_es(dt.hour)
|
||||
|
||||
# las 14:04 son "las catorce cero cuatro"
|
||||
if dt.minute < 10:
|
||||
speak += " cero " + pronounce_number_es(dt.minute)
|
||||
else:
|
||||
speak += " " + pronounce_number_es(dt.minute)
|
||||
|
||||
else:
|
||||
# Prepare for "tres menos cuarto" ??
|
||||
if dt.minute == 35:
|
||||
minute = -25
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 40:
|
||||
minute = -20
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 45:
|
||||
minute = -15
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 50:
|
||||
minute = -10
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 55:
|
||||
minute = -5
|
||||
hour = dt.hour + 1
|
||||
else:
|
||||
minute = dt.minute
|
||||
hour = dt.hour
|
||||
|
||||
if hour == 0 or hour == 12:
|
||||
speak += "las doce"
|
||||
elif hour == 1 or hour == 13:
|
||||
speak += "la una"
|
||||
elif hour < 13:
|
||||
speak = "las " + pronounce_number_es(hour)
|
||||
else:
|
||||
speak = "las " + pronounce_number_es(hour-12)
|
||||
|
||||
if minute != 0:
|
||||
# las horas especiales
|
||||
if minute == 15:
|
||||
speak += " y cuarto"
|
||||
elif minute == 30:
|
||||
speak += " y media"
|
||||
elif minute == -15:
|
||||
speak += " menos cuarto"
|
||||
else: # seis y nueve. siete y veinticinco
|
||||
if minute > 0:
|
||||
speak += " y " + pronounce_number_es(minute)
|
||||
else: # si son las siete menos veinte, no ponemos la "y"
|
||||
speak += " " + pronounce_number_es(minute)
|
||||
|
||||
# si no especificamos de la tarde, noche, mañana, etc
|
||||
if minute == 0 and not use_ampm:
|
||||
# 3:00
|
||||
speak += " en punto"
|
||||
|
||||
if use_ampm:
|
||||
# "de la noche" es desde que anochece hasta medianoche
|
||||
# así que decir que es desde las 21h es algo subjetivo
|
||||
# en España a las 20h se dice "de la tarde"
|
||||
# en castellano, las 12h es de la mañana o mediodía
|
||||
# así que diremos "de la tarde" a partir de las 13h.
|
||||
# http://lema.rae.es/dpd/srv/search?id=YNoTWNJnAD6bhhVBf9
|
||||
if hour >= 0 and hour < 6:
|
||||
speak += " de la madrugada"
|
||||
elif hour >= 6 and hour < 13:
|
||||
speak += " de la mañana"
|
||||
elif hour >= 13 and hour < 21:
|
||||
speak += " de la tarde"
|
||||
else:
|
||||
speak += " de la noche"
|
||||
return speak
|
||||
from lingua_franca.lang.format_es import *
|
||||
|
|
|
@ -13,290 +13,9 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
""" Format functions for french (fr)
|
||||
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
|
||||
NUM_STRING_FR = {
|
||||
0: 'zéro',
|
||||
1: 'un',
|
||||
2: 'deux',
|
||||
3: 'trois',
|
||||
4: 'quatre',
|
||||
5: 'cinq',
|
||||
6: 'six',
|
||||
7: 'sept',
|
||||
8: 'huit',
|
||||
9: 'neuf',
|
||||
10: 'dix',
|
||||
11: 'onze',
|
||||
12: 'douze',
|
||||
13: 'treize',
|
||||
14: 'quatorze',
|
||||
15: 'quinze',
|
||||
16: 'seize',
|
||||
20: 'vingt',
|
||||
30: 'trente',
|
||||
40: 'quarante',
|
||||
50: 'cinquante',
|
||||
60: 'soixante',
|
||||
70: 'soixante-dix',
|
||||
80: 'quatre-vingt',
|
||||
90: 'quatre-vingt-dix'
|
||||
}
|
||||
|
||||
FRACTION_STRING_FR = {
|
||||
2: 'demi',
|
||||
3: 'tiers',
|
||||
4: 'quart',
|
||||
5: 'cinquième',
|
||||
6: 'sixième',
|
||||
7: 'septième',
|
||||
8: 'huitième',
|
||||
9: 'neuvième',
|
||||
10: 'dixième',
|
||||
11: 'onzième',
|
||||
12: 'douzième',
|
||||
13: 'treizième',
|
||||
14: 'quatorzième',
|
||||
15: 'quinzième',
|
||||
16: 'seizième',
|
||||
17: 'dix-septième',
|
||||
18: 'dix-huitième',
|
||||
19: 'dix-neuvième',
|
||||
20: 'vingtième'
|
||||
}
|
||||
|
||||
|
||||
def nice_number_fr(number, speech, denominators=range(1, 21)):
|
||||
""" French helper for nice_number
|
||||
|
||||
This function formats a float to human understandable functions. Like
|
||||
4.5 becomes "4 et demi" for speech and "4 1/2" for text
|
||||
|
||||
Args:
|
||||
number (int or float): the float to format
|
||||
speech (bool): format for speech (True) or display (False)
|
||||
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||
Returns:
|
||||
(str): The formatted string.
|
||||
"""
|
||||
strNumber = ""
|
||||
whole = 0
|
||||
num = 0
|
||||
den = 0
|
||||
|
||||
result = convert_to_mixed_fraction(number, denominators)
|
||||
|
||||
if not result:
|
||||
# Give up, just represent as a 3 decimal number
|
||||
whole = round(number, 3)
|
||||
else:
|
||||
whole, num, den = result
|
||||
|
||||
if not speech:
|
||||
if num == 0:
|
||||
strNumber = '{:,}'.format(whole)
|
||||
strNumber = strNumber.replace(",", " ")
|
||||
strNumber = strNumber.replace(".", ",")
|
||||
return strNumber
|
||||
else:
|
||||
return '{} {}/{}'.format(whole, num, den)
|
||||
else:
|
||||
if num == 0:
|
||||
# if the number is not a fraction, nothing to do
|
||||
strNumber = str(whole)
|
||||
strNumber = strNumber.replace(".", ",")
|
||||
return strNumber
|
||||
den_str = FRACTION_STRING_FR[den]
|
||||
# if it is not an integer
|
||||
if whole == 0:
|
||||
# if there is no whole number
|
||||
if num == 1:
|
||||
# if numerator is 1, return "un demi", for example
|
||||
strNumber = 'un {}'.format(den_str)
|
||||
else:
|
||||
# else return "quatre tiers", for example
|
||||
strNumber = '{} {}'.format(num, den_str)
|
||||
elif num == 1:
|
||||
# if there is a whole number and numerator is 1
|
||||
if den == 2:
|
||||
# if denominator is 2, return "1 et demi", for example
|
||||
strNumber = '{} et {}'.format(whole, den_str)
|
||||
else:
|
||||
# else return "1 et 1 tiers", for example
|
||||
strNumber = '{} et 1 {}'.format(whole, den_str)
|
||||
else:
|
||||
# else return "2 et 3 quart", for example
|
||||
strNumber = '{} et {} {}'.format(whole, num, den_str)
|
||||
if num > 1 and den != 3:
|
||||
# if the numerator is greater than 1 and the denominator
|
||||
# is not 3 ("tiers"), add an s for plural
|
||||
strNumber += 's'
|
||||
|
||||
return strNumber
|
||||
|
||||
|
||||
def pronounce_number_fr(num, places=2):
|
||||
"""
|
||||
Convert a number to it's spoken equivalent
|
||||
|
||||
For example, '5.2' would return 'cinq virgule deux'
|
||||
|
||||
Args:
|
||||
num(float or int): the number to pronounce (under 100)
|
||||
places(int): maximum decimal places to speak
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
"""
|
||||
if abs(num) >= 100:
|
||||
# TODO: Support for numbers over 100
|
||||
return str(num)
|
||||
|
||||
result = ""
|
||||
if num < 0:
|
||||
result = "moins "
|
||||
num = abs(num)
|
||||
|
||||
if num > 16:
|
||||
tens = int(num-int(num) % 10)
|
||||
ones = int(num-tens)
|
||||
if ones != 0:
|
||||
if tens > 10 and tens <= 60 and int(num-tens) == 1:
|
||||
result += NUM_STRING_FR[tens] + "-et-" + NUM_STRING_FR[ones]
|
||||
elif num == 71:
|
||||
result += "soixante-et-onze"
|
||||
elif tens == 70:
|
||||
result += NUM_STRING_FR[60] + "-"
|
||||
if ones < 7:
|
||||
result += NUM_STRING_FR[10 + ones]
|
||||
else:
|
||||
result += NUM_STRING_FR[10] + "-" + NUM_STRING_FR[ones]
|
||||
elif tens == 90:
|
||||
result += NUM_STRING_FR[80] + "-"
|
||||
if ones < 7:
|
||||
result += NUM_STRING_FR[10 + ones]
|
||||
else:
|
||||
result += NUM_STRING_FR[10] + "-" + NUM_STRING_FR[ones]
|
||||
else:
|
||||
result += NUM_STRING_FR[tens] + "-" + NUM_STRING_FR[ones]
|
||||
else:
|
||||
if num == 80:
|
||||
result += "quatre-vingts"
|
||||
else:
|
||||
result += NUM_STRING_FR[tens]
|
||||
else:
|
||||
result += NUM_STRING_FR[int(num)]
|
||||
|
||||
# Deal with decimal part
|
||||
if not num == int(num) and places > 0:
|
||||
result += " virgule"
|
||||
place = 10
|
||||
while int(num*place) % 10 > 0 and places > 0:
|
||||
result += " " + NUM_STRING_FR[int(num*place) % 10]
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
||||
|
||||
def nice_time_fr(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
|
||||
For example, generate 'cinq heures trente' for speech or '5:30' for
|
||||
text display.
|
||||
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
if string[0] == '0':
|
||||
string = string[1:] # strip leading zeros
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
speak = ""
|
||||
if use_24hour:
|
||||
|
||||
# "13 heures trente"
|
||||
if dt.hour == 0:
|
||||
speak += "minuit"
|
||||
elif dt.hour == 12:
|
||||
speak += "midi"
|
||||
elif dt.hour == 1:
|
||||
speak += "une heure"
|
||||
else:
|
||||
speak += pronounce_number_fr(dt.hour) + " heures"
|
||||
|
||||
if dt.minute != 0:
|
||||
speak += " " + pronounce_number_fr(dt.minute)
|
||||
|
||||
else:
|
||||
# Prepare for "trois heures moins le quart"
|
||||
if dt.minute == 35:
|
||||
minute = -25
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 40:
|
||||
minute = -20
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 45:
|
||||
minute = -15
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 50:
|
||||
minute = -10
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 55:
|
||||
minute = -5
|
||||
hour = dt.hour + 1
|
||||
else:
|
||||
minute = dt.minute
|
||||
hour = dt.hour
|
||||
|
||||
if hour == 0:
|
||||
speak += "minuit"
|
||||
elif hour == 12:
|
||||
speak += "midi"
|
||||
elif hour == 1 or hour == 13:
|
||||
speak += "une heure"
|
||||
elif hour < 13:
|
||||
speak = pronounce_number_fr(hour) + " heures"
|
||||
else:
|
||||
speak = pronounce_number_fr(hour-12) + " heures"
|
||||
|
||||
if minute != 0:
|
||||
if minute == 15:
|
||||
speak += " et quart"
|
||||
elif minute == 30:
|
||||
speak += " et demi"
|
||||
elif minute == -15:
|
||||
speak += " moins le quart"
|
||||
else:
|
||||
speak += " " + pronounce_number_fr(minute)
|
||||
|
||||
if use_ampm:
|
||||
if hour > 17:
|
||||
speak += " du soir"
|
||||
elif hour > 12:
|
||||
speak += " de l'après-midi"
|
||||
elif hour > 0 and hour < 12:
|
||||
speak += " du matin"
|
||||
|
||||
return speak
|
||||
from lingua_franca.lang.format_fr import *
|
||||
|
|
|
@ -14,351 +14,8 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
from math import floor
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
months = ['január', 'február', 'március', 'április', 'május', 'június',
|
||||
'július', 'augusztus', 'szeptember', 'október', 'november',
|
||||
'december']
|
||||
|
||||
NUM_STRING_HU = {
|
||||
0: 'nulla',
|
||||
1: 'egy',
|
||||
2: 'kettő',
|
||||
3: 'három',
|
||||
4: 'négy',
|
||||
5: 'öt',
|
||||
6: 'hat',
|
||||
7: 'hét',
|
||||
8: 'nyolc',
|
||||
9: 'kilenc',
|
||||
10: 'tíz',
|
||||
11: 'tizenegy',
|
||||
12: 'tizenkettő',
|
||||
13: 'tizenhárom',
|
||||
14: 'tizennégy',
|
||||
15: 'tizenöt',
|
||||
16: 'tizenhat',
|
||||
17: 'tizenhét',
|
||||
18: 'tizennyolc',
|
||||
19: 'tizenkilenc',
|
||||
20: 'húsz',
|
||||
30: 'harminc',
|
||||
40: 'negyven',
|
||||
50: 'ötven',
|
||||
60: 'hatvan',
|
||||
70: 'hetven',
|
||||
80: 'nyolcvan',
|
||||
90: 'kilencven',
|
||||
100: 'száz'
|
||||
}
|
||||
|
||||
# Hungarian uses "long scale"
|
||||
# https://en.wikipedia.org/wiki/Long_and_short_scales
|
||||
# Currently, numbers are limited to 1000000000000000000000000,
|
||||
# but NUM_POWERS_OF_TEN can be extended to include additional number words
|
||||
|
||||
NUM_POWERS_OF_TEN = [
|
||||
'', 'ezer', 'millió', 'milliárd', 'billió', 'billiárd', 'trillió',
|
||||
'trilliárd'
|
||||
]
|
||||
|
||||
FRACTION_STRING_HU = {
|
||||
2: 'fél',
|
||||
3: 'harmad',
|
||||
4: 'negyed',
|
||||
5: 'ötöd',
|
||||
6: 'hatod',
|
||||
7: 'heted',
|
||||
8: 'nyolcad',
|
||||
9: 'kilenced',
|
||||
10: 'tized',
|
||||
11: 'tizenegyed',
|
||||
12: 'tizenketted',
|
||||
13: 'tizenharmad',
|
||||
14: 'tizennegyed',
|
||||
15: 'tizenötöd',
|
||||
16: 'tizenhatod',
|
||||
17: 'tizenheted',
|
||||
18: 'tizennyolcad',
|
||||
19: 'tizenkilenced',
|
||||
20: 'huszad'
|
||||
}
|
||||
|
||||
# Numbers below 2 thousand are written in one word in Hungarian
|
||||
# Numbers above 2 thousand are separated by hyphens
|
||||
# In some circumstances it may better to seperate individual words
|
||||
# Set EXTRA_SPACE=" " for separating numbers below 2 thousand (
|
||||
# orthographically incorrect)
|
||||
# Set EXTRA_SPACE="" for correct spelling, this is standard
|
||||
|
||||
# EXTRA_SPACE = " "
|
||||
EXTRA_SPACE = ""
|
||||
|
||||
|
||||
def _get_vocal_type(word):
|
||||
# checks the vocal attributes of a word
|
||||
vowels_high = len([char for char in word if char in 'eéiíöőüű'])
|
||||
vowels_low = len([char for char in word if char in 'aáoóuú'])
|
||||
if vowels_high != 0 and vowels_low != 0:
|
||||
return 2 # 2: type is mixed
|
||||
return 0 if vowels_high == 0 else 1 # 0: type is low, 1: is high
|
||||
|
||||
|
||||
def nice_number_hu(number, speech, denominators=range(1, 21)):
|
||||
""" Hungarian helper for nice_number
|
||||
|
||||
This function formats a float to human understandable functions. Like
|
||||
4.5 becomes "4 és fél" for speech and "4 1/2" for text
|
||||
|
||||
Args:
|
||||
number (int or float): the float to format
|
||||
speech (bool): format for speech (True) or display (False)
|
||||
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||
Returns:
|
||||
(str): The formatted string.
|
||||
"""
|
||||
|
||||
result = convert_to_mixed_fraction(number, denominators)
|
||||
if not result:
|
||||
# Give up, just represent as a 3 decimal number
|
||||
return str(round(number, 3)).replace(".", ",")
|
||||
|
||||
whole, num, den = result
|
||||
|
||||
if not speech:
|
||||
if num == 0:
|
||||
# TODO: Number grouping? E.g. "1,000,000"
|
||||
return str(whole)
|
||||
else:
|
||||
return '{} {}/{}'.format(whole, num, den)
|
||||
|
||||
if num == 0:
|
||||
return str(whole)
|
||||
den_str = FRACTION_STRING_HU[den]
|
||||
if whole == 0:
|
||||
if num == 1:
|
||||
one = 'egy ' if den != 2 else ''
|
||||
return_string = '{}{}'.format(one, den_str)
|
||||
else:
|
||||
return_string = '{} {}'.format(num, den_str)
|
||||
elif num == 1:
|
||||
pointOne = 'egész egy' if den != 2 else 'és'
|
||||
return_string = '{} {} {}'.format(whole, pointOne, den_str)
|
||||
else:
|
||||
return_string = '{} egész {} {}'.format(whole, num, den_str)
|
||||
return return_string
|
||||
|
||||
|
||||
def pronounce_number_hu(num, places=2):
|
||||
"""
|
||||
Convert a number to its spoken equivalent
|
||||
|
||||
For example, '5.2' would return 'öt egész két tized'
|
||||
|
||||
Args:
|
||||
num(float or int): the number to pronounce (set limit below)
|
||||
places(int): maximum decimal places to speak
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
"""
|
||||
|
||||
def pronounce_triplet_hu(num):
|
||||
result = ""
|
||||
num = floor(num)
|
||||
if num > 99:
|
||||
hundreds = floor(num / 100)
|
||||
if hundreds > 0:
|
||||
hundredConst = EXTRA_SPACE + 'száz' + EXTRA_SPACE
|
||||
if hundreds == 1:
|
||||
result += hundredConst
|
||||
elif hundreds == 2:
|
||||
result += 'két' + hundredConst
|
||||
else:
|
||||
result += NUM_STRING_HU[hundreds] + hundredConst
|
||||
num -= hundreds * 100
|
||||
if num == 0:
|
||||
result += '' # do nothing
|
||||
elif num <= 20:
|
||||
result += NUM_STRING_HU[num] # + EXTRA_SPACE
|
||||
elif num > 20:
|
||||
ones = num % 10
|
||||
tens = num - ones
|
||||
if tens > 0:
|
||||
if tens != 20:
|
||||
result += NUM_STRING_HU[tens] + EXTRA_SPACE
|
||||
else:
|
||||
result += "huszon" + EXTRA_SPACE
|
||||
if ones > 0:
|
||||
result += NUM_STRING_HU[ones] + EXTRA_SPACE
|
||||
return result
|
||||
|
||||
def pronounce_whole_number_hu(num, scale_level=0):
|
||||
if num == 0:
|
||||
return ''
|
||||
|
||||
num = floor(num)
|
||||
result = ''
|
||||
last_triplet = num % 1000
|
||||
|
||||
if last_triplet == 1:
|
||||
if scale_level == 0:
|
||||
if result != '':
|
||||
result += '' + "egy"
|
||||
else:
|
||||
result += "egy"
|
||||
elif scale_level == 1:
|
||||
result += EXTRA_SPACE + NUM_POWERS_OF_TEN[1] + EXTRA_SPACE
|
||||
else:
|
||||
result += "egy" + NUM_POWERS_OF_TEN[scale_level]
|
||||
elif last_triplet > 1:
|
||||
result += pronounce_triplet_hu(last_triplet)
|
||||
if scale_level != 0:
|
||||
result = result.replace(NUM_STRING_HU[2], 'két')
|
||||
if scale_level == 1:
|
||||
result += NUM_POWERS_OF_TEN[1] + EXTRA_SPACE
|
||||
if scale_level >= 2:
|
||||
result += NUM_POWERS_OF_TEN[scale_level]
|
||||
if scale_level > 0:
|
||||
result += '-'
|
||||
|
||||
num = floor(num / 1000)
|
||||
scale_level += 1
|
||||
return pronounce_whole_number_hu(num,
|
||||
scale_level) + result
|
||||
|
||||
result = ""
|
||||
if abs(num) >= 1000000000000000000000000: # cannot do more than this
|
||||
return str(num)
|
||||
elif num == 0:
|
||||
return str(NUM_STRING_HU[0])
|
||||
elif num < 0:
|
||||
return "mínusz " + pronounce_number_hu(abs(num), places)
|
||||
else:
|
||||
if num == int(num):
|
||||
return pronounce_whole_number_hu(num).strip('-')
|
||||
else:
|
||||
whole_number_part = floor(num)
|
||||
fractional_part = num - whole_number_part
|
||||
if whole_number_part == 0:
|
||||
result += NUM_STRING_HU[0]
|
||||
result += pronounce_whole_number_hu(whole_number_part)
|
||||
if places > 0:
|
||||
result += " egész "
|
||||
fraction = pronounce_whole_number_hu(
|
||||
round(fractional_part * 10 ** places))
|
||||
result += fraction.replace(NUM_STRING_HU[2], 'két')
|
||||
fraction_suffixes = [
|
||||
'tized', 'század', 'ezred', 'tízezred', 'százezred']
|
||||
if places <= len(fraction_suffixes):
|
||||
result += ' ' + fraction_suffixes[places - 1]
|
||||
return result
|
||||
|
||||
|
||||
def pronounce_ordinal_hu(num):
|
||||
ordinals = ["nulladik", "első", "második", "harmadik", "negyedik",
|
||||
"ötödik", "hatodik", "hetedik", "nyolcadik", "kilencedik",
|
||||
"tizedik"]
|
||||
big_ordinals = ["", "ezredik", "milliomodik"]
|
||||
|
||||
# only for whole positive numbers including zero
|
||||
if num < 0 or num != int(num):
|
||||
return num
|
||||
elif num < 11:
|
||||
return ordinals[num]
|
||||
else:
|
||||
# concatenate parts and inflect them accordingly
|
||||
root = pronounce_number_hu(num)
|
||||
vtype = _get_vocal_type(root)
|
||||
last_digit = num - floor(num/10) * 10
|
||||
if root == "húsz":
|
||||
root = "husz"
|
||||
if num % 1000000 == 0:
|
||||
return root.replace(NUM_POWERS_OF_TEN[2], big_ordinals[2])
|
||||
if num % 1000 == 0:
|
||||
return root.replace(NUM_POWERS_OF_TEN[1], big_ordinals[1])
|
||||
if last_digit == 1:
|
||||
return root + "edik"
|
||||
elif root[-1] == 'ő':
|
||||
return root[:-1] + 'edik'
|
||||
elif last_digit != 0:
|
||||
return ordinals[last_digit].join(
|
||||
root.rsplit(NUM_STRING_HU[last_digit], 1))
|
||||
return root + "edik" if vtype == 1 else root + "adik"
|
||||
|
||||
|
||||
def nice_time_hu(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
|
||||
For example, generate 'five thirty' for speech or '5:30' for
|
||||
text display.
|
||||
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
if string[0] == '0':
|
||||
string = string[1:] # strip leading zeros
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
speak = ""
|
||||
if use_24hour:
|
||||
speak += pronounce_number_hu(dt.hour)
|
||||
speak = speak.replace(NUM_STRING_HU[2], 'két')
|
||||
speak += " óra"
|
||||
if not dt.minute == 0: # zero minutes are not pronounced
|
||||
speak += " " + pronounce_number_hu(dt.minute)
|
||||
|
||||
return speak # ampm is ignored when use_24hour is true
|
||||
else:
|
||||
if dt.hour == 0 and dt.minute == 0:
|
||||
return "éjfél"
|
||||
if dt.hour == 12 and dt.minute == 0:
|
||||
return "dél"
|
||||
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
|
||||
|
||||
if dt.hour == 0:
|
||||
speak += pronounce_number_hu(12)
|
||||
elif dt.hour < 13:
|
||||
speak = pronounce_number_hu(dt.hour)
|
||||
else:
|
||||
speak = pronounce_number_hu(dt.hour - 12)
|
||||
|
||||
speak = speak.replace(NUM_STRING_HU[2], 'két')
|
||||
speak += " óra"
|
||||
|
||||
if not dt.minute == 0:
|
||||
speak += " " + pronounce_number_hu(dt.minute)
|
||||
|
||||
if use_ampm:
|
||||
if dt.hour > 11:
|
||||
if dt.hour < 18:
|
||||
speak = "délután " + speak # 12:01 - 17:59
|
||||
elif dt.hour < 22:
|
||||
speak = "este " + speak # 18:00 - 21:59 este/evening
|
||||
else:
|
||||
speak = "éjjel " + speak # 22:00 - 23:59 éjjel/at night
|
||||
elif dt.hour < 3:
|
||||
speak = "éjjel " + speak # 00:01 - 02:59 éjjel/at night
|
||||
else:
|
||||
speak = "reggel " + speak # 03:00 - 11:59 reggel/in t. morning
|
||||
|
||||
return speak
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.format_hu import *
|
||||
|
|
|
@ -14,485 +14,8 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
import collections
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
NUM_STRING_IT = {
|
||||
0: 'zero',
|
||||
1: 'uno',
|
||||
2: 'due',
|
||||
3: 'tre',
|
||||
4: 'quattro',
|
||||
5: 'cinque',
|
||||
6: 'sei',
|
||||
7: 'sette',
|
||||
8: 'otto',
|
||||
9: 'nove',
|
||||
10: 'dieci',
|
||||
11: 'undici',
|
||||
12: 'dodici',
|
||||
13: 'tredici',
|
||||
14: 'quattordici',
|
||||
15: 'quindici',
|
||||
16: 'sedici',
|
||||
17: 'diciassette',
|
||||
18: 'diciotto',
|
||||
19: 'diciannove',
|
||||
20: 'venti',
|
||||
30: 'trenta',
|
||||
40: 'quaranta',
|
||||
50: 'cinquanta',
|
||||
60: 'sessanta',
|
||||
70: 'settanta',
|
||||
80: 'ottanta',
|
||||
90: 'novanta'
|
||||
}
|
||||
|
||||
FRACTION_STRING_IT = {
|
||||
2: 'mezz',
|
||||
3: 'terz',
|
||||
4: 'quart',
|
||||
5: 'quint',
|
||||
6: 'sest',
|
||||
7: 'settim',
|
||||
8: 'ottav',
|
||||
9: 'non',
|
||||
10: 'decim',
|
||||
11: 'undicesim',
|
||||
12: 'dodicesim',
|
||||
13: 'tredicesim',
|
||||
14: 'quattordicesim',
|
||||
15: 'quindicesim',
|
||||
16: 'sedicesim',
|
||||
17: 'diciassettesim',
|
||||
18: 'diciottesim',
|
||||
19: 'diciannovesim',
|
||||
20: 'ventesim'
|
||||
}
|
||||
|
||||
# fonte: http://tulengua.es/numeros-texto/default.aspx
|
||||
LONG_SCALE_IT = collections.OrderedDict([
|
||||
(100, 'cento'),
|
||||
(1000, 'mila'),
|
||||
(1000000, 'milioni'),
|
||||
(1e9, "miliardi"),
|
||||
(1e12, "bilioni"),
|
||||
(1e18, 'trilioni'),
|
||||
(1e24, "quadrilioni"),
|
||||
(1e30, "quintilioni"),
|
||||
(1e36, "sestilioni"),
|
||||
(1e42, "settilioni"),
|
||||
(1e48, "ottillioni"),
|
||||
(1e54, "nonillioni"),
|
||||
(1e60, "decemillioni"),
|
||||
(1e66, "undicilione"),
|
||||
(1e72, "dodicilione"),
|
||||
(1e78, "tredicilione"),
|
||||
(1e84, "quattordicilione"),
|
||||
(1e90, "quindicilione"),
|
||||
(1e96, "sedicilione"),
|
||||
(1e102, "diciasettilione"),
|
||||
(1e108, "diciottilione"),
|
||||
(1e114, "dicianovilione"),
|
||||
(1e120, "vintilione"),
|
||||
(1e306, "unquinquagintilione"),
|
||||
(1e312, "duoquinquagintilione"),
|
||||
(1e336, "sesquinquagintilione"),
|
||||
(1e366, "unsexagintilione")
|
||||
])
|
||||
|
||||
|
||||
SHORT_SCALE_IT = collections.OrderedDict([
|
||||
(100, 'cento'),
|
||||
(1000, 'mila'),
|
||||
(1000000, 'milioni'),
|
||||
(1e9, "miliardi"),
|
||||
(1e12, 'bilioni'),
|
||||
(1e15, "biliardi"),
|
||||
(1e18, "trilioni"),
|
||||
(1e21, "triliardi"),
|
||||
(1e24, "quadrilioni"),
|
||||
(1e27, "quadriliardi"),
|
||||
(1e30, "quintilioni"),
|
||||
(1e33, "quintiliardi"),
|
||||
(1e36, "sestilioni"),
|
||||
(1e39, "sestiliardi"),
|
||||
(1e42, "settilioni"),
|
||||
(1e45, "settiliardi"),
|
||||
(1e48, "ottilioni"),
|
||||
(1e51, "ottiliardi"),
|
||||
(1e54, "nonilioni"),
|
||||
(1e57, "noniliardi"),
|
||||
(1e60, "decilioni"),
|
||||
(1e63, "deciliardi"),
|
||||
(1e66, "undicilioni"),
|
||||
(1e69, "undiciliardi"),
|
||||
(1e72, "dodicilioni"),
|
||||
(1e75, "dodiciliardi"),
|
||||
(1e78, "tredicilioni"),
|
||||
(1e81, "trediciliardi"),
|
||||
(1e84, "quattordicilioni"),
|
||||
(1e87, "quattordiciliardi"),
|
||||
(1e90, "quindicilioni"),
|
||||
(1e93, "quindiciliardi"),
|
||||
(1e96, "sedicilioni"),
|
||||
(1e99, "sediciliardi"),
|
||||
(1e102, "diciassettilioni"),
|
||||
(1e105, "diciassettiliardi"),
|
||||
(1e108, "diciottilioni"),
|
||||
(1e111, "diciottiliardi"),
|
||||
(1e114, "dicianovilioni"),
|
||||
(1e117, "dicianoviliardi"),
|
||||
(1e120, "vintilioni"),
|
||||
(1e123, "vintiliardi"),
|
||||
(1e153, "quinquagintillion"),
|
||||
(1e183, "sexagintillion"),
|
||||
(1e213, "septuagintillion"),
|
||||
(1e243, "ottogintilioni"),
|
||||
(1e273, "nonigintillioni"),
|
||||
(1e303, "centilioni"),
|
||||
(1e306, "uncentilioni"),
|
||||
(1e309, "duocentilioni"),
|
||||
(1e312, "trecentilioni"),
|
||||
(1e333, "decicentilioni"),
|
||||
(1e336, "undicicentilioni"),
|
||||
(1e363, "viginticentilioni"),
|
||||
(1e366, "unviginticentilioni"),
|
||||
(1e393, "trigintacentilioni"),
|
||||
(1e423, "quadragintacentillion"),
|
||||
(1e453, "quinquagintacentillion"),
|
||||
(1e483, "sexagintacentillion"),
|
||||
(1e513, "septuagintacentillion"),
|
||||
(1e543, "ctogintacentillion"),
|
||||
(1e573, "nonagintacentillion"),
|
||||
(1e603, "ducentillion"),
|
||||
(1e903, "trecentillion"),
|
||||
(1e1203, "quadringentillion"),
|
||||
(1e1503, "quingentillion"),
|
||||
(1e1803, "sescentillion"),
|
||||
(1e2103, "septingentillion"),
|
||||
(1e2403, "octingentillion"),
|
||||
(1e2703, "nongentillion"),
|
||||
(1e3003, "millinillion")
|
||||
])
|
||||
|
||||
|
||||
def nice_number_it(number, speech, denominators=range(1, 21)):
|
||||
""" Italian helper for nice_number
|
||||
|
||||
This function formats a float to human understandable functions. Like
|
||||
4.5 becomes "4 e un mezz" for speech and "4 1/2" for text
|
||||
|
||||
Args:
|
||||
number (int or float): the float to format
|
||||
speech (bool): format for speech (True) or display (False)
|
||||
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||
Returns:
|
||||
(str): The formatted string.
|
||||
"""
|
||||
|
||||
result = convert_to_mixed_fraction(number, denominators)
|
||||
if not result:
|
||||
# Give up, just represent as a 3 decimal number
|
||||
return str(round(number, 3))
|
||||
|
||||
whole, num, den = result
|
||||
|
||||
if not speech:
|
||||
if num == 0:
|
||||
return str(whole)
|
||||
else:
|
||||
return '{} {}/{}'.format(whole, num, den)
|
||||
|
||||
if num == 0:
|
||||
return str(whole)
|
||||
# denominatore
|
||||
den_str = FRACTION_STRING_IT[den]
|
||||
# frazione
|
||||
if whole == 0:
|
||||
if num == 1:
|
||||
# un decimo
|
||||
return_string = 'un {}'.format(den_str)
|
||||
else:
|
||||
# tre mezzi
|
||||
return_string = '{} {}'.format(num, den_str)
|
||||
# interi >10
|
||||
elif num == 1:
|
||||
# trenta e un
|
||||
return_string = '{} e un {}'.format(whole, den_str)
|
||||
# interi >10 con frazioni
|
||||
else:
|
||||
# venti e 3 decimi
|
||||
return_string = '{} e {} {}'.format(whole, num, den_str)
|
||||
|
||||
# gestisce il plurale del denominatore
|
||||
if num > 1:
|
||||
return_string += 'i'
|
||||
else:
|
||||
return_string += 'o'
|
||||
|
||||
return return_string
|
||||
|
||||
|
||||
def pronounce_number_it(num, places=2, short_scale=False, scientific=False):
|
||||
"""
|
||||
Convert a number to it's spoken equivalent
|
||||
adapted to italian fron en version
|
||||
|
||||
For example, '5.2' would return 'cinque virgola due'
|
||||
|
||||
Args:
|
||||
num(float or int): the number to pronounce (under 100)
|
||||
places(int): maximum decimal places to speak
|
||||
short_scale (bool) : use short (True) or long scale (False)
|
||||
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||
scientific (bool): pronounce in scientific notation
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
"""
|
||||
# gestione infinito
|
||||
if num == float("inf"):
|
||||
return "infinito"
|
||||
elif num == float("-inf"):
|
||||
return "meno infinito"
|
||||
|
||||
if scientific:
|
||||
number = '%E' % num
|
||||
n, power = number.replace("+", "").split("E")
|
||||
power = int(power)
|
||||
if power != 0:
|
||||
return '{}{} per dieci elevato alla {}{}'.format(
|
||||
'meno ' if float(n) < 0 else '',
|
||||
pronounce_number_it(abs(float(n)), places, short_scale, False),
|
||||
'meno ' if power < 0 else '',
|
||||
pronounce_number_it(abs(power), places, short_scale, False))
|
||||
|
||||
if short_scale:
|
||||
number_names = NUM_STRING_IT.copy()
|
||||
number_names.update(SHORT_SCALE_IT)
|
||||
else:
|
||||
number_names = NUM_STRING_IT.copy()
|
||||
number_names.update(LONG_SCALE_IT)
|
||||
|
||||
digits = [number_names[n] for n in range(0, 20)]
|
||||
|
||||
tens = [number_names[n] for n in range(10, 100, 10)]
|
||||
|
||||
if short_scale:
|
||||
hundreds = [SHORT_SCALE_IT[n] for n in SHORT_SCALE_IT.keys()]
|
||||
else:
|
||||
hundreds = [LONG_SCALE_IT[n] for n in LONG_SCALE_IT.keys()]
|
||||
|
||||
# deal with negatives
|
||||
result = ""
|
||||
if num < 0:
|
||||
result = "meno "
|
||||
num = abs(num)
|
||||
|
||||
# check for a direct match
|
||||
if num in number_names:
|
||||
if num > 90:
|
||||
result += "" # inizio stringa
|
||||
result += number_names[num]
|
||||
else:
|
||||
def _sub_thousand(n):
|
||||
assert 0 <= n <= 999
|
||||
if n <= 19:
|
||||
return digits[n]
|
||||
elif n <= 99:
|
||||
q, r = divmod(n, 10)
|
||||
_deci = tens[q-1]
|
||||
_unit = r
|
||||
_partial = _deci
|
||||
if _unit > 0:
|
||||
if _unit == 1 or _unit == 8:
|
||||
_partial = _partial[:-1] # ventuno ventotto
|
||||
_partial += number_names[_unit]
|
||||
return _partial
|
||||
else:
|
||||
q, r = divmod(n, 100)
|
||||
if q == 1:
|
||||
_partial = "cento"
|
||||
else:
|
||||
_partial = digits[q] + "cento"
|
||||
_partial += (
|
||||
" " + _sub_thousand(r) if r else "") # separa centinaia
|
||||
return _partial
|
||||
|
||||
def _short_scale(n):
|
||||
if n >= max(SHORT_SCALE_IT.keys()):
|
||||
return "numero davvero enorme"
|
||||
n = int(n)
|
||||
assert 0 <= n
|
||||
res = []
|
||||
for i, z in enumerate(_split_by(n, 1000)):
|
||||
if not z:
|
||||
continue
|
||||
number = _sub_thousand(z)
|
||||
if i:
|
||||
number += "" # separa ordini grandezza
|
||||
number += hundreds[i]
|
||||
res.append(number)
|
||||
|
||||
return ", ".join(reversed(res))
|
||||
|
||||
def _split_by(n, split=1000):
|
||||
assert 0 <= n
|
||||
res = []
|
||||
while n:
|
||||
n, r = divmod(n, split)
|
||||
res.append(r)
|
||||
return res
|
||||
|
||||
def _long_scale(n):
|
||||
if n >= max(LONG_SCALE_IT.keys()):
|
||||
return "numero davvero enorme"
|
||||
n = int(n)
|
||||
assert 0 <= n
|
||||
res = []
|
||||
for i, z in enumerate(_split_by(n, 1000000)):
|
||||
if not z:
|
||||
continue
|
||||
number = pronounce_number_it(z, places, True, scientific)
|
||||
# strip off the comma after the thousand
|
||||
if i:
|
||||
# plus one as we skip 'thousand'
|
||||
# (and 'hundred', but this is excluded by index value)
|
||||
number = number.replace(',', '')
|
||||
number += " " + hundreds[i+1]
|
||||
res.append(number)
|
||||
return ", ".join(reversed(res))
|
||||
|
||||
if short_scale:
|
||||
result += _short_scale(num)
|
||||
else:
|
||||
result += _long_scale(num)
|
||||
|
||||
# normalizza unità misura singole e 'ragionevoli' ed ad inizio stringa
|
||||
if result == 'mila':
|
||||
result = 'mille'
|
||||
if result == 'milioni':
|
||||
result = 'un milione'
|
||||
if result == 'miliardi':
|
||||
result = 'un miliardo'
|
||||
if result[0:7] == 'unomila':
|
||||
result = result.replace('unomila', 'mille', 1)
|
||||
if result[0:10] == 'unomilioni':
|
||||
result = result.replace('unomilioni', 'un milione', 1)
|
||||
# if result[0:11] == 'unomiliardi':
|
||||
# result = result.replace('unomiliardi', 'un miliardo', 1)
|
||||
|
||||
# Deal with fractional part
|
||||
if not num == int(num) and places > 0:
|
||||
result += " virgola"
|
||||
place = 10
|
||||
while int(num * place) % 10 > 0 and places > 0:
|
||||
result += " " + number_names[int(num * place) % 10]
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
||||
|
||||
def nice_time_it(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
adapted to italian fron en version
|
||||
|
||||
For example, generate 'cinque e trenta' for speech or '5:30' for
|
||||
text display.
|
||||
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
if string[0] == '0':
|
||||
string = string[1:] # strip leading zeros
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
if use_24hour:
|
||||
speak = ""
|
||||
# Either "zero 8 zerozero" o "13 zerozero"
|
||||
if string[0:2] == '00':
|
||||
speak += "zerozero"
|
||||
elif string[0] == '0':
|
||||
speak += pronounce_number_it(int(string[0])) + " "
|
||||
if int(string[1]) == 1:
|
||||
speak = "una"
|
||||
else:
|
||||
speak += pronounce_number_it(int(string[1]))
|
||||
else:
|
||||
speak = pronounce_number_it(int(string[0:2]))
|
||||
|
||||
# in italian "13 e 25"
|
||||
speak += " e "
|
||||
|
||||
if string[3:5] == '00':
|
||||
speak += "zerozero"
|
||||
else:
|
||||
if string[3] == '0':
|
||||
speak += pronounce_number_it(0) + " "
|
||||
speak += pronounce_number_it(int(string[4]))
|
||||
else:
|
||||
speak += pronounce_number_it(int(string[3:5]))
|
||||
return speak
|
||||
else:
|
||||
if dt.hour == 0 and dt.minute == 0:
|
||||
return "mezzanotte"
|
||||
if dt.hour == 12 and dt.minute == 0:
|
||||
return "mezzogiorno"
|
||||
# TODO: "10 e un quarto", "4 e tre quarti" and ot her idiomatic times
|
||||
|
||||
if dt.hour == 0:
|
||||
speak = "mezzanotte"
|
||||
elif dt.hour == 1 or dt.hour == 13:
|
||||
speak = "una"
|
||||
elif dt.hour > 13: # era minore
|
||||
speak = pronounce_number_it(dt.hour-12)
|
||||
else:
|
||||
speak = pronounce_number_it(dt.hour)
|
||||
|
||||
speak += " e"
|
||||
if dt.minute == 0:
|
||||
speak = speak[:-2]
|
||||
if not use_ampm:
|
||||
speak += " in punto"
|
||||
elif dt.minute == 15:
|
||||
speak += " un quarto"
|
||||
elif dt.minute == 45:
|
||||
speak += " tre quarti"
|
||||
else:
|
||||
if dt.minute < 10:
|
||||
speak += " zero"
|
||||
speak += " " + pronounce_number_it(dt.minute)
|
||||
|
||||
if use_ampm:
|
||||
|
||||
if dt.hour < 4:
|
||||
speak.strip()
|
||||
elif dt.hour > 20:
|
||||
speak += " della notte"
|
||||
elif dt.hour > 17:
|
||||
speak += " della sera"
|
||||
elif dt.hour > 12:
|
||||
speak += " del pomeriggio"
|
||||
else:
|
||||
speak += " della mattina"
|
||||
|
||||
return speak
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.format_it import *
|
||||
|
|
|
@ -14,382 +14,8 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
from math import floor
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
months = ['januari', 'februari', 'maart', 'april', 'mei', 'juni',
|
||||
'juli', 'augustus', 'september', 'oktober', 'november',
|
||||
'december']
|
||||
|
||||
NUM_STRING_NL = {
|
||||
0: 'nul',
|
||||
1: 'één',
|
||||
2: 'twee',
|
||||
3: 'drie',
|
||||
4: 'vier',
|
||||
5: 'vijf',
|
||||
6: 'zes',
|
||||
7: 'zeven',
|
||||
8: 'acht',
|
||||
9: 'negen',
|
||||
10: 'tien',
|
||||
11: 'elf',
|
||||
12: 'twaalf',
|
||||
13: 'dertien',
|
||||
14: 'veertien',
|
||||
15: 'vijftien',
|
||||
16: 'zestien',
|
||||
17: 'zeventien',
|
||||
18: 'actien',
|
||||
19: 'negentien',
|
||||
20: 'twintig',
|
||||
30: 'dertig',
|
||||
40: 'veertig',
|
||||
50: 'vijftig',
|
||||
60: 'zestig',
|
||||
70: 'zeventig',
|
||||
80: 'tachtig',
|
||||
90: 'negentig',
|
||||
100: 'honderd'
|
||||
}
|
||||
|
||||
# German uses "long scale" https://en.wikipedia.org/wiki/Long_and_short_scales
|
||||
# Currently, numbers are limited to 1000000000000000000000000,
|
||||
# but NUM_POWERS_OF_TEN can be extended to include additional number words
|
||||
|
||||
|
||||
NUM_POWERS_OF_TEN = [
|
||||
'', 'duizend', 'miljoen', 'miljard', 'biljoen', 'biljard', 'triljoen',
|
||||
'triljard'
|
||||
]
|
||||
|
||||
FRACTION_STRING_NL = {
|
||||
2: 'half',
|
||||
3: 'derde',
|
||||
4: 'vierde',
|
||||
5: 'vijfde',
|
||||
6: 'zesde',
|
||||
7: 'zevende',
|
||||
8: 'achtste',
|
||||
9: 'negende',
|
||||
10: 'tiende',
|
||||
11: 'elfde',
|
||||
12: 'twaalfde',
|
||||
13: 'dertiende',
|
||||
14: 'veertiende',
|
||||
15: 'vijftiende',
|
||||
16: 'zestiende',
|
||||
17: 'zeventiende',
|
||||
18: 'achttiende',
|
||||
19: 'negentiende',
|
||||
20: 'twintigste'
|
||||
}
|
||||
|
||||
# Numbers below 1 million are written in one word in dutch, yielding very
|
||||
# long words
|
||||
# In some circumstances it may better to seperate individual words
|
||||
# Set EXTRA_SPACE=" " for separating numbers below 1 million (
|
||||
# orthographically incorrect)
|
||||
# Set EXTRA_SPACE="" for correct spelling, this is standard
|
||||
|
||||
# EXTRA_SPACE = " "
|
||||
EXTRA_SPACE = ""
|
||||
|
||||
|
||||
def nice_number_nl(number, speech, denominators=range(1, 21)):
|
||||
""" Dutch helper for nice_number
|
||||
This function formats a float to human understandable functions. Like
|
||||
4.5 becomes "4 einhalb" for speech and "4 1/2" for text
|
||||
Args:
|
||||
number (int or float): the float to format
|
||||
speech (bool): format for speech (True) or display (False)
|
||||
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||
Returns:
|
||||
(str): The formatted string.
|
||||
"""
|
||||
result = convert_to_mixed_fraction(number, denominators)
|
||||
if not result:
|
||||
# Give up, just represent as a 3 decimal number
|
||||
return str(round(number, 3)).replace(".", ",")
|
||||
whole, num, den = result
|
||||
if not speech:
|
||||
if num == 0:
|
||||
# TODO: Number grouping? E.g. "1,000,000"
|
||||
return str(whole)
|
||||
else:
|
||||
return '{} {}/{}'.format(whole, num, den)
|
||||
if num == 0:
|
||||
return str(whole)
|
||||
den_str = FRACTION_STRING_NL[den]
|
||||
if whole == 0:
|
||||
if num == 1:
|
||||
return_string = 'één {}'.format(den_str)
|
||||
else:
|
||||
return_string = '{} {}'.format(num, den_str)
|
||||
elif num == 1:
|
||||
return_string = '{} en één {}'.format(whole, den_str)
|
||||
else:
|
||||
return_string = '{} en {} {}'.format(whole, num, den_str)
|
||||
|
||||
return return_string
|
||||
|
||||
|
||||
def pronounce_number_nl(num, places=2):
|
||||
"""
|
||||
Convert a number to its spoken equivalent
|
||||
For example, '5.2' would return 'five point two'
|
||||
Args:
|
||||
num(float or int): the number to pronounce (set limit below)
|
||||
places(int): maximum decimal places to speak
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
|
||||
"""
|
||||
|
||||
def pronounce_triplet_nl(num):
|
||||
result = ""
|
||||
num = floor(num)
|
||||
if num > 99:
|
||||
hundreds = floor(num / 100)
|
||||
if hundreds > 0:
|
||||
result += NUM_STRING_NL[
|
||||
hundreds] + EXTRA_SPACE + 'honderd' + EXTRA_SPACE
|
||||
num -= hundreds * 100
|
||||
if num == 0:
|
||||
result += '' # do nothing
|
||||
elif num <= 20:
|
||||
result += NUM_STRING_NL[num] # + EXTRA_SPACE
|
||||
elif num > 20:
|
||||
ones = num % 10
|
||||
tens = num - ones
|
||||
if ones > 0:
|
||||
result += NUM_STRING_NL[ones] + EXTRA_SPACE
|
||||
if tens > 0:
|
||||
result += 'en' + EXTRA_SPACE
|
||||
if tens > 0:
|
||||
result += NUM_STRING_NL[tens] + EXTRA_SPACE
|
||||
return result
|
||||
|
||||
def pronounce_fractional_nl(num,
|
||||
places): # fixed number of places even with
|
||||
# trailing zeros
|
||||
result = ""
|
||||
place = 10
|
||||
while places > 0: # doesn't work with 1.0001 and places = 2: int(
|
||||
# num*place) % 10 > 0 and places > 0:
|
||||
result += " " + NUM_STRING_NL[int(num * place) % 10]
|
||||
if int(num * place) % 10 == 1:
|
||||
result += '' # "1" is pronounced "eins" after the decimal
|
||||
# point
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
||||
def pronounce_whole_number_nl(num, scale_level=0):
|
||||
if num == 0:
|
||||
return ''
|
||||
|
||||
num = floor(num)
|
||||
result = ''
|
||||
last_triplet = num % 1000
|
||||
|
||||
if last_triplet == 1:
|
||||
if scale_level == 0:
|
||||
if result != '':
|
||||
result += '' + 'één'
|
||||
else:
|
||||
result += "één"
|
||||
elif scale_level == 1:
|
||||
result += 'één' + EXTRA_SPACE + 'duizend' + EXTRA_SPACE
|
||||
else:
|
||||
result += "één " + NUM_POWERS_OF_TEN[scale_level] + ' '
|
||||
elif last_triplet > 1:
|
||||
result += pronounce_triplet_nl(last_triplet)
|
||||
if scale_level == 1:
|
||||
# result += EXTRA_SPACE
|
||||
result += 'duizend' + EXTRA_SPACE
|
||||
if scale_level >= 2:
|
||||
# if EXTRA_SPACE == '':
|
||||
# result += " "
|
||||
result += " " + NUM_POWERS_OF_TEN[scale_level] + ' '
|
||||
if scale_level >= 2:
|
||||
if scale_level % 2 == 0:
|
||||
result += "" # Miljioen
|
||||
result += "" # Miljard, Miljoen
|
||||
|
||||
num = floor(num / 1000)
|
||||
scale_level += 1
|
||||
return pronounce_whole_number_nl(num,
|
||||
scale_level) + result + ''
|
||||
|
||||
result = ""
|
||||
if abs(num) >= 1000000000000000000000000: # cannot do more than this
|
||||
return str(num)
|
||||
elif num == 0:
|
||||
return str(NUM_STRING_NL[0])
|
||||
elif num < 0:
|
||||
return "min " + pronounce_number_nl(abs(num), places)
|
||||
else:
|
||||
if num == int(num):
|
||||
return pronounce_whole_number_nl(num)
|
||||
else:
|
||||
whole_number_part = floor(num)
|
||||
fractional_part = num - whole_number_part
|
||||
result += pronounce_whole_number_nl(whole_number_part)
|
||||
if places > 0:
|
||||
result += " komma"
|
||||
result += pronounce_fractional_nl(fractional_part, places)
|
||||
return result
|
||||
|
||||
|
||||
def pronounce_ordinal_nl(num):
|
||||
ordinals = ["nulste", "eerste", "tweede", "derde", "vierde", "vijfde",
|
||||
"zesde", "zevende", "achtste"]
|
||||
|
||||
# only for whole positive numbers including zero
|
||||
if num < 0 or num != int(num):
|
||||
return num
|
||||
if num < 4:
|
||||
return ordinals[num]
|
||||
if num < 8:
|
||||
return pronounce_number_nl(num) + "de"
|
||||
if num < 9:
|
||||
return pronounce_number_nl(num) + "ste"
|
||||
if num < 20:
|
||||
return pronounce_number_nl(num) + "de"
|
||||
return pronounce_number_nl(num) + "ste"
|
||||
|
||||
|
||||
def nice_time_nl(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
|
||||
For example, generate 'five thirty' for speech or '5:30' for
|
||||
text display.
|
||||
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
if string[0] == '0':
|
||||
string = string[1:] # strip leading zeros
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
speak = ""
|
||||
if use_24hour:
|
||||
speak += pronounce_number_nl(dt.hour)
|
||||
speak += " uur"
|
||||
if not dt.minute == 0: # zero minutes are not pronounced, 13:00 is
|
||||
# "13 uur" not "13 hundred hours"
|
||||
speak += " " + pronounce_number_nl(dt.minute)
|
||||
return speak # ampm is ignored when use_24hour is true
|
||||
else:
|
||||
if dt.hour == 0 and dt.minute == 0:
|
||||
return "Middernacht"
|
||||
hour = dt.hour % 12
|
||||
if dt.minute == 0:
|
||||
hour = fix_hour(hour)
|
||||
speak += pronounce_number_nl(hour)
|
||||
speak += " uur"
|
||||
elif dt.minute == 30:
|
||||
speak += "half "
|
||||
hour += 1
|
||||
hour = fix_hour(hour)
|
||||
speak += pronounce_number_nl(hour)
|
||||
elif dt.minute == 15:
|
||||
speak += "kwart over "
|
||||
hour = fix_hour(hour)
|
||||
speak += pronounce_number_nl(hour)
|
||||
elif dt.minute == 45:
|
||||
speak += "kwart voor "
|
||||
hour += 1
|
||||
hour = fix_hour(hour)
|
||||
speak += pronounce_number_nl(hour)
|
||||
elif dt.minute > 30:
|
||||
speak += pronounce_number_nl(60 - dt.minute)
|
||||
speak += " voor "
|
||||
hour += 1
|
||||
hour = fix_hour(hour)
|
||||
speak += pronounce_number_nl(hour)
|
||||
else:
|
||||
speak += pronounce_number_nl(dt.minute)
|
||||
speak += " over "
|
||||
hour = fix_hour(hour)
|
||||
speak += pronounce_number_nl(hour)
|
||||
|
||||
if use_ampm:
|
||||
speak += nice_part_of_day_nl(dt)
|
||||
|
||||
return speak
|
||||
|
||||
|
||||
def fix_hour(hour):
|
||||
hour = hour % 12
|
||||
if hour == 0:
|
||||
hour = 12
|
||||
return hour
|
||||
|
||||
|
||||
def nice_part_of_day_nl(dt):
|
||||
if dt.hour < 6:
|
||||
return " 's nachts"
|
||||
if dt.hour < 12:
|
||||
return " 's ochtends"
|
||||
if dt.hour < 18:
|
||||
return " 's middags"
|
||||
if dt.hour < 24:
|
||||
return " 's avonds"
|
||||
raise Exception('dt.hour is bigger than 24')
|
||||
|
||||
|
||||
def nice_response_nl(text):
|
||||
# check for months and call nice_ordinal_nl declension of ordinals
|
||||
# replace "^" with "tot de macht" (to the power of)
|
||||
words = text.split()
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word.lower() in months:
|
||||
text = nice_ordinal_nl(text)
|
||||
|
||||
if word == '^':
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
if wordNext.isnumeric():
|
||||
words[idx] = "tot de macht"
|
||||
text = " ".join(words)
|
||||
return text
|
||||
|
||||
|
||||
def nice_ordinal_nl(text):
|
||||
# check for months for declension of ordinals before months
|
||||
# depending on articles/prepositions
|
||||
normalized_text = text
|
||||
words = text.split()
|
||||
for idx, word in enumerate(words):
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
if word[:-1].isdecimal():
|
||||
if wordNext.lower() in months:
|
||||
if wordPrev == 'de':
|
||||
word = pronounce_ordinal_nl(int(word))
|
||||
else:
|
||||
word = pronounce_number_nl(int(word))
|
||||
words[idx] = word
|
||||
normalized_text = " ".join(words)
|
||||
return normalized_text
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.format_nl import *
|
||||
|
|
|
@ -14,209 +14,8 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
from mycroft.util.lang.common_data_pt import _FRACTION_STRING_PT, \
|
||||
_NUM_STRING_PT
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
|
||||
def nice_number_pt(number, speech, denominators=range(1, 21)):
|
||||
""" Portuguese helper for nice_number
|
||||
|
||||
This function formats a float to human understandable functions. Like
|
||||
4.5 becomes "4 e meio" for speech and "4 1/2" for text
|
||||
|
||||
Args:
|
||||
number (int or float): the float to format
|
||||
speech (bool): format for speech (True) or display (False)
|
||||
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||
Returns:
|
||||
(str): The formatted string.
|
||||
"""
|
||||
|
||||
result = convert_to_mixed_fraction(number, denominators)
|
||||
if not result:
|
||||
# Give up, just represent as a 3 decimal number
|
||||
return str(round(number, 3))
|
||||
|
||||
whole, num, den = result
|
||||
|
||||
if not speech:
|
||||
if num == 0:
|
||||
# TODO: Number grouping? E.g. "1,000,000"
|
||||
return str(whole)
|
||||
else:
|
||||
return '{} {}/{}'.format(whole, num, den)
|
||||
|
||||
if num == 0:
|
||||
return str(whole)
|
||||
# denominador
|
||||
den_str = _FRACTION_STRING_PT[den]
|
||||
# fracções
|
||||
if whole == 0:
|
||||
if num == 1:
|
||||
# um décimo
|
||||
return_string = 'um {}'.format(den_str)
|
||||
else:
|
||||
# três meio
|
||||
return_string = '{} {}'.format(num, den_str)
|
||||
# inteiros >10
|
||||
elif num == 1:
|
||||
# trinta e um
|
||||
return_string = '{} e {}'.format(whole, den_str)
|
||||
# inteiros >10 com fracções
|
||||
else:
|
||||
# vinte e 3 décimo
|
||||
return_string = '{} e {} {}'.format(whole, num, den_str)
|
||||
# plural
|
||||
if num > 1:
|
||||
return_string += 's'
|
||||
return return_string
|
||||
|
||||
|
||||
def pronounce_number_pt(num, places=2):
|
||||
"""
|
||||
Convert a number to it's spoken equivalent
|
||||
For example, '5.2' would return 'cinco virgula dois'
|
||||
Args:
|
||||
num(float or int): the number to pronounce (under 100)
|
||||
places(int): maximum decimal places to speak
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
"""
|
||||
if abs(num) >= 100:
|
||||
# TODO: Support n > 100
|
||||
return str(num)
|
||||
|
||||
result = ""
|
||||
if num < 0:
|
||||
result = "menos "
|
||||
num = abs(num)
|
||||
|
||||
if num >= 20:
|
||||
tens = int(num - int(num) % 10)
|
||||
ones = int(num - tens)
|
||||
result += _NUM_STRING_PT[tens]
|
||||
if ones > 0:
|
||||
result += " e " + _NUM_STRING_PT[ones]
|
||||
else:
|
||||
result += _NUM_STRING_PT[int(num)]
|
||||
|
||||
# Deal with decimal part, in portuguese is commonly used the comma
|
||||
# instead the dot. Decimal part can be written both with comma
|
||||
# and dot, but when pronounced, its pronounced "virgula"
|
||||
if not num == int(num) and places > 0:
|
||||
result += " vírgula"
|
||||
place = 10
|
||||
while int(num * place) % 10 > 0 and places > 0:
|
||||
result += " " + _NUM_STRING_PT[int(num * place) % 10]
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
||||
|
||||
def nice_time_pt(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
For example, generate 'cinco treinta' for speech or '5:30' for
|
||||
text display.
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
if string[0] == '0':
|
||||
string = string[1:] # strip leading zeros
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
speak = ""
|
||||
if use_24hour:
|
||||
# simply speak the number
|
||||
if dt.hour == 1:
|
||||
speak += "uma"
|
||||
else:
|
||||
speak += pronounce_number_pt(dt.hour)
|
||||
|
||||
# equivalent to "quarter past ten"
|
||||
if dt.minute > 0:
|
||||
speak += " e " + pronounce_number_pt(dt.minute)
|
||||
|
||||
else:
|
||||
# speak number and add daytime identifier
|
||||
# (equivalent to "in the morning")
|
||||
if dt.minute == 35:
|
||||
minute = -25
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 40:
|
||||
minute = -20
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 45:
|
||||
minute = -15
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 50:
|
||||
minute = -10
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 55:
|
||||
minute = -5
|
||||
hour = dt.hour + 1
|
||||
else:
|
||||
minute = dt.minute
|
||||
hour = dt.hour
|
||||
|
||||
if hour == 0:
|
||||
speak += "meia noite"
|
||||
elif hour == 12:
|
||||
speak += "meio dia"
|
||||
# 1 and 2 are pronounced in female form when talking about hours
|
||||
elif hour == 1 or hour == 13:
|
||||
speak += "uma"
|
||||
elif hour == 2 or hour == 14:
|
||||
speak += "duas"
|
||||
elif hour < 13:
|
||||
speak = pronounce_number_pt(hour)
|
||||
else:
|
||||
speak = pronounce_number_pt(hour - 12)
|
||||
|
||||
if minute != 0:
|
||||
if minute == 15:
|
||||
speak += " e um quarto"
|
||||
elif minute == 30:
|
||||
speak += " e meia"
|
||||
elif minute == -15:
|
||||
speak += " menos um quarto"
|
||||
else:
|
||||
if minute > 0:
|
||||
speak += " e " + pronounce_number_pt(minute)
|
||||
else:
|
||||
speak += " " + pronounce_number_pt(minute)
|
||||
|
||||
# exact time
|
||||
if minute == 0 and not use_ampm:
|
||||
# 3:00
|
||||
speak += " em ponto"
|
||||
|
||||
if use_ampm:
|
||||
if hour > 0 and hour < 6:
|
||||
speak += " da madrugada"
|
||||
elif hour >= 6 and hour < 12:
|
||||
speak += " da manhã"
|
||||
elif hour >= 13 and hour < 21:
|
||||
speak += " da tarde"
|
||||
elif hour != 0 and hour != 12:
|
||||
speak += " da noite"
|
||||
return speak
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.format_pt import *
|
||||
|
|
|
@ -14,411 +14,8 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
from math import floor
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
months = ['januari', 'februari', 'mars', 'april', 'maj', 'juni',
|
||||
'juli', 'augusti', 'september', 'oktober', 'november',
|
||||
'december']
|
||||
|
||||
NUM_STRING_SV = {
|
||||
0: 'noll',
|
||||
1: 'en',
|
||||
2: 'två',
|
||||
3: 'tre',
|
||||
4: 'fyra',
|
||||
5: 'fem',
|
||||
6: 'sex',
|
||||
7: 'sju',
|
||||
8: 'åtta',
|
||||
9: 'nio',
|
||||
10: 'tio',
|
||||
11: 'elva',
|
||||
12: 'tolv',
|
||||
13: 'tretton',
|
||||
14: 'fjorton',
|
||||
15: 'femton',
|
||||
16: 'sexton',
|
||||
17: 'sjutton',
|
||||
18: 'arton',
|
||||
19: 'nitton',
|
||||
20: 'tjugo',
|
||||
30: 'trettio',
|
||||
40: 'fyrtio',
|
||||
50: 'femtio',
|
||||
60: 'sextio',
|
||||
70: 'sjuttio',
|
||||
80: 'åttio',
|
||||
90: 'nittio',
|
||||
100: 'hundra'
|
||||
}
|
||||
|
||||
NUM_POWERS_OF_TEN = [
|
||||
'hundra',
|
||||
'tusen',
|
||||
'miljon',
|
||||
'miljard',
|
||||
'biljon',
|
||||
'biljard',
|
||||
'triljon',
|
||||
'triljard'
|
||||
]
|
||||
|
||||
FRACTION_STRING_SV = {
|
||||
2: 'halv',
|
||||
3: 'tredjedel',
|
||||
4: 'fjärdedel',
|
||||
5: 'femtedel',
|
||||
6: 'sjättedel',
|
||||
7: 'sjundedel',
|
||||
8: 'åttondel',
|
||||
9: 'niondel',
|
||||
10: 'tiondel',
|
||||
11: 'elftedel',
|
||||
12: 'tolftedel',
|
||||
13: 'trettondel',
|
||||
14: 'fjortondel',
|
||||
15: 'femtondel',
|
||||
16: 'sextondel',
|
||||
17: 'sjuttondel',
|
||||
18: 'artondel',
|
||||
19: 'nittondel',
|
||||
20: 'tjugondel'
|
||||
}
|
||||
|
||||
EXTRA_SPACE = " "
|
||||
|
||||
|
||||
def nice_number_sv(number, speech, denominators=range(1, 21)):
|
||||
""" Swedish helper for nice_number
|
||||
|
||||
This function formats a float to human understandable functions. Like
|
||||
4.5 becomes "4 och en halv" for speech and "4 1/2" for text
|
||||
|
||||
Args:
|
||||
number (int or float): the float to format
|
||||
speech (bool): format for speech (True) or display (False)
|
||||
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||
Returns:
|
||||
(str): The formatted string.
|
||||
"""
|
||||
result = convert_to_mixed_fraction(number, denominators)
|
||||
if not result:
|
||||
# Give up, just represent as a 3 decimal number
|
||||
return str(round(number, 3))
|
||||
|
||||
whole, num, den = result
|
||||
|
||||
if not speech:
|
||||
if num == 0:
|
||||
# TODO: Number grouping? E.g. "1,000,000"
|
||||
return str(whole)
|
||||
else:
|
||||
return '{} {}/{}'.format(whole, num, den)
|
||||
|
||||
if num == 0:
|
||||
return str(whole)
|
||||
den_str = FRACTION_STRING_SV[den]
|
||||
if whole == 0:
|
||||
if num == 1:
|
||||
return_string = 'en {}'.format(den_str)
|
||||
else:
|
||||
return_string = '{} {}'.format(num, den_str)
|
||||
elif num == 1:
|
||||
return_string = '{} och en {}'.format(whole, den_str)
|
||||
else:
|
||||
return_string = '{} och {} {}'.format(whole, num, den_str)
|
||||
if num > 1:
|
||||
return_string += 'ar'
|
||||
return return_string
|
||||
|
||||
|
||||
def pronounce_number_sv(num, places=2):
|
||||
"""
|
||||
Convert a number to its spoken equivalent
|
||||
For example, '5.2' would return 'five point two'
|
||||
Args:
|
||||
num(float or int): the number to pronounce (set limit below)
|
||||
places(int): maximum decimal places to speak
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
|
||||
"""
|
||||
|
||||
def pronounce_triplet_sv(num):
|
||||
result = ""
|
||||
num = floor(num)
|
||||
|
||||
if num > 99:
|
||||
hundreds = floor(num / 100)
|
||||
if hundreds > 0:
|
||||
if hundreds == 1:
|
||||
result += 'ett' + 'hundra'
|
||||
else:
|
||||
result += NUM_STRING_SV[hundreds] + 'hundra'
|
||||
|
||||
num -= hundreds * 100
|
||||
|
||||
if num == 0:
|
||||
result += '' # do nothing
|
||||
elif num == 1:
|
||||
result += 'ett'
|
||||
elif num <= 20:
|
||||
result += NUM_STRING_SV[num]
|
||||
elif num > 20:
|
||||
tens = num % 10
|
||||
ones = num - tens
|
||||
|
||||
if ones > 0:
|
||||
result += NUM_STRING_SV[ones]
|
||||
if tens > 0:
|
||||
result += NUM_STRING_SV[tens]
|
||||
|
||||
return result
|
||||
|
||||
def pronounce_fractional_sv(num, places):
|
||||
# fixed number of places even with trailing zeros
|
||||
result = ""
|
||||
place = 10
|
||||
while places > 0:
|
||||
# doesn't work with 1.0001 and places = 2: int(
|
||||
# num*place) % 10 > 0 and places > 0:
|
||||
result += " " + NUM_STRING_SV[int(num * place) % 10]
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
||||
def pronounce_whole_number_sv(num, scale_level=0):
|
||||
if num == 0:
|
||||
return ''
|
||||
|
||||
num = floor(num)
|
||||
result = ''
|
||||
last_triplet = num % 1000
|
||||
|
||||
if last_triplet == 1:
|
||||
if scale_level == 0:
|
||||
if result != '':
|
||||
result += '' + 'ett'
|
||||
else:
|
||||
result += 'en'
|
||||
elif scale_level == 1:
|
||||
result += 'ettusen' + EXTRA_SPACE
|
||||
else:
|
||||
result += 'en ' + NUM_POWERS_OF_TEN[scale_level] + EXTRA_SPACE
|
||||
elif last_triplet > 1:
|
||||
result += pronounce_triplet_sv(last_triplet)
|
||||
if scale_level == 1:
|
||||
result += 'tusen' + EXTRA_SPACE
|
||||
if scale_level >= 2:
|
||||
result += NUM_POWERS_OF_TEN[scale_level]
|
||||
if scale_level >= 2:
|
||||
result += 'er' + EXTRA_SPACE # MiljonER
|
||||
|
||||
num = floor(num / 1000)
|
||||
scale_level += 1
|
||||
return pronounce_whole_number_sv(num, scale_level) + result
|
||||
|
||||
result = ""
|
||||
if abs(num) >= 1000000000000000000000000: # cannot do more than this
|
||||
return str(num)
|
||||
elif num == 0:
|
||||
return str(NUM_STRING_SV[0])
|
||||
elif num < 0:
|
||||
return "minus " + pronounce_number_sv(abs(num), places)
|
||||
else:
|
||||
if num == int(num):
|
||||
return pronounce_whole_number_sv(num)
|
||||
else:
|
||||
whole_number_part = floor(num)
|
||||
fractional_part = num - whole_number_part
|
||||
result += pronounce_whole_number_sv(whole_number_part)
|
||||
if places > 0:
|
||||
result += " komma"
|
||||
result += pronounce_fractional_sv(fractional_part, places)
|
||||
return result
|
||||
|
||||
|
||||
def pronounce_ordinal_sv(num):
|
||||
# ordinals for 1, 3, 7 and 8 are irregular
|
||||
# this produces the base form, it will have to be adapted for genus,
|
||||
# casus, numerus
|
||||
|
||||
ordinals = ["noll", "första", "andra", "tredje", "fjärde", "femte",
|
||||
"sjätte", "sjunde", "åttonde", "nionde", "tionde"]
|
||||
|
||||
tens = int(floor(num / 10.0)) * 10
|
||||
ones = num % 10
|
||||
|
||||
if num < 0 or num != int(num):
|
||||
return num
|
||||
if num == 0:
|
||||
return ordinals[num]
|
||||
|
||||
result = ""
|
||||
if num > 10:
|
||||
result += pronounce_number_sv(tens).rstrip()
|
||||
|
||||
if ones > 0:
|
||||
result += ordinals[ones]
|
||||
else:
|
||||
result += 'de'
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def nice_time_sv(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
|
||||
For example, generate 'five thirty' for speech or '5:30' for
|
||||
text display.
|
||||
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
speak = ""
|
||||
if use_24hour:
|
||||
if dt.hour == 1:
|
||||
speak += "ett" # 01:00 is "ett" not "en"
|
||||
else:
|
||||
speak += pronounce_number_sv(dt.hour)
|
||||
if not dt.minute == 0:
|
||||
if dt.minute < 10:
|
||||
speak += ' noll'
|
||||
|
||||
if dt.minute == 1:
|
||||
speak += ' ett'
|
||||
else:
|
||||
speak += " " + pronounce_number_sv(dt.minute)
|
||||
|
||||
return speak # ampm is ignored when use_24hour is true
|
||||
else:
|
||||
hour = dt.hour
|
||||
|
||||
if not dt.minute == 0:
|
||||
if dt.minute < 30:
|
||||
if dt.minute != 15:
|
||||
speak += pronounce_number_sv(dt.minute)
|
||||
else:
|
||||
speak += 'kvart'
|
||||
|
||||
if dt.minute == 1:
|
||||
speak += ' minut över '
|
||||
elif dt.minute != 10 and dt.minute != 5 and dt.minute != 15:
|
||||
speak += ' minuter över '
|
||||
else:
|
||||
speak += ' över '
|
||||
elif dt.minute > 30:
|
||||
if dt.minute != 45:
|
||||
speak += pronounce_number_sv((60 - dt.minute))
|
||||
else:
|
||||
speak += 'kvart'
|
||||
|
||||
if dt.minute == 1:
|
||||
speak += ' minut i '
|
||||
elif dt.minute != 50 and dt.minute != 55 and dt.minute != 45:
|
||||
speak += ' minuter i '
|
||||
else:
|
||||
speak += ' i '
|
||||
|
||||
hour = (hour + 1) % 12
|
||||
elif dt.minute == 30:
|
||||
speak += 'halv '
|
||||
hour = (hour + 1) % 12
|
||||
|
||||
if hour == 0 and dt.minute == 0:
|
||||
return "midnatt"
|
||||
if hour == 12 and dt.minute == 0:
|
||||
return "middag"
|
||||
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
|
||||
|
||||
if hour == 0:
|
||||
speak += pronounce_number_sv(12)
|
||||
elif hour <= 13:
|
||||
if hour == 1 or hour == 13: # 01:00 and 13:00 is "ett"
|
||||
speak += 'ett'
|
||||
else:
|
||||
speak += pronounce_number_sv(hour)
|
||||
else:
|
||||
speak += pronounce_number_sv(hour - 12)
|
||||
|
||||
if use_ampm:
|
||||
if dt.hour > 11:
|
||||
if dt.hour < 18:
|
||||
# 12:01 - 17:59 nachmittags/afternoon
|
||||
speak += " på eftermiddagen"
|
||||
elif dt.hour < 22:
|
||||
# 18:00 - 21:59 abends/evening
|
||||
speak += " på kvällen"
|
||||
else:
|
||||
# 22:00 - 23:59 nachts/at night
|
||||
speak += " på natten"
|
||||
elif dt.hour < 3:
|
||||
# 00:01 - 02:59 nachts/at night
|
||||
speak += " på natten"
|
||||
else:
|
||||
# 03:00 - 11:59 morgens/in the morning
|
||||
speak += " på morgonen"
|
||||
|
||||
return speak
|
||||
|
||||
|
||||
def nice_response_sv(text):
|
||||
# check for months and call nice_ordinal_sv declension of ordinals
|
||||
# replace "^" with "hoch" (to the power of)
|
||||
words = text.split()
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word.lower() in months:
|
||||
text = nice_ordinal_sv(text)
|
||||
|
||||
if word == '^':
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
if wordNext.isnumeric():
|
||||
words[idx] = "upphöjt till"
|
||||
text = " ".join(words)
|
||||
return text
|
||||
|
||||
|
||||
def nice_ordinal_sv(text):
|
||||
# check for months for declension of ordinals before months
|
||||
# depending on articles/prepositions
|
||||
normalized_text = text
|
||||
words = text.split()
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
if word[-1:] == ".":
|
||||
if word[:-1].isdecimal():
|
||||
if wordNext.lower() in months:
|
||||
word = pronounce_ordinal_sv(int(word[:-1]))
|
||||
if wordPrev.lower() in ["om", "den", "från", "till",
|
||||
"(från", "(om", "till"]:
|
||||
word += "n"
|
||||
elif wordPrev.lower() not in ["den"]:
|
||||
word += "r"
|
||||
words[idx] = word
|
||||
normalized_text = " ".join(words)
|
||||
return normalized_text
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.format_sv import *
|
||||
|
|
|
@ -14,89 +14,8 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
def is_numeric(input_str):
|
||||
"""
|
||||
Takes in a string and tests to see if it is a number.
|
||||
Args:
|
||||
text (str): string to test if a number
|
||||
Returns:
|
||||
(bool): True if a number, else False
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
float(input_str)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def look_for_fractions(split_list):
|
||||
""""
|
||||
This function takes a list made by fraction & determines if a fraction.
|
||||
|
||||
Args:
|
||||
split_list (list): list created by splitting on '/'
|
||||
Returns:
|
||||
(bool): False if not a fraction, otherwise True
|
||||
|
||||
"""
|
||||
|
||||
if len(split_list) == 2:
|
||||
if is_numeric(split_list[0]) and is_numeric(split_list[1]):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_numbers_generic(text, pronounce_handler, extract_handler,
|
||||
short_scale=True, ordinals=False):
|
||||
"""
|
||||
Takes in a string and extracts a list of numbers.
|
||||
Language agnostic, per language parsers need to be provided
|
||||
|
||||
Args:
|
||||
text (str): the string to extract a number from
|
||||
pronounce_handler (function): function that pronounces a number
|
||||
extract_handler (function): function that extracts the last number
|
||||
present in a string
|
||||
short_scale (bool): Use "short scale" or "long scale" for large
|
||||
numbers -- over a million. The default is short scale, which
|
||||
is now common in most English speaking countries.
|
||||
See https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
|
||||
Returns:
|
||||
list: list of extracted numbers as floats
|
||||
"""
|
||||
numbers = []
|
||||
normalized = text
|
||||
extract = extract_handler(normalized, short_scale, ordinals)
|
||||
to_parse = normalized
|
||||
while extract:
|
||||
numbers.append(extract)
|
||||
prev = to_parse
|
||||
num_txt = pronounce_handler(extract)
|
||||
extract = str(extract)
|
||||
if extract.endswith(".0"):
|
||||
extract = extract[:-2]
|
||||
|
||||
# handle duplicate occurences, replace last one only
|
||||
def replace_right(source, target, replacement, replacements=None):
|
||||
return replacement.join(source.rsplit(target, replacements))
|
||||
|
||||
normalized = replace_right(normalized, num_txt, extract, 1)
|
||||
# last biggest number was replaced, recurse to handle cases like
|
||||
# test one two 3
|
||||
to_parse = replace_right(to_parse, num_txt, extract, 1)
|
||||
to_parse = replace_right(to_parse, extract, " ", 1)
|
||||
if to_parse == prev:
|
||||
# avoid infinite loops, occasionally pronounced number may be
|
||||
# different from extracted text,
|
||||
# ie pronounce(0.5) != half and extract(half) == 0.5
|
||||
extract = False
|
||||
# TODO fix this
|
||||
else:
|
||||
extract = extract_handler(to_parse, short_scale, ordinals)
|
||||
numbers.reverse()
|
||||
return numbers
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.parse_common import *
|
||||
|
|
|
@ -13,920 +13,9 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from datetime import datetime
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions, \
|
||||
extract_numbers_generic
|
||||
from mycroft.util.lang.format_da import pronounce_number_da
|
||||
|
||||
da_numbers = {
|
||||
'nul': 0,
|
||||
'en': 1,
|
||||
'et': 1,
|
||||
'to': 2,
|
||||
'tre': 3,
|
||||
'fire': 4,
|
||||
'fem': 5,
|
||||
'seks': 6,
|
||||
'syv': 7,
|
||||
'otte': 8,
|
||||
'ni': 9,
|
||||
'ti': 10,
|
||||
'elve': 11,
|
||||
'tolv': 12,
|
||||
'tretten': 13,
|
||||
'fjorten': 14,
|
||||
'femten': 15,
|
||||
'seksten': 16,
|
||||
'sytten': 17,
|
||||
'atten': 18,
|
||||
'nitten': 19,
|
||||
'tyve': 20,
|
||||
'enogtyve': 21,
|
||||
'toogtyve': 22,
|
||||
'treogtyve': 23,
|
||||
'fireogtyve': 24,
|
||||
'femogtyve': 25,
|
||||
'seksogtyve': 26,
|
||||
'syvogtyve': 27,
|
||||
'otteogtyve': 28,
|
||||
'niogtyve': 29,
|
||||
'tredive': 30,
|
||||
'enogtredive': 31,
|
||||
'fyrrre': 40,
|
||||
'halvtres': 50,
|
||||
'tres': 60,
|
||||
'halvfjers': 70,
|
||||
'firs': 80,
|
||||
'halvfems': 90,
|
||||
'hunderede': 100,
|
||||
'tohundrede': 200,
|
||||
'trehundrede': 300,
|
||||
'firehundrede': 400,
|
||||
'femhundrede': 500,
|
||||
'sekshundrede': 600,
|
||||
'syvhundrede': 700,
|
||||
'ottehundrede': 800,
|
||||
'nihundrede': 900,
|
||||
'tusinde': 1000,
|
||||
'million': 1000000
|
||||
}
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
|
||||
def extractnumber_da(text):
|
||||
"""
|
||||
This function prepares the given text for parsing by making
|
||||
numbers consistent, getting rid of contractions, etc.
|
||||
Args:
|
||||
text (str): the string to normalize
|
||||
Returns:
|
||||
(int) or (float): The value of extracted number
|
||||
|
||||
|
||||
undefined articles cannot be suppressed in German:
|
||||
'ein Pferd' means 'one horse' and 'a horse'
|
||||
|
||||
"""
|
||||
aWords = text.split()
|
||||
aWords = [word for word in aWords if
|
||||
word not in ["den", "det"]]
|
||||
and_pass = False
|
||||
valPreAnd = False
|
||||
val = False
|
||||
count = 0
|
||||
while count < len(aWords):
|
||||
word = aWords[count]
|
||||
if is_numeric(word):
|
||||
if word.isdigit(): # doesn't work with decimals
|
||||
val = float(word)
|
||||
elif isFractional_da(word):
|
||||
val = isFractional_da(word)
|
||||
elif isOrdinal_da(word):
|
||||
val = isOrdinal_da(word)
|
||||
else:
|
||||
if word in da_numbers:
|
||||
val = da_numbers[word]
|
||||
if count < (len(aWords) - 1):
|
||||
wordNext = aWords[count + 1]
|
||||
else:
|
||||
wordNext = ""
|
||||
valNext = isFractional_da(wordNext)
|
||||
|
||||
if valNext:
|
||||
val = val * valNext
|
||||
aWords[count + 1] = ""
|
||||
|
||||
if not val:
|
||||
# look for fractions like "2/3"
|
||||
aPieces = word.split('/')
|
||||
# if (len(aPieces) == 2 and is_numeric(aPieces[0])
|
||||
# and is_numeric(aPieces[1])):
|
||||
if look_for_fractions(aPieces):
|
||||
val = float(aPieces[0]) / float(aPieces[1])
|
||||
elif and_pass:
|
||||
# added to value, quit here
|
||||
val = valPreAnd
|
||||
break
|
||||
else:
|
||||
count += 1
|
||||
continue
|
||||
|
||||
aWords[count] = ""
|
||||
|
||||
if and_pass:
|
||||
aWords[count - 1] = '' # remove "og"
|
||||
val += valPreAnd
|
||||
elif count + 1 < len(aWords) and aWords[count + 1] == 'og':
|
||||
and_pass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 2
|
||||
continue
|
||||
elif count + 2 < len(aWords) and aWords[count + 2] == 'og':
|
||||
and_pass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 3
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
if not val:
|
||||
return False
|
||||
|
||||
return val
|
||||
|
||||
|
||||
def extract_datetime_da(string, currentDate, default_time):
|
||||
def clean_string(s):
|
||||
"""
|
||||
cleans the input string of unneeded punctuation
|
||||
and capitalization among other things.
|
||||
|
||||
'am' is a preposition, so cannot currently be used
|
||||
for 12 hour date format
|
||||
"""
|
||||
|
||||
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
|
||||
.replace(' den ', ' ').replace(' det ', ' ').replace(' om ',
|
||||
' ').replace(
|
||||
' om ', ' ') \
|
||||
.replace(' på ', ' ').replace(' om ', ' ')
|
||||
wordList = s.split()
|
||||
|
||||
for idx, word in enumerate(wordList):
|
||||
if isOrdinal_da(word) is not False:
|
||||
word = str(isOrdinal_da(word))
|
||||
wordList[idx] = word
|
||||
|
||||
return wordList
|
||||
|
||||
def date_found():
|
||||
return found or \
|
||||
(
|
||||
datestr != "" or timeStr != "" or
|
||||
yearOffset != 0 or monthOffset != 0 or
|
||||
dayOffset is True or hrOffset != 0 or
|
||||
hrAbs or minOffset != 0 or
|
||||
minAbs or secOffset != 0
|
||||
)
|
||||
|
||||
if string == "" or not currentDate:
|
||||
return None
|
||||
|
||||
found = False
|
||||
daySpecified = False
|
||||
dayOffset = False
|
||||
monthOffset = 0
|
||||
yearOffset = 0
|
||||
dateNow = currentDate
|
||||
today = dateNow.strftime("%w")
|
||||
currentYear = dateNow.strftime("%Y")
|
||||
fromFlag = False
|
||||
datestr = ""
|
||||
hasYear = False
|
||||
timeQualifier = ""
|
||||
|
||||
timeQualifiersList = ['tidlig',
|
||||
'morgen',
|
||||
'morgenen',
|
||||
'formidag',
|
||||
'formiddagen',
|
||||
'eftermiddag',
|
||||
'eftermiddagen',
|
||||
'aften',
|
||||
'aftenen',
|
||||
'nat',
|
||||
'natten']
|
||||
markers = ['i', 'om', 'på', 'klokken', 'ved']
|
||||
days = ['mandag', 'tirsdag', 'onsdag',
|
||||
'torsdag', 'fredag', 'lørdag', 'søndag']
|
||||
months = ['januar', 'februar', 'marts', 'april', 'maj', 'juni',
|
||||
'juli', 'august', 'september', 'oktober', 'november',
|
||||
'desember']
|
||||
monthsShort = ['jan', 'feb', 'mar', 'apr', 'maj', 'juni', 'juli', 'aug',
|
||||
'sep', 'okt', 'nov', 'des']
|
||||
|
||||
validFollowups = days + months + monthsShort
|
||||
validFollowups.append("i dag")
|
||||
validFollowups.append("morgen")
|
||||
validFollowups.append("næste")
|
||||
validFollowups.append("forige")
|
||||
validFollowups.append("nu")
|
||||
|
||||
words = clean_string(string)
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
|
||||
start = idx
|
||||
used = 0
|
||||
# save timequalifier for later
|
||||
if word in timeQualifiersList:
|
||||
timeQualifier = word
|
||||
# parse today, tomorrow, day after tomorrow
|
||||
elif word == "dag" and not fromFlag:
|
||||
dayOffset = 0
|
||||
used += 1
|
||||
elif word == "morgen" and not fromFlag and wordPrev != "om" and \
|
||||
wordPrev not in days: # morgen means tomorrow if not "am
|
||||
# Morgen" and not [day of the week] morgen
|
||||
dayOffset = 1
|
||||
used += 1
|
||||
elif word == "overmorgen" and not fromFlag:
|
||||
dayOffset = 2
|
||||
used += 1
|
||||
# parse 5 days, 10 weeks, last week, next week
|
||||
elif word == "dag" or word == "dage":
|
||||
if wordPrev[0].isdigit():
|
||||
dayOffset += int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif word == "uge" or word == "uger" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
dayOffset += int(wordPrev) * 7
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "næste":
|
||||
dayOffset = 7
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:5] == "forige":
|
||||
dayOffset = -7
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse 10 months, next month, last month
|
||||
elif word == "måned" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
monthOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "næste":
|
||||
monthOffset = 1
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:5] == "forige":
|
||||
monthOffset = -1
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse 5 years, next year, last year
|
||||
elif word == "år" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
yearOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == " næste":
|
||||
yearOffset = 1
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "næste":
|
||||
yearOffset = -1
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse Monday, Tuesday, etc., and next Monday,
|
||||
# last Tuesday, etc.
|
||||
elif word in days and not fromFlag:
|
||||
d = days.index(word)
|
||||
dayOffset = (d + 1) - int(today)
|
||||
used = 1
|
||||
if dayOffset < 0:
|
||||
dayOffset += 7
|
||||
if wordNext == "morgen":
|
||||
# morgen means morning if preceded by
|
||||
# the day of the week
|
||||
words[idx + 1] = "tidlig"
|
||||
if wordPrev[:6] == "næste":
|
||||
dayOffset += 7
|
||||
used += 1
|
||||
start -= 1
|
||||
elif wordPrev[:5] == "forige":
|
||||
dayOffset -= 7
|
||||
used += 1
|
||||
start -= 1
|
||||
# parse 15 of July, June 20th, Feb 18, 19 of February
|
||||
elif word in months or word in monthsShort and not fromFlag:
|
||||
try:
|
||||
m = months.index(word)
|
||||
except ValueError:
|
||||
m = monthsShort.index(word)
|
||||
used += 1
|
||||
datestr = months[m]
|
||||
if wordPrev and (wordPrev[0].isdigit() or
|
||||
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
|
||||
if wordPrev == "of" and wordPrevPrev[0].isdigit():
|
||||
datestr += " " + words[idx - 2]
|
||||
used += 1
|
||||
start -= 1
|
||||
else:
|
||||
datestr += " " + wordPrev
|
||||
start -= 1
|
||||
used += 1
|
||||
if wordNext and wordNext[0].isdigit():
|
||||
datestr += " " + wordNext
|
||||
used += 1
|
||||
hasYear = True
|
||||
else:
|
||||
hasYear = False
|
||||
|
||||
elif wordNext and wordNext[0].isdigit():
|
||||
datestr += " " + wordNext
|
||||
used += 1
|
||||
if wordNextNext and wordNextNext[0].isdigit():
|
||||
datestr += " " + wordNextNext
|
||||
used += 1
|
||||
hasYear = True
|
||||
else:
|
||||
hasYear = False
|
||||
# parse 5 days from tomorrow, 10 weeks from next thursday,
|
||||
# 2 months from July
|
||||
|
||||
if (
|
||||
word == "fra" or word == "til" or word == "om") and wordNext \
|
||||
in validFollowups:
|
||||
used = 2
|
||||
fromFlag = True
|
||||
if wordNext == "morgenen" and \
|
||||
wordPrev != "om" and \
|
||||
wordPrev not in days:
|
||||
# morgen means tomorrow if not "am Morgen" and not
|
||||
# [day of the week] morgen:
|
||||
dayOffset += 1
|
||||
elif wordNext in days:
|
||||
d = days.index(wordNext)
|
||||
tmpOffset = (d + 1) - int(today)
|
||||
used = 2
|
||||
if tmpOffset < 0:
|
||||
tmpOffset += 7
|
||||
dayOffset += tmpOffset
|
||||
elif wordNextNext and wordNextNext in days:
|
||||
d = days.index(wordNextNext)
|
||||
tmpOffset = (d + 1) - int(today)
|
||||
used = 3
|
||||
if wordNext[:6] == "næste":
|
||||
tmpOffset += 7
|
||||
used += 1
|
||||
start -= 1
|
||||
elif wordNext[:5] == "forige":
|
||||
tmpOffset -= 7
|
||||
used += 1
|
||||
start -= 1
|
||||
dayOffset += tmpOffset
|
||||
if used > 0:
|
||||
if start - 1 > 0 and words[start - 1].startswith("denne"):
|
||||
start -= 1
|
||||
used += 1
|
||||
|
||||
for i in range(0, used):
|
||||
words[i + start] = ""
|
||||
|
||||
if start - 1 >= 0 and words[start - 1] in markers:
|
||||
words[start - 1] = ""
|
||||
found = True
|
||||
daySpecified = True
|
||||
|
||||
# parse time
|
||||
timeStr = ""
|
||||
hrOffset = 0
|
||||
minOffset = 0
|
||||
secOffset = 0
|
||||
hrAbs = None
|
||||
minAbs = None
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
|
||||
wordNextNextNextNext = words[idx + 4] if idx + 4 < len(words) else ""
|
||||
|
||||
# parse noon, midnight, morning, afternoon, evening
|
||||
used = 0
|
||||
if word[:6] == "middag":
|
||||
hrAbs = 12
|
||||
used += 1
|
||||
elif word[:11] == "midnat":
|
||||
hrAbs = 0
|
||||
used += 1
|
||||
elif word == "morgenen" or (
|
||||
wordPrev == "om" and word == "morgenen") or word == "tidlig":
|
||||
if not hrAbs:
|
||||
hrAbs = 8
|
||||
used += 1
|
||||
elif word[:11] == "eftermiddag":
|
||||
if not hrAbs:
|
||||
hrAbs = 15
|
||||
used += 1
|
||||
elif word[:5] == "aften":
|
||||
if not hrAbs:
|
||||
hrAbs = 19
|
||||
used += 1
|
||||
# parse half an hour, quarter hour
|
||||
elif word == "time" and \
|
||||
(wordPrev in markers or wordPrevPrev in markers):
|
||||
if wordPrev[:4] == "halv":
|
||||
minOffset = 30
|
||||
elif wordPrev == "kvarter":
|
||||
minOffset = 15
|
||||
elif wordPrev == "trekvarter":
|
||||
minOffset = 45
|
||||
else:
|
||||
hrOffset = 1
|
||||
if wordPrevPrev in markers:
|
||||
words[idx - 2] = ""
|
||||
words[idx - 1] = ""
|
||||
used += 1
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
# parse 5:00 am, 12:00 p.m., etc
|
||||
elif word[0].isdigit():
|
||||
isTime = True
|
||||
strHH = ""
|
||||
strMM = ""
|
||||
remainder = ""
|
||||
if ':' in word:
|
||||
# parse colons
|
||||
# "3:00 in the morning"
|
||||
stage = 0
|
||||
length = len(word)
|
||||
for i in range(length):
|
||||
if stage == 0:
|
||||
if word[i].isdigit():
|
||||
strHH += word[i]
|
||||
elif word[i] == ":":
|
||||
stage = 1
|
||||
else:
|
||||
stage = 2
|
||||
i -= 1
|
||||
elif stage == 1:
|
||||
if word[i].isdigit():
|
||||
strMM += word[i]
|
||||
else:
|
||||
stage = 2
|
||||
i -= 1
|
||||
elif stage == 2:
|
||||
remainder = word[i:].replace(".", "")
|
||||
break
|
||||
if remainder == "":
|
||||
nextWord = wordNext.replace(".", "")
|
||||
if nextWord == "am" or nextWord == "pm":
|
||||
remainder = nextWord
|
||||
used += 1
|
||||
elif nextWord == "aften":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "om" and wordNextNext == "morgenen":
|
||||
remainder = "am"
|
||||
used += 2
|
||||
elif wordNext == "om" and wordNextNext == "eftermiddagen":
|
||||
remainder = "pm"
|
||||
used += 2
|
||||
elif wordNext == "om" and wordNextNext == "aftenen":
|
||||
remainder = "pm"
|
||||
used += 2
|
||||
elif wordNext == "morgen":
|
||||
remainder = "am"
|
||||
used += 1
|
||||
elif wordNext == "eftermiddag":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "aften":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "i" and wordNextNext == "morgen":
|
||||
remainder = "am"
|
||||
used = 2
|
||||
elif wordNext == "i" and wordNextNext == "eftermiddag":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "i" and wordNextNext == "aften":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "natten":
|
||||
if strHH > 4:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
used += 1
|
||||
else:
|
||||
if timeQualifier != "":
|
||||
if strHH <= 12 and \
|
||||
(timeQualifier == "aftenen" or
|
||||
timeQualifier == "eftermiddagen"):
|
||||
strHH += 12 # what happens when strHH is 24?
|
||||
else:
|
||||
# try to parse # s without colons
|
||||
# 5 hours, 10 minutes etc.
|
||||
length = len(word)
|
||||
strNum = ""
|
||||
remainder = ""
|
||||
for i in range(length):
|
||||
if word[i].isdigit():
|
||||
strNum += word[i]
|
||||
else:
|
||||
remainder += word[i]
|
||||
|
||||
if remainder == "":
|
||||
remainder = wordNext.replace(".", "").lstrip().rstrip()
|
||||
|
||||
if (
|
||||
remainder == "pm" or
|
||||
wordNext == "pm" or
|
||||
remainder == "p.m." or
|
||||
wordNext == "p.m."):
|
||||
strHH = strNum
|
||||
remainder = "pm"
|
||||
used = 1
|
||||
elif (
|
||||
remainder == "am" or
|
||||
wordNext == "am" or
|
||||
remainder == "a.m." or
|
||||
wordNext == "a.m."):
|
||||
strHH = strNum
|
||||
remainder = "am"
|
||||
used = 1
|
||||
else:
|
||||
if wordNext == "time" and int(word) < 100:
|
||||
# "in 3 hours"
|
||||
hrOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif wordNext == "minut":
|
||||
# "in 10 minutes"
|
||||
minOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif wordNext == "sekund":
|
||||
# in 5 seconds
|
||||
secOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
|
||||
elif wordNext == "time":
|
||||
strHH = word
|
||||
used += 1
|
||||
isTime = True
|
||||
if wordNextNext == timeQualifier:
|
||||
strMM = ""
|
||||
if wordNextNext[:11] == "eftermiddag":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNext == "om" and wordNextNextNext == \
|
||||
"eftermiddagen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNext[:5] == "aften":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNext == "om" and wordNextNextNext == \
|
||||
"aftenen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNext[:6] == "morgen":
|
||||
used += 1
|
||||
remainder = "am"
|
||||
elif wordNextNext == "om" and wordNextNextNext == \
|
||||
"morgenen":
|
||||
used += 2
|
||||
remainder = "am"
|
||||
elif wordNextNext == "natten":
|
||||
used += 1
|
||||
if 8 <= int(word) <= 12:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
|
||||
elif is_numeric(wordNextNext):
|
||||
strMM = wordNextNext
|
||||
used += 1
|
||||
if wordNextNextNext == timeQualifier:
|
||||
if wordNextNextNext[:11] == "eftermiddag":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext == "om" and \
|
||||
wordNextNextNextNext == \
|
||||
"eftermiddagen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext[:6] == "natten":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext == "am" and \
|
||||
wordNextNextNextNext == "natten":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext[:7] == "morgenen":
|
||||
used += 1
|
||||
remainder = "am"
|
||||
elif wordNextNextNext == "om" and \
|
||||
wordNextNextNextNext == "morgenen":
|
||||
used += 2
|
||||
remainder = "am"
|
||||
elif wordNextNextNext == "natten":
|
||||
used += 1
|
||||
if 8 <= int(word) <= 12:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
|
||||
elif wordNext == timeQualifier:
|
||||
strHH = word
|
||||
strMM = 00
|
||||
isTime = True
|
||||
if wordNext[:10] == "eftermidag":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNext == "om" and \
|
||||
wordNextNext == "eftermiddanen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNext[:7] == "aftenen":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNext == "om" and wordNextNext == "aftenen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNext[:7] == "morgenen":
|
||||
used += 1
|
||||
remainder = "am"
|
||||
elif wordNext == "ao" and wordNextNext == "morgenen":
|
||||
used += 2
|
||||
remainder = "am"
|
||||
elif wordNext == "natten":
|
||||
used += 1
|
||||
if 8 <= int(word) <= 12:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
|
||||
# if timeQualifier != "":
|
||||
# military = True
|
||||
# else:
|
||||
# isTime = False
|
||||
|
||||
strHH = int(strHH) if strHH else 0
|
||||
strMM = int(strMM) if strMM else 0
|
||||
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
||||
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
|
||||
if strHH > 24 or strMM > 59:
|
||||
isTime = False
|
||||
used = 0
|
||||
if isTime:
|
||||
hrAbs = strHH * 1
|
||||
minAbs = strMM * 1
|
||||
used += 1
|
||||
if used > 0:
|
||||
# removed parsed words from the sentence
|
||||
for i in range(used):
|
||||
words[idx + i] = ""
|
||||
|
||||
if wordPrev == "tidlig":
|
||||
hrOffset = -1
|
||||
words[idx - 1] = ""
|
||||
idx -= 1
|
||||
elif wordPrev == "sen":
|
||||
hrOffset = 1
|
||||
words[idx - 1] = ""
|
||||
idx -= 1
|
||||
if idx > 0 and wordPrev in markers:
|
||||
words[idx - 1] = ""
|
||||
if idx > 1 and wordPrevPrev in markers:
|
||||
words[idx - 2] = ""
|
||||
|
||||
idx += used - 1
|
||||
found = True
|
||||
|
||||
# check that we found a date
|
||||
if not date_found:
|
||||
return None
|
||||
|
||||
if dayOffset is False:
|
||||
dayOffset = 0
|
||||
|
||||
# perform date manipulation
|
||||
|
||||
extractedDate = dateNow
|
||||
extractedDate = extractedDate.replace(microsecond=0,
|
||||
second=0,
|
||||
minute=0,
|
||||
hour=0)
|
||||
if datestr != "":
|
||||
en_months = ['january', 'february', 'march', 'april', 'may', 'june',
|
||||
'july', 'august', 'september', 'october', 'november',
|
||||
'december']
|
||||
en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
|
||||
'aug',
|
||||
'sept', 'oct', 'nov', 'dec']
|
||||
for idx, en_month in enumerate(en_months):
|
||||
datestr = datestr.replace(months[idx], en_month)
|
||||
for idx, en_month in enumerate(en_monthsShort):
|
||||
datestr = datestr.replace(monthsShort[idx], en_month)
|
||||
|
||||
temp = datetime.strptime(datestr, "%B %d")
|
||||
if not hasYear:
|
||||
temp = temp.replace(year=extractedDate.year)
|
||||
if extractedDate < temp:
|
||||
extractedDate = extractedDate.replace(year=int(currentYear),
|
||||
month=int(
|
||||
temp.strftime(
|
||||
"%m")),
|
||||
day=int(temp.strftime(
|
||||
"%d")))
|
||||
else:
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(currentYear) + 1,
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
else:
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(temp.strftime("%Y")),
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
|
||||
if timeStr != "":
|
||||
temp = datetime(timeStr)
|
||||
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
|
||||
minute=temp.strftime("%M"),
|
||||
second=temp.strftime("%S"))
|
||||
|
||||
if yearOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(years=yearOffset)
|
||||
if monthOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(months=monthOffset)
|
||||
if dayOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(days=dayOffset)
|
||||
|
||||
if hrAbs is None and minAbs is None and default_time:
|
||||
hrAbs = default_time.hour
|
||||
minAbs = default_time.minute
|
||||
|
||||
if hrAbs != -1 and minAbs != -1:
|
||||
|
||||
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
|
||||
minutes=minAbs or 0)
|
||||
if (hrAbs or minAbs) and datestr == "":
|
||||
if not daySpecified and dateNow > extractedDate:
|
||||
extractedDate = extractedDate + relativedelta(days=1)
|
||||
if hrOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(hours=hrOffset)
|
||||
if minOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(minutes=minOffset)
|
||||
if secOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(seconds=secOffset)
|
||||
for idx, word in enumerate(words):
|
||||
if words[idx] == "og" and words[idx - 1] == "" \
|
||||
and words[idx + 1] == "":
|
||||
words[idx] = ""
|
||||
|
||||
resultStr = " ".join(words)
|
||||
resultStr = ' '.join(resultStr.split())
|
||||
|
||||
return [extractedDate, resultStr]
|
||||
|
||||
|
||||
def isFractional_da(input_str):
|
||||
"""
|
||||
This function takes the given text and checks if it is a fraction.
|
||||
|
||||
Args:
|
||||
input_str (str): the string to check if fractional
|
||||
Returns:
|
||||
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||
|
||||
"""
|
||||
if input_str.lower().startswith("halv"):
|
||||
return 0.5
|
||||
|
||||
if input_str.lower() == "trediedel":
|
||||
return 1.0 / 3
|
||||
elif input_str.endswith('del'):
|
||||
input_str = input_str[:len(input_str) - 3] # e.g. "fünftel"
|
||||
if input_str.lower() in da_numbers:
|
||||
return 1.0 / (da_numbers[input_str.lower()])
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def isOrdinal_da(input_str):
|
||||
"""
|
||||
This function takes the given text and checks if it is an ordinal number.
|
||||
|
||||
Args:
|
||||
input_str (str): the string to check if ordinal
|
||||
Returns:
|
||||
(bool) or (float): False if not an ordinal, otherwise the number
|
||||
corresponding to the ordinal
|
||||
|
||||
ordinals for 1, 3, 7 and 8 are irregular
|
||||
|
||||
only works for ordinals corresponding to the numbers in da_numbers
|
||||
|
||||
"""
|
||||
|
||||
lowerstr = input_str.lower()
|
||||
|
||||
if lowerstr.startswith("første"):
|
||||
return 1
|
||||
if lowerstr.startswith("anden"):
|
||||
return 2
|
||||
if lowerstr.startswith("tredie"):
|
||||
return 3
|
||||
if lowerstr.startswith("fjerde"):
|
||||
return 4
|
||||
if lowerstr.startswith("femte"):
|
||||
return 5
|
||||
if lowerstr.startswith("sjette"):
|
||||
return 6
|
||||
if lowerstr.startswith("elfte"):
|
||||
return 1
|
||||
if lowerstr.startswith("tolvfte"):
|
||||
return 12
|
||||
|
||||
if lowerstr[-3:] == "nde":
|
||||
# from 20 suffix is -ste*
|
||||
lowerstr = lowerstr[:-3]
|
||||
if lowerstr in da_numbers:
|
||||
return da_numbers[lowerstr]
|
||||
|
||||
if lowerstr[-4:] in ["ende"]:
|
||||
lowerstr = lowerstr[:-4]
|
||||
if lowerstr in da_numbers:
|
||||
return da_numbers[lowerstr]
|
||||
|
||||
if lowerstr[-2:] == "te": # below 20 suffix is -te*
|
||||
lowerstr = lowerstr[:-2]
|
||||
if lowerstr in da_numbers:
|
||||
return da_numbers[lowerstr]
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def normalize_da(text, remove_articles):
|
||||
""" German string normalization """
|
||||
|
||||
words = text.split() # this also removed extra spaces
|
||||
normalized = ""
|
||||
for word in words:
|
||||
if remove_articles and word in ["den", "det"]:
|
||||
continue
|
||||
|
||||
# Convert numbers into digits, e.g. "two" -> "2"
|
||||
|
||||
if word in da_numbers:
|
||||
word = str(da_numbers[word])
|
||||
|
||||
normalized += " " + word
|
||||
|
||||
return normalized[1:] # strip the initial space
|
||||
|
||||
|
||||
def extract_numbers_da(text, short_scale=True, ordinals=False):
|
||||
"""
|
||||
Takes in a string and extracts a list of numbers.
|
||||
|
||||
Args:
|
||||
text (str): the string to extract a number from
|
||||
short_scale (bool): Use "short scale" or "long scale" for large
|
||||
numbers -- over a million. The default is short scale, which
|
||||
is now common in most English speaking countries.
|
||||
See https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
|
||||
Returns:
|
||||
list: list of extracted numbers as floats
|
||||
"""
|
||||
return extract_numbers_generic(text, pronounce_number_da, extractnumber_da,
|
||||
short_scale=short_scale, ordinals=ordinals)
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.parse_da import *
|
||||
|
|
|
@ -14,938 +14,8 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from datetime import datetime
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions, \
|
||||
extract_numbers_generic
|
||||
from mycroft.util.lang.format_de import pronounce_number_de
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
de_numbers = {
|
||||
'null': 0,
|
||||
'ein': 1,
|
||||
'eins': 1,
|
||||
'eine': 1,
|
||||
'einer': 1,
|
||||
'einem': 1,
|
||||
'einen': 1,
|
||||
'eines': 1,
|
||||
'zwei': 2,
|
||||
'drei': 3,
|
||||
'vier': 4,
|
||||
'fünf': 5,
|
||||
'sechs': 6,
|
||||
'sieben': 7,
|
||||
'acht': 8,
|
||||
'neun': 9,
|
||||
'zehn': 10,
|
||||
'elf': 11,
|
||||
'zwölf': 12,
|
||||
'dreizehn': 13,
|
||||
'vierzehn': 14,
|
||||
'fünfzehn': 15,
|
||||
'sechzehn': 16,
|
||||
'siebzehn': 17,
|
||||
'achtzehn': 18,
|
||||
'neunzehn': 19,
|
||||
'zwanzig': 20,
|
||||
'einundzwanzig': 21,
|
||||
'zweiundzwanzig': 22,
|
||||
'dreiundzwanzig': 23,
|
||||
'vierundzwanzig': 24,
|
||||
'fünfundzwanzig': 25,
|
||||
'sechsundzwanzig': 26,
|
||||
'siebenundzwanzig': 27,
|
||||
'achtundzwanzig': 28,
|
||||
'neunundzwanzig': 29,
|
||||
'dreißig': 30,
|
||||
'einunddreißig': 31,
|
||||
'vierzig': 40,
|
||||
'fünfzig': 50,
|
||||
'sechzig': 60,
|
||||
'siebzig': 70,
|
||||
'achtzig': 80,
|
||||
'neunzig': 90,
|
||||
'hundert': 100,
|
||||
'zweihundert': 200,
|
||||
'dreihundert': 300,
|
||||
'vierhundert': 400,
|
||||
'fünfhundert': 500,
|
||||
'sechshundert': 600,
|
||||
'siebenhundert': 700,
|
||||
'achthundert': 800,
|
||||
'neunhundert': 900,
|
||||
'tausend': 1000,
|
||||
'million': 1000000
|
||||
}
|
||||
|
||||
|
||||
def extractnumber_de(text):
|
||||
"""
|
||||
This function prepares the given text for parsing by making
|
||||
numbers consistent, getting rid of contractions, etc.
|
||||
Args:
|
||||
text (str): the string to normalize
|
||||
Returns:
|
||||
(int) or (float): The value of extracted number
|
||||
|
||||
|
||||
undefined articles cannot be suppressed in German:
|
||||
'ein Pferd' means 'one horse' and 'a horse'
|
||||
|
||||
"""
|
||||
aWords = text.split()
|
||||
aWords = [word for word in aWords if
|
||||
word not in ["der", "die", "das", "des", "den", "dem"]]
|
||||
and_pass = False
|
||||
valPreAnd = False
|
||||
val = False
|
||||
count = 0
|
||||
while count < len(aWords):
|
||||
word = aWords[count]
|
||||
if is_numeric(word):
|
||||
# if word.isdigit(): # doesn't work with decimals
|
||||
val = float(word)
|
||||
elif isFractional_de(word):
|
||||
val = isFractional_de(word)
|
||||
elif isOrdinal_de(word):
|
||||
val = isOrdinal_de(word)
|
||||
else:
|
||||
if word in de_numbers:
|
||||
val = de_numbers[word]
|
||||
if count < (len(aWords) - 1):
|
||||
wordNext = aWords[count + 1]
|
||||
else:
|
||||
wordNext = ""
|
||||
valNext = isFractional_de(wordNext)
|
||||
|
||||
if valNext:
|
||||
val = val * valNext
|
||||
aWords[count + 1] = ""
|
||||
|
||||
if not val:
|
||||
# look for fractions like "2/3"
|
||||
aPieces = word.split('/')
|
||||
# if (len(aPieces) == 2 and is_numeric(aPieces[0])
|
||||
# and is_numeric(aPieces[1])):
|
||||
if look_for_fractions(aPieces):
|
||||
val = float(aPieces[0]) / float(aPieces[1])
|
||||
elif and_pass:
|
||||
# added to value, quit here
|
||||
val = valPreAnd
|
||||
break
|
||||
else:
|
||||
count += 1
|
||||
continue
|
||||
|
||||
aWords[count] = ""
|
||||
|
||||
if and_pass:
|
||||
aWords[count - 1] = '' # remove "and"
|
||||
val += valPreAnd
|
||||
elif count + 1 < len(aWords) and aWords[count + 1] == 'und':
|
||||
and_pass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 2
|
||||
continue
|
||||
elif count + 2 < len(aWords) and aWords[count + 2] == 'und':
|
||||
and_pass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 3
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
if not val:
|
||||
return False
|
||||
|
||||
return val
|
||||
|
||||
|
||||
def extract_datetime_de(string, currentDate, default_time):
|
||||
def clean_string(s):
|
||||
"""
|
||||
cleans the input string of unneeded punctuation
|
||||
and capitalization among other things.
|
||||
|
||||
'am' is a preposition, so cannot currently be used
|
||||
for 12 hour date format
|
||||
"""
|
||||
|
||||
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
|
||||
.replace(' der ', ' ').replace(' den ', ' ').replace(' an ',
|
||||
' ').replace(
|
||||
' am ', ' ') \
|
||||
.replace(' auf ', ' ').replace(' um ', ' ')
|
||||
wordList = s.split()
|
||||
|
||||
for idx, word in enumerate(wordList):
|
||||
if isOrdinal_de(word) is not False:
|
||||
word = str(isOrdinal_de(word))
|
||||
wordList[idx] = word
|
||||
|
||||
return wordList
|
||||
|
||||
def date_found():
|
||||
return found or \
|
||||
(
|
||||
datestr != "" or timeStr != "" or
|
||||
yearOffset != 0 or monthOffset != 0 or
|
||||
dayOffset is True or hrOffset != 0 or
|
||||
hrAbs or minOffset != 0 or
|
||||
minAbs or secOffset != 0
|
||||
)
|
||||
|
||||
if string == "" or not currentDate:
|
||||
return None
|
||||
|
||||
found = False
|
||||
daySpecified = False
|
||||
dayOffset = False
|
||||
monthOffset = 0
|
||||
yearOffset = 0
|
||||
dateNow = currentDate
|
||||
today = dateNow.strftime("%w")
|
||||
currentYear = dateNow.strftime("%Y")
|
||||
fromFlag = False
|
||||
datestr = ""
|
||||
hasYear = False
|
||||
timeQualifier = ""
|
||||
|
||||
timeQualifiersList = ['früh', 'morgens', 'vormittag', 'vormittags',
|
||||
'nachmittag', 'nachmittags', 'abend', 'abends',
|
||||
'nachts']
|
||||
markers = ['in', 'am', 'gegen', 'bis', 'für']
|
||||
days = ['montag', 'dienstag', 'mittwoch',
|
||||
'donnerstag', 'freitag', 'samstag', 'sonntag']
|
||||
months = ['januar', 'februar', 'märz', 'april', 'mai', 'juni',
|
||||
'juli', 'august', 'september', 'october', 'november',
|
||||
'dezember']
|
||||
monthsShort = ['jan', 'feb', 'mär', 'apr', 'mai', 'juni', 'juli', 'aug',
|
||||
'sept', 'oct', 'nov', 'dez']
|
||||
|
||||
validFollowups = days + months + monthsShort
|
||||
validFollowups.append("heute")
|
||||
validFollowups.append("morgen")
|
||||
validFollowups.append("nächste")
|
||||
validFollowups.append("nächster")
|
||||
validFollowups.append("nächstes")
|
||||
validFollowups.append("nächsten")
|
||||
validFollowups.append("nächstem")
|
||||
validFollowups.append("letzte")
|
||||
validFollowups.append("letzter")
|
||||
validFollowups.append("letztes")
|
||||
validFollowups.append("letzten")
|
||||
validFollowups.append("letztem")
|
||||
validFollowups.append("jetzt")
|
||||
|
||||
words = clean_string(string)
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
|
||||
# this isn't in clean string because I don't want to save back to words
|
||||
|
||||
if word != 'morgen' and word != 'übermorgen':
|
||||
if word[-2:] == "en":
|
||||
word = word[:-2] # remove en
|
||||
if word != 'heute':
|
||||
if word[-1:] == "e":
|
||||
word = word[:-1] # remove plural for most nouns
|
||||
|
||||
start = idx
|
||||
used = 0
|
||||
# save timequalifier for later
|
||||
if word in timeQualifiersList:
|
||||
timeQualifier = word
|
||||
# parse today, tomorrow, day after tomorrow
|
||||
elif word == "heute" and not fromFlag:
|
||||
dayOffset = 0
|
||||
used += 1
|
||||
elif word == "morgen" and not fromFlag and wordPrev != "am" and \
|
||||
wordPrev not in days: # morgen means tomorrow if not "am
|
||||
# Morgen" and not [day of the week] morgen
|
||||
dayOffset = 1
|
||||
used += 1
|
||||
elif word == "übermorgen" and not fromFlag:
|
||||
dayOffset = 2
|
||||
used += 1
|
||||
# parse 5 days, 10 weeks, last week, next week
|
||||
elif word == "tag" or word == "tage":
|
||||
if wordPrev[0].isdigit():
|
||||
dayOffset += int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif word == "woch" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
dayOffset += int(wordPrev) * 7
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "nächst":
|
||||
dayOffset = 7
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:5] == "letzt":
|
||||
dayOffset = -7
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse 10 months, next month, last month
|
||||
elif word == "monat" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
monthOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "nächst":
|
||||
monthOffset = 1
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:5] == "letzt":
|
||||
monthOffset = -1
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse 5 years, next year, last year
|
||||
elif word == "jahr" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
yearOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "nächst":
|
||||
yearOffset = 1
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "nächst":
|
||||
yearOffset = -1
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse Monday, Tuesday, etc., and next Monday,
|
||||
# last Tuesday, etc.
|
||||
elif word in days and not fromFlag:
|
||||
d = days.index(word)
|
||||
dayOffset = (d + 1) - int(today)
|
||||
used = 1
|
||||
if dayOffset < 0:
|
||||
dayOffset += 7
|
||||
if wordNext == "morgen": # morgen means morning if preceded by
|
||||
# the day of the week
|
||||
words[idx + 1] = "früh"
|
||||
if wordPrev[:6] == "nächst":
|
||||
dayOffset += 7
|
||||
used += 1
|
||||
start -= 1
|
||||
elif wordPrev[:5] == "letzt":
|
||||
dayOffset -= 7
|
||||
used += 1
|
||||
start -= 1
|
||||
# parse 15 of July, June 20th, Feb 18, 19 of February
|
||||
elif word in months or word in monthsShort and not fromFlag:
|
||||
try:
|
||||
m = months.index(word)
|
||||
except ValueError:
|
||||
m = monthsShort.index(word)
|
||||
used += 1
|
||||
datestr = months[m]
|
||||
if wordPrev and (wordPrev[0].isdigit() or
|
||||
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
|
||||
if wordPrev == "of" and wordPrevPrev[0].isdigit():
|
||||
datestr += " " + words[idx - 2]
|
||||
used += 1
|
||||
start -= 1
|
||||
else:
|
||||
datestr += " " + wordPrev
|
||||
start -= 1
|
||||
used += 1
|
||||
if wordNext and wordNext[0].isdigit():
|
||||
datestr += " " + wordNext
|
||||
used += 1
|
||||
hasYear = True
|
||||
else:
|
||||
hasYear = False
|
||||
|
||||
elif wordNext and wordNext[0].isdigit():
|
||||
datestr += " " + wordNext
|
||||
used += 1
|
||||
if wordNextNext and wordNextNext[0].isdigit():
|
||||
datestr += " " + wordNextNext
|
||||
used += 1
|
||||
hasYear = True
|
||||
else:
|
||||
hasYear = False
|
||||
# parse 5 days from tomorrow, 10 weeks from next thursday,
|
||||
# 2 months from July
|
||||
|
||||
if (
|
||||
word == "von" or word == "nach" or word == "ab") and wordNext \
|
||||
in validFollowups:
|
||||
used = 2
|
||||
fromFlag = True
|
||||
if wordNext == "morgen" and wordPrev != "am" and \
|
||||
wordPrev not in days: # morgen means tomorrow if not "am
|
||||
# Morgen" and not [day of the week] morgen:
|
||||
dayOffset += 1
|
||||
elif wordNext in days:
|
||||
d = days.index(wordNext)
|
||||
tmpOffset = (d + 1) - int(today)
|
||||
used = 2
|
||||
if tmpOffset < 0:
|
||||
tmpOffset += 7
|
||||
dayOffset += tmpOffset
|
||||
elif wordNextNext and wordNextNext in days:
|
||||
d = days.index(wordNextNext)
|
||||
tmpOffset = (d + 1) - int(today)
|
||||
used = 3
|
||||
if wordNext[:6] == "nächst":
|
||||
tmpOffset += 7
|
||||
used += 1
|
||||
start -= 1
|
||||
elif wordNext[:5] == "letzt":
|
||||
tmpOffset -= 7
|
||||
used += 1
|
||||
start -= 1
|
||||
dayOffset += tmpOffset
|
||||
if used > 0:
|
||||
if start - 1 > 0 and words[start - 1].startswith("diese"):
|
||||
start -= 1
|
||||
used += 1
|
||||
|
||||
for i in range(0, used):
|
||||
words[i + start] = ""
|
||||
|
||||
if start - 1 >= 0 and words[start - 1] in markers:
|
||||
words[start - 1] = ""
|
||||
found = True
|
||||
daySpecified = True
|
||||
|
||||
# parse time
|
||||
timeStr = ""
|
||||
hrOffset = 0
|
||||
minOffset = 0
|
||||
secOffset = 0
|
||||
hrAbs = None
|
||||
minAbs = None
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
|
||||
wordNextNextNextNext = words[idx + 4] if idx + 4 < len(words) else ""
|
||||
|
||||
# parse noon, midnight, morning, afternoon, evening
|
||||
used = 0
|
||||
if word[:6] == "mittag":
|
||||
hrAbs = 12
|
||||
used += 1
|
||||
elif word[:11] == "mitternacht":
|
||||
hrAbs = 0
|
||||
used += 1
|
||||
elif word == "morgens" or (
|
||||
wordPrev == "am" and word == "morgen") or word == "früh":
|
||||
if not hrAbs:
|
||||
hrAbs = 8
|
||||
used += 1
|
||||
elif word[:10] == "nachmittag":
|
||||
if not hrAbs:
|
||||
hrAbs = 15
|
||||
used += 1
|
||||
elif word[:5] == "abend":
|
||||
if not hrAbs:
|
||||
hrAbs = 19
|
||||
used += 1
|
||||
# parse half an hour, quarter hour
|
||||
elif word == "stunde" and \
|
||||
(wordPrev in markers or wordPrevPrev in markers):
|
||||
if wordPrev[:4] == "halb":
|
||||
minOffset = 30
|
||||
elif wordPrev == "viertel":
|
||||
minOffset = 15
|
||||
elif wordPrev == "dreiviertel":
|
||||
minOffset = 45
|
||||
else:
|
||||
hrOffset = 1
|
||||
if wordPrevPrev in markers:
|
||||
words[idx - 2] = ""
|
||||
words[idx - 1] = ""
|
||||
used += 1
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
# parse 5:00 am, 12:00 p.m., etc
|
||||
elif word[0].isdigit():
|
||||
isTime = True
|
||||
strHH = ""
|
||||
strMM = ""
|
||||
remainder = ""
|
||||
if ':' in word:
|
||||
# parse colons
|
||||
# "3:00 in the morning"
|
||||
stage = 0
|
||||
length = len(word)
|
||||
for i in range(length):
|
||||
if stage == 0:
|
||||
if word[i].isdigit():
|
||||
strHH += word[i]
|
||||
elif word[i] == ":":
|
||||
stage = 1
|
||||
else:
|
||||
stage = 2
|
||||
i -= 1
|
||||
elif stage == 1:
|
||||
if word[i].isdigit():
|
||||
strMM += word[i]
|
||||
else:
|
||||
stage = 2
|
||||
i -= 1
|
||||
elif stage == 2:
|
||||
remainder = word[i:].replace(".", "")
|
||||
break
|
||||
if remainder == "":
|
||||
nextWord = wordNext.replace(".", "")
|
||||
if nextWord == "am" or nextWord == "pm":
|
||||
remainder = nextWord
|
||||
used += 1
|
||||
elif nextWord == "abends":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "am" and wordNextNext == "morgen":
|
||||
remainder = "am"
|
||||
used += 2
|
||||
elif wordNext == "am" and wordNextNext == "nachmittag":
|
||||
remainder = "pm"
|
||||
used += 2
|
||||
elif wordNext == "am" and wordNextNext == "abend":
|
||||
remainder = "pm"
|
||||
used += 2
|
||||
elif wordNext == "morgens":
|
||||
remainder = "am"
|
||||
used += 1
|
||||
elif wordNext == "nachmittags":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "abends":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "heute" and wordNextNext == "morgen":
|
||||
remainder = "am"
|
||||
used = 2
|
||||
elif wordNext == "heute" and wordNextNext == "nachmittag":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "heute" and wordNextNext == "abend":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "nachts":
|
||||
if strHH > 4:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
used += 1
|
||||
else:
|
||||
if timeQualifier != "":
|
||||
if strHH <= 12 and \
|
||||
(timeQualifier == "abends" or
|
||||
timeQualifier == "nachmittags"):
|
||||
strHH += 12 # what happens when strHH is 24?
|
||||
else:
|
||||
# try to parse # s without colons
|
||||
# 5 hours, 10 minutes etc.
|
||||
length = len(word)
|
||||
strNum = ""
|
||||
remainder = ""
|
||||
for i in range(length):
|
||||
if word[i].isdigit():
|
||||
strNum += word[i]
|
||||
else:
|
||||
remainder += word[i]
|
||||
|
||||
if remainder == "":
|
||||
remainder = wordNext.replace(".", "").lstrip().rstrip()
|
||||
|
||||
if (
|
||||
remainder == "pm" or
|
||||
wordNext == "pm" or
|
||||
remainder == "p.m." or
|
||||
wordNext == "p.m."):
|
||||
strHH = strNum
|
||||
remainder = "pm"
|
||||
used = 1
|
||||
elif (
|
||||
remainder == "am" or
|
||||
wordNext == "am" or
|
||||
remainder == "a.m." or
|
||||
wordNext == "a.m."):
|
||||
strHH = strNum
|
||||
remainder = "am"
|
||||
used = 1
|
||||
else:
|
||||
if wordNext == "stund" and int(word) < 100:
|
||||
# "in 3 hours"
|
||||
hrOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif wordNext == "minut":
|
||||
# "in 10 minutes"
|
||||
minOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif wordNext == "sekund":
|
||||
# in 5 seconds
|
||||
secOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
|
||||
elif wordNext == "uhr":
|
||||
strHH = word
|
||||
used += 1
|
||||
isTime = True
|
||||
if wordNextNext == timeQualifier:
|
||||
strMM = ""
|
||||
if wordNextNext[:10] == "nachmittag":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNext == "am" and wordNextNextNext == \
|
||||
"nachmittag":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNext[:5] == "abend":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNext == "am" and wordNextNextNext == \
|
||||
"abend":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNext[:7] == "morgens":
|
||||
used += 1
|
||||
remainder = "am"
|
||||
elif wordNextNext == "am" and wordNextNextNext == \
|
||||
"morgen":
|
||||
used += 2
|
||||
remainder = "am"
|
||||
elif wordNextNext == "nachts":
|
||||
used += 1
|
||||
if 8 <= int(word) <= 12:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
|
||||
elif is_numeric(wordNextNext):
|
||||
strMM = wordNextNext
|
||||
used += 1
|
||||
if wordNextNextNext == timeQualifier:
|
||||
if wordNextNextNext[:10] == "nachmittag":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext == "am" and \
|
||||
wordNextNextNextNext == "nachmittag":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext[:5] == "abend":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext == "am" and \
|
||||
wordNextNextNextNext == "abend":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext[:7] == "morgens":
|
||||
used += 1
|
||||
remainder = "am"
|
||||
elif wordNextNextNext == "am" and \
|
||||
wordNextNextNextNext == "morgen":
|
||||
used += 2
|
||||
remainder = "am"
|
||||
elif wordNextNextNext == "nachts":
|
||||
used += 1
|
||||
if 8 <= int(word) <= 12:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
|
||||
elif wordNext == timeQualifier:
|
||||
strHH = word
|
||||
strMM = 00
|
||||
isTime = True
|
||||
if wordNext[:10] == "nachmittag":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNext == "am" and wordNextNext == "nachmittag":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNext[:5] == "abend":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNext == "am" and wordNextNext == "abend":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNext[:7] == "morgens":
|
||||
used += 1
|
||||
remainder = "am"
|
||||
elif wordNext == "am" and wordNextNext == "morgen":
|
||||
used += 2
|
||||
remainder = "am"
|
||||
elif wordNext == "nachts":
|
||||
used += 1
|
||||
if 8 <= int(word) <= 12:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
|
||||
# if timeQualifier != "":
|
||||
# military = True
|
||||
# else:
|
||||
# isTime = False
|
||||
|
||||
strHH = int(strHH) if strHH else 0
|
||||
strMM = int(strMM) if strMM else 0
|
||||
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
||||
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
|
||||
if strHH > 24 or strMM > 59:
|
||||
isTime = False
|
||||
used = 0
|
||||
if isTime:
|
||||
hrAbs = strHH * 1
|
||||
minAbs = strMM * 1
|
||||
used += 1
|
||||
if used > 0:
|
||||
# removed parsed words from the sentence
|
||||
for i in range(used):
|
||||
words[idx + i] = ""
|
||||
|
||||
if wordPrev == "Uhr":
|
||||
words[words.index(wordPrev)] = ""
|
||||
|
||||
if wordPrev == "früh":
|
||||
hrOffset = -1
|
||||
words[idx - 1] = ""
|
||||
idx -= 1
|
||||
elif wordPrev == "spät":
|
||||
hrOffset = 1
|
||||
words[idx - 1] = ""
|
||||
idx -= 1
|
||||
if idx > 0 and wordPrev in markers:
|
||||
words[idx - 1] = ""
|
||||
if idx > 1 and wordPrevPrev in markers:
|
||||
words[idx - 2] = ""
|
||||
|
||||
idx += used - 1
|
||||
found = True
|
||||
|
||||
# check that we found a date
|
||||
if not date_found:
|
||||
return None
|
||||
|
||||
if dayOffset is False:
|
||||
dayOffset = 0
|
||||
|
||||
# perform date manipulation
|
||||
|
||||
extractedDate = dateNow
|
||||
extractedDate = extractedDate.replace(microsecond=0,
|
||||
second=0,
|
||||
minute=0,
|
||||
hour=0)
|
||||
if datestr != "":
|
||||
en_months = ['january', 'february', 'march', 'april', 'may', 'june',
|
||||
'july', 'august', 'september', 'october', 'november',
|
||||
'december']
|
||||
en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
|
||||
'aug',
|
||||
'sept', 'oct', 'nov', 'dec']
|
||||
for idx, en_month in enumerate(en_months):
|
||||
datestr = datestr.replace(months[idx], en_month)
|
||||
for idx, en_month in enumerate(en_monthsShort):
|
||||
datestr = datestr.replace(monthsShort[idx], en_month)
|
||||
|
||||
temp = datetime.strptime(datestr, "%B %d")
|
||||
if not hasYear:
|
||||
temp = temp.replace(year=extractedDate.year)
|
||||
if extractedDate < temp:
|
||||
extractedDate = extractedDate.replace(year=int(currentYear),
|
||||
month=int(
|
||||
temp.strftime(
|
||||
"%m")),
|
||||
day=int(temp.strftime(
|
||||
"%d")))
|
||||
else:
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(currentYear) + 1,
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
else:
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(temp.strftime("%Y")),
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
|
||||
if timeStr != "":
|
||||
temp = datetime(timeStr)
|
||||
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
|
||||
minute=temp.strftime("%M"),
|
||||
second=temp.strftime("%S"))
|
||||
|
||||
if yearOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(years=yearOffset)
|
||||
if monthOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(months=monthOffset)
|
||||
if dayOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(days=dayOffset)
|
||||
|
||||
if hrAbs is None and minAbs is None and default_time:
|
||||
hrAbs = default_time.hour
|
||||
minAbs = default_time.minute
|
||||
|
||||
if hrAbs != -1 and minAbs != -1:
|
||||
|
||||
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
|
||||
minutes=minAbs or 0)
|
||||
if (hrAbs or minAbs) and datestr == "":
|
||||
if not daySpecified and dateNow > extractedDate:
|
||||
extractedDate = extractedDate + relativedelta(days=1)
|
||||
if hrOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(hours=hrOffset)
|
||||
if minOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(minutes=minOffset)
|
||||
if secOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(seconds=secOffset)
|
||||
for idx, word in enumerate(words):
|
||||
if words[idx] == "und" and words[idx - 1] == "" \
|
||||
and words[idx + 1] == "":
|
||||
words[idx] = ""
|
||||
|
||||
resultStr = " ".join(words)
|
||||
resultStr = ' '.join(resultStr.split())
|
||||
|
||||
return [extractedDate, resultStr]
|
||||
|
||||
|
||||
def isFractional_de(input_str):
|
||||
"""
|
||||
This function takes the given text and checks if it is a fraction.
|
||||
|
||||
Args:
|
||||
input_str (str): the string to check if fractional
|
||||
Returns:
|
||||
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||
|
||||
"""
|
||||
if input_str.lower().startswith("halb"):
|
||||
return 0.5
|
||||
|
||||
if input_str.lower() == "drittel":
|
||||
return 1.0 / 3
|
||||
elif input_str.endswith('tel'):
|
||||
if input_str.endswith('stel'):
|
||||
input_str = input_str[:len(input_str) - 4] # e.g. "hundertstel"
|
||||
else:
|
||||
input_str = input_str[:len(input_str) - 3] # e.g. "fünftel"
|
||||
if input_str.lower() in de_numbers:
|
||||
return 1.0 / (de_numbers[input_str.lower()])
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def isOrdinal_de(input_str):
|
||||
"""
|
||||
This function takes the given text and checks if it is an ordinal number.
|
||||
|
||||
Args:
|
||||
input_str (str): the string to check if ordinal
|
||||
Returns:
|
||||
(bool) or (float): False if not an ordinal, otherwise the number
|
||||
corresponding to the ordinal
|
||||
|
||||
ordinals for 1, 3, 7 and 8 are irregular
|
||||
|
||||
only works for ordinals corresponding to the numbers in de_numbers
|
||||
|
||||
"""
|
||||
|
||||
lowerstr = input_str.lower()
|
||||
|
||||
if lowerstr.startswith("erste"):
|
||||
return 1
|
||||
if lowerstr.startswith("dritte"):
|
||||
return 3
|
||||
if lowerstr.startswith("siebte"):
|
||||
return 7
|
||||
if lowerstr.startswith("achte"):
|
||||
return 8
|
||||
|
||||
if lowerstr[-3:] == "ste": # from 20 suffix is -ste*
|
||||
lowerstr = lowerstr[:-3]
|
||||
if lowerstr in de_numbers:
|
||||
return de_numbers[lowerstr]
|
||||
|
||||
if lowerstr[-4:] in ["ster", "stes", "sten", "stem"]:
|
||||
lowerstr = lowerstr[:-4]
|
||||
if lowerstr in de_numbers:
|
||||
return de_numbers[lowerstr]
|
||||
|
||||
if lowerstr[-2:] == "te": # below 20 suffix is -te*
|
||||
lowerstr = lowerstr[:-2]
|
||||
if lowerstr in de_numbers:
|
||||
return de_numbers[lowerstr]
|
||||
|
||||
if lowerstr[-3:] in ["ter", "tes", "ten", "tem"]:
|
||||
lowerstr = lowerstr[:-3]
|
||||
if lowerstr in de_numbers:
|
||||
return de_numbers[lowerstr]
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def normalize_de(text, remove_articles):
|
||||
""" German string normalization """
|
||||
|
||||
words = text.split() # this also removed extra spaces
|
||||
normalized = ""
|
||||
for word in words:
|
||||
if remove_articles and word in ["der", "die", "das", "des", "den",
|
||||
"dem"]:
|
||||
continue
|
||||
|
||||
# Expand common contractions, e.g. "isn't" -> "is not"
|
||||
contraction = ["net", "nett"]
|
||||
if word in contraction:
|
||||
expansion = ["nicht", "nicht"]
|
||||
word = expansion[contraction.index(word)]
|
||||
|
||||
# Convert numbers into digits, e.g. "two" -> "2"
|
||||
|
||||
if word in de_numbers:
|
||||
word = str(de_numbers[word])
|
||||
|
||||
normalized += " " + word
|
||||
|
||||
return normalized[1:] # strip the initial space
|
||||
|
||||
|
||||
def extract_numbers_de(text, short_scale=True, ordinals=False):
|
||||
"""
|
||||
Takes in a string and extracts a list of numbers.
|
||||
|
||||
Args:
|
||||
text (str): the string to extract a number from
|
||||
short_scale (bool): Use "short scale" or "long scale" for large
|
||||
numbers -- over a million. The default is short scale, which
|
||||
is now common in most English speaking countries.
|
||||
See https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
|
||||
Returns:
|
||||
list: list of extracted numbers as floats
|
||||
"""
|
||||
return extract_numbers_generic(text, pronounce_number_de, extractnumber_de,
|
||||
short_scale=short_scale, ordinals=ordinals)
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.parse_de import *
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -13,765 +13,8 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from datetime import datetime
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions
|
||||
"""File kept for backwards compatibility.
|
||||
|
||||
|
||||
def extractnumber_sv(text):
|
||||
"""
|
||||
This function prepares the given text for parsing by making
|
||||
numbers consistent, getting rid of contractions, etc.
|
||||
Args:
|
||||
text (str): the string to normalize
|
||||
Returns:
|
||||
(int) or (float): The value of extracted number
|
||||
"""
|
||||
aWords = text.split()
|
||||
and_pass = False
|
||||
valPreAnd = False
|
||||
val = False
|
||||
count = 0
|
||||
while count < len(aWords):
|
||||
word = aWords[count]
|
||||
if is_numeric(word):
|
||||
val = float(word)
|
||||
elif word == "första":
|
||||
val = 1
|
||||
elif word == "andra":
|
||||
val = 2
|
||||
elif word == "tredje":
|
||||
val = 3
|
||||
elif word == "fjärde":
|
||||
val = 4
|
||||
elif word == "femte":
|
||||
val = 5
|
||||
elif word == "sjätte":
|
||||
val = 6
|
||||
elif is_fractional_sv(word):
|
||||
val = is_fractional_sv(word)
|
||||
else:
|
||||
if word == "en":
|
||||
val = 1
|
||||
if word == "ett":
|
||||
val = 1
|
||||
elif word == "två":
|
||||
val = 2
|
||||
elif word == "tre":
|
||||
val = 3
|
||||
elif word == "fyra":
|
||||
val = 4
|
||||
elif word == "fem":
|
||||
val = 5
|
||||
elif word == "sex":
|
||||
val = 6
|
||||
elif word == "sju":
|
||||
val = 7
|
||||
elif word == "åtta":
|
||||
val = 8
|
||||
elif word == "nio":
|
||||
val = 9
|
||||
elif word == "tio":
|
||||
val = 10
|
||||
if val:
|
||||
if count < (len(aWords) - 1):
|
||||
wordNext = aWords[count + 1]
|
||||
else:
|
||||
wordNext = ""
|
||||
valNext = is_fractional_sv(wordNext)
|
||||
|
||||
if valNext:
|
||||
val = val * valNext
|
||||
aWords[count + 1] = ""
|
||||
|
||||
if not val:
|
||||
# look for fractions like "2/3"
|
||||
aPieces = word.split('/')
|
||||
if look_for_fractions(aPieces):
|
||||
val = float(aPieces[0]) / float(aPieces[1])
|
||||
elif and_pass:
|
||||
# added to value, quit here
|
||||
val = valPreAnd
|
||||
break
|
||||
else:
|
||||
count += 1
|
||||
continue
|
||||
|
||||
aWords[count] = ""
|
||||
|
||||
if and_pass:
|
||||
aWords[count - 1] = '' # remove "och"
|
||||
val += valPreAnd
|
||||
elif count + 1 < len(aWords) and aWords[count + 1] == 'och':
|
||||
and_pass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 2
|
||||
continue
|
||||
elif count + 2 < len(aWords) and aWords[count + 2] == 'och':
|
||||
and_pass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 3
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
if not val:
|
||||
return False
|
||||
|
||||
return val
|
||||
|
||||
|
||||
def extract_datetime_sv(string, currentDate, default_time):
|
||||
def clean_string(s):
|
||||
"""
|
||||
cleans the input string of unneeded punctuation and capitalization
|
||||
among other things.
|
||||
"""
|
||||
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
|
||||
.replace(' den ', ' ').replace(' en ', ' ')
|
||||
wordList = s.split()
|
||||
for idx, word in enumerate(wordList):
|
||||
word = word.replace("'s", "")
|
||||
|
||||
ordinals = ["rd", "st", "nd", "th"]
|
||||
if word[0].isdigit():
|
||||
for ordinal in ordinals:
|
||||
if ordinal in word:
|
||||
word = word.replace(ordinal, "")
|
||||
wordList[idx] = word
|
||||
|
||||
return wordList
|
||||
|
||||
def date_found():
|
||||
return found or \
|
||||
(
|
||||
datestr != "" or timeStr != "" or
|
||||
yearOffset != 0 or monthOffset != 0 or
|
||||
dayOffset is True or hrOffset != 0 or
|
||||
hrAbs or minOffset != 0 or
|
||||
minAbs or secOffset != 0
|
||||
)
|
||||
|
||||
if string == "" or not currentDate:
|
||||
return None
|
||||
|
||||
found = False
|
||||
daySpecified = False
|
||||
dayOffset = False
|
||||
monthOffset = 0
|
||||
yearOffset = 0
|
||||
dateNow = currentDate
|
||||
today = dateNow.strftime("%w")
|
||||
currentYear = dateNow.strftime("%Y")
|
||||
fromFlag = False
|
||||
datestr = ""
|
||||
hasYear = False
|
||||
timeQualifier = ""
|
||||
|
||||
timeQualifiersList = ['morgon', 'förmiddag', 'eftermiddag', 'kväll']
|
||||
markers = ['på', 'i', 'den här', 'kring', 'efter']
|
||||
days = ['måndag', 'tisdag', 'onsdag', 'torsdag',
|
||||
'fredag', 'lördag', 'söndag']
|
||||
months = ['januari', 'februari', 'mars', 'april', 'maj', 'juni',
|
||||
'juli', 'augusti', 'september', 'oktober', 'november',
|
||||
'december']
|
||||
monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july', 'aug',
|
||||
'sept', 'oct', 'nov', 'dec']
|
||||
|
||||
words = clean_string(string)
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
|
||||
# this isn't in clean string because I don't want to save back to words
|
||||
word = word.rstrip('s')
|
||||
start = idx
|
||||
used = 0
|
||||
# save timequalifier for later
|
||||
if word in timeQualifiersList:
|
||||
timeQualifier = word
|
||||
# parse today, tomorrow, day after tomorrow
|
||||
elif word == "idag" and not fromFlag:
|
||||
dayOffset = 0
|
||||
used += 1
|
||||
elif word == "imorgon" and not fromFlag:
|
||||
dayOffset = 1
|
||||
used += 1
|
||||
elif word == "morgondagen" or word == "morgondagens" and not fromFlag:
|
||||
dayOffset = 1
|
||||
used += 1
|
||||
elif word == "övermorgon" and not fromFlag:
|
||||
dayOffset = 2
|
||||
used += 1
|
||||
# parse 5 days, 10 weeks, last week, next week
|
||||
elif word == "dag" or word == "dagar":
|
||||
if wordPrev[0].isdigit():
|
||||
dayOffset += int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif word == "vecka" or word == "veckor" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
dayOffset += int(wordPrev) * 7
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev == "nästa":
|
||||
dayOffset = 7
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev == "förra":
|
||||
dayOffset = -7
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse 10 months, next month, last month
|
||||
elif word == "månad" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
monthOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev == "nästa":
|
||||
monthOffset = 1
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev == "förra":
|
||||
monthOffset = -1
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse 5 years, next year, last year
|
||||
elif word == "år" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
yearOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev == "nästa":
|
||||
yearOffset = 1
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev == "förra":
|
||||
yearOffset = -1
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse Monday, Tuesday, etc., and next Monday,
|
||||
# last Tuesday, etc.
|
||||
elif word in days and not fromFlag:
|
||||
d = days.index(word)
|
||||
dayOffset = (d + 1) - int(today)
|
||||
used = 1
|
||||
if dayOffset < 0:
|
||||
dayOffset += 7
|
||||
if wordPrev == "nästa":
|
||||
dayOffset += 7
|
||||
used += 1
|
||||
start -= 1
|
||||
elif wordPrev == "förra":
|
||||
dayOffset -= 7
|
||||
used += 1
|
||||
start -= 1
|
||||
# parse 15 of July, June 20th, Feb 18, 19 of February
|
||||
elif word in months or word in monthsShort and not fromFlag:
|
||||
try:
|
||||
m = months.index(word)
|
||||
except ValueError:
|
||||
m = monthsShort.index(word)
|
||||
used += 1
|
||||
datestr = months[m]
|
||||
if wordPrev and (wordPrev[0].isdigit() or
|
||||
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
|
||||
if wordPrev == "of" and wordPrevPrev[0].isdigit():
|
||||
datestr += " " + words[idx - 2]
|
||||
used += 1
|
||||
start -= 1
|
||||
else:
|
||||
datestr += " " + wordPrev
|
||||
start -= 1
|
||||
used += 1
|
||||
if wordNext and wordNext[0].isdigit():
|
||||
datestr += " " + wordNext
|
||||
used += 1
|
||||
hasYear = True
|
||||
else:
|
||||
hasYear = False
|
||||
|
||||
elif wordNext and wordNext[0].isdigit():
|
||||
datestr += " " + wordNext
|
||||
used += 1
|
||||
if wordNextNext and wordNextNext[0].isdigit():
|
||||
datestr += " " + wordNextNext
|
||||
used += 1
|
||||
hasYear = True
|
||||
else:
|
||||
hasYear = False
|
||||
# parse 5 days from tomorrow, 10 weeks from next thursday,
|
||||
# 2 months from July
|
||||
validFollowups = days + months + monthsShort
|
||||
validFollowups.append("idag")
|
||||
validFollowups.append("imorgon")
|
||||
validFollowups.append("nästa")
|
||||
validFollowups.append("förra")
|
||||
validFollowups.append("nu")
|
||||
if (word == "från" or word == "efter") and wordNext in validFollowups:
|
||||
used = 2
|
||||
fromFlag = True
|
||||
if wordNext == "imorgon":
|
||||
dayOffset += 1
|
||||
elif wordNext in days:
|
||||
d = days.index(wordNext)
|
||||
tmpOffset = (d + 1) - int(today)
|
||||
used = 2
|
||||
if tmpOffset < 0:
|
||||
tmpOffset += 7
|
||||
dayOffset += tmpOffset
|
||||
elif wordNextNext and wordNextNext in days:
|
||||
d = days.index(wordNextNext)
|
||||
tmpOffset = (d + 1) - int(today)
|
||||
used = 3
|
||||
if wordNext == "nästa":
|
||||
tmpOffset += 7
|
||||
used += 1
|
||||
start -= 1
|
||||
elif wordNext == "förra":
|
||||
tmpOffset -= 7
|
||||
used += 1
|
||||
start -= 1
|
||||
dayOffset += tmpOffset
|
||||
if used > 0:
|
||||
if start - 1 > 0 and words[start - 1] == "denna":
|
||||
start -= 1
|
||||
used += 1
|
||||
|
||||
for i in range(0, used):
|
||||
words[i + start] = ""
|
||||
|
||||
if start - 1 >= 0 and words[start - 1] in markers:
|
||||
words[start - 1] = ""
|
||||
found = True
|
||||
daySpecified = True
|
||||
|
||||
# parse time
|
||||
timeStr = ""
|
||||
hrOffset = 0
|
||||
minOffset = 0
|
||||
secOffset = 0
|
||||
hrAbs = None
|
||||
minAbs = None
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
# parse noon, midnight, morning, afternoon, evening
|
||||
used = 0
|
||||
if word == "middag":
|
||||
hrAbs = 12
|
||||
used += 1
|
||||
elif word == "midnatt":
|
||||
hrAbs = 0
|
||||
used += 1
|
||||
elif word == "morgon":
|
||||
if not hrAbs:
|
||||
hrAbs = 8
|
||||
used += 1
|
||||
elif word == "förmiddag":
|
||||
if not hrAbs:
|
||||
hrAbs = 10
|
||||
used += 1
|
||||
elif word == "eftermiddag":
|
||||
if not hrAbs:
|
||||
hrAbs = 15
|
||||
used += 1
|
||||
elif word == "kväll":
|
||||
if not hrAbs:
|
||||
hrAbs = 19
|
||||
used += 1
|
||||
# parse half an hour, quarter hour
|
||||
elif wordPrev in markers or wordPrevPrev in markers:
|
||||
if word == "halvtimme" or word == "halvtimma":
|
||||
minOffset = 30
|
||||
elif word == "kvart":
|
||||
minOffset = 15
|
||||
elif word == "timme" or word == "timma":
|
||||
hrOffset = 1
|
||||
words[idx - 1] = ""
|
||||
used += 1
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
# parse 5:00 am, 12:00 p.m., etc
|
||||
elif word[0].isdigit():
|
||||
isTime = True
|
||||
strHH = ""
|
||||
strMM = ""
|
||||
remainder = ""
|
||||
if ':' in word:
|
||||
# parse colons
|
||||
# "3:00 in the morning"
|
||||
stage = 0
|
||||
length = len(word)
|
||||
for i in range(length):
|
||||
if stage == 0:
|
||||
if word[i].isdigit():
|
||||
strHH += word[i]
|
||||
elif word[i] == ":":
|
||||
stage = 1
|
||||
else:
|
||||
stage = 2
|
||||
i -= 1
|
||||
elif stage == 1:
|
||||
if word[i].isdigit():
|
||||
strMM += word[i]
|
||||
else:
|
||||
stage = 2
|
||||
i -= 1
|
||||
elif stage == 2:
|
||||
remainder = word[i:].replace(".", "")
|
||||
break
|
||||
if remainder == "":
|
||||
nextWord = wordNext.replace(".", "")
|
||||
if nextWord == "am" or nextWord == "pm":
|
||||
remainder = nextWord
|
||||
used += 1
|
||||
elif nextWord == "tonight":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "in" and wordNextNext == "the" and \
|
||||
words[idx + 3] == "morning":
|
||||
remainder = "am"
|
||||
used += 3
|
||||
elif wordNext == "in" and wordNextNext == "the" and \
|
||||
words[idx + 3] == "afternoon":
|
||||
remainder = "pm"
|
||||
used += 3
|
||||
elif wordNext == "in" and wordNextNext == "the" and \
|
||||
words[idx + 3] == "evening":
|
||||
remainder = "pm"
|
||||
used += 3
|
||||
elif wordNext == "in" and wordNextNext == "morning":
|
||||
remainder = "am"
|
||||
used += 2
|
||||
elif wordNext == "in" and wordNextNext == "afternoon":
|
||||
remainder = "pm"
|
||||
used += 2
|
||||
elif wordNext == "in" and wordNextNext == "evening":
|
||||
remainder = "pm"
|
||||
used += 2
|
||||
elif wordNext == "this" and wordNextNext == "morning":
|
||||
remainder = "am"
|
||||
used = 2
|
||||
elif wordNext == "this" and wordNextNext == "afternoon":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "this" and wordNextNext == "evening":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "at" and wordNextNext == "night":
|
||||
if strHH > 5:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
used += 2
|
||||
else:
|
||||
if timeQualifier != "":
|
||||
if strHH <= 12 and \
|
||||
(timeQualifier == "evening" or
|
||||
timeQualifier == "afternoon"):
|
||||
strHH += 12
|
||||
else:
|
||||
# try to parse # s without colons
|
||||
# 5 hours, 10 minutes etc.
|
||||
length = len(word)
|
||||
strNum = ""
|
||||
remainder = ""
|
||||
for i in range(length):
|
||||
if word[i].isdigit():
|
||||
strNum += word[i]
|
||||
else:
|
||||
remainder += word[i]
|
||||
|
||||
if remainder == "":
|
||||
remainder = wordNext.replace(".", "").lstrip().rstrip()
|
||||
|
||||
if (
|
||||
remainder == "pm" or
|
||||
wordNext == "pm" or
|
||||
remainder == "p.m." or
|
||||
wordNext == "p.m."):
|
||||
strHH = strNum
|
||||
remainder = "pm"
|
||||
used = 1
|
||||
elif (
|
||||
remainder == "am" or
|
||||
wordNext == "am" or
|
||||
remainder == "a.m." or
|
||||
wordNext == "a.m."):
|
||||
strHH = strNum
|
||||
remainder = "am"
|
||||
used = 1
|
||||
else:
|
||||
if wordNext == "pm" or wordNext == "p.m.":
|
||||
strHH = strNum
|
||||
remainder = "pm"
|
||||
used = 1
|
||||
elif wordNext == "am" or wordNext == "a.m.":
|
||||
strHH = strNum
|
||||
remainder = "am"
|
||||
used = 1
|
||||
elif (
|
||||
int(word) > 100 and
|
||||
(
|
||||
wordPrev == "o" or
|
||||
wordPrev == "oh"
|
||||
)):
|
||||
# 0800 hours (pronounced oh-eight-hundred)
|
||||
strHH = int(word) / 100
|
||||
strMM = int(word) - strHH * 100
|
||||
if wordNext == "hours":
|
||||
used += 1
|
||||
elif (
|
||||
wordNext == "hours" and
|
||||
word[0] != '0' and
|
||||
(
|
||||
int(word) < 100 and
|
||||
int(word) > 2400
|
||||
)):
|
||||
# "in 3 hours"
|
||||
hrOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
|
||||
elif wordNext == "minutes":
|
||||
# "in 10 minutes"
|
||||
minOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif wordNext == "seconds":
|
||||
# in 5 seconds
|
||||
secOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif int(word) > 100:
|
||||
strHH = int(word) / 100
|
||||
strMM = int(word) - strHH * 100
|
||||
if wordNext == "hours":
|
||||
used += 1
|
||||
elif wordNext[0].isdigit():
|
||||
strHH = word
|
||||
strMM = wordNext
|
||||
used += 1
|
||||
if wordNextNext == "hours":
|
||||
used += 1
|
||||
elif (
|
||||
wordNext == "" or wordNext == "o'clock" or
|
||||
(
|
||||
wordNext == "in" and
|
||||
(
|
||||
wordNextNext == "the" or
|
||||
wordNextNext == timeQualifier
|
||||
)
|
||||
)):
|
||||
strHH = word
|
||||
strMM = 00
|
||||
if wordNext == "o'clock":
|
||||
used += 1
|
||||
if wordNext == "in" or wordNextNext == "in":
|
||||
used += (1 if wordNext == "in" else 2)
|
||||
if (wordNextNext and
|
||||
wordNextNext in timeQualifier or
|
||||
(words[words.index(wordNextNext) + 1] and
|
||||
words[words.index(wordNextNext) + 1] in
|
||||
timeQualifier)):
|
||||
if (wordNextNext == "afternoon" or
|
||||
(len(words) >
|
||||
words.index(wordNextNext) + 1 and
|
||||
words[words.index(
|
||||
wordNextNext) + 1] == "afternoon")):
|
||||
remainder = "pm"
|
||||
if (wordNextNext == "evening" or
|
||||
(len(words) >
|
||||
(words.index(wordNextNext) + 1) and
|
||||
words[words.index(
|
||||
wordNextNext) + 1] == "evening")):
|
||||
remainder = "pm"
|
||||
if (wordNextNext == "morning" or
|
||||
(len(words) >
|
||||
words.index(wordNextNext) + 1 and
|
||||
words[words.index(
|
||||
wordNextNext) + 1] == "morning")):
|
||||
remainder = "am"
|
||||
else:
|
||||
isTime = False
|
||||
|
||||
strHH = int(strHH) if strHH else 0
|
||||
strMM = int(strMM) if strMM else 0
|
||||
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
||||
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
|
||||
if strHH > 24 or strMM > 59:
|
||||
isTime = False
|
||||
used = 0
|
||||
if isTime:
|
||||
hrAbs = strHH * 1
|
||||
minAbs = strMM * 1
|
||||
used += 1
|
||||
if used > 0:
|
||||
# removed parsed words from the sentence
|
||||
for i in range(used):
|
||||
words[idx + i] = ""
|
||||
|
||||
if wordPrev == "o" or wordPrev == "oh":
|
||||
words[words.index(wordPrev)] = ""
|
||||
|
||||
if wordPrev == "early":
|
||||
hrOffset = -1
|
||||
words[idx - 1] = ""
|
||||
idx -= 1
|
||||
elif wordPrev == "late":
|
||||
hrOffset = 1
|
||||
words[idx - 1] = ""
|
||||
idx -= 1
|
||||
if idx > 0 and wordPrev in markers:
|
||||
words[idx - 1] = ""
|
||||
if idx > 1 and wordPrevPrev in markers:
|
||||
words[idx - 2] = ""
|
||||
|
||||
idx += used - 1
|
||||
found = True
|
||||
|
||||
# check that we found a date
|
||||
if not date_found:
|
||||
return None
|
||||
|
||||
if dayOffset is False:
|
||||
dayOffset = 0
|
||||
|
||||
# perform date manipulation
|
||||
|
||||
extractedDate = dateNow
|
||||
extractedDate = extractedDate.replace(microsecond=0,
|
||||
second=0,
|
||||
minute=0,
|
||||
hour=0)
|
||||
if datestr != "":
|
||||
temp = datetime.strptime(datestr, "%B %d")
|
||||
if not hasYear:
|
||||
temp = temp.replace(year=extractedDate.year)
|
||||
if extractedDate < temp:
|
||||
extractedDate = extractedDate.replace(year=int(currentYear),
|
||||
month=int(
|
||||
temp.strftime(
|
||||
"%m")),
|
||||
day=int(temp.strftime(
|
||||
"%d")))
|
||||
else:
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(currentYear) + 1,
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
else:
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(temp.strftime("%Y")),
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
|
||||
if timeStr != "":
|
||||
temp = datetime(timeStr)
|
||||
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
|
||||
minute=temp.strftime("%M"),
|
||||
second=temp.strftime("%S"))
|
||||
|
||||
if yearOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(years=yearOffset)
|
||||
if monthOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(months=monthOffset)
|
||||
if dayOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(days=dayOffset)
|
||||
|
||||
if hrAbs is None and minAbs is None and default_time:
|
||||
hrAbs = default_time.hour
|
||||
minAbs = default_time.minute
|
||||
if hrAbs != -1 and minAbs != -1:
|
||||
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
|
||||
minutes=minAbs or 0)
|
||||
if (hrAbs or minAbs) and datestr == "":
|
||||
if not daySpecified and dateNow > extractedDate:
|
||||
extractedDate = extractedDate + relativedelta(days=1)
|
||||
if hrOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(hours=hrOffset)
|
||||
if minOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(minutes=minOffset)
|
||||
if secOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(seconds=secOffset)
|
||||
for idx, word in enumerate(words):
|
||||
if words[idx] == "and" and words[idx - 1] == "" and words[
|
||||
idx + 1] == "":
|
||||
words[idx] = ""
|
||||
|
||||
resultStr = " ".join(words)
|
||||
resultStr = ' '.join(resultStr.split())
|
||||
return [extractedDate, resultStr]
|
||||
|
||||
|
||||
def is_fractional_sv(input_str):
|
||||
"""
|
||||
This function takes the given text and checks if it is a fraction.
|
||||
|
||||
Args:
|
||||
input_str (str): the string to check if fractional
|
||||
Returns:
|
||||
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||
|
||||
"""
|
||||
if input_str.endswith('ars', -3):
|
||||
input_str = input_str[:len(input_str) - 3] # e.g. "femtedelar"
|
||||
if input_str.endswith('ar', -2):
|
||||
input_str = input_str[:len(input_str) - 2] # e.g. "femtedelar"
|
||||
if input_str.endswith('a', -1):
|
||||
input_str = input_str[:len(input_str) - 1] # e.g. "halva"
|
||||
if input_str.endswith('s', -1):
|
||||
input_str = input_str[:len(input_str) - 1] # e.g. "halva"
|
||||
|
||||
aFrac = ["hel", "halv", "tredjedel", "fjärdedel", "femtedel", "sjättedel",
|
||||
"sjundedel", "åttondel", "niondel", "tiondel", "elftedel",
|
||||
"tolftedel"]
|
||||
if input_str.lower() in aFrac:
|
||||
return 1.0 / (aFrac.index(input_str) + 1)
|
||||
if input_str == "kvart":
|
||||
return 1.0 / 4
|
||||
if input_str == "trekvart":
|
||||
return 3.0 / 4
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def normalize_sv(text, remove_articles):
|
||||
""" English string normalization """
|
||||
|
||||
words = text.split() # this also removed extra spaces
|
||||
normalized = ''
|
||||
for word in words:
|
||||
# Convert numbers into digits, e.g. "two" -> "2"
|
||||
if word == 'en':
|
||||
word = 'ett'
|
||||
textNumbers = ["noll", "ett", "två", "tre", "fyra", "fem", "sex",
|
||||
"sju", "åtta", "nio", "tio", "elva", "tolv",
|
||||
"tretton", "fjorton", "femton", "sexton",
|
||||
"sjutton", "arton", "nitton", "tjugo"]
|
||||
if word in textNumbers:
|
||||
word = str(textNumbers.index(word))
|
||||
|
||||
normalized += " " + word
|
||||
|
||||
return normalized[1:] # strip the initial space
|
||||
TODO: Remove in 20.02
|
||||
"""
|
||||
from lingua_franca.lang.parse_sv import *
|
||||
|
|
Loading…
Reference in New Issue