Remove content from lang specific files

The files are kept for backwards compatibility but these just contains imports
of lingua-franca versions of variables
pull/2438/head
Åke Forslund 2019-12-13 13:24:15 +01:00
parent 1b88db4fa1
commit 32666e9d68
19 changed files with 78 additions and 13034 deletions

View File

@ -18,5 +18,4 @@
TODO: Remove in 20.02
"""
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.format_common import *

View File

@ -14,285 +14,8 @@
# limitations under the License.
#
from mycroft.util.lang.format_common import convert_to_mixed_fraction
from mycroft.util.log import LOG
from mycroft.util.lang.common_data_en import _NUM_STRING_EN, \
_FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN
"""File kept for backwards compatibility
def nice_number_en(number, speech, denominators=range(1, 21)):
""" English helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 and a half" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_EN[den]
if whole == 0:
if num == 1:
return_string = 'a {}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
return_string = '{} and a {}'.format(whole, den_str)
else:
return_string = '{} and {} {}'.format(whole, num, den_str)
if num > 1:
return_string += 's'
return return_string
def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
"""
Convert a number to its spoken equivalent
For example, '5.2' would return 'five point two'
Args:
num(float or int): the number to pronounce
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
Returns:
(str): The pronounced number
"""
if scientific:
number = '%E' % num
n, power = number.split("E")
power = int(power)
if power != 0:
# This handles negatives of powers separately from the normal
# handling since each call disables the scientific flag
return '{} times ten to the power of {}{}'.format(
pronounce_number_en(float(n), places, short_scale, True),
'negative ' if power < 0 else '',
pronounce_number_en(abs(power), places, short_scale, False))
number_names = _NUM_STRING_EN
big_number_names = _SHORT_SCALE_EN if short_scale else _LONG_SCALE_EN
# deal with negatives
result = ""
if num < 0:
result = "negative " if scientific else "minus "
num = abs(num)
try:
# deal with 4 digits
# usually if it's a 4 digit num it should be said like a date
# i.e. 1972 => nineteen seventy two
if 10000 > num >= 1000 and isinstance(num, int):
# deal with 1000, 2000, 2001, 2100, 3123, etc
# is skipped as the rest of the
# functin deals with this already
if num % 1000 < 10 or num > 2000:
pass
# deal with 1900, 1300, etc
# i.e. 1900 => nineteen hundred
elif not num % 100:
first = number_names[num / 100]
last = big_number_names[100]
return first + " " + last
# deal with 1960, 1961, etc
# i.e. 1960 => nineteen sixty
# 1961 => nineteen sixty one
else:
first = number_names[num // 100]
last = number_names[num % 100 - num % 10]
if num % 10:
last += " " + number_names[num % 10]
return first + " " + last
# exception used to catch any unforseen edge cases
# will default back to normal subroutine
except Exception as e:
LOG.error('Exception in pronounce_number_en: {}' + repr(e))
# check for a direct match
if num in number_names:
result += number_names[num]
elif num in big_number_names:
result += "one " + big_number_names[num]
else:
hundreds = list(big_number_names.values())
def _sub_thousand(n):
assert 0 <= n <= 999
if n <= 19:
return number_names[n]
elif n <= 99:
q, r = divmod(n, 10)
return number_names[q * 10] + (
" " + _sub_thousand(r) if r else "")
else:
q, r = divmod(n, 100)
return number_names[q] + " hundred" + (
" and " + _sub_thousand(r) if r else "")
def _short_scale(n):
if n >= max(_SHORT_SCALE_EN):
return "infinity"
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000)):
if not z:
continue
number = _sub_thousand(z)
if i:
number += " "
number += hundreds[i]
res.append(number)
return ", ".join(reversed(res))
def _split_by(n, split=1000):
assert 0 <= n
res = []
while n:
n, r = divmod(n, split)
res.append(r)
return res
def _long_scale(n):
if n >= max(_LONG_SCALE_EN):
return "infinity"
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000000)):
if not z:
continue
number = pronounce_number_en(z, places, True, scientific)
# strip off the comma after the thousand
if i:
# plus one as we skip 'thousand'
# (and 'hundred', but this is excluded by index value)
number = number.replace(',', '')
number += " " + hundreds[i+1]
res.append(number)
return ", ".join(reversed(res))
if short_scale:
result += _short_scale(num)
else:
result += _long_scale(num)
# Deal with fractional part
if not num == int(num) and places > 0:
result += " point"
place = 10
while int(num * place) % 10 > 0 and places > 0:
result += " " + number_names[int(num * place) % 10]
place *= 10
places -= 1
return result
def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
if use_24hour:
speak = ""
# Either "0 8 hundred" or "13 hundred"
if string[0] == '0':
speak += pronounce_number_en(int(string[0])) + " "
speak += pronounce_number_en(int(string[1]))
else:
speak = pronounce_number_en(int(string[0:2]))
speak += " "
if string[3:5] == '00':
speak += "hundred"
else:
if string[3] == '0':
speak += pronounce_number_en(0) + " "
speak += pronounce_number_en(int(string[4]))
else:
speak += pronounce_number_en(int(string[3:5]))
return speak
else:
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
if dt.hour == 0 and dt.minute == 0:
return "midnight"
if dt.hour == 12 and dt.minute == 0:
return "noon"
elif dt.minute == 15:
speak = "quarter past " + pronounce_number_en(hour)
elif dt.minute == 30:
speak = "half past " + pronounce_number_en(hour)
elif dt.minute == 45:
next_hour = (dt.hour + 1) % 12 or 12
speak = "quarter to " + pronounce_number_en(next_hour)
else:
speak = pronounce_number_en(hour)
if dt.minute == 0:
if not use_ampm:
return speak + " o'clock"
else:
if dt.minute < 10:
speak += " oh"
speak += " " + pronounce_number_en(dt.minute)
if use_ampm:
if dt.hour > 11:
speak += " p.m."
else:
speak += " a.m."
return speak
TODO: Remove in 20.02
"""
from lingua_franca.lang.format_en import *

View File

@ -13,307 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""File kept for backwards compatibility
TODO: Remove in 20.02
"""
Format functions for castillian (es-es)
"""
from mycroft.util.lang.format_common import convert_to_mixed_fraction
NUM_STRING_ES = {
0: 'cero',
1: 'uno',
2: 'dos',
3: 'tres',
4: 'cuatro',
5: 'cinco',
6: 'seis',
7: 'siete',
8: 'ocho',
9: 'nueve',
10: 'diez',
11: 'once',
12: 'doce',
13: 'trece',
14: 'catorce',
15: 'quince',
16: 'dieciséis',
17: 'diecisete',
18: 'dieciocho',
19: 'diecinueve',
20: 'veinte',
30: 'treinta',
40: 'cuarenta',
50: 'cincuenta',
60: 'sesenta',
70: 'setenta',
80: 'ochenta',
90: 'noventa'
}
FRACTION_STRING_ES = {
2: 'medio',
3: 'tercio',
4: 'cuarto',
5: 'quinto',
6: 'sexto',
7: 'séptimo',
8: 'octavo',
9: 'noveno',
10: 'décimo',
11: 'onceavo',
12: 'doceavo',
13: 'treceavo',
14: 'catorceavo',
15: 'quinceavo',
16: 'dieciseisavo',
17: 'diecisieteavo',
18: 'dieciochoavo',
19: 'diecinueveavo',
20: 'veinteavo'
}
def nice_number_es(number, speech, denominators=range(1, 21)):
""" Spanish helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 y medio" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
strNumber = ""
whole = 0
num = 0
den = 0
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
whole = round(number, 3)
else:
whole, num, den = result
if not speech:
if num == 0:
strNumber = '{:,}'.format(whole)
strNumber = strNumber.replace(",", " ")
strNumber = strNumber.replace(".", ",")
return strNumber
else:
return '{} {}/{}'.format(whole, num, den)
else:
if num == 0:
# if the number is not a fraction, nothing to do
strNumber = str(whole)
strNumber = strNumber.replace(".", ",")
return strNumber
den_str = FRACTION_STRING_ES[den]
# if it is not an integer
if whole == 0:
# if there is no whole number
if num == 1:
# if numerator is 1, return "un medio", for example
strNumber = 'un {}'.format(den_str)
else:
# else return "cuatro tercios", for example
strNumber = '{} {}'.format(num, den_str)
elif num == 1:
# if there is a whole number and numerator is 1
if den == 2:
# if denominator is 2, return "1 y medio", for example
strNumber = '{} y {}'.format(whole, den_str)
else:
# else return "1 y 1 tercio", for example
strNumber = '{} y 1 {}'.format(whole, den_str)
else:
# else return "2 y 3 cuarto", for example
strNumber = '{} y {} {}'.format(whole, num, den_str)
if num > 1 and den != 3:
# if the numerator is greater than 1 and the denominator
# is not 3 ("tercio"), add an s for plural
strNumber += 's'
return strNumber
def pronounce_number_es(num, places=2):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'cinco coma dos'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
if abs(num) >= 100:
# TODO: Soporta a números por encima de 100
return str(num)
result = ""
if num < 0:
result = "menos "
num = abs(num)
# del 21 al 29 tienen una pronunciación especial
if 20 <= num <= 29:
tens = int(num-int(num) % 10)
ones = int(num - tens)
result += NUM_STRING_ES[tens]
if ones > 0:
result = result[:-1]
# a veinte le quitamos la "e" final para construir los
# números del 21 - 29. Pero primero tenemos en cuenta
# las excepciones: 22, 23 y 26, que llevan tilde.
if ones == 2:
result += "idós"
elif ones == 3:
result += "itrés"
elif ones == 6:
result += "iséis"
else:
result += "i" + NUM_STRING_ES[ones]
elif num >= 30: # de 30 en adelante
tens = int(num-int(num) % 10)
ones = int(num - tens)
result += NUM_STRING_ES[tens]
if ones > 0:
result += " y " + NUM_STRING_ES[ones]
else:
result += NUM_STRING_ES[int(num)]
# Deal with decimal part, in spanish is commonly used the comma
# instead the dot. Decimal part can be written both with comma
# and dot, but when pronounced, its pronounced "coma"
if not num == int(num) and places > 0:
result += " coma"
place = 10
while int(num*place) % 10 > 0 and places > 0:
result += " " + NUM_STRING_ES[int(num*place) % 10]
place *= 10
places -= 1
return result
def nice_time_es(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'cinco treinta' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
# Tenemos que tener en cuenta que cuando hablamos en formato
# 24h, no hay que especificar ninguna precisión adicional
# como "la noche", "la tarde" o "la mañana"
# http://lema.rae.es/dpd/srv/search?id=YNoTWNJnAD6bhhVBf9
if dt.hour == 1:
speak += "la una"
else:
speak += "las " + pronounce_number_es(dt.hour)
# las 14:04 son "las catorce cero cuatro"
if dt.minute < 10:
speak += " cero " + pronounce_number_es(dt.minute)
else:
speak += " " + pronounce_number_es(dt.minute)
else:
# Prepare for "tres menos cuarto" ??
if dt.minute == 35:
minute = -25
hour = dt.hour + 1
elif dt.minute == 40:
minute = -20
hour = dt.hour + 1
elif dt.minute == 45:
minute = -15
hour = dt.hour + 1
elif dt.minute == 50:
minute = -10
hour = dt.hour + 1
elif dt.minute == 55:
minute = -5
hour = dt.hour + 1
else:
minute = dt.minute
hour = dt.hour
if hour == 0 or hour == 12:
speak += "las doce"
elif hour == 1 or hour == 13:
speak += "la una"
elif hour < 13:
speak = "las " + pronounce_number_es(hour)
else:
speak = "las " + pronounce_number_es(hour-12)
if minute != 0:
# las horas especiales
if minute == 15:
speak += " y cuarto"
elif minute == 30:
speak += " y media"
elif minute == -15:
speak += " menos cuarto"
else: # seis y nueve. siete y veinticinco
if minute > 0:
speak += " y " + pronounce_number_es(minute)
else: # si son las siete menos veinte, no ponemos la "y"
speak += " " + pronounce_number_es(minute)
# si no especificamos de la tarde, noche, mañana, etc
if minute == 0 and not use_ampm:
# 3:00
speak += " en punto"
if use_ampm:
# "de la noche" es desde que anochece hasta medianoche
# así que decir que es desde las 21h es algo subjetivo
# en España a las 20h se dice "de la tarde"
# en castellano, las 12h es de la mañana o mediodía
# así que diremos "de la tarde" a partir de las 13h.
# http://lema.rae.es/dpd/srv/search?id=YNoTWNJnAD6bhhVBf9
if hour >= 0 and hour < 6:
speak += " de la madrugada"
elif hour >= 6 and hour < 13:
speak += " de la mañana"
elif hour >= 13 and hour < 21:
speak += " de la tarde"
else:
speak += " de la noche"
return speak
from lingua_franca.lang.format_es import *

View File

@ -13,290 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
""" Format functions for french (fr)
"""File kept for backwards compatibility.
TODO: Remove in 20.02
"""
from mycroft.util.lang.format_common import convert_to_mixed_fraction
NUM_STRING_FR = {
0: 'zéro',
1: 'un',
2: 'deux',
3: 'trois',
4: 'quatre',
5: 'cinq',
6: 'six',
7: 'sept',
8: 'huit',
9: 'neuf',
10: 'dix',
11: 'onze',
12: 'douze',
13: 'treize',
14: 'quatorze',
15: 'quinze',
16: 'seize',
20: 'vingt',
30: 'trente',
40: 'quarante',
50: 'cinquante',
60: 'soixante',
70: 'soixante-dix',
80: 'quatre-vingt',
90: 'quatre-vingt-dix'
}
FRACTION_STRING_FR = {
2: 'demi',
3: 'tiers',
4: 'quart',
5: 'cinquième',
6: 'sixième',
7: 'septième',
8: 'huitième',
9: 'neuvième',
10: 'dixième',
11: 'onzième',
12: 'douzième',
13: 'treizième',
14: 'quatorzième',
15: 'quinzième',
16: 'seizième',
17: 'dix-septième',
18: 'dix-huitième',
19: 'dix-neuvième',
20: 'vingtième'
}
def nice_number_fr(number, speech, denominators=range(1, 21)):
""" French helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 et demi" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
strNumber = ""
whole = 0
num = 0
den = 0
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
whole = round(number, 3)
else:
whole, num, den = result
if not speech:
if num == 0:
strNumber = '{:,}'.format(whole)
strNumber = strNumber.replace(",", " ")
strNumber = strNumber.replace(".", ",")
return strNumber
else:
return '{} {}/{}'.format(whole, num, den)
else:
if num == 0:
# if the number is not a fraction, nothing to do
strNumber = str(whole)
strNumber = strNumber.replace(".", ",")
return strNumber
den_str = FRACTION_STRING_FR[den]
# if it is not an integer
if whole == 0:
# if there is no whole number
if num == 1:
# if numerator is 1, return "un demi", for example
strNumber = 'un {}'.format(den_str)
else:
# else return "quatre tiers", for example
strNumber = '{} {}'.format(num, den_str)
elif num == 1:
# if there is a whole number and numerator is 1
if den == 2:
# if denominator is 2, return "1 et demi", for example
strNumber = '{} et {}'.format(whole, den_str)
else:
# else return "1 et 1 tiers", for example
strNumber = '{} et 1 {}'.format(whole, den_str)
else:
# else return "2 et 3 quart", for example
strNumber = '{} et {} {}'.format(whole, num, den_str)
if num > 1 and den != 3:
# if the numerator is greater than 1 and the denominator
# is not 3 ("tiers"), add an s for plural
strNumber += 's'
return strNumber
def pronounce_number_fr(num, places=2):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'cinq virgule deux'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
if abs(num) >= 100:
# TODO: Support for numbers over 100
return str(num)
result = ""
if num < 0:
result = "moins "
num = abs(num)
if num > 16:
tens = int(num-int(num) % 10)
ones = int(num-tens)
if ones != 0:
if tens > 10 and tens <= 60 and int(num-tens) == 1:
result += NUM_STRING_FR[tens] + "-et-" + NUM_STRING_FR[ones]
elif num == 71:
result += "soixante-et-onze"
elif tens == 70:
result += NUM_STRING_FR[60] + "-"
if ones < 7:
result += NUM_STRING_FR[10 + ones]
else:
result += NUM_STRING_FR[10] + "-" + NUM_STRING_FR[ones]
elif tens == 90:
result += NUM_STRING_FR[80] + "-"
if ones < 7:
result += NUM_STRING_FR[10 + ones]
else:
result += NUM_STRING_FR[10] + "-" + NUM_STRING_FR[ones]
else:
result += NUM_STRING_FR[tens] + "-" + NUM_STRING_FR[ones]
else:
if num == 80:
result += "quatre-vingts"
else:
result += NUM_STRING_FR[tens]
else:
result += NUM_STRING_FR[int(num)]
# Deal with decimal part
if not num == int(num) and places > 0:
result += " virgule"
place = 10
while int(num*place) % 10 > 0 and places > 0:
result += " " + NUM_STRING_FR[int(num*place) % 10]
place *= 10
places -= 1
return result
def nice_time_fr(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'cinq heures trente' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
# "13 heures trente"
if dt.hour == 0:
speak += "minuit"
elif dt.hour == 12:
speak += "midi"
elif dt.hour == 1:
speak += "une heure"
else:
speak += pronounce_number_fr(dt.hour) + " heures"
if dt.minute != 0:
speak += " " + pronounce_number_fr(dt.minute)
else:
# Prepare for "trois heures moins le quart"
if dt.minute == 35:
minute = -25
hour = dt.hour + 1
elif dt.minute == 40:
minute = -20
hour = dt.hour + 1
elif dt.minute == 45:
minute = -15
hour = dt.hour + 1
elif dt.minute == 50:
minute = -10
hour = dt.hour + 1
elif dt.minute == 55:
minute = -5
hour = dt.hour + 1
else:
minute = dt.minute
hour = dt.hour
if hour == 0:
speak += "minuit"
elif hour == 12:
speak += "midi"
elif hour == 1 or hour == 13:
speak += "une heure"
elif hour < 13:
speak = pronounce_number_fr(hour) + " heures"
else:
speak = pronounce_number_fr(hour-12) + " heures"
if minute != 0:
if minute == 15:
speak += " et quart"
elif minute == 30:
speak += " et demi"
elif minute == -15:
speak += " moins le quart"
else:
speak += " " + pronounce_number_fr(minute)
if use_ampm:
if hour > 17:
speak += " du soir"
elif hour > 12:
speak += " de l'après-midi"
elif hour > 0 and hour < 12:
speak += " du matin"
return speak
from lingua_franca.lang.format_fr import *

View File

@ -14,351 +14,8 @@
# limitations under the License.
#
from mycroft.util.lang.format_common import convert_to_mixed_fraction
from math import floor
"""File kept for backwards compatibility.
months = ['január', 'február', 'március', 'április', 'május', 'június',
'július', 'augusztus', 'szeptember', 'október', 'november',
'december']
NUM_STRING_HU = {
0: 'nulla',
1: 'egy',
2: 'kettő',
3: 'három',
4: 'négy',
5: 'öt',
6: 'hat',
7: 'hét',
8: 'nyolc',
9: 'kilenc',
10: 'tíz',
11: 'tizenegy',
12: 'tizenkettő',
13: 'tizenhárom',
14: 'tizennégy',
15: 'tizenöt',
16: 'tizenhat',
17: 'tizenhét',
18: 'tizennyolc',
19: 'tizenkilenc',
20: 'húsz',
30: 'harminc',
40: 'negyven',
50: 'ötven',
60: 'hatvan',
70: 'hetven',
80: 'nyolcvan',
90: 'kilencven',
100: 'száz'
}
# Hungarian uses "long scale"
# https://en.wikipedia.org/wiki/Long_and_short_scales
# Currently, numbers are limited to 1000000000000000000000000,
# but NUM_POWERS_OF_TEN can be extended to include additional number words
NUM_POWERS_OF_TEN = [
'', 'ezer', 'millió', 'milliárd', 'billió', 'billiárd', 'trillió',
'trilliárd'
]
FRACTION_STRING_HU = {
2: 'fél',
3: 'harmad',
4: 'negyed',
5: 'ötöd',
6: 'hatod',
7: 'heted',
8: 'nyolcad',
9: 'kilenced',
10: 'tized',
11: 'tizenegyed',
12: 'tizenketted',
13: 'tizenharmad',
14: 'tizennegyed',
15: 'tizenötöd',
16: 'tizenhatod',
17: 'tizenheted',
18: 'tizennyolcad',
19: 'tizenkilenced',
20: 'huszad'
}
# Numbers below 2 thousand are written in one word in Hungarian
# Numbers above 2 thousand are separated by hyphens
# In some circumstances it may better to seperate individual words
# Set EXTRA_SPACE=" " for separating numbers below 2 thousand (
# orthographically incorrect)
# Set EXTRA_SPACE="" for correct spelling, this is standard
# EXTRA_SPACE = " "
EXTRA_SPACE = ""
def _get_vocal_type(word):
# checks the vocal attributes of a word
vowels_high = len([char for char in word if char in 'eéiíöőüű'])
vowels_low = len([char for char in word if char in 'aáoóuú'])
if vowels_high != 0 and vowels_low != 0:
return 2 # 2: type is mixed
return 0 if vowels_high == 0 else 1 # 0: type is low, 1: is high
def nice_number_hu(number, speech, denominators=range(1, 21)):
""" Hungarian helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 és fél" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3)).replace(".", ",")
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = FRACTION_STRING_HU[den]
if whole == 0:
if num == 1:
one = 'egy ' if den != 2 else ''
return_string = '{}{}'.format(one, den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
pointOne = 'egész egy' if den != 2 else 'és'
return_string = '{} {} {}'.format(whole, pointOne, den_str)
else:
return_string = '{} egész {} {}'.format(whole, num, den_str)
return return_string
def pronounce_number_hu(num, places=2):
"""
Convert a number to its spoken equivalent
For example, '5.2' would return 'öt egész két tized'
Args:
num(float or int): the number to pronounce (set limit below)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
def pronounce_triplet_hu(num):
result = ""
num = floor(num)
if num > 99:
hundreds = floor(num / 100)
if hundreds > 0:
hundredConst = EXTRA_SPACE + 'száz' + EXTRA_SPACE
if hundreds == 1:
result += hundredConst
elif hundreds == 2:
result += 'két' + hundredConst
else:
result += NUM_STRING_HU[hundreds] + hundredConst
num -= hundreds * 100
if num == 0:
result += '' # do nothing
elif num <= 20:
result += NUM_STRING_HU[num] # + EXTRA_SPACE
elif num > 20:
ones = num % 10
tens = num - ones
if tens > 0:
if tens != 20:
result += NUM_STRING_HU[tens] + EXTRA_SPACE
else:
result += "huszon" + EXTRA_SPACE
if ones > 0:
result += NUM_STRING_HU[ones] + EXTRA_SPACE
return result
def pronounce_whole_number_hu(num, scale_level=0):
if num == 0:
return ''
num = floor(num)
result = ''
last_triplet = num % 1000
if last_triplet == 1:
if scale_level == 0:
if result != '':
result += '' + "egy"
else:
result += "egy"
elif scale_level == 1:
result += EXTRA_SPACE + NUM_POWERS_OF_TEN[1] + EXTRA_SPACE
else:
result += "egy" + NUM_POWERS_OF_TEN[scale_level]
elif last_triplet > 1:
result += pronounce_triplet_hu(last_triplet)
if scale_level != 0:
result = result.replace(NUM_STRING_HU[2], 'két')
if scale_level == 1:
result += NUM_POWERS_OF_TEN[1] + EXTRA_SPACE
if scale_level >= 2:
result += NUM_POWERS_OF_TEN[scale_level]
if scale_level > 0:
result += '-'
num = floor(num / 1000)
scale_level += 1
return pronounce_whole_number_hu(num,
scale_level) + result
result = ""
if abs(num) >= 1000000000000000000000000: # cannot do more than this
return str(num)
elif num == 0:
return str(NUM_STRING_HU[0])
elif num < 0:
return "mínusz " + pronounce_number_hu(abs(num), places)
else:
if num == int(num):
return pronounce_whole_number_hu(num).strip('-')
else:
whole_number_part = floor(num)
fractional_part = num - whole_number_part
if whole_number_part == 0:
result += NUM_STRING_HU[0]
result += pronounce_whole_number_hu(whole_number_part)
if places > 0:
result += " egész "
fraction = pronounce_whole_number_hu(
round(fractional_part * 10 ** places))
result += fraction.replace(NUM_STRING_HU[2], 'két')
fraction_suffixes = [
'tized', 'század', 'ezred', 'tízezred', 'százezred']
if places <= len(fraction_suffixes):
result += ' ' + fraction_suffixes[places - 1]
return result
def pronounce_ordinal_hu(num):
ordinals = ["nulladik", "első", "második", "harmadik", "negyedik",
"ötödik", "hatodik", "hetedik", "nyolcadik", "kilencedik",
"tizedik"]
big_ordinals = ["", "ezredik", "milliomodik"]
# only for whole positive numbers including zero
if num < 0 or num != int(num):
return num
elif num < 11:
return ordinals[num]
else:
# concatenate parts and inflect them accordingly
root = pronounce_number_hu(num)
vtype = _get_vocal_type(root)
last_digit = num - floor(num/10) * 10
if root == "húsz":
root = "husz"
if num % 1000000 == 0:
return root.replace(NUM_POWERS_OF_TEN[2], big_ordinals[2])
if num % 1000 == 0:
return root.replace(NUM_POWERS_OF_TEN[1], big_ordinals[1])
if last_digit == 1:
return root + "edik"
elif root[-1] == 'ő':
return root[:-1] + 'edik'
elif last_digit != 0:
return ordinals[last_digit].join(
root.rsplit(NUM_STRING_HU[last_digit], 1))
return root + "edik" if vtype == 1 else root + "adik"
def nice_time_hu(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
speak += pronounce_number_hu(dt.hour)
speak = speak.replace(NUM_STRING_HU[2], 'két')
speak += " óra"
if not dt.minute == 0: # zero minutes are not pronounced
speak += " " + pronounce_number_hu(dt.minute)
return speak # ampm is ignored when use_24hour is true
else:
if dt.hour == 0 and dt.minute == 0:
return "éjfél"
if dt.hour == 12 and dt.minute == 0:
return "dél"
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
if dt.hour == 0:
speak += pronounce_number_hu(12)
elif dt.hour < 13:
speak = pronounce_number_hu(dt.hour)
else:
speak = pronounce_number_hu(dt.hour - 12)
speak = speak.replace(NUM_STRING_HU[2], 'két')
speak += " óra"
if not dt.minute == 0:
speak += " " + pronounce_number_hu(dt.minute)
if use_ampm:
if dt.hour > 11:
if dt.hour < 18:
speak = "délután " + speak # 12:01 - 17:59
elif dt.hour < 22:
speak = "este " + speak # 18:00 - 21:59 este/evening
else:
speak = "éjjel " + speak # 22:00 - 23:59 éjjel/at night
elif dt.hour < 3:
speak = "éjjel " + speak # 00:01 - 02:59 éjjel/at night
else:
speak = "reggel " + speak # 03:00 - 11:59 reggel/in t. morning
return speak
TODO: Remove in 20.02
"""
from lingua_franca.lang.format_hu import *

View File

@ -14,485 +14,8 @@
# limitations under the License.
#
from mycroft.util.lang.format_common import convert_to_mixed_fraction
import collections
"""File kept for backwards compatibility.
NUM_STRING_IT = {
0: 'zero',
1: 'uno',
2: 'due',
3: 'tre',
4: 'quattro',
5: 'cinque',
6: 'sei',
7: 'sette',
8: 'otto',
9: 'nove',
10: 'dieci',
11: 'undici',
12: 'dodici',
13: 'tredici',
14: 'quattordici',
15: 'quindici',
16: 'sedici',
17: 'diciassette',
18: 'diciotto',
19: 'diciannove',
20: 'venti',
30: 'trenta',
40: 'quaranta',
50: 'cinquanta',
60: 'sessanta',
70: 'settanta',
80: 'ottanta',
90: 'novanta'
}
FRACTION_STRING_IT = {
2: 'mezz',
3: 'terz',
4: 'quart',
5: 'quint',
6: 'sest',
7: 'settim',
8: 'ottav',
9: 'non',
10: 'decim',
11: 'undicesim',
12: 'dodicesim',
13: 'tredicesim',
14: 'quattordicesim',
15: 'quindicesim',
16: 'sedicesim',
17: 'diciassettesim',
18: 'diciottesim',
19: 'diciannovesim',
20: 'ventesim'
}
# fonte: http://tulengua.es/numeros-texto/default.aspx
LONG_SCALE_IT = collections.OrderedDict([
(100, 'cento'),
(1000, 'mila'),
(1000000, 'milioni'),
(1e9, "miliardi"),
(1e12, "bilioni"),
(1e18, 'trilioni'),
(1e24, "quadrilioni"),
(1e30, "quintilioni"),
(1e36, "sestilioni"),
(1e42, "settilioni"),
(1e48, "ottillioni"),
(1e54, "nonillioni"),
(1e60, "decemillioni"),
(1e66, "undicilione"),
(1e72, "dodicilione"),
(1e78, "tredicilione"),
(1e84, "quattordicilione"),
(1e90, "quindicilione"),
(1e96, "sedicilione"),
(1e102, "diciasettilione"),
(1e108, "diciottilione"),
(1e114, "dicianovilione"),
(1e120, "vintilione"),
(1e306, "unquinquagintilione"),
(1e312, "duoquinquagintilione"),
(1e336, "sesquinquagintilione"),
(1e366, "unsexagintilione")
])
SHORT_SCALE_IT = collections.OrderedDict([
(100, 'cento'),
(1000, 'mila'),
(1000000, 'milioni'),
(1e9, "miliardi"),
(1e12, 'bilioni'),
(1e15, "biliardi"),
(1e18, "trilioni"),
(1e21, "triliardi"),
(1e24, "quadrilioni"),
(1e27, "quadriliardi"),
(1e30, "quintilioni"),
(1e33, "quintiliardi"),
(1e36, "sestilioni"),
(1e39, "sestiliardi"),
(1e42, "settilioni"),
(1e45, "settiliardi"),
(1e48, "ottilioni"),
(1e51, "ottiliardi"),
(1e54, "nonilioni"),
(1e57, "noniliardi"),
(1e60, "decilioni"),
(1e63, "deciliardi"),
(1e66, "undicilioni"),
(1e69, "undiciliardi"),
(1e72, "dodicilioni"),
(1e75, "dodiciliardi"),
(1e78, "tredicilioni"),
(1e81, "trediciliardi"),
(1e84, "quattordicilioni"),
(1e87, "quattordiciliardi"),
(1e90, "quindicilioni"),
(1e93, "quindiciliardi"),
(1e96, "sedicilioni"),
(1e99, "sediciliardi"),
(1e102, "diciassettilioni"),
(1e105, "diciassettiliardi"),
(1e108, "diciottilioni"),
(1e111, "diciottiliardi"),
(1e114, "dicianovilioni"),
(1e117, "dicianoviliardi"),
(1e120, "vintilioni"),
(1e123, "vintiliardi"),
(1e153, "quinquagintillion"),
(1e183, "sexagintillion"),
(1e213, "septuagintillion"),
(1e243, "ottogintilioni"),
(1e273, "nonigintillioni"),
(1e303, "centilioni"),
(1e306, "uncentilioni"),
(1e309, "duocentilioni"),
(1e312, "trecentilioni"),
(1e333, "decicentilioni"),
(1e336, "undicicentilioni"),
(1e363, "viginticentilioni"),
(1e366, "unviginticentilioni"),
(1e393, "trigintacentilioni"),
(1e423, "quadragintacentillion"),
(1e453, "quinquagintacentillion"),
(1e483, "sexagintacentillion"),
(1e513, "septuagintacentillion"),
(1e543, "ctogintacentillion"),
(1e573, "nonagintacentillion"),
(1e603, "ducentillion"),
(1e903, "trecentillion"),
(1e1203, "quadringentillion"),
(1e1503, "quingentillion"),
(1e1803, "sescentillion"),
(1e2103, "septingentillion"),
(1e2403, "octingentillion"),
(1e2703, "nongentillion"),
(1e3003, "millinillion")
])
def nice_number_it(number, speech, denominators=range(1, 21)):
""" Italian helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 e un mezz" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
# denominatore
den_str = FRACTION_STRING_IT[den]
# frazione
if whole == 0:
if num == 1:
# un decimo
return_string = 'un {}'.format(den_str)
else:
# tre mezzi
return_string = '{} {}'.format(num, den_str)
# interi >10
elif num == 1:
# trenta e un
return_string = '{} e un {}'.format(whole, den_str)
# interi >10 con frazioni
else:
# venti e 3 decimi
return_string = '{} e {} {}'.format(whole, num, den_str)
# gestisce il plurale del denominatore
if num > 1:
return_string += 'i'
else:
return_string += 'o'
return return_string
def pronounce_number_it(num, places=2, short_scale=False, scientific=False):
"""
Convert a number to it's spoken equivalent
adapted to italian fron en version
For example, '5.2' would return 'cinque virgola due'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
Returns:
(str): The pronounced number
"""
# gestione infinito
if num == float("inf"):
return "infinito"
elif num == float("-inf"):
return "meno infinito"
if scientific:
number = '%E' % num
n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
return '{}{} per dieci elevato alla {}{}'.format(
'meno ' if float(n) < 0 else '',
pronounce_number_it(abs(float(n)), places, short_scale, False),
'meno ' if power < 0 else '',
pronounce_number_it(abs(power), places, short_scale, False))
if short_scale:
number_names = NUM_STRING_IT.copy()
number_names.update(SHORT_SCALE_IT)
else:
number_names = NUM_STRING_IT.copy()
number_names.update(LONG_SCALE_IT)
digits = [number_names[n] for n in range(0, 20)]
tens = [number_names[n] for n in range(10, 100, 10)]
if short_scale:
hundreds = [SHORT_SCALE_IT[n] for n in SHORT_SCALE_IT.keys()]
else:
hundreds = [LONG_SCALE_IT[n] for n in LONG_SCALE_IT.keys()]
# deal with negatives
result = ""
if num < 0:
result = "meno "
num = abs(num)
# check for a direct match
if num in number_names:
if num > 90:
result += "" # inizio stringa
result += number_names[num]
else:
def _sub_thousand(n):
assert 0 <= n <= 999
if n <= 19:
return digits[n]
elif n <= 99:
q, r = divmod(n, 10)
_deci = tens[q-1]
_unit = r
_partial = _deci
if _unit > 0:
if _unit == 1 or _unit == 8:
_partial = _partial[:-1] # ventuno ventotto
_partial += number_names[_unit]
return _partial
else:
q, r = divmod(n, 100)
if q == 1:
_partial = "cento"
else:
_partial = digits[q] + "cento"
_partial += (
" " + _sub_thousand(r) if r else "") # separa centinaia
return _partial
def _short_scale(n):
if n >= max(SHORT_SCALE_IT.keys()):
return "numero davvero enorme"
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000)):
if not z:
continue
number = _sub_thousand(z)
if i:
number += "" # separa ordini grandezza
number += hundreds[i]
res.append(number)
return ", ".join(reversed(res))
def _split_by(n, split=1000):
assert 0 <= n
res = []
while n:
n, r = divmod(n, split)
res.append(r)
return res
def _long_scale(n):
if n >= max(LONG_SCALE_IT.keys()):
return "numero davvero enorme"
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000000)):
if not z:
continue
number = pronounce_number_it(z, places, True, scientific)
# strip off the comma after the thousand
if i:
# plus one as we skip 'thousand'
# (and 'hundred', but this is excluded by index value)
number = number.replace(',', '')
number += " " + hundreds[i+1]
res.append(number)
return ", ".join(reversed(res))
if short_scale:
result += _short_scale(num)
else:
result += _long_scale(num)
# normalizza unità misura singole e 'ragionevoli' ed ad inizio stringa
if result == 'mila':
result = 'mille'
if result == 'milioni':
result = 'un milione'
if result == 'miliardi':
result = 'un miliardo'
if result[0:7] == 'unomila':
result = result.replace('unomila', 'mille', 1)
if result[0:10] == 'unomilioni':
result = result.replace('unomilioni', 'un milione', 1)
# if result[0:11] == 'unomiliardi':
# result = result.replace('unomiliardi', 'un miliardo', 1)
# Deal with fractional part
if not num == int(num) and places > 0:
result += " virgola"
place = 10
while int(num * place) % 10 > 0 and places > 0:
result += " " + number_names[int(num * place) % 10]
place *= 10
places -= 1
return result
def nice_time_it(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
adapted to italian fron en version
For example, generate 'cinque e trenta' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
if use_24hour:
speak = ""
# Either "zero 8 zerozero" o "13 zerozero"
if string[0:2] == '00':
speak += "zerozero"
elif string[0] == '0':
speak += pronounce_number_it(int(string[0])) + " "
if int(string[1]) == 1:
speak = "una"
else:
speak += pronounce_number_it(int(string[1]))
else:
speak = pronounce_number_it(int(string[0:2]))
# in italian "13 e 25"
speak += " e "
if string[3:5] == '00':
speak += "zerozero"
else:
if string[3] == '0':
speak += pronounce_number_it(0) + " "
speak += pronounce_number_it(int(string[4]))
else:
speak += pronounce_number_it(int(string[3:5]))
return speak
else:
if dt.hour == 0 and dt.minute == 0:
return "mezzanotte"
if dt.hour == 12 and dt.minute == 0:
return "mezzogiorno"
# TODO: "10 e un quarto", "4 e tre quarti" and ot her idiomatic times
if dt.hour == 0:
speak = "mezzanotte"
elif dt.hour == 1 or dt.hour == 13:
speak = "una"
elif dt.hour > 13: # era minore
speak = pronounce_number_it(dt.hour-12)
else:
speak = pronounce_number_it(dt.hour)
speak += " e"
if dt.minute == 0:
speak = speak[:-2]
if not use_ampm:
speak += " in punto"
elif dt.minute == 15:
speak += " un quarto"
elif dt.minute == 45:
speak += " tre quarti"
else:
if dt.minute < 10:
speak += " zero"
speak += " " + pronounce_number_it(dt.minute)
if use_ampm:
if dt.hour < 4:
speak.strip()
elif dt.hour > 20:
speak += " della notte"
elif dt.hour > 17:
speak += " della sera"
elif dt.hour > 12:
speak += " del pomeriggio"
else:
speak += " della mattina"
return speak
TODO: Remove in 20.02
"""
from lingua_franca.lang.format_it import *

View File

@ -14,382 +14,8 @@
# limitations under the License.
#
from mycroft.util.lang.format_common import convert_to_mixed_fraction
from math import floor
"""File kept for backwards compatibility.
months = ['januari', 'februari', 'maart', 'april', 'mei', 'juni',
'juli', 'augustus', 'september', 'oktober', 'november',
'december']
NUM_STRING_NL = {
0: 'nul',
1: 'één',
2: 'twee',
3: 'drie',
4: 'vier',
5: 'vijf',
6: 'zes',
7: 'zeven',
8: 'acht',
9: 'negen',
10: 'tien',
11: 'elf',
12: 'twaalf',
13: 'dertien',
14: 'veertien',
15: 'vijftien',
16: 'zestien',
17: 'zeventien',
18: 'actien',
19: 'negentien',
20: 'twintig',
30: 'dertig',
40: 'veertig',
50: 'vijftig',
60: 'zestig',
70: 'zeventig',
80: 'tachtig',
90: 'negentig',
100: 'honderd'
}
# German uses "long scale" https://en.wikipedia.org/wiki/Long_and_short_scales
# Currently, numbers are limited to 1000000000000000000000000,
# but NUM_POWERS_OF_TEN can be extended to include additional number words
NUM_POWERS_OF_TEN = [
'', 'duizend', 'miljoen', 'miljard', 'biljoen', 'biljard', 'triljoen',
'triljard'
]
FRACTION_STRING_NL = {
2: 'half',
3: 'derde',
4: 'vierde',
5: 'vijfde',
6: 'zesde',
7: 'zevende',
8: 'achtste',
9: 'negende',
10: 'tiende',
11: 'elfde',
12: 'twaalfde',
13: 'dertiende',
14: 'veertiende',
15: 'vijftiende',
16: 'zestiende',
17: 'zeventiende',
18: 'achttiende',
19: 'negentiende',
20: 'twintigste'
}
# Numbers below 1 million are written in one word in dutch, yielding very
# long words
# In some circumstances it may better to seperate individual words
# Set EXTRA_SPACE=" " for separating numbers below 1 million (
# orthographically incorrect)
# Set EXTRA_SPACE="" for correct spelling, this is standard
# EXTRA_SPACE = " "
EXTRA_SPACE = ""
def nice_number_nl(number, speech, denominators=range(1, 21)):
""" Dutch helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 einhalb" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3)).replace(".", ",")
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = FRACTION_STRING_NL[den]
if whole == 0:
if num == 1:
return_string = 'één {}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
return_string = '{} en één {}'.format(whole, den_str)
else:
return_string = '{} en {} {}'.format(whole, num, den_str)
return return_string
def pronounce_number_nl(num, places=2):
"""
Convert a number to its spoken equivalent
For example, '5.2' would return 'five point two'
Args:
num(float or int): the number to pronounce (set limit below)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
def pronounce_triplet_nl(num):
result = ""
num = floor(num)
if num > 99:
hundreds = floor(num / 100)
if hundreds > 0:
result += NUM_STRING_NL[
hundreds] + EXTRA_SPACE + 'honderd' + EXTRA_SPACE
num -= hundreds * 100
if num == 0:
result += '' # do nothing
elif num <= 20:
result += NUM_STRING_NL[num] # + EXTRA_SPACE
elif num > 20:
ones = num % 10
tens = num - ones
if ones > 0:
result += NUM_STRING_NL[ones] + EXTRA_SPACE
if tens > 0:
result += 'en' + EXTRA_SPACE
if tens > 0:
result += NUM_STRING_NL[tens] + EXTRA_SPACE
return result
def pronounce_fractional_nl(num,
places): # fixed number of places even with
# trailing zeros
result = ""
place = 10
while places > 0: # doesn't work with 1.0001 and places = 2: int(
# num*place) % 10 > 0 and places > 0:
result += " " + NUM_STRING_NL[int(num * place) % 10]
if int(num * place) % 10 == 1:
result += '' # "1" is pronounced "eins" after the decimal
# point
place *= 10
places -= 1
return result
def pronounce_whole_number_nl(num, scale_level=0):
if num == 0:
return ''
num = floor(num)
result = ''
last_triplet = num % 1000
if last_triplet == 1:
if scale_level == 0:
if result != '':
result += '' + 'één'
else:
result += "één"
elif scale_level == 1:
result += 'één' + EXTRA_SPACE + 'duizend' + EXTRA_SPACE
else:
result += "één " + NUM_POWERS_OF_TEN[scale_level] + ' '
elif last_triplet > 1:
result += pronounce_triplet_nl(last_triplet)
if scale_level == 1:
# result += EXTRA_SPACE
result += 'duizend' + EXTRA_SPACE
if scale_level >= 2:
# if EXTRA_SPACE == '':
# result += " "
result += " " + NUM_POWERS_OF_TEN[scale_level] + ' '
if scale_level >= 2:
if scale_level % 2 == 0:
result += "" # Miljioen
result += "" # Miljard, Miljoen
num = floor(num / 1000)
scale_level += 1
return pronounce_whole_number_nl(num,
scale_level) + result + ''
result = ""
if abs(num) >= 1000000000000000000000000: # cannot do more than this
return str(num)
elif num == 0:
return str(NUM_STRING_NL[0])
elif num < 0:
return "min " + pronounce_number_nl(abs(num), places)
else:
if num == int(num):
return pronounce_whole_number_nl(num)
else:
whole_number_part = floor(num)
fractional_part = num - whole_number_part
result += pronounce_whole_number_nl(whole_number_part)
if places > 0:
result += " komma"
result += pronounce_fractional_nl(fractional_part, places)
return result
def pronounce_ordinal_nl(num):
ordinals = ["nulste", "eerste", "tweede", "derde", "vierde", "vijfde",
"zesde", "zevende", "achtste"]
# only for whole positive numbers including zero
if num < 0 or num != int(num):
return num
if num < 4:
return ordinals[num]
if num < 8:
return pronounce_number_nl(num) + "de"
if num < 9:
return pronounce_number_nl(num) + "ste"
if num < 20:
return pronounce_number_nl(num) + "de"
return pronounce_number_nl(num) + "ste"
def nice_time_nl(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
speak += pronounce_number_nl(dt.hour)
speak += " uur"
if not dt.minute == 0: # zero minutes are not pronounced, 13:00 is
# "13 uur" not "13 hundred hours"
speak += " " + pronounce_number_nl(dt.minute)
return speak # ampm is ignored when use_24hour is true
else:
if dt.hour == 0 and dt.minute == 0:
return "Middernacht"
hour = dt.hour % 12
if dt.minute == 0:
hour = fix_hour(hour)
speak += pronounce_number_nl(hour)
speak += " uur"
elif dt.minute == 30:
speak += "half "
hour += 1
hour = fix_hour(hour)
speak += pronounce_number_nl(hour)
elif dt.minute == 15:
speak += "kwart over "
hour = fix_hour(hour)
speak += pronounce_number_nl(hour)
elif dt.minute == 45:
speak += "kwart voor "
hour += 1
hour = fix_hour(hour)
speak += pronounce_number_nl(hour)
elif dt.minute > 30:
speak += pronounce_number_nl(60 - dt.minute)
speak += " voor "
hour += 1
hour = fix_hour(hour)
speak += pronounce_number_nl(hour)
else:
speak += pronounce_number_nl(dt.minute)
speak += " over "
hour = fix_hour(hour)
speak += pronounce_number_nl(hour)
if use_ampm:
speak += nice_part_of_day_nl(dt)
return speak
def fix_hour(hour):
hour = hour % 12
if hour == 0:
hour = 12
return hour
def nice_part_of_day_nl(dt):
if dt.hour < 6:
return " 's nachts"
if dt.hour < 12:
return " 's ochtends"
if dt.hour < 18:
return " 's middags"
if dt.hour < 24:
return " 's avonds"
raise Exception('dt.hour is bigger than 24')
def nice_response_nl(text):
# check for months and call nice_ordinal_nl declension of ordinals
# replace "^" with "tot de macht" (to the power of)
words = text.split()
for idx, word in enumerate(words):
if word.lower() in months:
text = nice_ordinal_nl(text)
if word == '^':
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
if wordNext.isnumeric():
words[idx] = "tot de macht"
text = " ".join(words)
return text
def nice_ordinal_nl(text):
# check for months for declension of ordinals before months
# depending on articles/prepositions
normalized_text = text
words = text.split()
for idx, word in enumerate(words):
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordPrev = words[idx - 1] if idx > 0 else ""
if word[:-1].isdecimal():
if wordNext.lower() in months:
if wordPrev == 'de':
word = pronounce_ordinal_nl(int(word))
else:
word = pronounce_number_nl(int(word))
words[idx] = word
normalized_text = " ".join(words)
return normalized_text
TODO: Remove in 20.02
"""
from lingua_franca.lang.format_nl import *

View File

@ -14,209 +14,8 @@
# limitations under the License.
#
from mycroft.util.lang.format_common import convert_to_mixed_fraction
from mycroft.util.lang.common_data_pt import _FRACTION_STRING_PT, \
_NUM_STRING_PT
"""File kept for backwards compatibility.
def nice_number_pt(number, speech, denominators=range(1, 21)):
""" Portuguese helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 e meio" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
# denominador
den_str = _FRACTION_STRING_PT[den]
# fracções
if whole == 0:
if num == 1:
# um décimo
return_string = 'um {}'.format(den_str)
else:
# três meio
return_string = '{} {}'.format(num, den_str)
# inteiros >10
elif num == 1:
# trinta e um
return_string = '{} e {}'.format(whole, den_str)
# inteiros >10 com fracções
else:
# vinte e 3 décimo
return_string = '{} e {} {}'.format(whole, num, den_str)
# plural
if num > 1:
return_string += 's'
return return_string
def pronounce_number_pt(num, places=2):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'cinco virgula dois'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
if abs(num) >= 100:
# TODO: Support n > 100
return str(num)
result = ""
if num < 0:
result = "menos "
num = abs(num)
if num >= 20:
tens = int(num - int(num) % 10)
ones = int(num - tens)
result += _NUM_STRING_PT[tens]
if ones > 0:
result += " e " + _NUM_STRING_PT[ones]
else:
result += _NUM_STRING_PT[int(num)]
# Deal with decimal part, in portuguese is commonly used the comma
# instead the dot. Decimal part can be written both with comma
# and dot, but when pronounced, its pronounced "virgula"
if not num == int(num) and places > 0:
result += " vírgula"
place = 10
while int(num * place) % 10 > 0 and places > 0:
result += " " + _NUM_STRING_PT[int(num * place) % 10]
place *= 10
places -= 1
return result
def nice_time_pt(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'cinco treinta' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
# simply speak the number
if dt.hour == 1:
speak += "uma"
else:
speak += pronounce_number_pt(dt.hour)
# equivalent to "quarter past ten"
if dt.minute > 0:
speak += " e " + pronounce_number_pt(dt.minute)
else:
# speak number and add daytime identifier
# (equivalent to "in the morning")
if dt.minute == 35:
minute = -25
hour = dt.hour + 1
elif dt.minute == 40:
minute = -20
hour = dt.hour + 1
elif dt.minute == 45:
minute = -15
hour = dt.hour + 1
elif dt.minute == 50:
minute = -10
hour = dt.hour + 1
elif dt.minute == 55:
minute = -5
hour = dt.hour + 1
else:
minute = dt.minute
hour = dt.hour
if hour == 0:
speak += "meia noite"
elif hour == 12:
speak += "meio dia"
# 1 and 2 are pronounced in female form when talking about hours
elif hour == 1 or hour == 13:
speak += "uma"
elif hour == 2 or hour == 14:
speak += "duas"
elif hour < 13:
speak = pronounce_number_pt(hour)
else:
speak = pronounce_number_pt(hour - 12)
if minute != 0:
if minute == 15:
speak += " e um quarto"
elif minute == 30:
speak += " e meia"
elif minute == -15:
speak += " menos um quarto"
else:
if minute > 0:
speak += " e " + pronounce_number_pt(minute)
else:
speak += " " + pronounce_number_pt(minute)
# exact time
if minute == 0 and not use_ampm:
# 3:00
speak += " em ponto"
if use_ampm:
if hour > 0 and hour < 6:
speak += " da madrugada"
elif hour >= 6 and hour < 12:
speak += " da manhã"
elif hour >= 13 and hour < 21:
speak += " da tarde"
elif hour != 0 and hour != 12:
speak += " da noite"
return speak
TODO: Remove in 20.02
"""
from lingua_franca.lang.format_pt import *

View File

@ -14,411 +14,8 @@
# limitations under the License.
#
from mycroft.util.lang.format_common import convert_to_mixed_fraction
from math import floor
"""File kept for backwards compatibility.
months = ['januari', 'februari', 'mars', 'april', 'maj', 'juni',
'juli', 'augusti', 'september', 'oktober', 'november',
'december']
NUM_STRING_SV = {
0: 'noll',
1: 'en',
2: 'två',
3: 'tre',
4: 'fyra',
5: 'fem',
6: 'sex',
7: 'sju',
8: 'åtta',
9: 'nio',
10: 'tio',
11: 'elva',
12: 'tolv',
13: 'tretton',
14: 'fjorton',
15: 'femton',
16: 'sexton',
17: 'sjutton',
18: 'arton',
19: 'nitton',
20: 'tjugo',
30: 'trettio',
40: 'fyrtio',
50: 'femtio',
60: 'sextio',
70: 'sjuttio',
80: 'åttio',
90: 'nittio',
100: 'hundra'
}
NUM_POWERS_OF_TEN = [
'hundra',
'tusen',
'miljon',
'miljard',
'biljon',
'biljard',
'triljon',
'triljard'
]
FRACTION_STRING_SV = {
2: 'halv',
3: 'tredjedel',
4: 'fjärdedel',
5: 'femtedel',
6: 'sjättedel',
7: 'sjundedel',
8: 'åttondel',
9: 'niondel',
10: 'tiondel',
11: 'elftedel',
12: 'tolftedel',
13: 'trettondel',
14: 'fjortondel',
15: 'femtondel',
16: 'sextondel',
17: 'sjuttondel',
18: 'artondel',
19: 'nittondel',
20: 'tjugondel'
}
EXTRA_SPACE = " "
def nice_number_sv(number, speech, denominators=range(1, 21)):
""" Swedish helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 och en halv" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = FRACTION_STRING_SV[den]
if whole == 0:
if num == 1:
return_string = 'en {}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
return_string = '{} och en {}'.format(whole, den_str)
else:
return_string = '{} och {} {}'.format(whole, num, den_str)
if num > 1:
return_string += 'ar'
return return_string
def pronounce_number_sv(num, places=2):
"""
Convert a number to its spoken equivalent
For example, '5.2' would return 'five point two'
Args:
num(float or int): the number to pronounce (set limit below)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
def pronounce_triplet_sv(num):
result = ""
num = floor(num)
if num > 99:
hundreds = floor(num / 100)
if hundreds > 0:
if hundreds == 1:
result += 'ett' + 'hundra'
else:
result += NUM_STRING_SV[hundreds] + 'hundra'
num -= hundreds * 100
if num == 0:
result += '' # do nothing
elif num == 1:
result += 'ett'
elif num <= 20:
result += NUM_STRING_SV[num]
elif num > 20:
tens = num % 10
ones = num - tens
if ones > 0:
result += NUM_STRING_SV[ones]
if tens > 0:
result += NUM_STRING_SV[tens]
return result
def pronounce_fractional_sv(num, places):
# fixed number of places even with trailing zeros
result = ""
place = 10
while places > 0:
# doesn't work with 1.0001 and places = 2: int(
# num*place) % 10 > 0 and places > 0:
result += " " + NUM_STRING_SV[int(num * place) % 10]
place *= 10
places -= 1
return result
def pronounce_whole_number_sv(num, scale_level=0):
if num == 0:
return ''
num = floor(num)
result = ''
last_triplet = num % 1000
if last_triplet == 1:
if scale_level == 0:
if result != '':
result += '' + 'ett'
else:
result += 'en'
elif scale_level == 1:
result += 'ettusen' + EXTRA_SPACE
else:
result += 'en ' + NUM_POWERS_OF_TEN[scale_level] + EXTRA_SPACE
elif last_triplet > 1:
result += pronounce_triplet_sv(last_triplet)
if scale_level == 1:
result += 'tusen' + EXTRA_SPACE
if scale_level >= 2:
result += NUM_POWERS_OF_TEN[scale_level]
if scale_level >= 2:
result += 'er' + EXTRA_SPACE # MiljonER
num = floor(num / 1000)
scale_level += 1
return pronounce_whole_number_sv(num, scale_level) + result
result = ""
if abs(num) >= 1000000000000000000000000: # cannot do more than this
return str(num)
elif num == 0:
return str(NUM_STRING_SV[0])
elif num < 0:
return "minus " + pronounce_number_sv(abs(num), places)
else:
if num == int(num):
return pronounce_whole_number_sv(num)
else:
whole_number_part = floor(num)
fractional_part = num - whole_number_part
result += pronounce_whole_number_sv(whole_number_part)
if places > 0:
result += " komma"
result += pronounce_fractional_sv(fractional_part, places)
return result
def pronounce_ordinal_sv(num):
# ordinals for 1, 3, 7 and 8 are irregular
# this produces the base form, it will have to be adapted for genus,
# casus, numerus
ordinals = ["noll", "första", "andra", "tredje", "fjärde", "femte",
"sjätte", "sjunde", "åttonde", "nionde", "tionde"]
tens = int(floor(num / 10.0)) * 10
ones = num % 10
if num < 0 or num != int(num):
return num
if num == 0:
return ordinals[num]
result = ""
if num > 10:
result += pronounce_number_sv(tens).rstrip()
if ones > 0:
result += ordinals[ones]
else:
result += 'de'
return result
def nice_time_sv(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
if dt.hour == 1:
speak += "ett" # 01:00 is "ett" not "en"
else:
speak += pronounce_number_sv(dt.hour)
if not dt.minute == 0:
if dt.minute < 10:
speak += ' noll'
if dt.minute == 1:
speak += ' ett'
else:
speak += " " + pronounce_number_sv(dt.minute)
return speak # ampm is ignored when use_24hour is true
else:
hour = dt.hour
if not dt.minute == 0:
if dt.minute < 30:
if dt.minute != 15:
speak += pronounce_number_sv(dt.minute)
else:
speak += 'kvart'
if dt.minute == 1:
speak += ' minut över '
elif dt.minute != 10 and dt.minute != 5 and dt.minute != 15:
speak += ' minuter över '
else:
speak += ' över '
elif dt.minute > 30:
if dt.minute != 45:
speak += pronounce_number_sv((60 - dt.minute))
else:
speak += 'kvart'
if dt.minute == 1:
speak += ' minut i '
elif dt.minute != 50 and dt.minute != 55 and dt.minute != 45:
speak += ' minuter i '
else:
speak += ' i '
hour = (hour + 1) % 12
elif dt.minute == 30:
speak += 'halv '
hour = (hour + 1) % 12
if hour == 0 and dt.minute == 0:
return "midnatt"
if hour == 12 and dt.minute == 0:
return "middag"
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
if hour == 0:
speak += pronounce_number_sv(12)
elif hour <= 13:
if hour == 1 or hour == 13: # 01:00 and 13:00 is "ett"
speak += 'ett'
else:
speak += pronounce_number_sv(hour)
else:
speak += pronounce_number_sv(hour - 12)
if use_ampm:
if dt.hour > 11:
if dt.hour < 18:
# 12:01 - 17:59 nachmittags/afternoon
speak += " på eftermiddagen"
elif dt.hour < 22:
# 18:00 - 21:59 abends/evening
speak += " på kvällen"
else:
# 22:00 - 23:59 nachts/at night
speak += " på natten"
elif dt.hour < 3:
# 00:01 - 02:59 nachts/at night
speak += " på natten"
else:
# 03:00 - 11:59 morgens/in the morning
speak += " på morgonen"
return speak
def nice_response_sv(text):
# check for months and call nice_ordinal_sv declension of ordinals
# replace "^" with "hoch" (to the power of)
words = text.split()
for idx, word in enumerate(words):
if word.lower() in months:
text = nice_ordinal_sv(text)
if word == '^':
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
if wordNext.isnumeric():
words[idx] = "upphöjt till"
text = " ".join(words)
return text
def nice_ordinal_sv(text):
# check for months for declension of ordinals before months
# depending on articles/prepositions
normalized_text = text
words = text.split()
for idx, word in enumerate(words):
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordPrev = words[idx - 1] if idx > 0 else ""
if word[-1:] == ".":
if word[:-1].isdecimal():
if wordNext.lower() in months:
word = pronounce_ordinal_sv(int(word[:-1]))
if wordPrev.lower() in ["om", "den", "från", "till",
"(från", "(om", "till"]:
word += "n"
elif wordPrev.lower() not in ["den"]:
word += "r"
words[idx] = word
normalized_text = " ".join(words)
return normalized_text
TODO: Remove in 20.02
"""
from lingua_franca.lang.format_sv import *

View File

@ -14,89 +14,8 @@
# limitations under the License.
#
"""File kept for backwards compatibility.
def is_numeric(input_str):
"""
Takes in a string and tests to see if it is a number.
Args:
text (str): string to test if a number
Returns:
(bool): True if a number, else False
"""
try:
float(input_str)
return True
except ValueError:
return False
def look_for_fractions(split_list):
""""
This function takes a list made by fraction & determines if a fraction.
Args:
split_list (list): list created by splitting on '/'
Returns:
(bool): False if not a fraction, otherwise True
"""
if len(split_list) == 2:
if is_numeric(split_list[0]) and is_numeric(split_list[1]):
return True
return False
def extract_numbers_generic(text, pronounce_handler, extract_handler,
short_scale=True, ordinals=False):
"""
Takes in a string and extracts a list of numbers.
Language agnostic, per language parsers need to be provided
Args:
text (str): the string to extract a number from
pronounce_handler (function): function that pronounces a number
extract_handler (function): function that extracts the last number
present in a string
short_scale (bool): Use "short scale" or "long scale" for large
numbers -- over a million. The default is short scale, which
is now common in most English speaking countries.
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
Returns:
list: list of extracted numbers as floats
"""
numbers = []
normalized = text
extract = extract_handler(normalized, short_scale, ordinals)
to_parse = normalized
while extract:
numbers.append(extract)
prev = to_parse
num_txt = pronounce_handler(extract)
extract = str(extract)
if extract.endswith(".0"):
extract = extract[:-2]
# handle duplicate occurences, replace last one only
def replace_right(source, target, replacement, replacements=None):
return replacement.join(source.rsplit(target, replacements))
normalized = replace_right(normalized, num_txt, extract, 1)
# last biggest number was replaced, recurse to handle cases like
# test one two 3
to_parse = replace_right(to_parse, num_txt, extract, 1)
to_parse = replace_right(to_parse, extract, " ", 1)
if to_parse == prev:
# avoid infinite loops, occasionally pronounced number may be
# different from extracted text,
# ie pronounce(0.5) != half and extract(half) == 0.5
extract = False
# TODO fix this
else:
extract = extract_handler(to_parse, short_scale, ordinals)
numbers.reverse()
return numbers
TODO: Remove in 20.02
"""
from lingua_franca.lang.parse_common import *

View File

@ -13,920 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import datetime
from dateutil.relativedelta import relativedelta
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions, \
extract_numbers_generic
from mycroft.util.lang.format_da import pronounce_number_da
da_numbers = {
'nul': 0,
'en': 1,
'et': 1,
'to': 2,
'tre': 3,
'fire': 4,
'fem': 5,
'seks': 6,
'syv': 7,
'otte': 8,
'ni': 9,
'ti': 10,
'elve': 11,
'tolv': 12,
'tretten': 13,
'fjorten': 14,
'femten': 15,
'seksten': 16,
'sytten': 17,
'atten': 18,
'nitten': 19,
'tyve': 20,
'enogtyve': 21,
'toogtyve': 22,
'treogtyve': 23,
'fireogtyve': 24,
'femogtyve': 25,
'seksogtyve': 26,
'syvogtyve': 27,
'otteogtyve': 28,
'niogtyve': 29,
'tredive': 30,
'enogtredive': 31,
'fyrrre': 40,
'halvtres': 50,
'tres': 60,
'halvfjers': 70,
'firs': 80,
'halvfems': 90,
'hunderede': 100,
'tohundrede': 200,
'trehundrede': 300,
'firehundrede': 400,
'femhundrede': 500,
'sekshundrede': 600,
'syvhundrede': 700,
'ottehundrede': 800,
'nihundrede': 900,
'tusinde': 1000,
'million': 1000000
}
"""File kept for backwards compatibility.
def extractnumber_da(text):
"""
This function prepares the given text for parsing by making
numbers consistent, getting rid of contractions, etc.
Args:
text (str): the string to normalize
Returns:
(int) or (float): The value of extracted number
undefined articles cannot be suppressed in German:
'ein Pferd' means 'one horse' and 'a horse'
"""
aWords = text.split()
aWords = [word for word in aWords if
word not in ["den", "det"]]
and_pass = False
valPreAnd = False
val = False
count = 0
while count < len(aWords):
word = aWords[count]
if is_numeric(word):
if word.isdigit(): # doesn't work with decimals
val = float(word)
elif isFractional_da(word):
val = isFractional_da(word)
elif isOrdinal_da(word):
val = isOrdinal_da(word)
else:
if word in da_numbers:
val = da_numbers[word]
if count < (len(aWords) - 1):
wordNext = aWords[count + 1]
else:
wordNext = ""
valNext = isFractional_da(wordNext)
if valNext:
val = val * valNext
aWords[count + 1] = ""
if not val:
# look for fractions like "2/3"
aPieces = word.split('/')
# if (len(aPieces) == 2 and is_numeric(aPieces[0])
# and is_numeric(aPieces[1])):
if look_for_fractions(aPieces):
val = float(aPieces[0]) / float(aPieces[1])
elif and_pass:
# added to value, quit here
val = valPreAnd
break
else:
count += 1
continue
aWords[count] = ""
if and_pass:
aWords[count - 1] = '' # remove "og"
val += valPreAnd
elif count + 1 < len(aWords) and aWords[count + 1] == 'og':
and_pass = True
valPreAnd = val
val = False
count += 2
continue
elif count + 2 < len(aWords) and aWords[count + 2] == 'og':
and_pass = True
valPreAnd = val
val = False
count += 3
continue
break
if not val:
return False
return val
def extract_datetime_da(string, currentDate, default_time):
def clean_string(s):
"""
cleans the input string of unneeded punctuation
and capitalization among other things.
'am' is a preposition, so cannot currently be used
for 12 hour date format
"""
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
.replace(' den ', ' ').replace(' det ', ' ').replace(' om ',
' ').replace(
' om ', ' ') \
.replace('', ' ').replace(' om ', ' ')
wordList = s.split()
for idx, word in enumerate(wordList):
if isOrdinal_da(word) is not False:
word = str(isOrdinal_da(word))
wordList[idx] = word
return wordList
def date_found():
return found or \
(
datestr != "" or timeStr != "" or
yearOffset != 0 or monthOffset != 0 or
dayOffset is True or hrOffset != 0 or
hrAbs or minOffset != 0 or
minAbs or secOffset != 0
)
if string == "" or not currentDate:
return None
found = False
daySpecified = False
dayOffset = False
monthOffset = 0
yearOffset = 0
dateNow = currentDate
today = dateNow.strftime("%w")
currentYear = dateNow.strftime("%Y")
fromFlag = False
datestr = ""
hasYear = False
timeQualifier = ""
timeQualifiersList = ['tidlig',
'morgen',
'morgenen',
'formidag',
'formiddagen',
'eftermiddag',
'eftermiddagen',
'aften',
'aftenen',
'nat',
'natten']
markers = ['i', 'om', '', 'klokken', 'ved']
days = ['mandag', 'tirsdag', 'onsdag',
'torsdag', 'fredag', 'lørdag', 'søndag']
months = ['januar', 'februar', 'marts', 'april', 'maj', 'juni',
'juli', 'august', 'september', 'oktober', 'november',
'desember']
monthsShort = ['jan', 'feb', 'mar', 'apr', 'maj', 'juni', 'juli', 'aug',
'sep', 'okt', 'nov', 'des']
validFollowups = days + months + monthsShort
validFollowups.append("i dag")
validFollowups.append("morgen")
validFollowups.append("næste")
validFollowups.append("forige")
validFollowups.append("nu")
words = clean_string(string)
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
start = idx
used = 0
# save timequalifier for later
if word in timeQualifiersList:
timeQualifier = word
# parse today, tomorrow, day after tomorrow
elif word == "dag" and not fromFlag:
dayOffset = 0
used += 1
elif word == "morgen" and not fromFlag and wordPrev != "om" and \
wordPrev not in days: # morgen means tomorrow if not "am
# Morgen" and not [day of the week] morgen
dayOffset = 1
used += 1
elif word == "overmorgen" and not fromFlag:
dayOffset = 2
used += 1
# parse 5 days, 10 weeks, last week, next week
elif word == "dag" or word == "dage":
if wordPrev[0].isdigit():
dayOffset += int(wordPrev)
start -= 1
used = 2
elif word == "uge" or word == "uger" and not fromFlag:
if wordPrev[0].isdigit():
dayOffset += int(wordPrev) * 7
start -= 1
used = 2
elif wordPrev[:6] == "næste":
dayOffset = 7
start -= 1
used = 2
elif wordPrev[:5] == "forige":
dayOffset = -7
start -= 1
used = 2
# parse 10 months, next month, last month
elif word == "måned" and not fromFlag:
if wordPrev[0].isdigit():
monthOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev[:6] == "næste":
monthOffset = 1
start -= 1
used = 2
elif wordPrev[:5] == "forige":
monthOffset = -1
start -= 1
used = 2
# parse 5 years, next year, last year
elif word == "år" and not fromFlag:
if wordPrev[0].isdigit():
yearOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev[:6] == " næste":
yearOffset = 1
start -= 1
used = 2
elif wordPrev[:6] == "næste":
yearOffset = -1
start -= 1
used = 2
# parse Monday, Tuesday, etc., and next Monday,
# last Tuesday, etc.
elif word in days and not fromFlag:
d = days.index(word)
dayOffset = (d + 1) - int(today)
used = 1
if dayOffset < 0:
dayOffset += 7
if wordNext == "morgen":
# morgen means morning if preceded by
# the day of the week
words[idx + 1] = "tidlig"
if wordPrev[:6] == "næste":
dayOffset += 7
used += 1
start -= 1
elif wordPrev[:5] == "forige":
dayOffset -= 7
used += 1
start -= 1
# parse 15 of July, June 20th, Feb 18, 19 of February
elif word in months or word in monthsShort and not fromFlag:
try:
m = months.index(word)
except ValueError:
m = monthsShort.index(word)
used += 1
datestr = months[m]
if wordPrev and (wordPrev[0].isdigit() or
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
if wordPrev == "of" and wordPrevPrev[0].isdigit():
datestr += " " + words[idx - 2]
used += 1
start -= 1
else:
datestr += " " + wordPrev
start -= 1
used += 1
if wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
hasYear = True
else:
hasYear = False
elif wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
if wordNextNext and wordNextNext[0].isdigit():
datestr += " " + wordNextNext
used += 1
hasYear = True
else:
hasYear = False
# parse 5 days from tomorrow, 10 weeks from next thursday,
# 2 months from July
if (
word == "fra" or word == "til" or word == "om") and wordNext \
in validFollowups:
used = 2
fromFlag = True
if wordNext == "morgenen" and \
wordPrev != "om" and \
wordPrev not in days:
# morgen means tomorrow if not "am Morgen" and not
# [day of the week] morgen:
dayOffset += 1
elif wordNext in days:
d = days.index(wordNext)
tmpOffset = (d + 1) - int(today)
used = 2
if tmpOffset < 0:
tmpOffset += 7
dayOffset += tmpOffset
elif wordNextNext and wordNextNext in days:
d = days.index(wordNextNext)
tmpOffset = (d + 1) - int(today)
used = 3
if wordNext[:6] == "næste":
tmpOffset += 7
used += 1
start -= 1
elif wordNext[:5] == "forige":
tmpOffset -= 7
used += 1
start -= 1
dayOffset += tmpOffset
if used > 0:
if start - 1 > 0 and words[start - 1].startswith("denne"):
start -= 1
used += 1
for i in range(0, used):
words[i + start] = ""
if start - 1 >= 0 and words[start - 1] in markers:
words[start - 1] = ""
found = True
daySpecified = True
# parse time
timeStr = ""
hrOffset = 0
minOffset = 0
secOffset = 0
hrAbs = None
minAbs = None
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
wordNextNextNextNext = words[idx + 4] if idx + 4 < len(words) else ""
# parse noon, midnight, morning, afternoon, evening
used = 0
if word[:6] == "middag":
hrAbs = 12
used += 1
elif word[:11] == "midnat":
hrAbs = 0
used += 1
elif word == "morgenen" or (
wordPrev == "om" and word == "morgenen") or word == "tidlig":
if not hrAbs:
hrAbs = 8
used += 1
elif word[:11] == "eftermiddag":
if not hrAbs:
hrAbs = 15
used += 1
elif word[:5] == "aften":
if not hrAbs:
hrAbs = 19
used += 1
# parse half an hour, quarter hour
elif word == "time" and \
(wordPrev in markers or wordPrevPrev in markers):
if wordPrev[:4] == "halv":
minOffset = 30
elif wordPrev == "kvarter":
minOffset = 15
elif wordPrev == "trekvarter":
minOffset = 45
else:
hrOffset = 1
if wordPrevPrev in markers:
words[idx - 2] = ""
words[idx - 1] = ""
used += 1
hrAbs = -1
minAbs = -1
# parse 5:00 am, 12:00 p.m., etc
elif word[0].isdigit():
isTime = True
strHH = ""
strMM = ""
remainder = ""
if ':' in word:
# parse colons
# "3:00 in the morning"
stage = 0
length = len(word)
for i in range(length):
if stage == 0:
if word[i].isdigit():
strHH += word[i]
elif word[i] == ":":
stage = 1
else:
stage = 2
i -= 1
elif stage == 1:
if word[i].isdigit():
strMM += word[i]
else:
stage = 2
i -= 1
elif stage == 2:
remainder = word[i:].replace(".", "")
break
if remainder == "":
nextWord = wordNext.replace(".", "")
if nextWord == "am" or nextWord == "pm":
remainder = nextWord
used += 1
elif nextWord == "aften":
remainder = "pm"
used += 1
elif wordNext == "om" and wordNextNext == "morgenen":
remainder = "am"
used += 2
elif wordNext == "om" and wordNextNext == "eftermiddagen":
remainder = "pm"
used += 2
elif wordNext == "om" and wordNextNext == "aftenen":
remainder = "pm"
used += 2
elif wordNext == "morgen":
remainder = "am"
used += 1
elif wordNext == "eftermiddag":
remainder = "pm"
used += 1
elif wordNext == "aften":
remainder = "pm"
used += 1
elif wordNext == "i" and wordNextNext == "morgen":
remainder = "am"
used = 2
elif wordNext == "i" and wordNextNext == "eftermiddag":
remainder = "pm"
used = 2
elif wordNext == "i" and wordNextNext == "aften":
remainder = "pm"
used = 2
elif wordNext == "natten":
if strHH > 4:
remainder = "pm"
else:
remainder = "am"
used += 1
else:
if timeQualifier != "":
if strHH <= 12 and \
(timeQualifier == "aftenen" or
timeQualifier == "eftermiddagen"):
strHH += 12 # what happens when strHH is 24?
else:
# try to parse # s without colons
# 5 hours, 10 minutes etc.
length = len(word)
strNum = ""
remainder = ""
for i in range(length):
if word[i].isdigit():
strNum += word[i]
else:
remainder += word[i]
if remainder == "":
remainder = wordNext.replace(".", "").lstrip().rstrip()
if (
remainder == "pm" or
wordNext == "pm" or
remainder == "p.m." or
wordNext == "p.m."):
strHH = strNum
remainder = "pm"
used = 1
elif (
remainder == "am" or
wordNext == "am" or
remainder == "a.m." or
wordNext == "a.m."):
strHH = strNum
remainder = "am"
used = 1
else:
if wordNext == "time" and int(word) < 100:
# "in 3 hours"
hrOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "minut":
# "in 10 minutes"
minOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "sekund":
# in 5 seconds
secOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "time":
strHH = word
used += 1
isTime = True
if wordNextNext == timeQualifier:
strMM = ""
if wordNextNext[:11] == "eftermiddag":
used += 1
remainder = "pm"
elif wordNextNext == "om" and wordNextNextNext == \
"eftermiddagen":
used += 2
remainder = "pm"
elif wordNextNext[:5] == "aften":
used += 1
remainder = "pm"
elif wordNextNext == "om" and wordNextNextNext == \
"aftenen":
used += 2
remainder = "pm"
elif wordNextNext[:6] == "morgen":
used += 1
remainder = "am"
elif wordNextNext == "om" and wordNextNextNext == \
"morgenen":
used += 2
remainder = "am"
elif wordNextNext == "natten":
used += 1
if 8 <= int(word) <= 12:
remainder = "pm"
else:
remainder = "am"
elif is_numeric(wordNextNext):
strMM = wordNextNext
used += 1
if wordNextNextNext == timeQualifier:
if wordNextNextNext[:11] == "eftermiddag":
used += 1
remainder = "pm"
elif wordNextNextNext == "om" and \
wordNextNextNextNext == \
"eftermiddagen":
used += 2
remainder = "pm"
elif wordNextNextNext[:6] == "natten":
used += 1
remainder = "pm"
elif wordNextNextNext == "am" and \
wordNextNextNextNext == "natten":
used += 2
remainder = "pm"
elif wordNextNextNext[:7] == "morgenen":
used += 1
remainder = "am"
elif wordNextNextNext == "om" and \
wordNextNextNextNext == "morgenen":
used += 2
remainder = "am"
elif wordNextNextNext == "natten":
used += 1
if 8 <= int(word) <= 12:
remainder = "pm"
else:
remainder = "am"
elif wordNext == timeQualifier:
strHH = word
strMM = 00
isTime = True
if wordNext[:10] == "eftermidag":
used += 1
remainder = "pm"
elif wordNext == "om" and \
wordNextNext == "eftermiddanen":
used += 2
remainder = "pm"
elif wordNext[:7] == "aftenen":
used += 1
remainder = "pm"
elif wordNext == "om" and wordNextNext == "aftenen":
used += 2
remainder = "pm"
elif wordNext[:7] == "morgenen":
used += 1
remainder = "am"
elif wordNext == "ao" and wordNextNext == "morgenen":
used += 2
remainder = "am"
elif wordNext == "natten":
used += 1
if 8 <= int(word) <= 12:
remainder = "pm"
else:
remainder = "am"
# if timeQualifier != "":
# military = True
# else:
# isTime = False
strHH = int(strHH) if strHH else 0
strMM = int(strMM) if strMM else 0
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
if strHH > 24 or strMM > 59:
isTime = False
used = 0
if isTime:
hrAbs = strHH * 1
minAbs = strMM * 1
used += 1
if used > 0:
# removed parsed words from the sentence
for i in range(used):
words[idx + i] = ""
if wordPrev == "tidlig":
hrOffset = -1
words[idx - 1] = ""
idx -= 1
elif wordPrev == "sen":
hrOffset = 1
words[idx - 1] = ""
idx -= 1
if idx > 0 and wordPrev in markers:
words[idx - 1] = ""
if idx > 1 and wordPrevPrev in markers:
words[idx - 2] = ""
idx += used - 1
found = True
# check that we found a date
if not date_found:
return None
if dayOffset is False:
dayOffset = 0
# perform date manipulation
extractedDate = dateNow
extractedDate = extractedDate.replace(microsecond=0,
second=0,
minute=0,
hour=0)
if datestr != "":
en_months = ['january', 'february', 'march', 'april', 'may', 'june',
'july', 'august', 'september', 'october', 'november',
'december']
en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
'aug',
'sept', 'oct', 'nov', 'dec']
for idx, en_month in enumerate(en_months):
datestr = datestr.replace(months[idx], en_month)
for idx, en_month in enumerate(en_monthsShort):
datestr = datestr.replace(monthsShort[idx], en_month)
temp = datetime.strptime(datestr, "%B %d")
if not hasYear:
temp = temp.replace(year=extractedDate.year)
if extractedDate < temp:
extractedDate = extractedDate.replace(year=int(currentYear),
month=int(
temp.strftime(
"%m")),
day=int(temp.strftime(
"%d")))
else:
extractedDate = extractedDate.replace(
year=int(currentYear) + 1,
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
else:
extractedDate = extractedDate.replace(
year=int(temp.strftime("%Y")),
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
if timeStr != "":
temp = datetime(timeStr)
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
minute=temp.strftime("%M"),
second=temp.strftime("%S"))
if yearOffset != 0:
extractedDate = extractedDate + relativedelta(years=yearOffset)
if monthOffset != 0:
extractedDate = extractedDate + relativedelta(months=monthOffset)
if dayOffset != 0:
extractedDate = extractedDate + relativedelta(days=dayOffset)
if hrAbs is None and minAbs is None and default_time:
hrAbs = default_time.hour
minAbs = default_time.minute
if hrAbs != -1 and minAbs != -1:
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
minutes=minAbs or 0)
if (hrAbs or minAbs) and datestr == "":
if not daySpecified and dateNow > extractedDate:
extractedDate = extractedDate + relativedelta(days=1)
if hrOffset != 0:
extractedDate = extractedDate + relativedelta(hours=hrOffset)
if minOffset != 0:
extractedDate = extractedDate + relativedelta(minutes=minOffset)
if secOffset != 0:
extractedDate = extractedDate + relativedelta(seconds=secOffset)
for idx, word in enumerate(words):
if words[idx] == "og" and words[idx - 1] == "" \
and words[idx + 1] == "":
words[idx] = ""
resultStr = " ".join(words)
resultStr = ' '.join(resultStr.split())
return [extractedDate, resultStr]
def isFractional_da(input_str):
"""
This function takes the given text and checks if it is a fraction.
Args:
input_str (str): the string to check if fractional
Returns:
(bool) or (float): False if not a fraction, otherwise the fraction
"""
if input_str.lower().startswith("halv"):
return 0.5
if input_str.lower() == "trediedel":
return 1.0 / 3
elif input_str.endswith('del'):
input_str = input_str[:len(input_str) - 3] # e.g. "fünftel"
if input_str.lower() in da_numbers:
return 1.0 / (da_numbers[input_str.lower()])
return False
def isOrdinal_da(input_str):
"""
This function takes the given text and checks if it is an ordinal number.
Args:
input_str (str): the string to check if ordinal
Returns:
(bool) or (float): False if not an ordinal, otherwise the number
corresponding to the ordinal
ordinals for 1, 3, 7 and 8 are irregular
only works for ordinals corresponding to the numbers in da_numbers
"""
lowerstr = input_str.lower()
if lowerstr.startswith("første"):
return 1
if lowerstr.startswith("anden"):
return 2
if lowerstr.startswith("tredie"):
return 3
if lowerstr.startswith("fjerde"):
return 4
if lowerstr.startswith("femte"):
return 5
if lowerstr.startswith("sjette"):
return 6
if lowerstr.startswith("elfte"):
return 1
if lowerstr.startswith("tolvfte"):
return 12
if lowerstr[-3:] == "nde":
# from 20 suffix is -ste*
lowerstr = lowerstr[:-3]
if lowerstr in da_numbers:
return da_numbers[lowerstr]
if lowerstr[-4:] in ["ende"]:
lowerstr = lowerstr[:-4]
if lowerstr in da_numbers:
return da_numbers[lowerstr]
if lowerstr[-2:] == "te": # below 20 suffix is -te*
lowerstr = lowerstr[:-2]
if lowerstr in da_numbers:
return da_numbers[lowerstr]
return False
def normalize_da(text, remove_articles):
""" German string normalization """
words = text.split() # this also removed extra spaces
normalized = ""
for word in words:
if remove_articles and word in ["den", "det"]:
continue
# Convert numbers into digits, e.g. "two" -> "2"
if word in da_numbers:
word = str(da_numbers[word])
normalized += " " + word
return normalized[1:] # strip the initial space
def extract_numbers_da(text, short_scale=True, ordinals=False):
"""
Takes in a string and extracts a list of numbers.
Args:
text (str): the string to extract a number from
short_scale (bool): Use "short scale" or "long scale" for large
numbers -- over a million. The default is short scale, which
is now common in most English speaking countries.
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
Returns:
list: list of extracted numbers as floats
"""
return extract_numbers_generic(text, pronounce_number_da, extractnumber_da,
short_scale=short_scale, ordinals=ordinals)
TODO: Remove in 20.02
"""
from lingua_franca.lang.parse_da import *

View File

@ -14,938 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import datetime
from dateutil.relativedelta import relativedelta
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions, \
extract_numbers_generic
from mycroft.util.lang.format_de import pronounce_number_de
"""File kept for backwards compatibility.
de_numbers = {
'null': 0,
'ein': 1,
'eins': 1,
'eine': 1,
'einer': 1,
'einem': 1,
'einen': 1,
'eines': 1,
'zwei': 2,
'drei': 3,
'vier': 4,
'fünf': 5,
'sechs': 6,
'sieben': 7,
'acht': 8,
'neun': 9,
'zehn': 10,
'elf': 11,
'zwölf': 12,
'dreizehn': 13,
'vierzehn': 14,
'fünfzehn': 15,
'sechzehn': 16,
'siebzehn': 17,
'achtzehn': 18,
'neunzehn': 19,
'zwanzig': 20,
'einundzwanzig': 21,
'zweiundzwanzig': 22,
'dreiundzwanzig': 23,
'vierundzwanzig': 24,
'fünfundzwanzig': 25,
'sechsundzwanzig': 26,
'siebenundzwanzig': 27,
'achtundzwanzig': 28,
'neunundzwanzig': 29,
'dreißig': 30,
'einunddreißig': 31,
'vierzig': 40,
'fünfzig': 50,
'sechzig': 60,
'siebzig': 70,
'achtzig': 80,
'neunzig': 90,
'hundert': 100,
'zweihundert': 200,
'dreihundert': 300,
'vierhundert': 400,
'fünfhundert': 500,
'sechshundert': 600,
'siebenhundert': 700,
'achthundert': 800,
'neunhundert': 900,
'tausend': 1000,
'million': 1000000
}
def extractnumber_de(text):
"""
This function prepares the given text for parsing by making
numbers consistent, getting rid of contractions, etc.
Args:
text (str): the string to normalize
Returns:
(int) or (float): The value of extracted number
undefined articles cannot be suppressed in German:
'ein Pferd' means 'one horse' and 'a horse'
"""
aWords = text.split()
aWords = [word for word in aWords if
word not in ["der", "die", "das", "des", "den", "dem"]]
and_pass = False
valPreAnd = False
val = False
count = 0
while count < len(aWords):
word = aWords[count]
if is_numeric(word):
# if word.isdigit(): # doesn't work with decimals
val = float(word)
elif isFractional_de(word):
val = isFractional_de(word)
elif isOrdinal_de(word):
val = isOrdinal_de(word)
else:
if word in de_numbers:
val = de_numbers[word]
if count < (len(aWords) - 1):
wordNext = aWords[count + 1]
else:
wordNext = ""
valNext = isFractional_de(wordNext)
if valNext:
val = val * valNext
aWords[count + 1] = ""
if not val:
# look for fractions like "2/3"
aPieces = word.split('/')
# if (len(aPieces) == 2 and is_numeric(aPieces[0])
# and is_numeric(aPieces[1])):
if look_for_fractions(aPieces):
val = float(aPieces[0]) / float(aPieces[1])
elif and_pass:
# added to value, quit here
val = valPreAnd
break
else:
count += 1
continue
aWords[count] = ""
if and_pass:
aWords[count - 1] = '' # remove "and"
val += valPreAnd
elif count + 1 < len(aWords) and aWords[count + 1] == 'und':
and_pass = True
valPreAnd = val
val = False
count += 2
continue
elif count + 2 < len(aWords) and aWords[count + 2] == 'und':
and_pass = True
valPreAnd = val
val = False
count += 3
continue
break
if not val:
return False
return val
def extract_datetime_de(string, currentDate, default_time):
def clean_string(s):
"""
cleans the input string of unneeded punctuation
and capitalization among other things.
'am' is a preposition, so cannot currently be used
for 12 hour date format
"""
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
.replace(' der ', ' ').replace(' den ', ' ').replace(' an ',
' ').replace(
' am ', ' ') \
.replace(' auf ', ' ').replace(' um ', ' ')
wordList = s.split()
for idx, word in enumerate(wordList):
if isOrdinal_de(word) is not False:
word = str(isOrdinal_de(word))
wordList[idx] = word
return wordList
def date_found():
return found or \
(
datestr != "" or timeStr != "" or
yearOffset != 0 or monthOffset != 0 or
dayOffset is True or hrOffset != 0 or
hrAbs or minOffset != 0 or
minAbs or secOffset != 0
)
if string == "" or not currentDate:
return None
found = False
daySpecified = False
dayOffset = False
monthOffset = 0
yearOffset = 0
dateNow = currentDate
today = dateNow.strftime("%w")
currentYear = dateNow.strftime("%Y")
fromFlag = False
datestr = ""
hasYear = False
timeQualifier = ""
timeQualifiersList = ['früh', 'morgens', 'vormittag', 'vormittags',
'nachmittag', 'nachmittags', 'abend', 'abends',
'nachts']
markers = ['in', 'am', 'gegen', 'bis', 'für']
days = ['montag', 'dienstag', 'mittwoch',
'donnerstag', 'freitag', 'samstag', 'sonntag']
months = ['januar', 'februar', 'märz', 'april', 'mai', 'juni',
'juli', 'august', 'september', 'october', 'november',
'dezember']
monthsShort = ['jan', 'feb', 'mär', 'apr', 'mai', 'juni', 'juli', 'aug',
'sept', 'oct', 'nov', 'dez']
validFollowups = days + months + monthsShort
validFollowups.append("heute")
validFollowups.append("morgen")
validFollowups.append("nächste")
validFollowups.append("nächster")
validFollowups.append("nächstes")
validFollowups.append("nächsten")
validFollowups.append("nächstem")
validFollowups.append("letzte")
validFollowups.append("letzter")
validFollowups.append("letztes")
validFollowups.append("letzten")
validFollowups.append("letztem")
validFollowups.append("jetzt")
words = clean_string(string)
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
# this isn't in clean string because I don't want to save back to words
if word != 'morgen' and word != 'übermorgen':
if word[-2:] == "en":
word = word[:-2] # remove en
if word != 'heute':
if word[-1:] == "e":
word = word[:-1] # remove plural for most nouns
start = idx
used = 0
# save timequalifier for later
if word in timeQualifiersList:
timeQualifier = word
# parse today, tomorrow, day after tomorrow
elif word == "heute" and not fromFlag:
dayOffset = 0
used += 1
elif word == "morgen" and not fromFlag and wordPrev != "am" and \
wordPrev not in days: # morgen means tomorrow if not "am
# Morgen" and not [day of the week] morgen
dayOffset = 1
used += 1
elif word == "übermorgen" and not fromFlag:
dayOffset = 2
used += 1
# parse 5 days, 10 weeks, last week, next week
elif word == "tag" or word == "tage":
if wordPrev[0].isdigit():
dayOffset += int(wordPrev)
start -= 1
used = 2
elif word == "woch" and not fromFlag:
if wordPrev[0].isdigit():
dayOffset += int(wordPrev) * 7
start -= 1
used = 2
elif wordPrev[:6] == "nächst":
dayOffset = 7
start -= 1
used = 2
elif wordPrev[:5] == "letzt":
dayOffset = -7
start -= 1
used = 2
# parse 10 months, next month, last month
elif word == "monat" and not fromFlag:
if wordPrev[0].isdigit():
monthOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev[:6] == "nächst":
monthOffset = 1
start -= 1
used = 2
elif wordPrev[:5] == "letzt":
monthOffset = -1
start -= 1
used = 2
# parse 5 years, next year, last year
elif word == "jahr" and not fromFlag:
if wordPrev[0].isdigit():
yearOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev[:6] == "nächst":
yearOffset = 1
start -= 1
used = 2
elif wordPrev[:6] == "nächst":
yearOffset = -1
start -= 1
used = 2
# parse Monday, Tuesday, etc., and next Monday,
# last Tuesday, etc.
elif word in days and not fromFlag:
d = days.index(word)
dayOffset = (d + 1) - int(today)
used = 1
if dayOffset < 0:
dayOffset += 7
if wordNext == "morgen": # morgen means morning if preceded by
# the day of the week
words[idx + 1] = "früh"
if wordPrev[:6] == "nächst":
dayOffset += 7
used += 1
start -= 1
elif wordPrev[:5] == "letzt":
dayOffset -= 7
used += 1
start -= 1
# parse 15 of July, June 20th, Feb 18, 19 of February
elif word in months or word in monthsShort and not fromFlag:
try:
m = months.index(word)
except ValueError:
m = monthsShort.index(word)
used += 1
datestr = months[m]
if wordPrev and (wordPrev[0].isdigit() or
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
if wordPrev == "of" and wordPrevPrev[0].isdigit():
datestr += " " + words[idx - 2]
used += 1
start -= 1
else:
datestr += " " + wordPrev
start -= 1
used += 1
if wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
hasYear = True
else:
hasYear = False
elif wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
if wordNextNext and wordNextNext[0].isdigit():
datestr += " " + wordNextNext
used += 1
hasYear = True
else:
hasYear = False
# parse 5 days from tomorrow, 10 weeks from next thursday,
# 2 months from July
if (
word == "von" or word == "nach" or word == "ab") and wordNext \
in validFollowups:
used = 2
fromFlag = True
if wordNext == "morgen" and wordPrev != "am" and \
wordPrev not in days: # morgen means tomorrow if not "am
# Morgen" and not [day of the week] morgen:
dayOffset += 1
elif wordNext in days:
d = days.index(wordNext)
tmpOffset = (d + 1) - int(today)
used = 2
if tmpOffset < 0:
tmpOffset += 7
dayOffset += tmpOffset
elif wordNextNext and wordNextNext in days:
d = days.index(wordNextNext)
tmpOffset = (d + 1) - int(today)
used = 3
if wordNext[:6] == "nächst":
tmpOffset += 7
used += 1
start -= 1
elif wordNext[:5] == "letzt":
tmpOffset -= 7
used += 1
start -= 1
dayOffset += tmpOffset
if used > 0:
if start - 1 > 0 and words[start - 1].startswith("diese"):
start -= 1
used += 1
for i in range(0, used):
words[i + start] = ""
if start - 1 >= 0 and words[start - 1] in markers:
words[start - 1] = ""
found = True
daySpecified = True
# parse time
timeStr = ""
hrOffset = 0
minOffset = 0
secOffset = 0
hrAbs = None
minAbs = None
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
wordNextNextNextNext = words[idx + 4] if idx + 4 < len(words) else ""
# parse noon, midnight, morning, afternoon, evening
used = 0
if word[:6] == "mittag":
hrAbs = 12
used += 1
elif word[:11] == "mitternacht":
hrAbs = 0
used += 1
elif word == "morgens" or (
wordPrev == "am" and word == "morgen") or word == "früh":
if not hrAbs:
hrAbs = 8
used += 1
elif word[:10] == "nachmittag":
if not hrAbs:
hrAbs = 15
used += 1
elif word[:5] == "abend":
if not hrAbs:
hrAbs = 19
used += 1
# parse half an hour, quarter hour
elif word == "stunde" and \
(wordPrev in markers or wordPrevPrev in markers):
if wordPrev[:4] == "halb":
minOffset = 30
elif wordPrev == "viertel":
minOffset = 15
elif wordPrev == "dreiviertel":
minOffset = 45
else:
hrOffset = 1
if wordPrevPrev in markers:
words[idx - 2] = ""
words[idx - 1] = ""
used += 1
hrAbs = -1
minAbs = -1
# parse 5:00 am, 12:00 p.m., etc
elif word[0].isdigit():
isTime = True
strHH = ""
strMM = ""
remainder = ""
if ':' in word:
# parse colons
# "3:00 in the morning"
stage = 0
length = len(word)
for i in range(length):
if stage == 0:
if word[i].isdigit():
strHH += word[i]
elif word[i] == ":":
stage = 1
else:
stage = 2
i -= 1
elif stage == 1:
if word[i].isdigit():
strMM += word[i]
else:
stage = 2
i -= 1
elif stage == 2:
remainder = word[i:].replace(".", "")
break
if remainder == "":
nextWord = wordNext.replace(".", "")
if nextWord == "am" or nextWord == "pm":
remainder = nextWord
used += 1
elif nextWord == "abends":
remainder = "pm"
used += 1
elif wordNext == "am" and wordNextNext == "morgen":
remainder = "am"
used += 2
elif wordNext == "am" and wordNextNext == "nachmittag":
remainder = "pm"
used += 2
elif wordNext == "am" and wordNextNext == "abend":
remainder = "pm"
used += 2
elif wordNext == "morgens":
remainder = "am"
used += 1
elif wordNext == "nachmittags":
remainder = "pm"
used += 1
elif wordNext == "abends":
remainder = "pm"
used += 1
elif wordNext == "heute" and wordNextNext == "morgen":
remainder = "am"
used = 2
elif wordNext == "heute" and wordNextNext == "nachmittag":
remainder = "pm"
used = 2
elif wordNext == "heute" and wordNextNext == "abend":
remainder = "pm"
used = 2
elif wordNext == "nachts":
if strHH > 4:
remainder = "pm"
else:
remainder = "am"
used += 1
else:
if timeQualifier != "":
if strHH <= 12 and \
(timeQualifier == "abends" or
timeQualifier == "nachmittags"):
strHH += 12 # what happens when strHH is 24?
else:
# try to parse # s without colons
# 5 hours, 10 minutes etc.
length = len(word)
strNum = ""
remainder = ""
for i in range(length):
if word[i].isdigit():
strNum += word[i]
else:
remainder += word[i]
if remainder == "":
remainder = wordNext.replace(".", "").lstrip().rstrip()
if (
remainder == "pm" or
wordNext == "pm" or
remainder == "p.m." or
wordNext == "p.m."):
strHH = strNum
remainder = "pm"
used = 1
elif (
remainder == "am" or
wordNext == "am" or
remainder == "a.m." or
wordNext == "a.m."):
strHH = strNum
remainder = "am"
used = 1
else:
if wordNext == "stund" and int(word) < 100:
# "in 3 hours"
hrOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "minut":
# "in 10 minutes"
minOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "sekund":
# in 5 seconds
secOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "uhr":
strHH = word
used += 1
isTime = True
if wordNextNext == timeQualifier:
strMM = ""
if wordNextNext[:10] == "nachmittag":
used += 1
remainder = "pm"
elif wordNextNext == "am" and wordNextNextNext == \
"nachmittag":
used += 2
remainder = "pm"
elif wordNextNext[:5] == "abend":
used += 1
remainder = "pm"
elif wordNextNext == "am" and wordNextNextNext == \
"abend":
used += 2
remainder = "pm"
elif wordNextNext[:7] == "morgens":
used += 1
remainder = "am"
elif wordNextNext == "am" and wordNextNextNext == \
"morgen":
used += 2
remainder = "am"
elif wordNextNext == "nachts":
used += 1
if 8 <= int(word) <= 12:
remainder = "pm"
else:
remainder = "am"
elif is_numeric(wordNextNext):
strMM = wordNextNext
used += 1
if wordNextNextNext == timeQualifier:
if wordNextNextNext[:10] == "nachmittag":
used += 1
remainder = "pm"
elif wordNextNextNext == "am" and \
wordNextNextNextNext == "nachmittag":
used += 2
remainder = "pm"
elif wordNextNextNext[:5] == "abend":
used += 1
remainder = "pm"
elif wordNextNextNext == "am" and \
wordNextNextNextNext == "abend":
used += 2
remainder = "pm"
elif wordNextNextNext[:7] == "morgens":
used += 1
remainder = "am"
elif wordNextNextNext == "am" and \
wordNextNextNextNext == "morgen":
used += 2
remainder = "am"
elif wordNextNextNext == "nachts":
used += 1
if 8 <= int(word) <= 12:
remainder = "pm"
else:
remainder = "am"
elif wordNext == timeQualifier:
strHH = word
strMM = 00
isTime = True
if wordNext[:10] == "nachmittag":
used += 1
remainder = "pm"
elif wordNext == "am" and wordNextNext == "nachmittag":
used += 2
remainder = "pm"
elif wordNext[:5] == "abend":
used += 1
remainder = "pm"
elif wordNext == "am" and wordNextNext == "abend":
used += 2
remainder = "pm"
elif wordNext[:7] == "morgens":
used += 1
remainder = "am"
elif wordNext == "am" and wordNextNext == "morgen":
used += 2
remainder = "am"
elif wordNext == "nachts":
used += 1
if 8 <= int(word) <= 12:
remainder = "pm"
else:
remainder = "am"
# if timeQualifier != "":
# military = True
# else:
# isTime = False
strHH = int(strHH) if strHH else 0
strMM = int(strMM) if strMM else 0
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
if strHH > 24 or strMM > 59:
isTime = False
used = 0
if isTime:
hrAbs = strHH * 1
minAbs = strMM * 1
used += 1
if used > 0:
# removed parsed words from the sentence
for i in range(used):
words[idx + i] = ""
if wordPrev == "Uhr":
words[words.index(wordPrev)] = ""
if wordPrev == "früh":
hrOffset = -1
words[idx - 1] = ""
idx -= 1
elif wordPrev == "spät":
hrOffset = 1
words[idx - 1] = ""
idx -= 1
if idx > 0 and wordPrev in markers:
words[idx - 1] = ""
if idx > 1 and wordPrevPrev in markers:
words[idx - 2] = ""
idx += used - 1
found = True
# check that we found a date
if not date_found:
return None
if dayOffset is False:
dayOffset = 0
# perform date manipulation
extractedDate = dateNow
extractedDate = extractedDate.replace(microsecond=0,
second=0,
minute=0,
hour=0)
if datestr != "":
en_months = ['january', 'february', 'march', 'april', 'may', 'june',
'july', 'august', 'september', 'october', 'november',
'december']
en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
'aug',
'sept', 'oct', 'nov', 'dec']
for idx, en_month in enumerate(en_months):
datestr = datestr.replace(months[idx], en_month)
for idx, en_month in enumerate(en_monthsShort):
datestr = datestr.replace(monthsShort[idx], en_month)
temp = datetime.strptime(datestr, "%B %d")
if not hasYear:
temp = temp.replace(year=extractedDate.year)
if extractedDate < temp:
extractedDate = extractedDate.replace(year=int(currentYear),
month=int(
temp.strftime(
"%m")),
day=int(temp.strftime(
"%d")))
else:
extractedDate = extractedDate.replace(
year=int(currentYear) + 1,
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
else:
extractedDate = extractedDate.replace(
year=int(temp.strftime("%Y")),
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
if timeStr != "":
temp = datetime(timeStr)
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
minute=temp.strftime("%M"),
second=temp.strftime("%S"))
if yearOffset != 0:
extractedDate = extractedDate + relativedelta(years=yearOffset)
if monthOffset != 0:
extractedDate = extractedDate + relativedelta(months=monthOffset)
if dayOffset != 0:
extractedDate = extractedDate + relativedelta(days=dayOffset)
if hrAbs is None and minAbs is None and default_time:
hrAbs = default_time.hour
minAbs = default_time.minute
if hrAbs != -1 and minAbs != -1:
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
minutes=minAbs or 0)
if (hrAbs or minAbs) and datestr == "":
if not daySpecified and dateNow > extractedDate:
extractedDate = extractedDate + relativedelta(days=1)
if hrOffset != 0:
extractedDate = extractedDate + relativedelta(hours=hrOffset)
if minOffset != 0:
extractedDate = extractedDate + relativedelta(minutes=minOffset)
if secOffset != 0:
extractedDate = extractedDate + relativedelta(seconds=secOffset)
for idx, word in enumerate(words):
if words[idx] == "und" and words[idx - 1] == "" \
and words[idx + 1] == "":
words[idx] = ""
resultStr = " ".join(words)
resultStr = ' '.join(resultStr.split())
return [extractedDate, resultStr]
def isFractional_de(input_str):
"""
This function takes the given text and checks if it is a fraction.
Args:
input_str (str): the string to check if fractional
Returns:
(bool) or (float): False if not a fraction, otherwise the fraction
"""
if input_str.lower().startswith("halb"):
return 0.5
if input_str.lower() == "drittel":
return 1.0 / 3
elif input_str.endswith('tel'):
if input_str.endswith('stel'):
input_str = input_str[:len(input_str) - 4] # e.g. "hundertstel"
else:
input_str = input_str[:len(input_str) - 3] # e.g. "fünftel"
if input_str.lower() in de_numbers:
return 1.0 / (de_numbers[input_str.lower()])
return False
def isOrdinal_de(input_str):
"""
This function takes the given text and checks if it is an ordinal number.
Args:
input_str (str): the string to check if ordinal
Returns:
(bool) or (float): False if not an ordinal, otherwise the number
corresponding to the ordinal
ordinals for 1, 3, 7 and 8 are irregular
only works for ordinals corresponding to the numbers in de_numbers
"""
lowerstr = input_str.lower()
if lowerstr.startswith("erste"):
return 1
if lowerstr.startswith("dritte"):
return 3
if lowerstr.startswith("siebte"):
return 7
if lowerstr.startswith("achte"):
return 8
if lowerstr[-3:] == "ste": # from 20 suffix is -ste*
lowerstr = lowerstr[:-3]
if lowerstr in de_numbers:
return de_numbers[lowerstr]
if lowerstr[-4:] in ["ster", "stes", "sten", "stem"]:
lowerstr = lowerstr[:-4]
if lowerstr in de_numbers:
return de_numbers[lowerstr]
if lowerstr[-2:] == "te": # below 20 suffix is -te*
lowerstr = lowerstr[:-2]
if lowerstr in de_numbers:
return de_numbers[lowerstr]
if lowerstr[-3:] in ["ter", "tes", "ten", "tem"]:
lowerstr = lowerstr[:-3]
if lowerstr in de_numbers:
return de_numbers[lowerstr]
return False
def normalize_de(text, remove_articles):
""" German string normalization """
words = text.split() # this also removed extra spaces
normalized = ""
for word in words:
if remove_articles and word in ["der", "die", "das", "des", "den",
"dem"]:
continue
# Expand common contractions, e.g. "isn't" -> "is not"
contraction = ["net", "nett"]
if word in contraction:
expansion = ["nicht", "nicht"]
word = expansion[contraction.index(word)]
# Convert numbers into digits, e.g. "two" -> "2"
if word in de_numbers:
word = str(de_numbers[word])
normalized += " " + word
return normalized[1:] # strip the initial space
def extract_numbers_de(text, short_scale=True, ordinals=False):
"""
Takes in a string and extracts a list of numbers.
Args:
text (str): the string to extract a number from
short_scale (bool): Use "short scale" or "long scale" for large
numbers -- over a million. The default is short scale, which
is now common in most English speaking countries.
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
Returns:
list: list of extracted numbers as floats
"""
return extract_numbers_generic(text, pronounce_number_de, extractnumber_de,
short_scale=short_scale, ordinals=ordinals)
TODO: Remove in 20.02
"""
from lingua_franca.lang.parse_de import *

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -13,765 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import datetime
from dateutil.relativedelta import relativedelta
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions
"""File kept for backwards compatibility.
def extractnumber_sv(text):
"""
This function prepares the given text for parsing by making
numbers consistent, getting rid of contractions, etc.
Args:
text (str): the string to normalize
Returns:
(int) or (float): The value of extracted number
"""
aWords = text.split()
and_pass = False
valPreAnd = False
val = False
count = 0
while count < len(aWords):
word = aWords[count]
if is_numeric(word):
val = float(word)
elif word == "första":
val = 1
elif word == "andra":
val = 2
elif word == "tredje":
val = 3
elif word == "fjärde":
val = 4
elif word == "femte":
val = 5
elif word == "sjätte":
val = 6
elif is_fractional_sv(word):
val = is_fractional_sv(word)
else:
if word == "en":
val = 1
if word == "ett":
val = 1
elif word == "två":
val = 2
elif word == "tre":
val = 3
elif word == "fyra":
val = 4
elif word == "fem":
val = 5
elif word == "sex":
val = 6
elif word == "sju":
val = 7
elif word == "åtta":
val = 8
elif word == "nio":
val = 9
elif word == "tio":
val = 10
if val:
if count < (len(aWords) - 1):
wordNext = aWords[count + 1]
else:
wordNext = ""
valNext = is_fractional_sv(wordNext)
if valNext:
val = val * valNext
aWords[count + 1] = ""
if not val:
# look for fractions like "2/3"
aPieces = word.split('/')
if look_for_fractions(aPieces):
val = float(aPieces[0]) / float(aPieces[1])
elif and_pass:
# added to value, quit here
val = valPreAnd
break
else:
count += 1
continue
aWords[count] = ""
if and_pass:
aWords[count - 1] = '' # remove "och"
val += valPreAnd
elif count + 1 < len(aWords) and aWords[count + 1] == 'och':
and_pass = True
valPreAnd = val
val = False
count += 2
continue
elif count + 2 < len(aWords) and aWords[count + 2] == 'och':
and_pass = True
valPreAnd = val
val = False
count += 3
continue
break
if not val:
return False
return val
def extract_datetime_sv(string, currentDate, default_time):
def clean_string(s):
"""
cleans the input string of unneeded punctuation and capitalization
among other things.
"""
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
.replace(' den ', ' ').replace(' en ', ' ')
wordList = s.split()
for idx, word in enumerate(wordList):
word = word.replace("'s", "")
ordinals = ["rd", "st", "nd", "th"]
if word[0].isdigit():
for ordinal in ordinals:
if ordinal in word:
word = word.replace(ordinal, "")
wordList[idx] = word
return wordList
def date_found():
return found or \
(
datestr != "" or timeStr != "" or
yearOffset != 0 or monthOffset != 0 or
dayOffset is True or hrOffset != 0 or
hrAbs or minOffset != 0 or
minAbs or secOffset != 0
)
if string == "" or not currentDate:
return None
found = False
daySpecified = False
dayOffset = False
monthOffset = 0
yearOffset = 0
dateNow = currentDate
today = dateNow.strftime("%w")
currentYear = dateNow.strftime("%Y")
fromFlag = False
datestr = ""
hasYear = False
timeQualifier = ""
timeQualifiersList = ['morgon', 'förmiddag', 'eftermiddag', 'kväll']
markers = ['', 'i', 'den här', 'kring', 'efter']
days = ['måndag', 'tisdag', 'onsdag', 'torsdag',
'fredag', 'lördag', 'söndag']
months = ['januari', 'februari', 'mars', 'april', 'maj', 'juni',
'juli', 'augusti', 'september', 'oktober', 'november',
'december']
monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july', 'aug',
'sept', 'oct', 'nov', 'dec']
words = clean_string(string)
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
# this isn't in clean string because I don't want to save back to words
word = word.rstrip('s')
start = idx
used = 0
# save timequalifier for later
if word in timeQualifiersList:
timeQualifier = word
# parse today, tomorrow, day after tomorrow
elif word == "idag" and not fromFlag:
dayOffset = 0
used += 1
elif word == "imorgon" and not fromFlag:
dayOffset = 1
used += 1
elif word == "morgondagen" or word == "morgondagens" and not fromFlag:
dayOffset = 1
used += 1
elif word == "övermorgon" and not fromFlag:
dayOffset = 2
used += 1
# parse 5 days, 10 weeks, last week, next week
elif word == "dag" or word == "dagar":
if wordPrev[0].isdigit():
dayOffset += int(wordPrev)
start -= 1
used = 2
elif word == "vecka" or word == "veckor" and not fromFlag:
if wordPrev[0].isdigit():
dayOffset += int(wordPrev) * 7
start -= 1
used = 2
elif wordPrev == "nästa":
dayOffset = 7
start -= 1
used = 2
elif wordPrev == "förra":
dayOffset = -7
start -= 1
used = 2
# parse 10 months, next month, last month
elif word == "månad" and not fromFlag:
if wordPrev[0].isdigit():
monthOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev == "nästa":
monthOffset = 1
start -= 1
used = 2
elif wordPrev == "förra":
monthOffset = -1
start -= 1
used = 2
# parse 5 years, next year, last year
elif word == "år" and not fromFlag:
if wordPrev[0].isdigit():
yearOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev == "nästa":
yearOffset = 1
start -= 1
used = 2
elif wordPrev == "förra":
yearOffset = -1
start -= 1
used = 2
# parse Monday, Tuesday, etc., and next Monday,
# last Tuesday, etc.
elif word in days and not fromFlag:
d = days.index(word)
dayOffset = (d + 1) - int(today)
used = 1
if dayOffset < 0:
dayOffset += 7
if wordPrev == "nästa":
dayOffset += 7
used += 1
start -= 1
elif wordPrev == "förra":
dayOffset -= 7
used += 1
start -= 1
# parse 15 of July, June 20th, Feb 18, 19 of February
elif word in months or word in monthsShort and not fromFlag:
try:
m = months.index(word)
except ValueError:
m = monthsShort.index(word)
used += 1
datestr = months[m]
if wordPrev and (wordPrev[0].isdigit() or
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
if wordPrev == "of" and wordPrevPrev[0].isdigit():
datestr += " " + words[idx - 2]
used += 1
start -= 1
else:
datestr += " " + wordPrev
start -= 1
used += 1
if wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
hasYear = True
else:
hasYear = False
elif wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
if wordNextNext and wordNextNext[0].isdigit():
datestr += " " + wordNextNext
used += 1
hasYear = True
else:
hasYear = False
# parse 5 days from tomorrow, 10 weeks from next thursday,
# 2 months from July
validFollowups = days + months + monthsShort
validFollowups.append("idag")
validFollowups.append("imorgon")
validFollowups.append("nästa")
validFollowups.append("förra")
validFollowups.append("nu")
if (word == "från" or word == "efter") and wordNext in validFollowups:
used = 2
fromFlag = True
if wordNext == "imorgon":
dayOffset += 1
elif wordNext in days:
d = days.index(wordNext)
tmpOffset = (d + 1) - int(today)
used = 2
if tmpOffset < 0:
tmpOffset += 7
dayOffset += tmpOffset
elif wordNextNext and wordNextNext in days:
d = days.index(wordNextNext)
tmpOffset = (d + 1) - int(today)
used = 3
if wordNext == "nästa":
tmpOffset += 7
used += 1
start -= 1
elif wordNext == "förra":
tmpOffset -= 7
used += 1
start -= 1
dayOffset += tmpOffset
if used > 0:
if start - 1 > 0 and words[start - 1] == "denna":
start -= 1
used += 1
for i in range(0, used):
words[i + start] = ""
if start - 1 >= 0 and words[start - 1] in markers:
words[start - 1] = ""
found = True
daySpecified = True
# parse time
timeStr = ""
hrOffset = 0
minOffset = 0
secOffset = 0
hrAbs = None
minAbs = None
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
# parse noon, midnight, morning, afternoon, evening
used = 0
if word == "middag":
hrAbs = 12
used += 1
elif word == "midnatt":
hrAbs = 0
used += 1
elif word == "morgon":
if not hrAbs:
hrAbs = 8
used += 1
elif word == "förmiddag":
if not hrAbs:
hrAbs = 10
used += 1
elif word == "eftermiddag":
if not hrAbs:
hrAbs = 15
used += 1
elif word == "kväll":
if not hrAbs:
hrAbs = 19
used += 1
# parse half an hour, quarter hour
elif wordPrev in markers or wordPrevPrev in markers:
if word == "halvtimme" or word == "halvtimma":
minOffset = 30
elif word == "kvart":
minOffset = 15
elif word == "timme" or word == "timma":
hrOffset = 1
words[idx - 1] = ""
used += 1
hrAbs = -1
minAbs = -1
# parse 5:00 am, 12:00 p.m., etc
elif word[0].isdigit():
isTime = True
strHH = ""
strMM = ""
remainder = ""
if ':' in word:
# parse colons
# "3:00 in the morning"
stage = 0
length = len(word)
for i in range(length):
if stage == 0:
if word[i].isdigit():
strHH += word[i]
elif word[i] == ":":
stage = 1
else:
stage = 2
i -= 1
elif stage == 1:
if word[i].isdigit():
strMM += word[i]
else:
stage = 2
i -= 1
elif stage == 2:
remainder = word[i:].replace(".", "")
break
if remainder == "":
nextWord = wordNext.replace(".", "")
if nextWord == "am" or nextWord == "pm":
remainder = nextWord
used += 1
elif nextWord == "tonight":
remainder = "pm"
used += 1
elif wordNext == "in" and wordNextNext == "the" and \
words[idx + 3] == "morning":
remainder = "am"
used += 3
elif wordNext == "in" and wordNextNext == "the" and \
words[idx + 3] == "afternoon":
remainder = "pm"
used += 3
elif wordNext == "in" and wordNextNext == "the" and \
words[idx + 3] == "evening":
remainder = "pm"
used += 3
elif wordNext == "in" and wordNextNext == "morning":
remainder = "am"
used += 2
elif wordNext == "in" and wordNextNext == "afternoon":
remainder = "pm"
used += 2
elif wordNext == "in" and wordNextNext == "evening":
remainder = "pm"
used += 2
elif wordNext == "this" and wordNextNext == "morning":
remainder = "am"
used = 2
elif wordNext == "this" and wordNextNext == "afternoon":
remainder = "pm"
used = 2
elif wordNext == "this" and wordNextNext == "evening":
remainder = "pm"
used = 2
elif wordNext == "at" and wordNextNext == "night":
if strHH > 5:
remainder = "pm"
else:
remainder = "am"
used += 2
else:
if timeQualifier != "":
if strHH <= 12 and \
(timeQualifier == "evening" or
timeQualifier == "afternoon"):
strHH += 12
else:
# try to parse # s without colons
# 5 hours, 10 minutes etc.
length = len(word)
strNum = ""
remainder = ""
for i in range(length):
if word[i].isdigit():
strNum += word[i]
else:
remainder += word[i]
if remainder == "":
remainder = wordNext.replace(".", "").lstrip().rstrip()
if (
remainder == "pm" or
wordNext == "pm" or
remainder == "p.m." or
wordNext == "p.m."):
strHH = strNum
remainder = "pm"
used = 1
elif (
remainder == "am" or
wordNext == "am" or
remainder == "a.m." or
wordNext == "a.m."):
strHH = strNum
remainder = "am"
used = 1
else:
if wordNext == "pm" or wordNext == "p.m.":
strHH = strNum
remainder = "pm"
used = 1
elif wordNext == "am" or wordNext == "a.m.":
strHH = strNum
remainder = "am"
used = 1
elif (
int(word) > 100 and
(
wordPrev == "o" or
wordPrev == "oh"
)):
# 0800 hours (pronounced oh-eight-hundred)
strHH = int(word) / 100
strMM = int(word) - strHH * 100
if wordNext == "hours":
used += 1
elif (
wordNext == "hours" and
word[0] != '0' and
(
int(word) < 100 and
int(word) > 2400
)):
# "in 3 hours"
hrOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "minutes":
# "in 10 minutes"
minOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "seconds":
# in 5 seconds
secOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif int(word) > 100:
strHH = int(word) / 100
strMM = int(word) - strHH * 100
if wordNext == "hours":
used += 1
elif wordNext[0].isdigit():
strHH = word
strMM = wordNext
used += 1
if wordNextNext == "hours":
used += 1
elif (
wordNext == "" or wordNext == "o'clock" or
(
wordNext == "in" and
(
wordNextNext == "the" or
wordNextNext == timeQualifier
)
)):
strHH = word
strMM = 00
if wordNext == "o'clock":
used += 1
if wordNext == "in" or wordNextNext == "in":
used += (1 if wordNext == "in" else 2)
if (wordNextNext and
wordNextNext in timeQualifier or
(words[words.index(wordNextNext) + 1] and
words[words.index(wordNextNext) + 1] in
timeQualifier)):
if (wordNextNext == "afternoon" or
(len(words) >
words.index(wordNextNext) + 1 and
words[words.index(
wordNextNext) + 1] == "afternoon")):
remainder = "pm"
if (wordNextNext == "evening" or
(len(words) >
(words.index(wordNextNext) + 1) and
words[words.index(
wordNextNext) + 1] == "evening")):
remainder = "pm"
if (wordNextNext == "morning" or
(len(words) >
words.index(wordNextNext) + 1 and
words[words.index(
wordNextNext) + 1] == "morning")):
remainder = "am"
else:
isTime = False
strHH = int(strHH) if strHH else 0
strMM = int(strMM) if strMM else 0
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
if strHH > 24 or strMM > 59:
isTime = False
used = 0
if isTime:
hrAbs = strHH * 1
minAbs = strMM * 1
used += 1
if used > 0:
# removed parsed words from the sentence
for i in range(used):
words[idx + i] = ""
if wordPrev == "o" or wordPrev == "oh":
words[words.index(wordPrev)] = ""
if wordPrev == "early":
hrOffset = -1
words[idx - 1] = ""
idx -= 1
elif wordPrev == "late":
hrOffset = 1
words[idx - 1] = ""
idx -= 1
if idx > 0 and wordPrev in markers:
words[idx - 1] = ""
if idx > 1 and wordPrevPrev in markers:
words[idx - 2] = ""
idx += used - 1
found = True
# check that we found a date
if not date_found:
return None
if dayOffset is False:
dayOffset = 0
# perform date manipulation
extractedDate = dateNow
extractedDate = extractedDate.replace(microsecond=0,
second=0,
minute=0,
hour=0)
if datestr != "":
temp = datetime.strptime(datestr, "%B %d")
if not hasYear:
temp = temp.replace(year=extractedDate.year)
if extractedDate < temp:
extractedDate = extractedDate.replace(year=int(currentYear),
month=int(
temp.strftime(
"%m")),
day=int(temp.strftime(
"%d")))
else:
extractedDate = extractedDate.replace(
year=int(currentYear) + 1,
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
else:
extractedDate = extractedDate.replace(
year=int(temp.strftime("%Y")),
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
if timeStr != "":
temp = datetime(timeStr)
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
minute=temp.strftime("%M"),
second=temp.strftime("%S"))
if yearOffset != 0:
extractedDate = extractedDate + relativedelta(years=yearOffset)
if monthOffset != 0:
extractedDate = extractedDate + relativedelta(months=monthOffset)
if dayOffset != 0:
extractedDate = extractedDate + relativedelta(days=dayOffset)
if hrAbs is None and minAbs is None and default_time:
hrAbs = default_time.hour
minAbs = default_time.minute
if hrAbs != -1 and minAbs != -1:
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
minutes=minAbs or 0)
if (hrAbs or minAbs) and datestr == "":
if not daySpecified and dateNow > extractedDate:
extractedDate = extractedDate + relativedelta(days=1)
if hrOffset != 0:
extractedDate = extractedDate + relativedelta(hours=hrOffset)
if minOffset != 0:
extractedDate = extractedDate + relativedelta(minutes=minOffset)
if secOffset != 0:
extractedDate = extractedDate + relativedelta(seconds=secOffset)
for idx, word in enumerate(words):
if words[idx] == "and" and words[idx - 1] == "" and words[
idx + 1] == "":
words[idx] = ""
resultStr = " ".join(words)
resultStr = ' '.join(resultStr.split())
return [extractedDate, resultStr]
def is_fractional_sv(input_str):
"""
This function takes the given text and checks if it is a fraction.
Args:
input_str (str): the string to check if fractional
Returns:
(bool) or (float): False if not a fraction, otherwise the fraction
"""
if input_str.endswith('ars', -3):
input_str = input_str[:len(input_str) - 3] # e.g. "femtedelar"
if input_str.endswith('ar', -2):
input_str = input_str[:len(input_str) - 2] # e.g. "femtedelar"
if input_str.endswith('a', -1):
input_str = input_str[:len(input_str) - 1] # e.g. "halva"
if input_str.endswith('s', -1):
input_str = input_str[:len(input_str) - 1] # e.g. "halva"
aFrac = ["hel", "halv", "tredjedel", "fjärdedel", "femtedel", "sjättedel",
"sjundedel", "åttondel", "niondel", "tiondel", "elftedel",
"tolftedel"]
if input_str.lower() in aFrac:
return 1.0 / (aFrac.index(input_str) + 1)
if input_str == "kvart":
return 1.0 / 4
if input_str == "trekvart":
return 3.0 / 4
return False
def normalize_sv(text, remove_articles):
""" English string normalization """
words = text.split() # this also removed extra spaces
normalized = ''
for word in words:
# Convert numbers into digits, e.g. "two" -> "2"
if word == 'en':
word = 'ett'
textNumbers = ["noll", "ett", "två", "tre", "fyra", "fem", "sex",
"sju", "åtta", "nio", "tio", "elva", "tolv",
"tretton", "fjorton", "femton", "sexton",
"sjutton", "arton", "nitton", "tjugo"]
if word in textNumbers:
word = str(textNumbers.index(word))
normalized += " " + word
return normalized[1:] # strip the initial space
TODO: Remove in 20.02
"""
from lingua_franca.lang.parse_sv import *