Add danish 2 - initial work to get danish to core (#2033)
* Danish formatting and parsing functionspull/2036/head
parent
3f827f7faf
commit
6c0cd8d427
|
@ -0,0 +1 @@
|
|||
og
|
|
@ -0,0 +1,4 @@
|
|||
Jeg har problemer med at kommunikere med Mycroft serverne. Giv mig et par minutter, før du prver at tale med mig.
|
||||
Jeg har problemer med at kommunikere med Mycroft serverne. Vent et par minutter, før du prver at tale med mig.
|
||||
Det ser ud til, at jeg ikke kan oprette forbindelse til Mycroft-serverne. Giv mig et par minutter, før du prver at tale med mig.
|
||||
Det ser ud til, at jeg ikke kan oprette forbindelse til Mycroft-serverne. Vent et par minutter, før du prver at tale med mig.
|
|
@ -0,0 +1,3 @@
|
|||
afbryd det
|
||||
ignorer det
|
||||
glem det
|
|
@ -0,0 +1,2 @@
|
|||
Leder efter opdateringer
|
||||
Et øjeblik, mens jeg opdaterer mig selv
|
|
@ -27,6 +27,6 @@
|
|||
"9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "søndag, den fjerde februar, to tusind og atten"}
|
||||
},
|
||||
"test_nice_date_time": {
|
||||
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "tirsdag, den en og tredivte januar, to tusind og sytten klokken 2017-01-31 13:22:03"}
|
||||
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "tirsdag, den en og tredivte januar, to tusind og sytten klokken et toogtyve om eftermiddagen"}
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
dag
|
|
@ -0,0 +1 @@
|
|||
dage
|
|
@ -0,0 +1 @@
|
|||
time
|
|
@ -0,0 +1 @@
|
|||
timer
|
|
@ -0,0 +1,4 @@
|
|||
Desværre, det forstod jeg ikke
|
||||
Jeg er bange for, at jeg ikke kunne forstå det
|
||||
Kan du sige det igen?
|
||||
Kan du gentage det?
|
|
@ -0,0 +1 @@
|
|||
Interaktionsdata vil ikke længere blive sendt til Mycroft AI.
|
|
@ -0,0 +1 @@
|
|||
Jeg vil nu uploade interaktionsdata til Mycroft AI, så jeg kan blive klogere. I øjeblikket omfatter dette optagelser af wake-up ord.
|
|
@ -0,0 +1 @@
|
|||
STARTAR IGEN...
|
|
@ -0,0 +1 @@
|
|||
< < < SYNKRONISERE < < <
|
|
@ -0,0 +1 @@
|
|||
< < < OPDATERER < < <
|
|
@ -0,0 +1 @@
|
|||
minut
|
|
@ -0,0 +1 @@
|
|||
minuter
|
|
@ -0,0 +1 @@
|
|||
Hej Jeg er Mycroft, din nye assistent. For at hjælpe dig skal jeg være forbundet til internettet. Du kan enten forbinde mig med et netværkskabel eller bruge wifi. Følg disse instruktioner for at konfigurere Wi-Fi:
|
|
@ -0,0 +1,5 @@
|
|||
no
|
||||
nope
|
||||
nah
|
||||
negative
|
||||
nej
|
|
@ -0,0 +1,4 @@
|
|||
Det ser ud til, at jeg ikke har forbindelse til internettet
|
||||
Jeg synes ikke at være forbundet til internettet
|
||||
Jeg kan ikke nå internettet lige nu
|
||||
Jeg kan ikke nå internettet
|
|
@ -0,0 +1,5 @@
|
|||
Jeg har problemer med at kommunikere med Mycroft serverne. Giv mig et par minutter, før du prver at tale til mig.
|
||||
Jeg har problemer med at kommunikere med Mycroft serverne. Vent et par minutter, før du prver at tale til mig.
|
||||
Det ser ud til, at jeg ikke kan oprette forbindelse til Mycroft-serverne. Giv mig et par minutter, før du prøver at tale til mig.
|
||||
Det ser ud til, at jeg ikke kan oprette forbindelse til Mycroft-serverne. Vent et par minutter, før du prøver at tale til mig.
|
||||
Vent et øjeblik, til jeg er færdig med at starte op.
|
|
@ -0,0 +1 @@
|
|||
eller
|
|
@ -0,0 +1,9 @@
|
|||
jalepeno: hallipeenyo
|
||||
ai: A.I.
|
||||
mycroft: mejkråft
|
||||
open: åpen
|
||||
source: sårhse
|
||||
seksten: sejsten
|
||||
spotify: spåtifej
|
||||
spot-ify: spåtifej
|
||||
chat: tjat
|
|
@ -0,0 +1 @@
|
|||
Jeg er blevet nulstillet til fabriksindstillingerne.
|
|
@ -0,0 +1 @@
|
|||
sekund
|
|
@ -0,0 +1 @@
|
|||
sekunder
|
|
@ -0,0 +1,6 @@
|
|||
Jeg har problemer med at kommunikere med Mycroft serverne. Giv mig et par minutter, før du prver at tale med mig.
|
||||
Jeg har problemer med at kommunikere med Mycroft serverne. Vent et par minutter, før du prver at tale med mig.
|
||||
Det ser ud til, at jeg ikke kan oprette forbindelse til Mycroft-serverne. Giv mig et par minutter, før du prver at tale med mig.
|
||||
Det ser ud til, at jeg ikke kan oprette forbindelse til Mycroft-serverne. Vent et par minutter, før du prøver at tale med mig.
|
||||
Vent et øjeblik, til jeg er førdig med at starte op.
|
||||
Der opstod en fejl under behandling af en anmodning i {{skill}}
|
|
@ -0,0 +1,2 @@
|
|||
Jeg har nu opdateret mine færdigheder. Jeg kan derfor godt hjælpe dig nu
|
||||
Mine færdigheder er nu opdateret. Jeg er klar til at hjælpe dig.
|
|
@ -0,0 +1 @@
|
|||
Der opstod en fejl under opdatering af færdigheder
|
|
@ -0,0 +1 @@
|
|||
SSH login er blevet deaktiveret
|
|
@ -0,0 +1 @@
|
|||
SSH logins er nu tilladt
|
|
@ -0,0 +1 @@
|
|||
Jeg skal genstarte efter synkronisering af mit ur med internettet, er snart tilbage.
|
|
@ -0,0 +1,6 @@
|
|||
yes
|
||||
yeah
|
||||
yep
|
||||
ja
|
||||
ja tak
|
||||
tak
|
|
@ -33,6 +33,9 @@ from mycroft.util.lang.format_fr import pronounce_number_fr
|
|||
from mycroft.util.lang.format_nl import nice_time_nl
|
||||
from mycroft.util.lang.format_nl import pronounce_number_nl
|
||||
from mycroft.util.lang.format_nl import nice_number_nl
|
||||
from mycroft.util.lang.format_da import nice_number_da
|
||||
from mycroft.util.lang.format_da import nice_time_da
|
||||
from mycroft.util.lang.format_da import pronounce_number_da
|
||||
|
||||
from collections import namedtuple
|
||||
from padatious.util import expand_parentheses
|
||||
|
@ -267,6 +270,8 @@ def nice_number(number, lang="en-us", speech=True, denominators=None):
|
|||
return nice_number_hu(number, speech, denominators)
|
||||
elif lang_lower.startswith("nl"):
|
||||
return nice_number_nl(number, speech, denominators)
|
||||
elif lang_lower.startswith("da"):
|
||||
return nice_number_da(number, speech, denominators)
|
||||
|
||||
# Default to the raw number for unsupported languages,
|
||||
# hopefully the STT engine will pronounce understandably.
|
||||
|
@ -305,6 +310,8 @@ def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
|
|||
return nice_time_hu(dt, speech, use_24hour, use_ampm)
|
||||
elif lang_lower.startswith("nl"):
|
||||
return nice_time_nl(dt, speech, use_24hour, use_ampm)
|
||||
elif lang_lower.startswith("da"):
|
||||
return nice_time_da(dt, speech, use_24hour, use_ampm)
|
||||
|
||||
# TODO: Other languages
|
||||
return str(dt)
|
||||
|
@ -344,6 +351,8 @@ def pronounce_number(number, lang="en-us", places=2, short_scale=True,
|
|||
return pronounce_number_hu(number, places=places)
|
||||
elif lang_lower.startswith("nl"):
|
||||
return pronounce_number_nl(number, places=places)
|
||||
elif lang_lower.startswith("da"):
|
||||
return pronounce_number_da(number, places=places)
|
||||
|
||||
# Default to just returning the numeric value
|
||||
return str(number)
|
||||
|
|
|
@ -0,0 +1,397 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2017 Mycroft AI Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from mycroft.util.lang.format_common import convert_to_mixed_fraction
|
||||
from math import floor
|
||||
|
||||
months = ['januar', 'februar', 'märz', 'april', 'mai', 'juni',
|
||||
'juli', 'august', 'september', 'oktober', 'november',
|
||||
'dezember']
|
||||
|
||||
NUM_STRING_DA = {
|
||||
0: 'nul',
|
||||
1: 'en',
|
||||
2: 'to',
|
||||
3: 'tre',
|
||||
4: 'fire',
|
||||
5: 'fem',
|
||||
6: 'seks',
|
||||
7: 'syv',
|
||||
8: 'otte',
|
||||
9: 'ni',
|
||||
10: 'ti',
|
||||
11: 'elve',
|
||||
12: 'tolv',
|
||||
13: 'tretten',
|
||||
14: 'fjorten',
|
||||
15: 'femten',
|
||||
16: 'seksten',
|
||||
17: 'sytten',
|
||||
18: 'atten',
|
||||
19: 'nitten',
|
||||
20: 'tyve',
|
||||
30: 'tredive',
|
||||
40: 'fyrre',
|
||||
50: 'halvtres',
|
||||
60: 'tres',
|
||||
70: 'halvfjers',
|
||||
80: 'firs',
|
||||
90: 'halvfems',
|
||||
100: 'hundrede'
|
||||
}
|
||||
|
||||
NUM_POWERS_OF_TEN = [
|
||||
'hundred',
|
||||
'tusind',
|
||||
'million',
|
||||
'milliard',
|
||||
'billion',
|
||||
'billiard',
|
||||
'trillion',
|
||||
'trilliard'
|
||||
]
|
||||
|
||||
FRACTION_STRING_DA = {
|
||||
2: 'halv',
|
||||
3: 'trediedel',
|
||||
4: 'fjerdedel',
|
||||
5: 'femtedel',
|
||||
6: 'sjettedel',
|
||||
7: 'syvendedel',
|
||||
8: 'ottendedel',
|
||||
9: 'niendedel',
|
||||
10: 'tiendedel',
|
||||
11: 'elftedel',
|
||||
12: 'tolvtedel',
|
||||
13: 'trettendedel',
|
||||
14: 'fjortendedel',
|
||||
15: 'femtendedel',
|
||||
16: 'sejstendedel',
|
||||
17: 'syttendedel',
|
||||
18: 'attendedel',
|
||||
19: 'nittendedel',
|
||||
20: 'tyvendedel'
|
||||
}
|
||||
|
||||
# Numbers below 1 million are written in one word in German, yielding very
|
||||
# long words
|
||||
# In some circumstances it may better to seperate individual words
|
||||
# Set EXTRA_SPACE=" " for separating numbers below 1 million (
|
||||
# orthographically incorrect)
|
||||
# Set EXTRA_SPACE="" for correct spelling, this is standard
|
||||
|
||||
# EXTRA_SPACE = " "
|
||||
EXTRA_SPACE = ""
|
||||
|
||||
|
||||
def nice_number_da(number, speech, denominators):
|
||||
""" Danish helper for nice_number
|
||||
This function formats a float to human understandable functions. Like
|
||||
4.5 becomes "4 einhalb" for speech and "4 1/2" for text
|
||||
Args:
|
||||
number (int or float): the float to format
|
||||
speech (bool): format for speech (True) or display (False)
|
||||
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||
Returns:
|
||||
(str): The formatted string.
|
||||
"""
|
||||
result = convert_to_mixed_fraction(number, denominators)
|
||||
if not result:
|
||||
# Give up, just represent as a 3 decimal number
|
||||
return str(round(number, 3)).replace(".", ",")
|
||||
whole, num, den = result
|
||||
if not speech:
|
||||
if num == 0:
|
||||
# TODO: Number grouping? E.g. "1,000,000"
|
||||
return str(whole)
|
||||
else:
|
||||
return '{} {}/{}'.format(whole, num, den)
|
||||
if num == 0:
|
||||
return str(whole)
|
||||
den_str = FRACTION_STRING_DA[den]
|
||||
if whole == 0:
|
||||
if num == 1:
|
||||
return_string = '{} {}'.format(num, den_str)
|
||||
else:
|
||||
return_string = '{} {}e'.format(num, den_str)
|
||||
else:
|
||||
if num == 1:
|
||||
return_string = '{} og {} {}'.format(whole, num, den_str)
|
||||
else:
|
||||
return_string = '{} og {} {}e'.format(whole, num, den_str)
|
||||
|
||||
return return_string
|
||||
|
||||
|
||||
def pronounce_number_da(num, places=2):
|
||||
"""
|
||||
Convert a number to its spoken equivalent
|
||||
For example, '5.2' would return 'five point two'
|
||||
Args:
|
||||
num(float or int): the number to pronounce (set limit below)
|
||||
places(int): maximum decimal places to speak
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
|
||||
"""
|
||||
|
||||
def pronounce_triplet_da(num):
|
||||
result = ""
|
||||
num = floor(num)
|
||||
if num > 99:
|
||||
hundreds = floor(num / 100)
|
||||
if hundreds > 0:
|
||||
if hundreds == 1:
|
||||
result += 'et' + 'hundrede' + EXTRA_SPACE
|
||||
else:
|
||||
result += NUM_STRING_DA[hundreds] + \
|
||||
'hundrede' + EXTRA_SPACE
|
||||
num -= hundreds * 100
|
||||
if num == 0:
|
||||
result += '' # do nothing
|
||||
elif num == 1:
|
||||
result += 'et'
|
||||
elif num <= 20:
|
||||
result += NUM_STRING_DA[num] + EXTRA_SPACE
|
||||
elif num > 20:
|
||||
ones = num % 10
|
||||
tens = num - ones
|
||||
if ones > 0:
|
||||
result += NUM_STRING_DA[ones] + EXTRA_SPACE
|
||||
if tens > 0:
|
||||
result += 'og' + EXTRA_SPACE
|
||||
if tens > 0:
|
||||
result += NUM_STRING_DA[tens] + EXTRA_SPACE
|
||||
|
||||
return result
|
||||
|
||||
def pronounce_fractional_da(num, places):
|
||||
# fixed number of places even with trailing zeros
|
||||
result = ""
|
||||
place = 10
|
||||
while places > 0:
|
||||
# doesn't work with 1.0001 and places = 2: int(
|
||||
# num*place) % 10 > 0 and places > 0:
|
||||
result += " " + NUM_STRING_DA[int(num * place) % 10]
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
||||
def pronounce_whole_number_da(num, scale_level=0):
|
||||
if num == 0:
|
||||
return ''
|
||||
|
||||
num = floor(num)
|
||||
result = ''
|
||||
last_triplet = num % 1000
|
||||
|
||||
if last_triplet == 1:
|
||||
if scale_level == 0:
|
||||
if result != '':
|
||||
result += '' + 'et'
|
||||
else:
|
||||
result += "en"
|
||||
elif scale_level == 1:
|
||||
result += 'et' + EXTRA_SPACE + 'tusinde' + EXTRA_SPACE
|
||||
else:
|
||||
result += "en " + NUM_POWERS_OF_TEN[scale_level] + ' '
|
||||
elif last_triplet > 1:
|
||||
result += pronounce_triplet_da(last_triplet)
|
||||
if scale_level == 1:
|
||||
result += 'tusinde' + EXTRA_SPACE
|
||||
if scale_level >= 2:
|
||||
result += "og" + NUM_POWERS_OF_TEN[scale_level]
|
||||
if scale_level >= 2:
|
||||
if scale_level % 2 == 0:
|
||||
result += "er" # MillionER
|
||||
result += "er " # MilliardER, MillioneER
|
||||
|
||||
num = floor(num / 1000)
|
||||
scale_level += 1
|
||||
return pronounce_whole_number_da(num,
|
||||
scale_level) + result + EXTRA_SPACE
|
||||
|
||||
result = ""
|
||||
if abs(num) >= 1000000000000000000000000: # cannot do more than this
|
||||
return str(num)
|
||||
elif num == 0:
|
||||
return str(NUM_STRING_DA[0])
|
||||
elif num < 0:
|
||||
return "minus " + pronounce_number_da(abs(num), places)
|
||||
else:
|
||||
if num == int(num):
|
||||
return pronounce_whole_number_da(num)
|
||||
else:
|
||||
whole_number_part = floor(num)
|
||||
fractional_part = num - whole_number_part
|
||||
result += pronounce_whole_number_da(whole_number_part)
|
||||
if places > 0:
|
||||
result += " komma"
|
||||
result += pronounce_fractional_da(fractional_part, places)
|
||||
return result
|
||||
|
||||
|
||||
def pronounce_ordinal_da(num):
|
||||
# ordinals for 1, 3, 7 and 8 are irregular
|
||||
# this produces the base form, it will have to be adapted for genus,
|
||||
# casus, numerus
|
||||
|
||||
ordinals = ["nulte", "første", "anden", "tredie", "fjerde", "femte",
|
||||
"sjette", "syvende", "ottende", "niende", "tiende"]
|
||||
|
||||
# only for whole positive numbers including zero
|
||||
if num < 0 or num != int(num):
|
||||
return num
|
||||
if num < 10:
|
||||
return ordinals[num]
|
||||
if num < 30:
|
||||
if pronounce_number_da(num)[-1:] == 'e':
|
||||
return pronounce_number_da(num) + "nde"
|
||||
else:
|
||||
return pronounce_number_da(num) + "ende"
|
||||
if num < 40:
|
||||
return pronounce_number_da(num) + "fte"
|
||||
else:
|
||||
if pronounce_number_da(num)[-1:] == 'e':
|
||||
return pronounce_number_da(num) + "nde"
|
||||
else:
|
||||
return pronounce_number_da(num) + "ende"
|
||||
|
||||
|
||||
def nice_time_da(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
|
||||
For example, generate 'five thirty' for speech or '5:30' for
|
||||
text display.
|
||||
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
speak = ""
|
||||
if use_24hour:
|
||||
if dt.hour == 1:
|
||||
speak += "et" # 01:00 is "et" not "en"
|
||||
else:
|
||||
speak += pronounce_number_da(dt.hour)
|
||||
if not dt.minute == 0:
|
||||
if dt.minute < 10:
|
||||
speak += ' nul'
|
||||
speak += " " + pronounce_number_da(dt.minute)
|
||||
|
||||
return speak # ampm is ignored when use_24hour is true
|
||||
else:
|
||||
if dt.hour == 0 and dt.minute == 0:
|
||||
return "midnat"
|
||||
if dt.hour == 12 and dt.minute == 0:
|
||||
return "middag"
|
||||
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
|
||||
|
||||
if dt.hour == 0:
|
||||
speak += pronounce_number_da(12)
|
||||
elif dt.hour <= 13:
|
||||
if dt.hour == 1 or dt.hour == 13: # 01:00 and 13:00 is "et"
|
||||
speak += 'et'
|
||||
else:
|
||||
speak += pronounce_number_da(dt.hour)
|
||||
else:
|
||||
speak += pronounce_number_da(dt.hour - 12)
|
||||
|
||||
if not dt.minute == 0:
|
||||
if dt.minute < 10:
|
||||
speak += ' nul'
|
||||
speak += " " + pronounce_number_da(dt.minute)
|
||||
|
||||
if use_ampm:
|
||||
if dt.hour > 11:
|
||||
if dt.hour < 18:
|
||||
# 12:01 - 17:59 nachmittags/afternoon
|
||||
speak += " om eftermiddagen"
|
||||
elif dt.hour < 22:
|
||||
# 18:00 - 21:59 abends/evening
|
||||
speak += " om aftenen"
|
||||
else:
|
||||
# 22:00 - 23:59 nachts/at night
|
||||
speak += " om natten"
|
||||
elif dt.hour < 3:
|
||||
# 00:01 - 02:59 nachts/at night
|
||||
speak += " om natten"
|
||||
else:
|
||||
# 03:00 - 11:59 morgens/in the morning
|
||||
speak += " om morgenen"
|
||||
|
||||
return speak
|
||||
|
||||
|
||||
def nice_response_da(text):
|
||||
# check for months and call nice_ordinal_da declension of ordinals
|
||||
# replace "^" with "hoch" (to the power of)
|
||||
words = text.split()
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word.lower() in months:
|
||||
text = nice_ordinal_da(text)
|
||||
|
||||
if word == '^':
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
if wordNext.isnumeric():
|
||||
words[idx] = "opløftet i"
|
||||
text = " ".join(words)
|
||||
return text
|
||||
|
||||
|
||||
def nice_ordinal_da(text):
|
||||
# check for months for declension of ordinals before months
|
||||
# depending on articles/prepositions
|
||||
normalized_text = text
|
||||
words = text.split()
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
if word[-1:] == ".":
|
||||
if word[:-1].isdecimal():
|
||||
if wordNext.lower() in months:
|
||||
word = pronounce_ordinal_da(int(word[:-1]))
|
||||
if wordPrev.lower() in ["om", "den", "fra", "til",
|
||||
"(fra", "(om", "til"]:
|
||||
word += "n"
|
||||
elif wordPrev.lower() not in ["den"]:
|
||||
word += "r"
|
||||
words[idx] = word
|
||||
normalized_text = " ".join(words)
|
||||
return normalized_text
|
|
@ -0,0 +1,933 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2017 Mycroft AI Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from datetime import datetime
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions, \
|
||||
extract_numbers_generic
|
||||
from mycroft.util.lang.format_da import pronounce_number_da
|
||||
|
||||
da_numbers = {
|
||||
'nul': 0,
|
||||
'en': 1,
|
||||
'et': 1,
|
||||
'to': 2,
|
||||
'tre': 3,
|
||||
'fire': 4,
|
||||
'fem': 5,
|
||||
'seks': 6,
|
||||
'syv': 7,
|
||||
'otte': 8,
|
||||
'ni': 9,
|
||||
'ti': 10,
|
||||
'elve': 11,
|
||||
'tolv': 12,
|
||||
'tretten': 13,
|
||||
'fjorten': 14,
|
||||
'femten': 15,
|
||||
'seksten': 16,
|
||||
'sytten': 17,
|
||||
'atten': 18,
|
||||
'nitten': 19,
|
||||
'tyve': 20,
|
||||
'enogtyve': 21,
|
||||
'toogtyve': 22,
|
||||
'treogtyve': 23,
|
||||
'fireogtyve': 24,
|
||||
'femogtyve': 25,
|
||||
'seksogtyve': 26,
|
||||
'syvogtyve': 27,
|
||||
'otteogtyve': 28,
|
||||
'niogtyve': 29,
|
||||
'tredive': 30,
|
||||
'enogtredive': 31,
|
||||
'fyrrre': 40,
|
||||
'halvtres': 50,
|
||||
'tres': 60,
|
||||
'halvfjers': 70,
|
||||
'firs': 80,
|
||||
'halvfems': 90,
|
||||
'hunderede': 100,
|
||||
'tohundrede': 200,
|
||||
'trehundrede': 300,
|
||||
'firehundrede': 400,
|
||||
'femhundrede': 500,
|
||||
'sekshundrede': 600,
|
||||
'syvhundrede': 700,
|
||||
'ottehundrede': 800,
|
||||
'nihundrede': 900,
|
||||
'tusinde': 1000,
|
||||
'million': 1000000
|
||||
}
|
||||
|
||||
|
||||
def extractnumber_da(text):
|
||||
"""
|
||||
This function prepares the given text for parsing by making
|
||||
numbers consistent, getting rid of contractions, etc.
|
||||
Args:
|
||||
text (str): the string to normalize
|
||||
Returns:
|
||||
(int) or (float): The value of extracted number
|
||||
|
||||
|
||||
undefined articles cannot be suppressed in German:
|
||||
'ein Pferd' means 'one horse' and 'a horse'
|
||||
|
||||
"""
|
||||
aWords = text.split()
|
||||
aWords = [word for word in aWords if
|
||||
word not in ["den", "det"]]
|
||||
and_pass = False
|
||||
valPreAnd = False
|
||||
val = False
|
||||
count = 0
|
||||
while count < len(aWords):
|
||||
word = aWords[count]
|
||||
if is_numeric(word):
|
||||
if word.isdigit(): # doesn't work with decimals
|
||||
val = float(word)
|
||||
elif isFractional_da(word):
|
||||
val = isFractional_da(word)
|
||||
elif isOrdinal_da(word):
|
||||
val = isOrdinal_da(word)
|
||||
else:
|
||||
if word in da_numbers:
|
||||
val = da_numbers[word]
|
||||
if count < (len(aWords) - 1):
|
||||
wordNext = aWords[count + 1]
|
||||
else:
|
||||
wordNext = ""
|
||||
valNext = isFractional_da(wordNext)
|
||||
|
||||
if valNext:
|
||||
val = val * valNext
|
||||
aWords[count + 1] = ""
|
||||
|
||||
if not val:
|
||||
# look for fractions like "2/3"
|
||||
aPieces = word.split('/')
|
||||
# if (len(aPieces) == 2 and is_numeric(aPieces[0])
|
||||
# and is_numeric(aPieces[1])):
|
||||
if look_for_fractions(aPieces):
|
||||
val = float(aPieces[0]) / float(aPieces[1])
|
||||
elif and_pass:
|
||||
# added to value, quit here
|
||||
val = valPreAnd
|
||||
break
|
||||
else:
|
||||
count += 1
|
||||
continue
|
||||
|
||||
aWords[count] = ""
|
||||
|
||||
if and_pass:
|
||||
aWords[count - 1] = '' # remove "og"
|
||||
val += valPreAnd
|
||||
elif count + 1 < len(aWords) and aWords[count + 1] == 'og':
|
||||
and_pass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 2
|
||||
continue
|
||||
elif count + 2 < len(aWords) and aWords[count + 2] == 'og':
|
||||
and_pass = True
|
||||
valPreAnd = val
|
||||
val = False
|
||||
count += 3
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
if not val:
|
||||
return False
|
||||
|
||||
return val
|
||||
|
||||
|
||||
def extract_datetime_da(string, currentDate, default_time):
|
||||
def clean_string(s):
|
||||
"""
|
||||
cleans the input string of unneeded punctuation
|
||||
and capitalization among other things.
|
||||
|
||||
'am' is a preposition, so cannot currently be used
|
||||
for 12 hour date format
|
||||
"""
|
||||
|
||||
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
|
||||
.replace(' den ', ' ').replace(' det ', ' ').replace(' om ',
|
||||
' ').replace(
|
||||
' om ', ' ') \
|
||||
.replace(' på ', ' ').replace(' om ', ' ')
|
||||
wordList = s.split()
|
||||
|
||||
for idx, word in enumerate(wordList):
|
||||
if isOrdinal_da(word) is not False:
|
||||
word = str(isOrdinal_da(word))
|
||||
wordList[idx] = word
|
||||
|
||||
return wordList
|
||||
|
||||
def date_found():
|
||||
return found or \
|
||||
(
|
||||
datestr != "" or timeStr != "" or
|
||||
yearOffset != 0 or monthOffset != 0 or
|
||||
dayOffset is True or hrOffset != 0 or
|
||||
hrAbs or minOffset != 0 or
|
||||
minAbs or secOffset != 0
|
||||
)
|
||||
|
||||
if string == "" or not currentDate:
|
||||
return None
|
||||
|
||||
found = False
|
||||
daySpecified = False
|
||||
dayOffset = False
|
||||
monthOffset = 0
|
||||
yearOffset = 0
|
||||
dateNow = currentDate
|
||||
today = dateNow.strftime("%w")
|
||||
currentYear = dateNow.strftime("%Y")
|
||||
fromFlag = False
|
||||
datestr = ""
|
||||
hasYear = False
|
||||
timeQualifier = ""
|
||||
|
||||
timeQualifiersList = ['tidlig',
|
||||
'morgen',
|
||||
'morgenen',
|
||||
'formidag',
|
||||
'formiddagen',
|
||||
'eftermiddag',
|
||||
'eftermiddagen',
|
||||
'aften',
|
||||
'aftenen',
|
||||
'nat',
|
||||
'natten']
|
||||
markers = ['i', 'om', 'på', 'klokken', 'ved']
|
||||
days = ['mandag', 'tirsdag', 'onsdag',
|
||||
'torsdag', 'fredag', 'lørdag', 'søndag']
|
||||
months = ['januar', 'februar', 'marts', 'april', 'maj', 'juni',
|
||||
'juli', 'august', 'september', 'oktober', 'november',
|
||||
'desember']
|
||||
monthsShort = ['jan', 'feb', 'mar', 'apr', 'maj', 'juni', 'juli', 'aug',
|
||||
'sep', 'okt', 'nov', 'des']
|
||||
|
||||
validFollowups = days + months + monthsShort
|
||||
validFollowups.append("i dag")
|
||||
validFollowups.append("morgen")
|
||||
validFollowups.append("næste")
|
||||
validFollowups.append("forige")
|
||||
validFollowups.append("nu")
|
||||
|
||||
words = clean_string(string)
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
|
||||
start = idx
|
||||
used = 0
|
||||
# save timequalifier for later
|
||||
if word in timeQualifiersList:
|
||||
timeQualifier = word
|
||||
# parse today, tomorrow, day after tomorrow
|
||||
elif word == "dag" and not fromFlag:
|
||||
dayOffset = 0
|
||||
used += 1
|
||||
elif word == "morgen" and not fromFlag and wordPrev != "om" and \
|
||||
wordPrev not in days: # morgen means tomorrow if not "am
|
||||
# Morgen" and not [day of the week] morgen
|
||||
dayOffset = 1
|
||||
used += 1
|
||||
elif word == "overmorgen" and not fromFlag:
|
||||
dayOffset = 2
|
||||
used += 1
|
||||
# parse 5 days, 10 weeks, last week, next week
|
||||
elif word == "dag" or word == "dage":
|
||||
if wordPrev[0].isdigit():
|
||||
dayOffset += int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif word == "uge" or word == "uger" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
dayOffset += int(wordPrev) * 7
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "næste":
|
||||
dayOffset = 7
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:5] == "forige":
|
||||
dayOffset = -7
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse 10 months, next month, last month
|
||||
elif word == "måned" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
monthOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "næste":
|
||||
monthOffset = 1
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:5] == "forige":
|
||||
monthOffset = -1
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse 5 years, next year, last year
|
||||
elif word == "år" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
yearOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == " næste":
|
||||
yearOffset = 1
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordPrev[:6] == "næste":
|
||||
yearOffset = -1
|
||||
start -= 1
|
||||
used = 2
|
||||
# parse Monday, Tuesday, etc., and next Monday,
|
||||
# last Tuesday, etc.
|
||||
elif word in days and not fromFlag:
|
||||
d = days.index(word)
|
||||
dayOffset = (d + 1) - int(today)
|
||||
used = 1
|
||||
if dayOffset < 0:
|
||||
dayOffset += 7
|
||||
if wordNext == "morgen":
|
||||
# morgen means morning if preceded by
|
||||
# the day of the week
|
||||
words[idx + 1] = "tidlig"
|
||||
if wordPrev[:6] == "næste":
|
||||
dayOffset += 7
|
||||
used += 1
|
||||
start -= 1
|
||||
elif wordPrev[:5] == "forige":
|
||||
dayOffset -= 7
|
||||
used += 1
|
||||
start -= 1
|
||||
# parse 15 of July, June 20th, Feb 18, 19 of February
|
||||
elif word in months or word in monthsShort and not fromFlag:
|
||||
try:
|
||||
m = months.index(word)
|
||||
except ValueError:
|
||||
m = monthsShort.index(word)
|
||||
used += 1
|
||||
datestr = months[m]
|
||||
if wordPrev and (wordPrev[0].isdigit() or
|
||||
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
|
||||
if wordPrev == "of" and wordPrevPrev[0].isdigit():
|
||||
datestr += " " + words[idx - 2]
|
||||
used += 1
|
||||
start -= 1
|
||||
else:
|
||||
datestr += " " + wordPrev
|
||||
start -= 1
|
||||
used += 1
|
||||
if wordNext and wordNext[0].isdigit():
|
||||
datestr += " " + wordNext
|
||||
used += 1
|
||||
hasYear = True
|
||||
else:
|
||||
hasYear = False
|
||||
|
||||
elif wordNext and wordNext[0].isdigit():
|
||||
datestr += " " + wordNext
|
||||
used += 1
|
||||
if wordNextNext and wordNextNext[0].isdigit():
|
||||
datestr += " " + wordNextNext
|
||||
used += 1
|
||||
hasYear = True
|
||||
else:
|
||||
hasYear = False
|
||||
# parse 5 days from tomorrow, 10 weeks from next thursday,
|
||||
# 2 months from July
|
||||
|
||||
if (
|
||||
word == "fra" or word == "til" or word == "om") and wordNext \
|
||||
in validFollowups:
|
||||
used = 2
|
||||
fromFlag = True
|
||||
if wordNext == "morgenen" and \
|
||||
wordPrev != "om" and \
|
||||
wordPrev not in days:
|
||||
# morgen means tomorrow if not "am Morgen" and not
|
||||
# [day of the week] morgen:
|
||||
dayOffset += 1
|
||||
elif wordNext in days:
|
||||
d = days.index(wordNext)
|
||||
tmpOffset = (d + 1) - int(today)
|
||||
used = 2
|
||||
if tmpOffset < 0:
|
||||
tmpOffset += 7
|
||||
dayOffset += tmpOffset
|
||||
elif wordNextNext and wordNextNext in days:
|
||||
d = days.index(wordNextNext)
|
||||
tmpOffset = (d + 1) - int(today)
|
||||
used = 3
|
||||
if wordNext[:6] == "næste":
|
||||
tmpOffset += 7
|
||||
used += 1
|
||||
start -= 1
|
||||
elif wordNext[:5] == "forige":
|
||||
tmpOffset -= 7
|
||||
used += 1
|
||||
start -= 1
|
||||
dayOffset += tmpOffset
|
||||
if used > 0:
|
||||
if start - 1 > 0 and words[start - 1].startswith("denne"):
|
||||
start -= 1
|
||||
used += 1
|
||||
|
||||
for i in range(0, used):
|
||||
words[i + start] = ""
|
||||
|
||||
if start - 1 >= 0 and words[start - 1] in markers:
|
||||
words[start - 1] = ""
|
||||
found = True
|
||||
daySpecified = True
|
||||
|
||||
# parse time
|
||||
timeStr = ""
|
||||
hrOffset = 0
|
||||
minOffset = 0
|
||||
secOffset = 0
|
||||
hrAbs = None
|
||||
minAbs = None
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
|
||||
wordNextNextNextNext = words[idx + 4] if idx + 4 < len(words) else ""
|
||||
|
||||
# parse noon, midnight, morning, afternoon, evening
|
||||
used = 0
|
||||
if word[:6] == "middag":
|
||||
hrAbs = 12
|
||||
used += 1
|
||||
elif word[:11] == "midnat":
|
||||
hrAbs = 0
|
||||
used += 1
|
||||
elif word == "morgenen" or (
|
||||
wordPrev == "om" and word == "morgenen") or word == "tidlig":
|
||||
if not hrAbs:
|
||||
hrAbs = 8
|
||||
used += 1
|
||||
elif word[:11] == "eftermiddag":
|
||||
if not hrAbs:
|
||||
hrAbs = 15
|
||||
used += 1
|
||||
elif word[:5] == "aften":
|
||||
if not hrAbs:
|
||||
hrAbs = 19
|
||||
used += 1
|
||||
# parse half an hour, quarter hour
|
||||
elif word == "time" and \
|
||||
(wordPrev in markers or wordPrevPrev in markers):
|
||||
if wordPrev[:4] == "halv":
|
||||
minOffset = 30
|
||||
elif wordPrev == "kvarter":
|
||||
minOffset = 15
|
||||
elif wordPrev == "trekvarter":
|
||||
minOffset = 45
|
||||
else:
|
||||
hrOffset = 1
|
||||
if wordPrevPrev in markers:
|
||||
words[idx - 2] = ""
|
||||
words[idx - 1] = ""
|
||||
used += 1
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
# parse 5:00 am, 12:00 p.m., etc
|
||||
elif word[0].isdigit():
|
||||
isTime = True
|
||||
strHH = ""
|
||||
strMM = ""
|
||||
remainder = ""
|
||||
if ':' in word:
|
||||
# parse colons
|
||||
# "3:00 in the morning"
|
||||
stage = 0
|
||||
length = len(word)
|
||||
for i in range(length):
|
||||
if stage == 0:
|
||||
if word[i].isdigit():
|
||||
strHH += word[i]
|
||||
elif word[i] == ":":
|
||||
stage = 1
|
||||
else:
|
||||
stage = 2
|
||||
i -= 1
|
||||
elif stage == 1:
|
||||
if word[i].isdigit():
|
||||
strMM += word[i]
|
||||
else:
|
||||
stage = 2
|
||||
i -= 1
|
||||
elif stage == 2:
|
||||
remainder = word[i:].replace(".", "")
|
||||
break
|
||||
if remainder == "":
|
||||
nextWord = wordNext.replace(".", "")
|
||||
if nextWord == "am" or nextWord == "pm":
|
||||
remainder = nextWord
|
||||
used += 1
|
||||
elif nextWord == "aften":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "om" and wordNextNext == "morgenen":
|
||||
remainder = "am"
|
||||
used += 2
|
||||
elif wordNext == "om" and wordNextNext == "eftermiddagen":
|
||||
remainder = "pm"
|
||||
used += 2
|
||||
elif wordNext == "om" and wordNextNext == "aftenen":
|
||||
remainder = "pm"
|
||||
used += 2
|
||||
elif wordNext == "morgen":
|
||||
remainder = "am"
|
||||
used += 1
|
||||
elif wordNext == "eftermiddag":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "aften":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "i" and wordNextNext == "morgen":
|
||||
remainder = "am"
|
||||
used = 2
|
||||
elif wordNext == "i" and wordNextNext == "eftermiddag":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "i" and wordNextNext == "aften":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "natten":
|
||||
if strHH > 4:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
used += 1
|
||||
else:
|
||||
if timeQualifier != "":
|
||||
if strHH <= 12 and \
|
||||
(timeQualifier == "aftenen" or
|
||||
timeQualifier == "eftermiddagen"):
|
||||
strHH += 12 # what happens when strHH is 24?
|
||||
else:
|
||||
# try to parse # s without colons
|
||||
# 5 hours, 10 minutes etc.
|
||||
length = len(word)
|
||||
strNum = ""
|
||||
remainder = ""
|
||||
for i in range(length):
|
||||
if word[i].isdigit():
|
||||
strNum += word[i]
|
||||
else:
|
||||
remainder += word[i]
|
||||
|
||||
if remainder == "":
|
||||
remainder = wordNext.replace(".", "").lstrip().rstrip()
|
||||
|
||||
if (
|
||||
remainder == "pm" or
|
||||
wordNext == "pm" or
|
||||
remainder == "p.m." or
|
||||
wordNext == "p.m."):
|
||||
strHH = strNum
|
||||
remainder = "pm"
|
||||
used = 1
|
||||
elif (
|
||||
remainder == "am" or
|
||||
wordNext == "am" or
|
||||
remainder == "a.m." or
|
||||
wordNext == "a.m."):
|
||||
strHH = strNum
|
||||
remainder = "am"
|
||||
used = 1
|
||||
else:
|
||||
if wordNext == "time" and int(word) < 100:
|
||||
# "in 3 hours"
|
||||
hrOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif wordNext == "minut":
|
||||
# "in 10 minutes"
|
||||
minOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif wordNext == "sekund":
|
||||
# in 5 seconds
|
||||
secOffset = int(word)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
|
||||
elif wordNext == "time":
|
||||
strHH = word
|
||||
used += 1
|
||||
isTime = True
|
||||
if wordNextNext == timeQualifier:
|
||||
strMM = ""
|
||||
if wordNextNext[:11] == "eftermiddag":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNext == "om" and wordNextNextNext == \
|
||||
"eftermiddagen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNext[:5] == "aften":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNext == "om" and wordNextNextNext == \
|
||||
"aftenen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNext[:6] == "morgen":
|
||||
used += 1
|
||||
remainder = "am"
|
||||
elif wordNextNext == "om" and wordNextNextNext == \
|
||||
"morgenen":
|
||||
used += 2
|
||||
remainder = "am"
|
||||
elif wordNextNext == "natten":
|
||||
used += 1
|
||||
if 8 <= int(word) <= 12:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
|
||||
elif is_numeric(wordNextNext):
|
||||
strMM = wordNextNext
|
||||
used += 1
|
||||
if wordNextNextNext == timeQualifier:
|
||||
if wordNextNextNext[:11] == "eftermiddag":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext == "om" and \
|
||||
wordNextNextNextNext == \
|
||||
"eftermiddagen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext[:6] == "natten":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext == "am" and \
|
||||
wordNextNextNextNext == "natten":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNextNextNext[:7] == "morgenen":
|
||||
used += 1
|
||||
remainder = "am"
|
||||
elif wordNextNextNext == "om" and \
|
||||
wordNextNextNextNext == "morgenen":
|
||||
used += 2
|
||||
remainder = "am"
|
||||
elif wordNextNextNext == "natten":
|
||||
used += 1
|
||||
if 8 <= int(word) <= 12:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
|
||||
elif wordNext == timeQualifier:
|
||||
strHH = word
|
||||
strMM = 00
|
||||
isTime = True
|
||||
if wordNext[:10] == "eftermidag":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNext == "om" and \
|
||||
wordNextNext == "eftermiddanen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNext[:7] == "aftenen":
|
||||
used += 1
|
||||
remainder = "pm"
|
||||
elif wordNext == "om" and wordNextNext == "aftenen":
|
||||
used += 2
|
||||
remainder = "pm"
|
||||
elif wordNext[:7] == "morgenen":
|
||||
used += 1
|
||||
remainder = "am"
|
||||
elif wordNext == "ao" and wordNextNext == "morgenen":
|
||||
used += 2
|
||||
remainder = "am"
|
||||
elif wordNext == "natten":
|
||||
used += 1
|
||||
if 8 <= int(word) <= 12:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
|
||||
# if timeQualifier != "":
|
||||
# military = True
|
||||
# else:
|
||||
# isTime = False
|
||||
|
||||
strHH = int(strHH) if strHH else 0
|
||||
strMM = int(strMM) if strMM else 0
|
||||
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
||||
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
|
||||
if strHH > 24 or strMM > 59:
|
||||
isTime = False
|
||||
used = 0
|
||||
if isTime:
|
||||
hrAbs = strHH * 1
|
||||
minAbs = strMM * 1
|
||||
used += 1
|
||||
if used > 0:
|
||||
# removed parsed words from the sentence
|
||||
for i in range(used):
|
||||
words[idx + i] = ""
|
||||
|
||||
if wordPrev == "tidlig":
|
||||
hrOffset = -1
|
||||
words[idx - 1] = ""
|
||||
idx -= 1
|
||||
elif wordPrev == "sen":
|
||||
hrOffset = 1
|
||||
words[idx - 1] = ""
|
||||
idx -= 1
|
||||
if idx > 0 and wordPrev in markers:
|
||||
words[idx - 1] = ""
|
||||
if idx > 1 and wordPrevPrev in markers:
|
||||
words[idx - 2] = ""
|
||||
|
||||
idx += used - 1
|
||||
found = True
|
||||
|
||||
# check that we found a date
|
||||
if not date_found:
|
||||
return None
|
||||
|
||||
if dayOffset is False:
|
||||
dayOffset = 0
|
||||
|
||||
# perform date manipulation
|
||||
|
||||
extractedDate = dateNow
|
||||
extractedDate = extractedDate.replace(microsecond=0,
|
||||
second=0,
|
||||
minute=0,
|
||||
hour=0)
|
||||
if datestr != "":
|
||||
en_months = ['january', 'february', 'march', 'april', 'may', 'june',
|
||||
'july', 'august', 'september', 'october', 'november',
|
||||
'december']
|
||||
en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
|
||||
'aug',
|
||||
'sept', 'oct', 'nov', 'dec']
|
||||
for idx, en_month in enumerate(en_months):
|
||||
datestr = datestr.replace(months[idx], en_month)
|
||||
for idx, en_month in enumerate(en_monthsShort):
|
||||
datestr = datestr.replace(monthsShort[idx], en_month)
|
||||
|
||||
temp = datetime.strptime(datestr, "%B %d")
|
||||
if not hasYear:
|
||||
temp = temp.replace(year=extractedDate.year)
|
||||
if extractedDate < temp:
|
||||
extractedDate = extractedDate.replace(year=int(currentYear),
|
||||
month=int(
|
||||
temp.strftime(
|
||||
"%m")),
|
||||
day=int(temp.strftime(
|
||||
"%d")))
|
||||
else:
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(currentYear) + 1,
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
else:
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(temp.strftime("%Y")),
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
|
||||
if timeStr != "":
|
||||
temp = datetime(timeStr)
|
||||
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
|
||||
minute=temp.strftime("%M"),
|
||||
second=temp.strftime("%S"))
|
||||
|
||||
if yearOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(years=yearOffset)
|
||||
if monthOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(months=monthOffset)
|
||||
if dayOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(days=dayOffset)
|
||||
|
||||
if hrAbs is None and minAbs is None and default_time:
|
||||
hrAbs = default_time.hour
|
||||
minAbs = default_time.minute
|
||||
|
||||
if hrAbs != -1 and minAbs != -1:
|
||||
|
||||
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
|
||||
minutes=minAbs or 0)
|
||||
if (hrAbs or minAbs) and datestr == "":
|
||||
if not daySpecified and dateNow > extractedDate:
|
||||
extractedDate = extractedDate + relativedelta(days=1)
|
||||
if hrOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(hours=hrOffset)
|
||||
if minOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(minutes=minOffset)
|
||||
if secOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(seconds=secOffset)
|
||||
for idx, word in enumerate(words):
|
||||
if words[idx] == "og" and words[idx - 1] == "" \
|
||||
and words[idx + 1] == "":
|
||||
words[idx] = ""
|
||||
|
||||
resultStr = " ".join(words)
|
||||
resultStr = ' '.join(resultStr.split())
|
||||
|
||||
return [extractedDate, resultStr]
|
||||
|
||||
|
||||
def isFractional_da(input_str):
|
||||
"""
|
||||
This function takes the given text and checks if it is a fraction.
|
||||
|
||||
Args:
|
||||
input_str (str): the string to check if fractional
|
||||
Returns:
|
||||
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||
|
||||
"""
|
||||
if input_str.lower().startswith("halv"):
|
||||
return 0.5
|
||||
|
||||
if input_str.lower() == "trediedel":
|
||||
return 1.0 / 3
|
||||
elif input_str.endswith('del'):
|
||||
input_str = input_str[:len(input_str) - 3] # e.g. "fünftel"
|
||||
if input_str.lower() in da_numbers:
|
||||
return 1.0 / (da_numbers[input_str.lower()])
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def isOrdinal_da(input_str):
|
||||
"""
|
||||
This function takes the given text and checks if it is an ordinal number.
|
||||
|
||||
Args:
|
||||
input_str (str): the string to check if ordinal
|
||||
Returns:
|
||||
(bool) or (float): False if not an ordinal, otherwise the number
|
||||
corresponding to the ordinal
|
||||
|
||||
ordinals for 1, 3, 7 and 8 are irregular
|
||||
|
||||
only works for ordinals corresponding to the numbers in da_numbers
|
||||
|
||||
"""
|
||||
|
||||
lowerstr = input_str.lower()
|
||||
|
||||
if lowerstr.startswith("første"):
|
||||
return 1
|
||||
if lowerstr.startswith("anden"):
|
||||
return 2
|
||||
if lowerstr.startswith("tredie"):
|
||||
return 3
|
||||
if lowerstr.startswith("fjerde"):
|
||||
return 4
|
||||
if lowerstr.startswith("femte"):
|
||||
return 5
|
||||
if lowerstr.startswith("sjette"):
|
||||
return 6
|
||||
if lowerstr.startswith("elfte"):
|
||||
return 1
|
||||
if lowerstr.startswith("tolvfte"):
|
||||
return 12
|
||||
|
||||
if lowerstr[-3:] == "nde":
|
||||
# from 20 suffix is -ste*
|
||||
lowerstr = lowerstr[:-3]
|
||||
if lowerstr in da_numbers:
|
||||
return da_numbers[lowerstr]
|
||||
|
||||
if lowerstr[-4:] in ["ende"]:
|
||||
lowerstr = lowerstr[:-4]
|
||||
if lowerstr in da_numbers:
|
||||
return da_numbers[lowerstr]
|
||||
|
||||
if lowerstr[-2:] == "te": # below 20 suffix is -te*
|
||||
lowerstr = lowerstr[:-2]
|
||||
if lowerstr in da_numbers:
|
||||
return da_numbers[lowerstr]
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def normalize_da(text, remove_articles):
|
||||
""" German string normalization """
|
||||
|
||||
words = text.split() # this also removed extra spaces
|
||||
normalized = ""
|
||||
for word in words:
|
||||
if remove_articles and word in ["den", "det"]:
|
||||
continue
|
||||
|
||||
# Convert numbers into digits, e.g. "two" -> "2"
|
||||
|
||||
if word in da_numbers:
|
||||
word = str(da_numbers[word])
|
||||
|
||||
normalized += " " + word
|
||||
|
||||
return normalized[1:] # strip the initial space
|
||||
|
||||
|
||||
def extract_numbers_da(text, short_scale=True, ordinals=False):
|
||||
"""
|
||||
Takes in a string and extracts a list of numbers.
|
||||
|
||||
Args:
|
||||
text (str): the string to extract a number from
|
||||
short_scale (bool): Use "short scale" or "long scale" for large
|
||||
numbers -- over a million. The default is short scale, which
|
||||
is now common in most English speaking countries.
|
||||
See https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
|
||||
Returns:
|
||||
list: list of extracted numbers as floats
|
||||
"""
|
||||
return extract_numbers_generic(text, pronounce_number_da, extractnumber_da,
|
||||
short_scale=short_scale, ordinals=ordinals)
|
|
@ -22,6 +22,7 @@ from mycroft.util.lang.parse_pt import *
|
|||
from mycroft.util.lang.parse_es import *
|
||||
from mycroft.util.lang.parse_it import *
|
||||
from mycroft.util.lang.parse_sv import *
|
||||
|
||||
from mycroft.util.lang.parse_de import extractnumber_de
|
||||
from mycroft.util.lang.parse_de import extract_numbers_de
|
||||
from mycroft.util.lang.parse_de import extract_datetime_de
|
||||
|
@ -30,6 +31,10 @@ from mycroft.util.lang.parse_fr import extractnumber_fr
|
|||
from mycroft.util.lang.parse_fr import extract_numbers_fr
|
||||
from mycroft.util.lang.parse_fr import extract_datetime_fr
|
||||
from mycroft.util.lang.parse_fr import normalize_fr
|
||||
from mycroft.util.lang.parse_da import extractnumber_da
|
||||
from mycroft.util.lang.parse_da import extract_numbers_da
|
||||
from mycroft.util.lang.parse_da import extract_datetime_da
|
||||
from mycroft.util.lang.parse_da import normalize_da
|
||||
|
||||
from .log import LOG
|
||||
|
||||
|
@ -111,6 +116,8 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang="en-us"):
|
|||
return extract_numbers_fr(text, short_scale, ordinals)
|
||||
elif lang.startswith("it"):
|
||||
return extract_numbers_it(text, short_scale, ordinals)
|
||||
elif lang.startswith("da"):
|
||||
return extract_numbers_da(text, short_scale, ordinals)
|
||||
return []
|
||||
|
||||
|
||||
|
@ -146,9 +153,11 @@ def extract_number(text, short_scale=True, ordinals=False, lang="en-us"):
|
|||
return extractnumber_sv(text)
|
||||
elif lang_lower.startswith("de"):
|
||||
return extractnumber_de(text)
|
||||
elif lang_lower.startswith("da"):
|
||||
return extractnumber_da(text)
|
||||
# TODO: extractnumber_xx for other languages
|
||||
_log_unsupported_language(lang_lower,
|
||||
['en', 'es', 'pt', 'it', 'fr', 'sv', 'de'])
|
||||
['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da'])
|
||||
return text
|
||||
|
||||
|
||||
|
@ -259,9 +268,11 @@ def extract_datetime(text, anchorDate=None, lang="en-us", default_time=None):
|
|||
return extract_datetime_sv(text, anchorDate, default_time)
|
||||
elif lang_lower.startswith("de"):
|
||||
return extract_datetime_de(text, anchorDate, default_time)
|
||||
elif lang_lower.startswith("da"):
|
||||
return extract_datetime_da(text, anchorDate, default_time)
|
||||
# TODO: extract_datetime for other languages
|
||||
_log_unsupported_language(lang_lower,
|
||||
['en', 'es', 'pt', 'it', 'fr', 'sv', 'de'])
|
||||
['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da'])
|
||||
return text
|
||||
# ==============================================================
|
||||
|
||||
|
@ -295,9 +306,11 @@ def normalize(text, lang="en-us", remove_articles=True):
|
|||
return normalize_sv(text, remove_articles)
|
||||
elif lang_lower.startswith("de"):
|
||||
return normalize_de(text, remove_articles)
|
||||
elif lang_lower.startswith("da"):
|
||||
return normalize_da(text, remove_articles)
|
||||
# TODO: Normalization for other languages
|
||||
_log_unsupported_language(lang_lower,
|
||||
['en', 'es', 'pt', 'it', 'fr', 'sv', 'de'])
|
||||
['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da'])
|
||||
return text
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,354 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2017 Mycroft AI Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import unittest
|
||||
import datetime
|
||||
|
||||
from mycroft.util.format import nice_number
|
||||
from mycroft.util.format import nice_time
|
||||
from mycroft.util.format import pronounce_number
|
||||
# from mycroft.util.lang.format_da import nice_response_da
|
||||
from mycroft.util.lang.format_da import pronounce_ordinal_da
|
||||
|
||||
# fractions are not capitalized for now
|
||||
NUMBERS_FIXTURE_da = {
|
||||
1.435634: '1,436',
|
||||
2: '2',
|
||||
5.0: '5',
|
||||
1234567890: '1234567890',
|
||||
12345.67890: '12345,679',
|
||||
0.027: '0,027',
|
||||
0.5: '1 halv',
|
||||
1.333: '1 og 1 trediedel',
|
||||
2.666: '2 og 2 trediedele',
|
||||
0.25: '1 fjerdedel',
|
||||
1.25: '1 og 1 fjerdedel',
|
||||
0.75: '3 fjerdedele',
|
||||
1.75: '1 og 3 fjerdedele',
|
||||
3.4: '3 og 2 femtedele',
|
||||
16.8333: '16 og 5 sjettedele',
|
||||
12.5714: '12 og 4 syvendedele',
|
||||
9.625: '9 og 5 ottendedele',
|
||||
6.777: '6 og 7 niendedele',
|
||||
3.1: '3 og 1 tiendedel',
|
||||
2.272: '2 og 3 elftedele',
|
||||
5.583: '5 og 7 tolvtedele',
|
||||
8.384: '8 og 5 trettendedele',
|
||||
0.071: '1 fjortendedel',
|
||||
6.466: '6 og 7 femtendedele',
|
||||
8.312: '8 og 5 sejstendedele',
|
||||
2.176: '2 og 3 syttendedele',
|
||||
200.722: '200 og 13 attendedele',
|
||||
7.421: '7 og 8 nittendedele',
|
||||
0.05: '1 tyvendedel'
|
||||
}
|
||||
|
||||
|
||||
# class TestNiceResponse(unittest.TestCase):
|
||||
# def test_replace_ordinal(self):
|
||||
# self.assertEqual(nice_response_da("det er den 31. maj"),
|
||||
# "det er den enogtredifte maj")
|
||||
# self.assertEqual(nice_response_da("Det begynder den 31. maj"),
|
||||
# "Det begynder den enogtrefte maj")
|
||||
# self.assertEqual(nice_response_da("den 31. mai"),
|
||||
# "den enogtrefte maj")
|
||||
# self.assertEqual(nice_response_da("10 ^ 2"), "ti to")
|
||||
|
||||
|
||||
class TestNiceNumberFormat(unittest.TestCase):
|
||||
def test_convert_float_to_nice_number(self):
|
||||
for number, number_str in NUMBERS_FIXTURE_da.items():
|
||||
self.assertEqual(nice_number(number, lang="da-dk"), number_str,
|
||||
'should format {} as {} and not {}'.format(
|
||||
number, number_str,
|
||||
nice_number(number, lang="da-dk")))
|
||||
|
||||
def test_specify_danominator(self):
|
||||
self.assertEqual(nice_number(5.5, lang="da-dk",
|
||||
denominators=[1, 2, 3]), '5 og 1 halv',
|
||||
'should format 5.5 as 5 und ein halb not {}'.format(
|
||||
nice_number(5.5, denominators=[1, 2, 3])))
|
||||
self.assertEqual(nice_number(2.333, lang="da-dk", denominators=[1, 2]),
|
||||
'2,333',
|
||||
'should format 2,333 as 2,333 not {}'.format(
|
||||
nice_number(2.333, lang="da-dk",
|
||||
denominators=[1, 2])))
|
||||
|
||||
def test_no_speech(self):
|
||||
self.assertEqual(nice_number(6.777, speech=False),
|
||||
'6 7/9',
|
||||
'should format 6.777 as 6 7/9 not {}'.format(
|
||||
nice_number(6.777, lang="da-dk", speech=False)))
|
||||
self.assertEqual(nice_number(6.0, speech=False),
|
||||
'6',
|
||||
'should format 6.0 as 6 not {}'.format(
|
||||
nice_number(6.0, lang="da-dk", speech=False)))
|
||||
|
||||
|
||||
class TestPronounceOrdinal(unittest.TestCase):
|
||||
def test_convert_int_da(self):
|
||||
self.assertEqual(pronounce_ordinal_da(0),
|
||||
"nulte")
|
||||
self.assertEqual(pronounce_ordinal_da(1),
|
||||
"første")
|
||||
self.assertEqual(pronounce_ordinal_da(3),
|
||||
"tredie")
|
||||
self.assertEqual(pronounce_ordinal_da(5),
|
||||
"femte")
|
||||
self.assertEqual(pronounce_ordinal_da(21),
|
||||
"enogtyvende")
|
||||
self.assertEqual(pronounce_ordinal_da(2000),
|
||||
"totusindende")
|
||||
self.assertEqual(pronounce_ordinal_da(1000),
|
||||
"ettusindende")
|
||||
# self.assertEqual(pronounce_ordinal_da(123456),
|
||||
# "ethundredetreogtyvetusindefirehundredeseksog\
|
||||
# halvtresende")
|
||||
|
||||
|
||||
class TestPronounceNumber(unittest.TestCase):
|
||||
def test_convert_int_da(self):
|
||||
# self.assertEqual(pronounce_number(123456789123456789, lang="da-dk"),
|
||||
# "ethundredetreogtyvebilliarder"
|
||||
# "firehundredeseksoghalvtresbillioner"
|
||||
# "syvhundredeogfirsmiliarder"
|
||||
# "ethundredetreogtyvemillioner"
|
||||
# "firehundredeseksoghalvtrestusindesyvhundredeniog \
|
||||
# firs")
|
||||
self.assertEqual(pronounce_number(1, lang="da-dk"), "en")
|
||||
self.assertEqual(pronounce_number(10, lang="da-dk"), "ti")
|
||||
self.assertEqual(pronounce_number(15, lang="da-dk"), "femten")
|
||||
self.assertEqual(pronounce_number(20, lang="da-dk"), "tyve")
|
||||
self.assertEqual(pronounce_number(27, lang="da-dk"), "syvogtyve")
|
||||
self.assertEqual(pronounce_number(30, lang="da-dk"), "tredive")
|
||||
self.assertEqual(pronounce_number(33, lang="da-dk"), "treogtredive")
|
||||
self.assertEqual(pronounce_number(71, lang="da-dk"), "enoghalvfjers")
|
||||
self.assertEqual(pronounce_number(80, lang="da-dk"), "firs")
|
||||
self.assertEqual(pronounce_number(74, lang="da-dk"), "fireoghalvfjers")
|
||||
self.assertEqual(pronounce_number(79, lang="da-dk"), "nioghalvfjers")
|
||||
self.assertEqual(pronounce_number(91, lang="da-dk"), "enoghalvfems")
|
||||
self.assertEqual(pronounce_number(97, lang="da-dk"), "syvoghalvfems")
|
||||
self.assertEqual(pronounce_number(300, lang="da-dk"), "trehundrede")
|
||||
|
||||
def test_convert_negative_int_da(self):
|
||||
self.assertEqual(pronounce_number(-1, lang="da-dk"),
|
||||
"minus en")
|
||||
self.assertEqual(pronounce_number(-10, lang="da-dk"),
|
||||
"minus ti")
|
||||
self.assertEqual(pronounce_number(-15, lang="da-dk"),
|
||||
"minus femten")
|
||||
self.assertEqual(pronounce_number(-20, lang="da-dk"),
|
||||
"minus tyve")
|
||||
self.assertEqual(pronounce_number(-27, lang="da-dk"),
|
||||
"minus syvogtyve")
|
||||
self.assertEqual(pronounce_number(-30, lang="da-dk"),
|
||||
"minus tredive")
|
||||
self.assertEqual(pronounce_number(-33, lang="da-dk"),
|
||||
"minus treogtredive")
|
||||
|
||||
def test_convert_dacimals_da(self):
|
||||
self.assertEqual(pronounce_number(1.234, lang="da-dk"),
|
||||
"en komma to tre")
|
||||
self.assertEqual(pronounce_number(21.234, lang="da-dk"),
|
||||
"enogtyve komma to tre")
|
||||
self.assertEqual(pronounce_number(21.234, lang="da-dk", places=1),
|
||||
"enogtyve komma to")
|
||||
self.assertEqual(pronounce_number(21.234, lang="da-dk", places=0),
|
||||
"enogtyve")
|
||||
self.assertEqual(pronounce_number(21.234, lang="da-dk", places=3),
|
||||
"enogtyve komma to tre fire")
|
||||
self.assertEqual(pronounce_number(21.234, lang="da-dk", places=4),
|
||||
"enogtyve komma to tre fire nul")
|
||||
self.assertEqual(pronounce_number(21.234, lang="da-dk", places=5),
|
||||
"enogtyve komma to tre fire nul nul")
|
||||
self.assertEqual(pronounce_number(-1.234, lang="da-dk"),
|
||||
"minus en komma to tre")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="da-dk"),
|
||||
"minus enogtyve komma to tre")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="da-dk", places=1),
|
||||
"minus enogtyve komma to")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="da-dk", places=0),
|
||||
"minus enogtyve")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="da-dk", places=3),
|
||||
"minus enogtyve komma to tre fire")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="da-dk", places=4),
|
||||
"minus enogtyve komma to tre fire nul")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="da-dk", places=5),
|
||||
"minus enogtyve komma to tre fire nul nul")
|
||||
|
||||
|
||||
# def nice_time(dt, lang="da-dk", speech=True, use_24hour=False,
|
||||
# use_ampm=False):
|
||||
class TestNiceDateFormat_da(unittest.TestCase):
|
||||
def test_convert_times_da(self):
|
||||
dt = datetime.datetime(2017, 1, 31, 13, 22, 3)
|
||||
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"),
|
||||
"et toogtyve")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_ampm=True),
|
||||
"et toogtyve om eftermiddagen")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False),
|
||||
"01:22")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_ampm=True),
|
||||
"01:22 PM")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk",
|
||||
speech=False, use_24hour=True),
|
||||
"13:22")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"13:22")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"tretten toogtyve")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"tretten toogtyve")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 13, 0, 3)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"), "et")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_ampm=True),
|
||||
"et om eftermiddagen")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False),
|
||||
"01:00")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_ampm=True),
|
||||
"01:00 PM")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True),
|
||||
"13:00")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"13:00")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"tretten")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"tretten")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 13, 2, 3)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"), "et nul to")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_ampm=True),
|
||||
"et nul to om eftermiddagen")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False),
|
||||
"01:02")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_ampm=True),
|
||||
"01:02 PM")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True),
|
||||
"13:02")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"13:02")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"tretten nul to")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"tretten nul to")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 0, 2, 3)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"), "tolv nul to")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_ampm=True),
|
||||
"tolv nul to om natten")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False),
|
||||
"12:02")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_ampm=True),
|
||||
"12:02 AM")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True),
|
||||
"00:02")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"00:02")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"nul nul to")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"nul nul to")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 12, 15, 9)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"), "tolv femten")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_ampm=True),
|
||||
"tolv femten om eftermiddagen")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False),
|
||||
"12:15")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_ampm=True),
|
||||
"12:15 PM")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True),
|
||||
"12:15")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"12:15")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"tolv femten")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"tolv femten")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 19, 40, 49)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"), "syv fyrre")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_ampm=True),
|
||||
"syv fyrre om aftenen")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False),
|
||||
"07:40")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_ampm=True),
|
||||
"07:40 PM")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True),
|
||||
"19:40")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"19:40")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"nitten fyrre")
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"nitten fyrre")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 1, 15, 00)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_24hour=True),
|
||||
"et femten")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 1, 35, 00)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"),
|
||||
"et femogtredive")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 1, 45, 00)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"), "et femogfyrre")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 4, 50, 00)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"), "fire halvtres")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 5, 55, 00)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk"), "fem femoghalvtres")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31, 5, 30, 00)
|
||||
self.assertEqual(nice_time(dt, lang="da-dk", use_ampm=True),
|
||||
"fem tredive om morgenen")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,189 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2017 Mycroft AI Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import unittest
|
||||
from datetime import datetime, time
|
||||
|
||||
from mycroft.util.parse import extract_datetime
|
||||
from mycroft.util.parse import extract_number
|
||||
from mycroft.util.parse import normalize
|
||||
|
||||
|
||||
class TestNormalize(unittest.TestCase):
|
||||
def test_articles(self):
|
||||
self.assertEqual(
|
||||
normalize("dette er en test", lang="da-dk", remove_articles=True),
|
||||
"dette er 1 test")
|
||||
self.assertEqual(
|
||||
normalize("og endnu en test", lang="da-dk", remove_articles=True),
|
||||
"og endnu 1 test")
|
||||
self.assertEqual(normalize("dette er en extra-test",
|
||||
lang="da-dk", remove_articles=False),
|
||||
"dette er 1 extra-test")
|
||||
|
||||
def test_extract_number(self):
|
||||
self.assertEqual(extract_number("dette er den første test",
|
||||
lang="da-dk"), 1)
|
||||
# self.assertEqual(extract_number("dette er den 1. test",
|
||||
# lang="da-dk"),
|
||||
# 1)
|
||||
self.assertEqual(extract_number("dette er den anden test",
|
||||
lang="da-dk"), 2)
|
||||
# self.assertEqual(extract_number("dette er den 2. test",
|
||||
# lang="da-dk"),
|
||||
# 2)
|
||||
self.assertEqual(
|
||||
extract_number("dette er den tredie test", lang="da-dk"), 3)
|
||||
self.assertEqual(
|
||||
extract_number("dette er test nummer fire", lang="da-dk"), 4)
|
||||
self.assertEqual(
|
||||
extract_number("en trediedel af en kop", lang="da-dk"), 1.0 / 3.0)
|
||||
self.assertEqual(extract_number("tre kopper", lang="da-dk"), 3)
|
||||
self.assertEqual(extract_number("1/3 kop", lang="da-dk"),
|
||||
1.0 / 3.0)
|
||||
# self.assertEqual(extract_number("en fjerdelel kop", lang="da-dk"),
|
||||
# 0.25)
|
||||
# self.assertEqual(extract_number("1/4 kop", lang="da-dk"), 0.25)
|
||||
# self.assertEqual(extract_number("kvart kop", lang="da-dk"), 0.25)
|
||||
# self.assertEqual(extract_number("2/3 kop", lang="da-dk"), 2.0 / 3.0)
|
||||
# self.assertEqual(extract_number("3/4 kop", lang="da-dk"), 3.0 / 4.0)
|
||||
# self.assertEqual(extract_number("1 og 3/4 kop", lang="da-dk"), 1.75)
|
||||
# self.assertEqual(extract_number("1 og en halv kop", lang="da-dk"),
|
||||
# 1.5)
|
||||
# self.assertEqual(
|
||||
# extract_number("en og en halv kop", lang="da-dk"), 1.5)
|
||||
# self.assertEqual(extract_number("tre fjerdele kop", lang="da-dk"),
|
||||
# 3.0 / 4.0)
|
||||
# self.assertEqual(extract_number("tre fjerdedel kop", lang="da-dk"),
|
||||
# 3.0 / 4.0)
|
||||
|
||||
def test_extractdatetime_de(self):
|
||||
def extractWithFormat(text):
|
||||
date = datetime(2017, 6, 27, 0, 0)
|
||||
[extractedDate, leftover] = extract_datetime(text, date,
|
||||
lang="da-dk", )
|
||||
extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S")
|
||||
return [extractedDate, leftover]
|
||||
|
||||
def testExtract(text, expected_date, expected_leftover):
|
||||
res = extractWithFormat(text)
|
||||
self.assertEqual(res[0], expected_date)
|
||||
self.assertEqual(res[1], expected_leftover)
|
||||
|
||||
testExtract("sæt frisøraftale på fredag",
|
||||
"2017-06-30 00:00:00", "sæt frisøraftale")
|
||||
testExtract("hvordan er vejret i overmorgen?",
|
||||
"2017-06-29 00:00:00", "hvordan er vejret")
|
||||
testExtract("mind mig om det 10:45 i aften",
|
||||
"2017-06-27 22:45:00", "mind mig")
|
||||
testExtract("hvordan er vejret fredag om morgenen",
|
||||
"2017-06-30 08:00:00", "hvordan er vejret")
|
||||
# testExtract("hvordan er vejret i morgen",
|
||||
# "2017-06-28 00:00:00", "hvordan er vejret")
|
||||
testExtract(
|
||||
"påmind mig at ringe min mor om 8 uger og 2 dage",
|
||||
"2017-08-24 00:00:00", "påmind mig at ringe min mor")
|
||||
testExtract("afspil rick astley musik 2 dage fra fredag",
|
||||
"2017-07-02 00:00:00", "afspil rick astley musik")
|
||||
testExtract("start inversionen 3:45 pm på torsdag",
|
||||
"2017-06-29 15:45:00", "start inversionen")
|
||||
testExtract("på mandag bestil kager fra bageren",
|
||||
"2017-07-03 00:00:00", "bestil kager fra bageren")
|
||||
testExtract("spil happy birthday musik om 5 år fra nu",
|
||||
"2022-06-27 00:00:00", "spil happy birthday musik")
|
||||
testExtract("skype mor klokken 12:45 pm næste torsdag",
|
||||
"2017-07-06 12:45:00", "skype mor")
|
||||
testExtract("hvordan er vejret på næste torsdag",
|
||||
"2017-07-06 00:00:00", "hvordan er vejret")
|
||||
testExtract("hvordan er vejret næste fredag morgen",
|
||||
"2017-07-07 08:00:00", "hvordan er vejret")
|
||||
testExtract("hvordan er vejret næste fredag aften",
|
||||
"2017-07-07 19:00:00", "hvordan er vejret")
|
||||
testExtract("hvordan er vejret næste fredag eftermiddag",
|
||||
"2017-07-07 15:00:00", "hvordan er vejret")
|
||||
testExtract("påmind mig at ringe min mor den tredie august",
|
||||
"2017-08-03 00:00:00", "påmind mig at ringe min mor")
|
||||
testExtract("køb fyrværkeri den enogtyvende juli",
|
||||
"2017-07-21 00:00:00", "køb fyrværkeri")
|
||||
testExtract("hvordan er vejret 2 uger fra næste fredag",
|
||||
"2017-07-21 00:00:00", "hvordan er vejret")
|
||||
testExtract("hvordan er vejret på onsdag klokken 07:00",
|
||||
"2017-06-28 07:00:00", "hvordan er vejret")
|
||||
testExtract("hvordan er vejret på onsdag klokken 7",
|
||||
"2017-06-28 07:00:00", "hvordan er vejret")
|
||||
testExtract("marker en termin klokken 12:45 på næste torsdag",
|
||||
"2017-07-06 12:45:00", "marker en termin")
|
||||
testExtract("hvordan er vejret på torsdag",
|
||||
"2017-06-29 00:00:00", "hvordan er vejret")
|
||||
testExtract("forbered et besøg på 2 uger og 6 dage fra på lørdag",
|
||||
"2017-07-21 00:00:00", "forbered et besøg")
|
||||
testExtract("begynd invasionen klokken 03:45 på torsdag",
|
||||
"2017-06-29 03:45:00", "begynd invasionen")
|
||||
testExtract("begynd invasionen klokken 3 om natten på torsdag",
|
||||
"2017-06-29 03:00:00", "begynd invasionen")
|
||||
testExtract("begynd invasionen klokken 8 am på torsdag",
|
||||
"2017-06-29 08:00:00", "begynd invasionen")
|
||||
testExtract("start festen klokken 8 om aftenen på torsdag",
|
||||
"2017-06-29 20:00:00", "start festen")
|
||||
testExtract("start invasionen klokken 8 om aftenen på torsdag",
|
||||
"2017-06-29 20:00:00", "start invasionen")
|
||||
testExtract("start invasionen på torsdag ved middag",
|
||||
"2017-06-29 12:00:00", "start invasionen")
|
||||
# testExtract("start invasionen på torsdag om eftermiddagen",
|
||||
# "2017-06-29 00:00:00", "start invasionen")
|
||||
testExtract("start invasionen på torsdag klokken 5",
|
||||
"2017-06-29 05:00:00", "start invasionen")
|
||||
testExtract("husk at vågne op om 4 år",
|
||||
"2021-06-27 00:00:00", "husk at vågne op")
|
||||
testExtract("husk at vågne op om 4 år og 4 dage",
|
||||
"2021-07-01 00:00:00", "husk at vågne op")
|
||||
# testExtract("hvordan er vejret om 3 dage fra i morgen",
|
||||
# "2017-07-01 00:00:00", "hvordan er vejret")
|
||||
# testExtract("tredie december",
|
||||
# "2017-12-03 00:00:00", "")
|
||||
# testExtract("lad os mødes klokken 8:00 om aftenen",
|
||||
# "2017-06-27 20:00:00", "lad os mødes")
|
||||
|
||||
def test_extractdatetime_default_da(self):
|
||||
default = time(9, 0, 0)
|
||||
anchor = datetime(2017, 6, 27, 0, 0)
|
||||
res = extract_datetime("lad os mødes på fredag klokken 9 om morgenen",
|
||||
anchor, lang='da-dk', default_time=default)
|
||||
self.assertEqual(default, res[0].time())
|
||||
|
||||
def test_spaces(self):
|
||||
self.assertEqual(normalize(" dette er en test", lang="da-dk"),
|
||||
"dette er 1 test")
|
||||
self.assertEqual(normalize(" dette er en test ",
|
||||
lang="da-dk"), "dette er 1 test")
|
||||
|
||||
def test_numbers(self):
|
||||
self.assertEqual(
|
||||
normalize("dette er en to tre test", lang="da-dk"),
|
||||
"dette er 1 2 3 test")
|
||||
self.assertEqual(
|
||||
normalize("dette er fire fem seks test", lang="da-dk"),
|
||||
"dette er 4 5 6 test")
|
||||
self.assertEqual(
|
||||
normalize("dette er syv otte ni test", lang="da-dk"),
|
||||
"dette er 7 8 9 test")
|
||||
self.assertEqual(
|
||||
normalize("dette er ti elve tolv test", lang="da-dk"),
|
||||
"dette er 10 11 12 test")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in New Issue