Issue-1375 - Fix and complete french translation
parent
acbebac87f
commit
3a11f39d7d
|
@ -0,0 +1,3 @@
|
|||
annule tout
|
||||
oublie ça
|
||||
laisse tomber
|
|
@ -1,2 +1,2 @@
|
|||
Recherche de mise à jours
|
||||
Un instant, le temps que je me mette à jour
|
||||
Recherche de mises à jour
|
||||
Un instant, je me mets à jour
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
Je suis réveillé
|
||||
Je suis maintenant réveillé
|
|
@ -1,4 +1,5 @@
|
|||
Désolé, je n'ai pas compris
|
||||
Je crains ne pas avoir compris
|
||||
J'ai bien peur de ne pas avoir compris
|
||||
Peux-tu répéter ?
|
||||
Peux-tu répéter s'il te plait ?
|
||||
Peux-tu répéter, s'il te plaît ?
|
||||
Aurais-tu l'obligeance de répéter ?
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Les données d'interaction ne seront plus envoyées à Mycroft A.I.
|
|
@ -0,0 +1 @@
|
|||
Dorénavant, je vais envoyer les données d'interaction à Mycroft A.I. pour être plus intelligent. Pour l'instant, cela inclut les enregistrements des activations par mot d'éveil.
|
|
@ -0,0 +1 @@
|
|||
REDÉMARRAGE...
|
|
@ -0,0 +1 @@
|
|||
< < < SYNC < < <
|
|
@ -0,0 +1 @@
|
|||
< < < MISE À JOUR < < <
|
|
@ -0,0 +1 @@
|
|||
Bonjour, je suis Mycroft, ton nouvel assistant. Pour t'assister, j'ai besoin d'être connecté à Internet. Tu peux me connecter avec un câble réseau, ou bien utiliser le wi-fi. Suis ces instructions pour configurer le wi-fi :
|
|
@ -1,4 +1,4 @@
|
|||
Il semblerait que je ne sois pas connecté à internet
|
||||
Je ne pense pas être connecté à internet
|
||||
Je n'arrive pas à accéder à internet
|
||||
Je ne peux pas accèder à internet
|
||||
On dirait que je ne suis pas connecté à Internet
|
||||
Je ne pense pas être connecté à Internet
|
||||
Je n'arrive pas à accéder à Internet
|
||||
Je ne peux pas accéder à Internet
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
mycroft: maycroft
|
|
@ -1 +1 @@
|
|||
J'ai été remis à zéro (valeurs d'usine)
|
||||
J'ai été réinitialisé.
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
Je suis actuellement à jour
|
||||
Compétences à jour. Je suis prêt à t'aider
|
||||
Je suis à jour, maintenant.
|
||||
Compétences mises à jour. Je suis prêt à t'aider.
|
||||
|
|
|
@ -1,2 +1 @@
|
|||
Désolé, je n'ai pas pu installer les compétences par défaut
|
||||
Une erreur s'est produite lors de l'installation des compétences par défaut
|
||||
Une erreur est survenue lors de la mise à jour des compétences.
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Je dois redémarrer après avoir synchronisé mon horloge avec internet, à tout de suite.
|
|
@ -16,6 +16,7 @@
|
|||
from mycroft.util.lang.format_en import *
|
||||
from mycroft.util.lang.format_pt import *
|
||||
from mycroft.util.lang.format_it import *
|
||||
from mycroft.util.lang.format_fr import *
|
||||
|
||||
|
||||
def nice_number(number, lang="en-us", speech=True, denominators=None):
|
||||
|
@ -52,6 +53,8 @@ def nice_number(number, lang="en-us", speech=True, denominators=None):
|
|||
return nice_number_pt(result)
|
||||
elif lang_lower.startswith("it"):
|
||||
return nice_number_it(result)
|
||||
elif lang_lower.startswith("fr"):
|
||||
return nice_number_fr(result)
|
||||
|
||||
# Default to the raw number for unsupported languages,
|
||||
# hopefully the STT engine will pronounce understandably.
|
||||
|
@ -80,6 +83,8 @@ def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
|
|||
return nice_time_en(dt, speech, use_24hour, use_ampm)
|
||||
elif lang_lower.startswith("it"):
|
||||
return nice_time_it(dt, speech, use_24hour, use_ampm)
|
||||
elif lang_lower.startswith("fr"):
|
||||
return nice_time_fr(dt, speech, use_24hour, use_ampm)
|
||||
|
||||
# TODO: Other languages
|
||||
return str(dt)
|
||||
|
@ -101,6 +106,8 @@ def pronounce_number(number, lang="en-us", places=2):
|
|||
return pronounce_number_en(number, places=places)
|
||||
elif lang_lower.startswith("it"):
|
||||
return pronounce_number_it(number, places=places)
|
||||
elif lang_lower.startswith("fr"):
|
||||
return pronounce_number_fr(number, places=places)
|
||||
|
||||
# Default to just returning the numeric value
|
||||
return str(number)
|
||||
|
|
|
@ -0,0 +1,278 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2017 Mycroft AI Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
""" Format functions for french (fr)
|
||||
|
||||
Todo:
|
||||
* nice_number should leave number formatting to nice_number_fr
|
||||
"""
|
||||
|
||||
NUM_STRING_FR = {
|
||||
0: 'zéro',
|
||||
1: 'un',
|
||||
2: 'deux',
|
||||
3: 'trois',
|
||||
4: 'quatre',
|
||||
5: 'cinq',
|
||||
6: 'six',
|
||||
7: 'sept',
|
||||
8: 'huit',
|
||||
9: 'neuf',
|
||||
10: 'dix',
|
||||
11: 'onze',
|
||||
12: 'douze',
|
||||
13: 'treize',
|
||||
14: 'quatorze',
|
||||
15: 'quinze',
|
||||
16: 'seize',
|
||||
20: 'vingt',
|
||||
30: 'trente',
|
||||
40: 'quarante',
|
||||
50: 'cinquante',
|
||||
60: 'soixante',
|
||||
70: 'soixante-dix',
|
||||
80: 'quatre-vingt',
|
||||
90: 'quatre-vingt-dix'
|
||||
}
|
||||
|
||||
FRACTION_STRING_FR = {
|
||||
2: 'demi',
|
||||
3: 'tiers',
|
||||
4: 'quart',
|
||||
5: 'cinquième',
|
||||
6: 'sixième',
|
||||
7: 'septième',
|
||||
8: 'huitième',
|
||||
9: 'neuvième',
|
||||
10: 'dixième',
|
||||
11: 'onzième',
|
||||
12: 'douzième',
|
||||
13: 'treizième',
|
||||
14: 'quatorzième',
|
||||
15: 'quinzième',
|
||||
16: 'seizième',
|
||||
17: 'dix-septième',
|
||||
18: 'dix-huitième',
|
||||
19: 'dix-neuvième',
|
||||
20: 'vingtième'
|
||||
}
|
||||
|
||||
|
||||
def nice_number_fr(result):
|
||||
"""
|
||||
Helper for nice_number
|
||||
|
||||
Convert (1 1/3) to spoken value like "1 et 1 tiers"
|
||||
|
||||
Args:
|
||||
mixed (int,int,int): the mixed number; whole, numerator, denominator
|
||||
Return:
|
||||
(str): spoken version of the number
|
||||
"""
|
||||
whole, num, den = result
|
||||
if num == 0:
|
||||
# if the number is an integer, nothing to do
|
||||
return str(whole)
|
||||
den_str = FRACTION_STRING_FR[den]
|
||||
# if it is not an integer
|
||||
if whole == 0:
|
||||
# if there is no whole number
|
||||
if num == 1:
|
||||
# if numerator is 1, return "un demi", for example
|
||||
return_string = 'un {}'.format(den_str)
|
||||
else:
|
||||
# else return "quatre tiers", for example
|
||||
return_string = '{} {}'.format(num, den_str)
|
||||
elif num == 1:
|
||||
# if there is a whole number and numerator is 1
|
||||
if den == 2:
|
||||
# if denominator is 2, return "1 et demi", for example
|
||||
return_string = '{} et {}'.format(whole, den_str)
|
||||
else:
|
||||
# else return "1 et 1 tiers", for example
|
||||
return_string = '{} et 1 {}'.format(whole, den_str)
|
||||
else:
|
||||
# else return "2 et 3 quart", for example
|
||||
return_string = '{} et {} {}'.format(whole, num, den_str)
|
||||
if num > 1 and den != 3:
|
||||
# if the numerator is greater than 1 and the denominator
|
||||
# is not 3 ("tiers"), add an s for plural
|
||||
return_string += 's'
|
||||
|
||||
return return_string
|
||||
|
||||
|
||||
def pronounce_number_fr(num, places=2):
|
||||
"""
|
||||
Convert a number to it's spoken equivalent
|
||||
|
||||
For example, '5.2' would return 'cinq virgule deux'
|
||||
|
||||
Args:
|
||||
num(float or int): the number to pronounce (under 100)
|
||||
places(int): maximum decimal places to speak
|
||||
Returns:
|
||||
(str): The pronounced number
|
||||
"""
|
||||
if abs(num) >= 100:
|
||||
# TODO: Support for numbers over 100
|
||||
return str(num)
|
||||
|
||||
result = ""
|
||||
if num < 0:
|
||||
result = "moins "
|
||||
num = abs(num)
|
||||
|
||||
if num > 16:
|
||||
tens = int(num-int(num) % 10)
|
||||
ones = int(num-tens)
|
||||
if ones != 0:
|
||||
if tens > 10 and tens <= 60 and int(num-tens) == 1:
|
||||
result += NUM_STRING_FR[tens] + "-et-" + NUM_STRING_FR[ones]
|
||||
elif num == 71:
|
||||
result += "soixante-et-onze"
|
||||
elif tens == 70:
|
||||
result += NUM_STRING_FR[60] + "-"
|
||||
if ones < 7:
|
||||
result += NUM_STRING_FR[10 + ones]
|
||||
else:
|
||||
result += NUM_STRING_FR[10] + "-" + NUM_STRING_FR[ones]
|
||||
elif tens == 90:
|
||||
result += NUM_STRING_FR[80] + "-"
|
||||
if ones < 7:
|
||||
result += NUM_STRING_FR[10 + ones]
|
||||
else:
|
||||
result += NUM_STRING_FR[10] + "-" + NUM_STRING_FR[ones]
|
||||
else:
|
||||
result += NUM_STRING_FR[tens] + "-" + NUM_STRING_FR[ones]
|
||||
else:
|
||||
if num == 80:
|
||||
result += "quatre-vingts"
|
||||
else:
|
||||
result += NUM_STRING_FR[tens]
|
||||
else:
|
||||
result += NUM_STRING_FR[int(num)]
|
||||
|
||||
# Deal with decimal part
|
||||
if not num == int(num) and places > 0:
|
||||
result += " virgule"
|
||||
place = 10
|
||||
while int(num*place) % 10 > 0 and places > 0:
|
||||
result += " " + NUM_STRING_FR[int(num*place) % 10]
|
||||
place *= 10
|
||||
places -= 1
|
||||
return result
|
||||
|
||||
|
||||
def nice_time_fr(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||
"""
|
||||
Format a time to a comfortable human format
|
||||
|
||||
For example, generate 'cinq heures trente' for speech or '5:30' for
|
||||
text display.
|
||||
|
||||
Args:
|
||||
dt (datetime): date to format (assumes already in local timezone)
|
||||
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||
use_ampm (bool): include the am/pm for 12-hour format
|
||||
Returns:
|
||||
(str): The formatted time string
|
||||
"""
|
||||
if use_24hour:
|
||||
# e.g. "03:01" or "14:22"
|
||||
string = dt.strftime("%H:%M")
|
||||
else:
|
||||
if use_ampm:
|
||||
# e.g. "3:01 AM" or "2:22 PM"
|
||||
string = dt.strftime("%I:%M %p")
|
||||
else:
|
||||
# e.g. "3:01" or "2:22"
|
||||
string = dt.strftime("%I:%M")
|
||||
if string[0] == '0':
|
||||
string = string[1:] # strip leading zeros
|
||||
|
||||
if not speech:
|
||||
return string
|
||||
|
||||
# Generate a speakable version of the time
|
||||
speak = ""
|
||||
if use_24hour:
|
||||
|
||||
# "13 heures trente"
|
||||
if dt.hour == 0:
|
||||
speak += "minuit"
|
||||
elif dt.hour == 12:
|
||||
speak += "midi"
|
||||
elif dt.hour == 1:
|
||||
speak += "une heure"
|
||||
else:
|
||||
speak += pronounce_number_fr(dt.hour) + " heures"
|
||||
|
||||
if dt.minute != 0:
|
||||
speak += " " + pronounce_number_fr(dt.minute)
|
||||
|
||||
else:
|
||||
# Prepare for "trois heures moins le quart"
|
||||
if dt.minute == 35:
|
||||
minute = -25
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 40:
|
||||
minute = -20
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 45:
|
||||
minute = -15
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 50:
|
||||
minute = -10
|
||||
hour = dt.hour + 1
|
||||
elif dt.minute == 55:
|
||||
minute = -5
|
||||
hour = dt.hour + 1
|
||||
else:
|
||||
minute = dt.minute
|
||||
hour = dt.hour
|
||||
|
||||
if hour == 0:
|
||||
speak += "minuit"
|
||||
elif hour == 12:
|
||||
speak += "midi"
|
||||
elif hour == 1 or hour == 13:
|
||||
speak += "une heure"
|
||||
elif hour < 13:
|
||||
speak = pronounce_number_fr(hour) + " heures"
|
||||
else:
|
||||
speak = pronounce_number_fr(hour-12) + " heures"
|
||||
|
||||
if minute != 0:
|
||||
if minute == 15:
|
||||
speak += " et quart"
|
||||
elif minute == 30:
|
||||
speak += " et demi"
|
||||
elif minute == -15:
|
||||
speak += " moins le quart"
|
||||
else:
|
||||
speak += " " + pronounce_number_fr(minute)
|
||||
|
||||
if use_ampm:
|
||||
if hour > 17:
|
||||
speak += " du soir"
|
||||
elif hour > 12:
|
||||
speak += " de l'après-midi"
|
||||
elif hour > 0 and hour < 12:
|
||||
speak += " du matin"
|
||||
|
||||
return speak
|
|
@ -0,0 +1,950 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2017 Mycroft AI Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
""" Parse functions for french (fr)
|
||||
|
||||
Todo:
|
||||
* extractnumber_fr: ordinal numbers ("le cinquième")
|
||||
* extractnumber_fr: numbers greater than 999 999 ("cinq millions")
|
||||
* extract_datetime_fr: "quatrième lundi de janvier"
|
||||
* extract_datetime_fr: "5 heures moins le quart"
|
||||
* extract_datetime_fr: "troisième lundi de juillet"
|
||||
* get_gender_fr
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions
|
||||
|
||||
# Undefined articles ["un", "une"] cannot be supressed,
|
||||
# in French, "un cheval" means "a horse" or "one horse".
|
||||
articles_fr = ["le", "la", "du", "de", "les", "des"]
|
||||
|
||||
numbers_fr = {
|
||||
"zéro": 0,
|
||||
"un": 1,
|
||||
"une": 1,
|
||||
"premier": 1,
|
||||
"première": 1,
|
||||
"deux": 2,
|
||||
"second": 2,
|
||||
"trois": 3,
|
||||
"quatre": 4,
|
||||
"cinq": 5,
|
||||
"six": 6,
|
||||
"sept": 7,
|
||||
"huit": 8,
|
||||
"neuf": 9,
|
||||
"dix": 10,
|
||||
"onze": 11,
|
||||
"douze": 12,
|
||||
"treize": 13,
|
||||
"quatorze": 14,
|
||||
"quinze": 15,
|
||||
"seize": 16,
|
||||
"vingt": 20,
|
||||
"trente": 30,
|
||||
"quarante": 40,
|
||||
"cinquante": 50,
|
||||
"soixante": 60,
|
||||
"soixante-dix": 70,
|
||||
"septante": 70,
|
||||
"quatre-vingt": 80,
|
||||
"quatre-vingts": 80,
|
||||
"octante": 80,
|
||||
"huitante": 80,
|
||||
"quatre-vingt-dix": 90,
|
||||
"nonante": 90,
|
||||
"cent": 100,
|
||||
"cents": 100,
|
||||
"mille": 1000,
|
||||
"mil": 1000,
|
||||
"millier": 1000,
|
||||
"milliers": 1000,
|
||||
"million": 1000000,
|
||||
"millions": 1000000,
|
||||
"milliard": 1000000000,
|
||||
"milliards": 1000000000}
|
||||
|
||||
|
||||
def number_parse_fr(words, i):
|
||||
"""
|
||||
Takes in a list of words (strings without whitespace) and
|
||||
extracts a number that starts at the given index.
|
||||
Args:
|
||||
words (array): the list to extract a number from
|
||||
i (int): the index in words where to look for the number
|
||||
Returns:
|
||||
tuple with number, index of next word after the number.
|
||||
|
||||
Returns None if no number was found.
|
||||
"""
|
||||
def cte_fr(i, s):
|
||||
# Check if string s is equal to words[i].
|
||||
# If it is return tuple with s, index of next word.
|
||||
# If it is not return None.
|
||||
if i < len(words) and s == words[i]:
|
||||
return s, i + 1
|
||||
return None
|
||||
|
||||
def number_word_fr(i, min, max):
|
||||
# Check if words[i] is a number in numbers_fr between min and max.
|
||||
# If it is return tuple with number, index of next word.
|
||||
# If it is not return None.
|
||||
if i < len(words):
|
||||
val = numbers_fr.get(words[i])
|
||||
# Numbers [1-16,20,30,40,50,60,70,80,90,100,1000]
|
||||
if val is not None:
|
||||
if val >= min and val <= max:
|
||||
return val, i + 1
|
||||
else:
|
||||
return None
|
||||
# The number may be hyphenated (numbers [17-99])
|
||||
splitWord = words[i].split('-')
|
||||
if len(splitWord) > 1:
|
||||
val1 = numbers_fr.get(splitWord[0])
|
||||
if val1:
|
||||
i1 = 1
|
||||
val2 = 0
|
||||
|
||||
# For [81-99], e.g. "quatre-vingt-deux"
|
||||
if splitWord[0] == "quatre" and splitWord[1] == "vingt":
|
||||
val1 = 80
|
||||
i1 = 2
|
||||
|
||||
# For [21,31,41,51,61,71]
|
||||
if splitWord[i1] == "et" and len(splitWord) > i1:
|
||||
val2 = numbers_fr.get(splitWord[i1 + 1])
|
||||
# For [77-79],[97-99] e.g. "soixante-dix-sept"
|
||||
elif splitWord[i1] == "dix" and len(splitWord) > i1:
|
||||
val2 = 10 + numbers_fr.get(splitWord[i1 + 1])
|
||||
else:
|
||||
val2 = numbers_fr.get(splitWord[i1])
|
||||
|
||||
if val2:
|
||||
val = val1 + val2
|
||||
else:
|
||||
return None
|
||||
if val and val >= min and val <= max:
|
||||
return val, i + 1
|
||||
|
||||
return None
|
||||
|
||||
def number_1_99_fr(i):
|
||||
# Check if words[i] is a number between 1 and 99.
|
||||
# If it is return tuple with number, index of next word.
|
||||
# If it is not return None.
|
||||
|
||||
# Is it a number between 1 and 16?
|
||||
result1 = number_word_fr(i, 1, 16)
|
||||
if result1:
|
||||
return result1
|
||||
|
||||
# Is it a number between 10 and 99?
|
||||
result1 = number_word_fr(i, 10, 99)
|
||||
if result1:
|
||||
val1, i1 = result1
|
||||
result2 = cte_fr(i1, "et")
|
||||
# If the number is not hyphenated [21,31,41,51,61,71]
|
||||
if result2:
|
||||
i2 = result2[1]
|
||||
result3 = number_word_fr(i2, 1, 11)
|
||||
if result3:
|
||||
val3, i3 = result3
|
||||
return val1 + val3, i3
|
||||
return result1
|
||||
|
||||
# It is not a number
|
||||
return None
|
||||
|
||||
def number_1_999_fr(i):
|
||||
# Check if words[i] is a number between 1 and 999.
|
||||
# If it is return tuple with number, index of next word.
|
||||
# If it is not return None.
|
||||
|
||||
# Is it 100 ?
|
||||
result1 = number_word_fr(i, 100, 100)
|
||||
|
||||
# Is it [200,300,400,500,600,700,800,900]?
|
||||
if not result1:
|
||||
resultH1 = number_word_fr(i, 2, 9)
|
||||
if resultH1:
|
||||
valH1, iH1 = resultH1
|
||||
resultH2 = number_word_fr(iH1, 100, 100)
|
||||
if resultH2:
|
||||
iH2 = resultH2[1]
|
||||
result1 = valH1 * 100, iH2
|
||||
|
||||
if result1:
|
||||
val1, i1 = result1
|
||||
result2 = number_1_99_fr(i1)
|
||||
if result2:
|
||||
val2, i2 = result2
|
||||
return val1 + val2, i2
|
||||
else:
|
||||
return result1
|
||||
|
||||
# [1-99]
|
||||
result1 = number_1_99_fr(i)
|
||||
if result1:
|
||||
return result1
|
||||
|
||||
return None
|
||||
|
||||
def number_fr(i):
|
||||
# Check if words[i] is a number between 1 and 999,999.
|
||||
# If it is return tuple with number, index of next word.
|
||||
# If it is not return None.
|
||||
|
||||
# check for zero
|
||||
result1 = number_word_fr(i, 0, 0)
|
||||
if result1:
|
||||
return result1
|
||||
|
||||
# check for [1-999]
|
||||
result1 = number_1_999_fr(i)
|
||||
if result1:
|
||||
val1, i1 = result1
|
||||
else:
|
||||
val1 = 1
|
||||
i1 = i
|
||||
# check for 1000
|
||||
result2 = number_word_fr(i1, 1000, 1000)
|
||||
if result2:
|
||||
# it's [1000-999000]
|
||||
i2 = result2[1]
|
||||
# check again for [1-999]
|
||||
result3 = number_1_999_fr(i2)
|
||||
if result3:
|
||||
val3, i3 = result3
|
||||
return val1 * 1000 + val3, i3
|
||||
else:
|
||||
return val1 * 1000, i2
|
||||
elif result1:
|
||||
return result1
|
||||
return None
|
||||
|
||||
return number_fr(i)
|
||||
|
||||
|
||||
def extractnumber_fr(text):
|
||||
"""Takes in a string and extracts a number.
|
||||
Args:
|
||||
text (str): the string to extract a number from
|
||||
Returns:
|
||||
(str): The number extracted or the original text.
|
||||
"""
|
||||
# normalize text, remove articles
|
||||
text = normalize_fr(text, True)
|
||||
# split words by whitespace
|
||||
aWords = text.split()
|
||||
count = 0
|
||||
result = None
|
||||
add = False
|
||||
while count < len(aWords):
|
||||
val = None
|
||||
word = aWords[count]
|
||||
wordNext = None
|
||||
wordNextNext = None
|
||||
if count < (len(aWords) - 1):
|
||||
wordNext = aWords[count + 1]
|
||||
|
||||
if word in ["et", "plus", "+"]:
|
||||
count += 1
|
||||
add = True
|
||||
continue
|
||||
|
||||
# is current word a numeric number?
|
||||
if word.isdigit():
|
||||
val = int(word)
|
||||
count += 1
|
||||
elif is_numeric(word):
|
||||
val = float(word)
|
||||
count += 1
|
||||
# is current word the denominator of a fraction?
|
||||
elif isFractional_fr(word):
|
||||
val = isFractional_fr(word)
|
||||
count += 1
|
||||
|
||||
# is current word the numerator of a fraction?
|
||||
if val and wordNext:
|
||||
valNext = isFractional_fr(wordNext)
|
||||
if valNext:
|
||||
val = float(val) * valNext
|
||||
count += 1
|
||||
|
||||
if not val:
|
||||
count += 1
|
||||
# is current word a numeric fraction like "2/3"?
|
||||
aPieces = word.split('/')
|
||||
# if (len(aPieces) == 2 and is_numeric(aPieces[0])
|
||||
# and is_numeric(aPieces[1])):
|
||||
if look_for_fractions(aPieces):
|
||||
val = float(aPieces[0]) / float(aPieces[1])
|
||||
|
||||
# is current word followed by a decimal value?
|
||||
if wordNext == "virgule":
|
||||
zeros = 0
|
||||
newWords = aWords[count + 1:]
|
||||
# count the number of zeros after the decimal sign
|
||||
for word in newWords:
|
||||
if word == "zéro" or word == "0":
|
||||
zeros += 1
|
||||
else:
|
||||
break
|
||||
afterDotVal = None
|
||||
# extract the number after the zeros
|
||||
if newWords[zeros].isdigit():
|
||||
afterDotVal = newWords[zeros]
|
||||
countDot = count + zeros + 2
|
||||
# if a number was extracted (since comma is also a
|
||||
# punctuation sign)
|
||||
if afterDotVal:
|
||||
count = countDot
|
||||
if not val:
|
||||
val = 0
|
||||
# add the zeros
|
||||
afterDotString = zeros * "0" + afterDotVal
|
||||
val = float(str(val) + "." + afterDotString)
|
||||
if val:
|
||||
if add:
|
||||
result += val
|
||||
add = False
|
||||
else:
|
||||
result = val
|
||||
|
||||
# if result == False:
|
||||
if not result:
|
||||
return text
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def extract_datetime_fr(string, currentDate=None):
|
||||
def clean_string(s):
|
||||
"""
|
||||
cleans the input string of unneeded punctuation and capitalization
|
||||
among other things.
|
||||
"""
|
||||
s = normalize_fr(s, True)
|
||||
wordList = s.split()
|
||||
for idx, word in enumerate(wordList):
|
||||
# remove comma and dot if it's not a number
|
||||
if word[-1] in [",", "."]:
|
||||
word = word[:-1]
|
||||
wordList[idx] = word
|
||||
|
||||
return wordList
|
||||
|
||||
def date_found():
|
||||
return found or \
|
||||
(
|
||||
datestr != "" or
|
||||
yearOffset != 0 or monthOffset != 0 or
|
||||
dayOffset is True or hrOffset != 0 or
|
||||
hrAbs != 0 or minOffset != 0 or
|
||||
minAbs != 0 or secOffset != 0
|
||||
)
|
||||
|
||||
def is_ordinal(word):
|
||||
if word:
|
||||
ordinals = ["er", "ère", "ème", "e", "nd", "nde"]
|
||||
for ordinal in ordinals:
|
||||
if word[0].isdigit() and ordinal in word:
|
||||
word = word.replace(ordinal, "")
|
||||
return int(word)
|
||||
|
||||
return None
|
||||
|
||||
if string == "":
|
||||
return None
|
||||
if currentDate is None:
|
||||
currentDate = datetime.now()
|
||||
|
||||
found = False
|
||||
daySpecified = False
|
||||
dayOffset = False
|
||||
monthOffset = 0
|
||||
yearOffset = 0
|
||||
dateNow = currentDate
|
||||
today = dateNow.strftime("%w")
|
||||
currentYear = dateNow.strftime("%Y")
|
||||
fromFlag = False
|
||||
datestr = ""
|
||||
hasYear = False
|
||||
timeQualifier = ""
|
||||
|
||||
timeQualifiersList = ["matin", "après-midi", "soir"]
|
||||
markers = ["à", "dès", "autour", "vers", "environs", "dans",
|
||||
"ce", "cette", "après"]
|
||||
days = ["lundi", "mardi", "mercredi",
|
||||
"jeudi", "vendredi", "samedi", "dimanche"]
|
||||
months = ["janvier", "février", "mars", "avril", "mai", "juin",
|
||||
"juillet", "août", "septembre", "octobre", "novembre",
|
||||
"décembre"]
|
||||
monthsShort = ["jan", "fév", "mar", "avr", "mai", "juin", "juil", "aoû",
|
||||
"sept", "oct", "nov", "déc"]
|
||||
# needed for format functions
|
||||
months_en = ['january', 'february', 'march', 'april', 'may', 'june',
|
||||
'july', 'august', 'september', 'october', 'november',
|
||||
'december']
|
||||
|
||||
words = clean_string(string)
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
wordPrevPrevPrev = words[idx - 3] if idx > 2 else ""
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
|
||||
start = idx
|
||||
used = 0
|
||||
# save timequalifier for later
|
||||
if word in timeQualifiersList:
|
||||
timeQualifier = word
|
||||
# parse aujourd'hui, demain, après-demain
|
||||
elif word == "aujourd'hui" and not fromFlag:
|
||||
dayOffset = 0
|
||||
used += 1
|
||||
elif word == "demain" and not fromFlag:
|
||||
dayOffset = 1
|
||||
used += 1
|
||||
elif word == "après-demain" and not fromFlag:
|
||||
dayOffset = 2
|
||||
used += 1
|
||||
# parse 5 jours, 10 semaines, semaine dernière, semaine prochaine
|
||||
elif word in ["jour", "jours"]:
|
||||
if wordPrev.isdigit():
|
||||
dayOffset += int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
# "3e jour"
|
||||
elif is_ordinal(wordPrev) is not None:
|
||||
dayOffset += is_ordinal(wordPrev) - 1
|
||||
start -= 1
|
||||
used = 2
|
||||
elif word in ["semaine", "semaines"] and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
dayOffset += int(wordPrev) * 7
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordNext in ["prochaine", "suivante"]:
|
||||
dayOffset = 7
|
||||
used = 2
|
||||
elif wordNext in ["dernière", "précédente"]:
|
||||
dayOffset = -7
|
||||
used = 2
|
||||
# parse 10 mois, mois prochain, mois dernier
|
||||
elif word == "mois" and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
monthOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordNext in ["prochain", "suivant"]:
|
||||
monthOffset = 1
|
||||
used = 2
|
||||
elif wordNext in ["dernier", "précédent"]:
|
||||
monthOffset = -1
|
||||
used = 2
|
||||
# parse 5 ans, an prochain, année dernière
|
||||
elif word in ["an", "ans", "année", "années"] and not fromFlag:
|
||||
if wordPrev[0].isdigit():
|
||||
yearOffset = int(wordPrev)
|
||||
start -= 1
|
||||
used = 2
|
||||
elif wordNext in ["prochain", "prochaine", "suivant", "suivante"]:
|
||||
yearOffset = 1
|
||||
used = 2
|
||||
elif wordNext in ["dernier", "dernière", "précédent",
|
||||
"précédente"]:
|
||||
yearOffset = -1
|
||||
used = 2
|
||||
# parse lundi, mardi etc., and lundi prochain, mardi dernier, etc.
|
||||
elif word in days and not fromFlag:
|
||||
d = days.index(word)
|
||||
dayOffset = (d + 1) - int(today)
|
||||
used = 1
|
||||
if dayOffset < 0:
|
||||
dayOffset += 7
|
||||
if wordNext in ["prochain", "suivant"]:
|
||||
dayOffset += 7
|
||||
used += 1
|
||||
elif wordNext in ["dernier", "précédent"]:
|
||||
dayOffset -= 7
|
||||
used += 1
|
||||
# parse 15 juillet, 15 juil
|
||||
elif word in months or word in monthsShort and not fromFlag:
|
||||
try:
|
||||
m = months.index(word)
|
||||
except ValueError:
|
||||
m = monthsShort.index(word)
|
||||
used += 1
|
||||
datestr = months_en[m]
|
||||
if wordPrev and (wordPrev[0].isdigit()):
|
||||
datestr += " " + wordPrev
|
||||
start -= 1
|
||||
used += 1
|
||||
else:
|
||||
datestr += " 1"
|
||||
if wordNext and wordNext[0].isdigit():
|
||||
datestr += " " + wordNext
|
||||
used += 1
|
||||
hasYear = True
|
||||
else:
|
||||
hasYear = False
|
||||
# parse 5 jours après demain, 10 semaines après jeudi prochain,
|
||||
# 2 mois après juillet
|
||||
validFollowups = days + months + monthsShort
|
||||
validFollowups.append("aujourd'hui")
|
||||
validFollowups.append("demain")
|
||||
validFollowups.append("prochain")
|
||||
validFollowups.append("prochaine")
|
||||
validFollowups.append("suivant")
|
||||
validFollowups.append("suivante")
|
||||
validFollowups.append("dernier")
|
||||
validFollowups.append("dernière")
|
||||
validFollowups.append("précédent")
|
||||
validFollowups.append("précédente")
|
||||
validFollowups.append("maintenant")
|
||||
if word in ["après", "depuis"] and wordNext in validFollowups:
|
||||
used = 2
|
||||
fromFlag = True
|
||||
if wordNext == "demain":
|
||||
dayOffset += 1
|
||||
elif wordNext in days:
|
||||
d = days.index(wordNext)
|
||||
tmpOffset = (d + 1) - int(today)
|
||||
used = 2
|
||||
if wordNextNext == "prochain":
|
||||
tmpOffset += 7
|
||||
used += 1
|
||||
elif wordNextNext == "dernier":
|
||||
tmpOffset -= 7
|
||||
used += 1
|
||||
elif tmpOffset < 0:
|
||||
tmpOffset += 7
|
||||
dayOffset += tmpOffset
|
||||
if used > 0:
|
||||
if start - 1 > 0 and words[start - 1] in ["ce", "cette"]:
|
||||
start -= 1
|
||||
used += 1
|
||||
|
||||
for i in range(0, used):
|
||||
words[i + start] = ""
|
||||
|
||||
if start - 1 >= 0 and words[start - 1] in markers:
|
||||
words[start - 1] = ""
|
||||
found = True
|
||||
daySpecified = True
|
||||
|
||||
# parse time
|
||||
hrOffset = 0
|
||||
minOffset = 0
|
||||
secOffset = 0
|
||||
hrAbs = 0
|
||||
minAbs = 0
|
||||
|
||||
for idx, word in enumerate(words):
|
||||
if word == "":
|
||||
continue
|
||||
|
||||
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||
# parse midi, minuit, matin, après-midi, soir
|
||||
used = 0
|
||||
if word == "midi":
|
||||
hrAbs = 12
|
||||
used += 1
|
||||
elif word == "minuit":
|
||||
hrAbs = 0
|
||||
used += 1
|
||||
elif word == "matin":
|
||||
if hrAbs == 0:
|
||||
hrAbs = 8
|
||||
used += 1
|
||||
elif word == "après-midi":
|
||||
if hrAbs == 0:
|
||||
hrAbs = 15
|
||||
used += 1
|
||||
elif word in ["soir", "soirée"]:
|
||||
if hrAbs == 0:
|
||||
hrAbs = 19
|
||||
used += 1
|
||||
# parse midi et quart, minuit et demi, midi 10, minuit moins 20
|
||||
if word in ["midi", "minuit"]:
|
||||
if wordNext.isdigit():
|
||||
minAbs = int(wordNext)
|
||||
used += 1
|
||||
elif wordNext == "et":
|
||||
if wordNextNext == "quart":
|
||||
minAbs = 15
|
||||
used += 2
|
||||
elif wordNextNext == "demi":
|
||||
minAbs = 30
|
||||
used += 2
|
||||
elif wordNext == "moins":
|
||||
if wordNextNext.isdigit():
|
||||
minAbs = 60 - int(wordNextNext)
|
||||
if hrAbs == 0:
|
||||
hrAbs = 23
|
||||
else:
|
||||
hrAbs -= 1
|
||||
used += 2
|
||||
if wordNextNext == "quart":
|
||||
minAbs = 45
|
||||
if hrAbs == 0:
|
||||
hrAbs = 23
|
||||
else:
|
||||
hrAbs -= 1
|
||||
used += 2
|
||||
# parse une demi-heure, un quart d'heure
|
||||
elif word == "demi-heure" or word == "heure" and \
|
||||
(wordPrevPrev in markers or wordPrevPrevPrev in markers):
|
||||
if word == "demi-heure":
|
||||
minOffset = 30
|
||||
elif wordPrev == "quart":
|
||||
minOffset = 15
|
||||
elif wordPrev == "quarts" and wordPrevPrev.isdigit():
|
||||
minOffset = int(wordPrevPrev) * 15
|
||||
words[idx - 2] = ""
|
||||
# else:
|
||||
# hrOffset = 1
|
||||
if wordPrevPrevPrev in markers:
|
||||
words[idx - 3] = ""
|
||||
words[idx - 2] = ""
|
||||
words[idx - 1] = ""
|
||||
used += 1
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
# parse 5:00 du matin, 12:00, etc
|
||||
elif word[0].isdigit() and is_ordinal(word) is None:
|
||||
isTime = True
|
||||
strHH = ""
|
||||
strMM = ""
|
||||
remainder = ""
|
||||
if ":" in word or "h" in word:
|
||||
# parse hours on short format
|
||||
# "3:00 du matin", "4h14", "3h15min"
|
||||
stage = 0
|
||||
length = len(word)
|
||||
for i in range(length):
|
||||
if stage == 0:
|
||||
if word[i].isdigit():
|
||||
strHH += word[i]
|
||||
used = 1
|
||||
elif word[i] in [":", "h"]:
|
||||
stage = 1
|
||||
else:
|
||||
stage = 2
|
||||
i -= 1
|
||||
elif stage == 1:
|
||||
if word[i].isdigit():
|
||||
strMM += word[i]
|
||||
used = 1
|
||||
else:
|
||||
stage = 2
|
||||
if word[i:i+3] == "min":
|
||||
i += 1
|
||||
elif stage == 2:
|
||||
break
|
||||
if remainder == "":
|
||||
if wordNext == "matin":
|
||||
remainder = "am"
|
||||
used += 1
|
||||
elif wordNext == "après-midi":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "soir":
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "ce" and wordNextNext == "matin":
|
||||
remainder = "am"
|
||||
used = 2
|
||||
elif wordNext in ["cet", "cette"] and \
|
||||
wordNextNext == "après-midi":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "ce" and wordNextNext == "soir":
|
||||
remainder = "pm"
|
||||
used = 2
|
||||
elif wordNext == "cette" and wordNextNext == "nuit":
|
||||
if int(strHH) > 8:
|
||||
remainder = "pm"
|
||||
else:
|
||||
remainder = "am"
|
||||
used += 2
|
||||
else:
|
||||
# try to parse time without colons
|
||||
# 5 hours, 10 minutes etc.
|
||||
length = len(word)
|
||||
strNum = ""
|
||||
remainder = ""
|
||||
for i in range(length):
|
||||
if word[i].isdigit():
|
||||
strNum += word[i]
|
||||
|
||||
if (
|
||||
wordNext in ["heures", "heure"] and word != "0" and
|
||||
(
|
||||
int(word) < 100 or
|
||||
int(word) > 2400
|
||||
)):
|
||||
# "dans 3 heures", "à 3 heures"
|
||||
if wordPrev in ["dans", "après"]:
|
||||
hrOffset = int(word)
|
||||
isTime = False
|
||||
else:
|
||||
strHH = strNum
|
||||
used = 2
|
||||
idxHr = idx + 2
|
||||
# "dans 1 heure 40", "à 1 heure 40"
|
||||
if idxHr < len(words):
|
||||
# "3 heures 45"
|
||||
if words[idxHr].isdigit():
|
||||
if wordPrev in ["dans", "après"]:
|
||||
minOffset = int(words[idxHr])
|
||||
else:
|
||||
strMM = int(words[idxHr])
|
||||
used += 1
|
||||
idxHr += 1
|
||||
# "3 heures et quart", "4 heures et demi"
|
||||
elif words[idxHr] == "et" and idxHr + 1 < len(words):
|
||||
if words[idxHr + 1] == "quart":
|
||||
if wordPrev in ["dans", "après"]:
|
||||
minOffset = 15
|
||||
else:
|
||||
strMM = 15
|
||||
used += 2
|
||||
idxHr += 2
|
||||
elif words[idxHr + 1] == "demi":
|
||||
if wordPrev in ["dans", "après"]:
|
||||
minOffset = 30
|
||||
else:
|
||||
strMM = 30
|
||||
used += 2
|
||||
idxHr += 2
|
||||
# "5 heures moins 20", "6 heures moins le quart"
|
||||
elif words[idxHr] == "moins" and \
|
||||
idxHr + 1 < len(words):
|
||||
if words[idxHr + 1].isdigit():
|
||||
if wordPrev in ["dans", "après"]:
|
||||
hrOffset -= 1
|
||||
minOffset = 60 - int(words[idxHr + 1])
|
||||
else:
|
||||
strHH = int(strHH) - 1
|
||||
strMM = 60 - int(words[idxHr + 1])
|
||||
used += 2
|
||||
idxHr += 2
|
||||
elif words[idxHr + 1] == "quart":
|
||||
if wordPrev in ["dans", "après"]:
|
||||
hrOffset -= 1
|
||||
minOffset = 45
|
||||
else:
|
||||
strHH = int(strHH) - 1
|
||||
strMM = 45
|
||||
used += 2
|
||||
idxHr += 2
|
||||
# remove word minutes if present
|
||||
if idxHr < len(words) and \
|
||||
words[idxHr] in ["minutes", "minute"]:
|
||||
used += 1
|
||||
idxHr += 1
|
||||
# handle am/pm
|
||||
if idxHr < len(words) and \
|
||||
words[idxHr] in timeQualifiersList:
|
||||
if words[idxHr] == "matin":
|
||||
remainder = "am"
|
||||
else:
|
||||
remainder = "pm"
|
||||
used += 1
|
||||
elif wordNext == "minutes":
|
||||
# "dans 10 minutes"
|
||||
if wordPrev in ["dans", "après"]:
|
||||
minOffset = int(word)
|
||||
isTime = False
|
||||
else:
|
||||
strMM = int(word)
|
||||
used = 2
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif wordNext == "secondes":
|
||||
# "dans 5 secondes"
|
||||
secOffset = int(word)
|
||||
isTime = False
|
||||
used = 2
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif int(word) > 100:
|
||||
# format militaire
|
||||
strHH = int(word) / 100
|
||||
strMM = int(word) - strHH * 100
|
||||
used = 1
|
||||
if wordNext == "heures":
|
||||
used += 1
|
||||
|
||||
strHH = int(strHH) if strHH else 0
|
||||
strMM = int(strMM) if strMM else 0
|
||||
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
||||
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
|
||||
if strHH > 24 or strMM > 59:
|
||||
isTime = False
|
||||
used = 0
|
||||
if isTime:
|
||||
hrAbs = strHH * 1
|
||||
minAbs = strMM * 1
|
||||
# used += 1
|
||||
if used > 0:
|
||||
# removed parsed words from the sentence
|
||||
for i in range(used):
|
||||
words[idx + i] = ""
|
||||
|
||||
if idx > 0 and wordPrev in markers:
|
||||
words[idx - 1] = ""
|
||||
if idx > 1 and wordPrevPrev in markers:
|
||||
words[idx - 2] = ""
|
||||
|
||||
idx += used - 1
|
||||
found = True
|
||||
|
||||
# check that we found a date
|
||||
if not date_found():
|
||||
return None
|
||||
|
||||
if dayOffset is False:
|
||||
dayOffset = 0
|
||||
|
||||
# perform date manipulation
|
||||
|
||||
extractedDate = dateNow
|
||||
extractedDate = extractedDate.replace(microsecond=0,
|
||||
second=0,
|
||||
minute=0,
|
||||
hour=0)
|
||||
if datestr != "":
|
||||
if not hasYear:
|
||||
temp = datetime.strptime(datestr, "%B %d")
|
||||
temp = temp.replace(year=extractedDate.year)
|
||||
if extractedDate < temp:
|
||||
extractedDate = extractedDate.replace(year=int(currentYear),
|
||||
month=int(
|
||||
temp.strftime(
|
||||
"%m")),
|
||||
day=int(temp.strftime(
|
||||
"%d")))
|
||||
else:
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(currentYear) + 1,
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
else:
|
||||
temp = datetime.strptime(datestr, "%B %d %Y")
|
||||
extractedDate = extractedDate.replace(
|
||||
year=int(temp.strftime("%Y")),
|
||||
month=int(temp.strftime("%m")),
|
||||
day=int(temp.strftime("%d")))
|
||||
|
||||
if yearOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(years=yearOffset)
|
||||
if monthOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(months=monthOffset)
|
||||
if dayOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(days=dayOffset)
|
||||
if hrAbs != -1 and minAbs != -1:
|
||||
|
||||
extractedDate = extractedDate + relativedelta(hours=hrAbs,
|
||||
minutes=minAbs)
|
||||
if (hrAbs != 0 or minAbs != 0) and datestr == "":
|
||||
if not daySpecified and dateNow > extractedDate:
|
||||
extractedDate = extractedDate + relativedelta(days=1)
|
||||
if hrOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(hours=hrOffset)
|
||||
if minOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(minutes=minOffset)
|
||||
if secOffset != 0:
|
||||
extractedDate = extractedDate + relativedelta(seconds=secOffset)
|
||||
for idx, word in enumerate(words):
|
||||
if words[idx] == "et" and words[idx - 1] == "" and words[
|
||||
idx + 1] == "":
|
||||
words[idx] = ""
|
||||
|
||||
resultStr = " ".join(words)
|
||||
resultStr = ' '.join(resultStr.split())
|
||||
return [extractedDate, resultStr]
|
||||
|
||||
|
||||
def isFractional_fr(input_str):
|
||||
"""
|
||||
This function takes the given text and checks if it is a fraction.
|
||||
Args:
|
||||
input_str (str): the string to check if fractional
|
||||
Returns:
|
||||
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||
"""
|
||||
input_str = input_str.lower()
|
||||
|
||||
if input_str != "tiers" and input_str.endswith('s', -1):
|
||||
input_str = input_str[:len(input_str) - 1] # e.g. "quarts"
|
||||
|
||||
aFrac = ["entier", "demi", "tiers", "quart", "cinquième", "sixième",
|
||||
"septième", "huitième", "neuvième", "dixième", "onzième",
|
||||
"douzième", "treizième", "quatorzième", "quinzième", "seizième",
|
||||
"dix-septième", "dix-huitième", "dix-neuvième", "vingtième"]
|
||||
|
||||
if input_str in aFrac:
|
||||
return 1.0 / (aFrac.index(input_str) + 1)
|
||||
if input_str == "trentième":
|
||||
return 1.0 / 30
|
||||
if input_str == "centième":
|
||||
return 1.0 / 100
|
||||
if input_str == "millième":
|
||||
return 1.0 / 1000
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def normalize_fr(text, remove_articles):
|
||||
""" French string normalization """
|
||||
text = text.lower()
|
||||
words = text.split() # this also removed extra spaces
|
||||
normalized = ""
|
||||
i = 0
|
||||
while i < len(words):
|
||||
# remove articles
|
||||
if remove_articles and words[i] in articles_fr:
|
||||
i += 1
|
||||
continue
|
||||
if remove_articles and words[i][:2] in ["l'", "d'"]:
|
||||
words[i] = words[i][2:]
|
||||
# remove useless punctuation signs
|
||||
if words[i] in ["?", "!", ";", "…"]:
|
||||
i += 1
|
||||
continue
|
||||
# Convert numbers into digits
|
||||
result = number_parse_fr(words, i)
|
||||
if result is not None:
|
||||
val, i = result
|
||||
normalized += " " + str(val)
|
||||
continue
|
||||
|
||||
normalized += " " + words[i]
|
||||
i += 1
|
||||
|
||||
return normalized[1:] # strip the initial space
|
|
@ -20,6 +20,7 @@ from mycroft.util.lang.parse_en import *
|
|||
from mycroft.util.lang.parse_pt import *
|
||||
from mycroft.util.lang.parse_es import *
|
||||
from mycroft.util.lang.parse_it import *
|
||||
from mycroft.util.lang.parse_fr import *
|
||||
from mycroft.util.lang.parse_common import *
|
||||
|
||||
|
||||
|
@ -78,7 +79,8 @@ def extractnumber(text, lang="en-us"):
|
|||
return extractnumber_pt(text)
|
||||
elif lang_lower.startswith("it"):
|
||||
return extractnumber_it(text)
|
||||
|
||||
elif lang_lower.startswith("fr"):
|
||||
return extractnumber_fr(text)
|
||||
# TODO: Normalization for other languages
|
||||
return text
|
||||
|
||||
|
@ -136,6 +138,8 @@ def extract_datetime(text, anchorDate=None, lang="en-us"):
|
|||
return extract_datetime_pt(text, anchorDate)
|
||||
elif lang_lower.startswith("it"):
|
||||
return extract_datetime_it(text, anchorDate)
|
||||
elif lang_lower.startswith("fr"):
|
||||
return extract_datetime_fr(text, anchorDate)
|
||||
|
||||
return text
|
||||
# ==============================================================
|
||||
|
@ -163,6 +167,8 @@ def normalize(text, lang="en-us", remove_articles=True):
|
|||
return normalize_pt(text, remove_articles)
|
||||
elif lang_lower.startswith("it"):
|
||||
return normalize_it(text, remove_articles)
|
||||
elif lang_lower.startswith("fr"):
|
||||
return normalize_fr(text, remove_articles)
|
||||
# TODO: Normalization for other languages
|
||||
return text
|
||||
|
||||
|
|
|
@ -0,0 +1,335 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2017 Mycroft AI Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import unittest
|
||||
import datetime
|
||||
|
||||
from mycroft.util.format import nice_number
|
||||
from mycroft.util.format import nice_time
|
||||
from mycroft.util.format import pronounce_number
|
||||
|
||||
|
||||
NUMBERS_FIXTURE_FR = {
|
||||
1.435634: '1.436',
|
||||
2: '2',
|
||||
5.0: '5',
|
||||
0.027: '0.027',
|
||||
0.5: 'un demi',
|
||||
1.333: '1 et 1 tiers',
|
||||
2.666: '2 et 2 tiers',
|
||||
0.25: 'un quart',
|
||||
1.25: '1 et 1 quart',
|
||||
0.75: '3 quarts',
|
||||
1.75: '1 et 3 quarts',
|
||||
3.4: '3 et 2 cinquièmes',
|
||||
16.8333: '16 et 5 sixièmes',
|
||||
12.5714: '12 et 4 septièmes',
|
||||
9.625: '9 et 5 huitièmes',
|
||||
6.777: '6 et 7 neuvièmes',
|
||||
3.1: '3 et 1 dixième',
|
||||
2.272: '2 et 3 onzièmes',
|
||||
5.583: '5 et 7 douzièmes',
|
||||
8.384: '8 et 5 treizièmes',
|
||||
0.071: 'un quatorzième',
|
||||
6.466: '6 et 7 quinzièmes',
|
||||
8.312: '8 et 5 seizièmes',
|
||||
2.176: '2 et 3 dix-septièmes',
|
||||
200.722: '200 et 13 dix-huitièmes',
|
||||
7.421: '7 et 8 dix-neuvièmes',
|
||||
0.05: 'un vingtième'
|
||||
}
|
||||
|
||||
|
||||
class TestNiceNumberFormat_fr(unittest.TestCase):
|
||||
def test_convert_float_to_nice_number_fr(self):
|
||||
for number, number_str in NUMBERS_FIXTURE_FR.items():
|
||||
self.assertEqual(nice_number(number, lang="fr-fr"), number_str,
|
||||
'should format {} as {} and not {}'.format(
|
||||
number, number_str, nice_number(
|
||||
number, lang="fr-fr")))
|
||||
|
||||
def test_specify_denominator_fr(self):
|
||||
self.assertEqual(nice_number(5.5, lang="fr-fr",
|
||||
denominators=[1, 2, 3]),
|
||||
'5 et demi',
|
||||
'should format 5.5 as 5 and a half not {}'.format(
|
||||
nice_number(5.5, lang="fr-fr",
|
||||
denominators=[1, 2, 3])))
|
||||
self.assertEqual(nice_number(2.333, denominators=[1, 2]),
|
||||
'2.333',
|
||||
'should format 2.333 as 2.333 not {}'.format(
|
||||
nice_number(2.333, lang="fr-fr",
|
||||
denominators=[1, 2])))
|
||||
|
||||
def test_no_speech_fr(self):
|
||||
self.assertEqual(nice_number(6.777, speech=False),
|
||||
'6 7/9',
|
||||
'should format 6.777 as 6 7/9 not {}'.format(
|
||||
nice_number(6.777, lang="fr-fr", speech=False)))
|
||||
self.assertEqual(nice_number(6.0, speech=False),
|
||||
'6',
|
||||
'should format 6.0 as 6 not {}'.format(
|
||||
nice_number(6.0, lang="fr-fr", speech=False)))
|
||||
|
||||
|
||||
# def pronounce_number(number, lang="en-us", places=2):
|
||||
class TestPronounceNumber_fr(unittest.TestCase):
|
||||
def test_convert_int_fr(self):
|
||||
self.assertEqual(pronounce_number(0, lang="fr-fr"), "zéro")
|
||||
self.assertEqual(pronounce_number(1, lang="fr-fr"), "un")
|
||||
self.assertEqual(pronounce_number(10, lang="fr-fr"), "dix")
|
||||
self.assertEqual(pronounce_number(15, lang="fr-fr"), "quinze")
|
||||
self.assertEqual(pronounce_number(20, lang="fr-fr"), "vingt")
|
||||
self.assertEqual(pronounce_number(27, lang="fr-fr"), "vingt-sept")
|
||||
self.assertEqual(pronounce_number(30, lang="fr-fr"), "trente")
|
||||
self.assertEqual(pronounce_number(33, lang="fr-fr"), "trente-trois")
|
||||
self.assertEqual(pronounce_number(71, lang="fr-fr"),
|
||||
"soixante-et-onze")
|
||||
self.assertEqual(pronounce_number(80, lang="fr-fr"), "quatre-vingts")
|
||||
self.assertEqual(pronounce_number(74, lang="fr-fr"),
|
||||
"soixante-quatorze")
|
||||
self.assertEqual(pronounce_number(79, lang="fr-fr"),
|
||||
"soixante-dix-neuf")
|
||||
self.assertEqual(pronounce_number(91, lang="fr-fr"),
|
||||
"quatre-vingt-onze")
|
||||
self.assertEqual(pronounce_number(97, lang="fr-fr"),
|
||||
"quatre-vingt-dix-sept")
|
||||
self.assertEqual(pronounce_number(300, lang="fr-fr"), "300")
|
||||
|
||||
def test_convert_negative_int_fr(self):
|
||||
self.assertEqual(pronounce_number(-1, lang="fr-fr"), "moins un")
|
||||
self.assertEqual(pronounce_number(-10, lang="fr-fr"), "moins dix")
|
||||
self.assertEqual(pronounce_number(-15, lang="fr-fr"), "moins quinze")
|
||||
self.assertEqual(pronounce_number(-20, lang="fr-fr"), "moins vingt")
|
||||
self.assertEqual(pronounce_number(-27, lang="fr-fr"),
|
||||
"moins vingt-sept")
|
||||
self.assertEqual(pronounce_number(-30, lang="fr-fr"), "moins trente")
|
||||
self.assertEqual(pronounce_number(-33, lang="fr-fr"),
|
||||
"moins trente-trois")
|
||||
|
||||
def test_convert_decimals_fr(self):
|
||||
self.assertEqual(pronounce_number(1.234, lang="fr-fr"),
|
||||
"un virgule deux trois")
|
||||
self.assertEqual(pronounce_number(21.234, lang="fr-fr"),
|
||||
"vingt-et-un virgule deux trois")
|
||||
self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=1),
|
||||
"vingt-et-un virgule deux")
|
||||
self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=0),
|
||||
"vingt-et-un")
|
||||
self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=3),
|
||||
"vingt-et-un virgule deux trois quatre")
|
||||
self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=4),
|
||||
"vingt-et-un virgule deux trois quatre")
|
||||
self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=5),
|
||||
"vingt-et-un virgule deux trois quatre")
|
||||
self.assertEqual(pronounce_number(-1.234, lang="fr-fr"),
|
||||
"moins un virgule deux trois")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="fr-fr"),
|
||||
"moins vingt-et-un virgule deux trois")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=1),
|
||||
"moins vingt-et-un virgule deux")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=0),
|
||||
"moins vingt-et-un")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=3),
|
||||
"moins vingt-et-un virgule deux trois quatre")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=4),
|
||||
"moins vingt-et-un virgule deux trois quatre")
|
||||
self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=5),
|
||||
"moins vingt-et-un virgule deux trois quatre")
|
||||
|
||||
|
||||
# def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
|
||||
# use_ampm=False):
|
||||
class TestNiceDateFormat_fr(unittest.TestCase):
|
||||
def test_convert_times_fr(self):
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
13, 22, 3)
|
||||
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"une heure vingt-deux")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True),
|
||||
"une heure vingt-deux de l'après-midi")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False),
|
||||
"1:22")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_ampm=True),
|
||||
"1:22 PM")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True),
|
||||
"13:22")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"13:22")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"treize heures vingt-deux")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"treize heures vingt-deux")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
13, 0, 3)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"une heure")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True),
|
||||
"une heure de l'après-midi")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False),
|
||||
"1:00")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_ampm=True),
|
||||
"1:00 PM")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True),
|
||||
"13:00")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"13:00")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"treize heures")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"treize heures")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
13, 2, 3)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"une heure deux")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True),
|
||||
"une heure deux de l'après-midi")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False),
|
||||
"1:02")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_ampm=True),
|
||||
"1:02 PM")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True),
|
||||
"13:02")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"13:02")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"treize heures deux")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"treize heures deux")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
0, 2, 3)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"minuit deux")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True),
|
||||
"minuit deux")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False),
|
||||
"12:02")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_ampm=True),
|
||||
"12:02 AM")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True),
|
||||
"00:02")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"00:02")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"minuit deux")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"minuit deux")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
12, 15, 9)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"midi et quart")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True),
|
||||
"midi et quart")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False),
|
||||
"12:15")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_ampm=True),
|
||||
"12:15 PM")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True),
|
||||
"12:15")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"12:15")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"midi quinze")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"midi quinze")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
19, 40, 49)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"huit heures moins vingt")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True),
|
||||
"huit heures moins vingt du soir")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False),
|
||||
"7:40")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_ampm=True),
|
||||
"7:40 PM")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True),
|
||||
"19:40")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", speech=False,
|
||||
use_24hour=True, use_ampm=True),
|
||||
"19:40")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=True),
|
||||
"dix-neuf heures quarante")
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True,
|
||||
use_ampm=False),
|
||||
"dix-neuf heures quarante")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
1, 15, 00)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True),
|
||||
"une heure quinze")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
1, 35, 00)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"deux heures moins vingt-cinq")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
1, 45, 00)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"deux heures moins le quart")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
4, 50, 00)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"cinq heures moins dix")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
5, 55, 00)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr"),
|
||||
"six heures moins cinq")
|
||||
|
||||
dt = datetime.datetime(2017, 1, 31,
|
||||
5, 30, 00)
|
||||
self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True),
|
||||
"cinq heures et demi du matin")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,361 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2017 Mycroft AI Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import unittest
|
||||
from datetime import datetime
|
||||
|
||||
from mycroft.util.parse import get_gender
|
||||
from mycroft.util.parse import extract_datetime
|
||||
from mycroft.util.parse import extractnumber
|
||||
from mycroft.util.parse import normalize
|
||||
from mycroft.util.parse import fuzzy_match
|
||||
from mycroft.util.parse import match_one
|
||||
|
||||
|
||||
class TestNormalize_fr(unittest.TestCase):
|
||||
def test_articles_fr(self):
|
||||
self.assertEqual(normalize("c'est le test", remove_articles=True,
|
||||
lang="fr-fr"),
|
||||
"c'est test")
|
||||
self.assertEqual(normalize("et l'autre test", remove_articles=True,
|
||||
lang="fr-fr"),
|
||||
"et autre test")
|
||||
self.assertEqual(normalize("et la tentative", remove_articles=True,
|
||||
lang="fr-fr"),
|
||||
"et tentative")
|
||||
self.assertEqual(normalize("la dernière tentative",
|
||||
remove_articles=False, lang="fr-fr"),
|
||||
"la dernière tentative")
|
||||
|
||||
def test_extractnumber_fr(self):
|
||||
self.assertEqual(extractnumber("voici le premier test", lang="fr-fr"),
|
||||
1)
|
||||
self.assertEqual(extractnumber("c'est 2 tests", lang="fr-fr"), 2)
|
||||
self.assertEqual(extractnumber("voici le second test", lang="fr-fr"),
|
||||
2)
|
||||
self.assertEqual(extractnumber("voici trois tests",
|
||||
lang="fr-fr"),
|
||||
3)
|
||||
self.assertEqual(extractnumber("voici le test numéro 4", lang="fr-fr"),
|
||||
4)
|
||||
self.assertEqual(extractnumber("un tiers de litre", lang="fr-fr"),
|
||||
1.0 / 3.0)
|
||||
self.assertEqual(extractnumber("3 cuillères", lang="fr-fr"), 3)
|
||||
self.assertEqual(extractnumber("1/3 de litre", lang="fr-fr"),
|
||||
1.0 / 3.0)
|
||||
self.assertEqual(extractnumber("un quart de bol", lang="fr-fr"), 0.25)
|
||||
self.assertEqual(extractnumber("1/4 de verre", lang="fr-fr"), 0.25)
|
||||
self.assertEqual(extractnumber("2/3 de bol", lang="fr-fr"), 2.0 / 3.0)
|
||||
self.assertEqual(extractnumber("3/4 de bol", lang="fr-fr"), 3.0 / 4.0)
|
||||
self.assertEqual(extractnumber("1 et 3/4 de bol", lang="fr-fr"), 1.75)
|
||||
self.assertEqual(extractnumber("1 bol et demi", lang="fr-fr"), 1.5)
|
||||
self.assertEqual(extractnumber("un bol et demi", lang="fr-fr"), 1.5)
|
||||
self.assertEqual(extractnumber("un et demi bols", lang="fr-fr"), 1.5)
|
||||
self.assertEqual(extractnumber("un bol et un demi", lang="fr-fr"), 1.5)
|
||||
self.assertEqual(extractnumber("trois quarts de bol", lang="fr-fr"),
|
||||
3.0 / 4.0)
|
||||
self.assertEqual(extractnumber("32.2 degrés", lang="fr-fr"), 32.2)
|
||||
self.assertEqual(extractnumber("2 virgule 2 cm", lang="fr-fr"), 2.2)
|
||||
self.assertEqual(extractnumber("2 virgule 0 2 cm", lang="fr-fr"), 2.02)
|
||||
self.assertEqual(extractnumber("ça fait virgule 2 cm", lang="fr-fr"),
|
||||
0.2)
|
||||
self.assertEqual(extractnumber("point du tout", lang="fr-fr"),
|
||||
"point tout")
|
||||
self.assertEqual(extractnumber("32.00 secondes", lang="fr-fr"), 32)
|
||||
self.assertEqual(extractnumber("mange trente-et-une bougies",
|
||||
lang="fr-fr"), 31)
|
||||
self.assertEqual(extractnumber("un trentième",
|
||||
lang="fr-fr"), 1.0 / 30.0)
|
||||
self.assertEqual(extractnumber("un centième",
|
||||
lang="fr-fr"), 0.01)
|
||||
self.assertEqual(extractnumber("un millième",
|
||||
lang="fr-fr"), 0.001)
|
||||
|
||||
def test_extractdatetime_fr(self):
|
||||
def extractWithFormat_fr(text):
|
||||
date = datetime(2017, 06, 27, 00, 00)
|
||||
[extractedDate, leftover] = extract_datetime(text, date,
|
||||
lang="fr-fr")
|
||||
extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S")
|
||||
return [extractedDate, leftover]
|
||||
|
||||
def testExtract_fr(text, expected_date, expected_leftover):
|
||||
res = extractWithFormat_fr(text)
|
||||
self.assertEqual(res[0], expected_date)
|
||||
self.assertEqual(res[1], expected_leftover)
|
||||
|
||||
def extractWithFormatDate2_fr(text):
|
||||
date = datetime(2017, 06, 30, 17, 00)
|
||||
[extractedDate, leftover] = extract_datetime(text, date,
|
||||
lang="fr-fr")
|
||||
extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S")
|
||||
return [extractedDate, leftover]
|
||||
|
||||
def testExtractDate2_fr(text, expected_date, expected_leftover):
|
||||
res = extractWithFormatDate2_fr(text)
|
||||
self.assertEqual(res[0], expected_date)
|
||||
self.assertEqual(res[1], expected_leftover)
|
||||
|
||||
def extractWithFormatNoDate_fr(text):
|
||||
[extractedDate, leftover] = extract_datetime(text, lang="fr-fr")
|
||||
extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S")
|
||||
return [extractedDate, leftover]
|
||||
|
||||
def testExtractNoDate_fr(text, expected_date, expected_leftover):
|
||||
res = extractWithFormatNoDate_fr(text)
|
||||
self.assertEqual(res[0], expected_date)
|
||||
self.assertEqual(res[1], expected_leftover)
|
||||
|
||||
testExtract_fr("Planifier l'embûche dans 5 jours",
|
||||
"2017-07-02 00:00:00", "planifier embûche")
|
||||
testExtract_fr("Quel temps fera-t-il après-demain ?",
|
||||
"2017-06-29 00:00:00", "quel temps fera-t-il")
|
||||
testExtract_fr("Met un rappel à 10:45 du soir",
|
||||
"2017-06-27 22:45:00", "met 1 rappel")
|
||||
testExtract_fr("quel temps est prévu pour vendredi matin ?",
|
||||
"2017-06-30 08:00:00", "quel temps est prévu pour")
|
||||
testExtract_fr("quel temps fait-il demain",
|
||||
"2017-06-28 00:00:00", "quel temps fait-il")
|
||||
testExtract_fr("rappelle-moi d'appeler maman dans 8 semaines et"
|
||||
" 2 jours", "2017-08-24 00:00:00",
|
||||
"rappelle-moi appeler maman")
|
||||
testExtract_fr("Jouer des musiques de Beyonce 2 jours après vendredi",
|
||||
"2017-07-02 00:00:00", "jouer musiques beyonce")
|
||||
testExtract_fr("Commencer l'invasion à 15 heures 45 jeudi",
|
||||
"2017-06-29 15:45:00", "commencer invasion")
|
||||
testExtract_fr("Lundi, commander le gâteau à la boulangerie",
|
||||
"2017-07-03 00:00:00", "commander gâteau à boulangerie")
|
||||
testExtract_fr("Jouer la chanson Joyeux anniversaire dans 5 ans",
|
||||
"2022-06-27 00:00:00", "jouer chanson joyeux"
|
||||
" anniversaire")
|
||||
testExtract_fr("Skyper Maman à 12 heures 45 jeudi prochain",
|
||||
"2017-07-06 12:45:00", "skyper maman")
|
||||
testExtract_fr("Quel temps fera-t-il jeudi prochain ?",
|
||||
"2017-07-06 00:00:00", "quel temps fera-t-il")
|
||||
testExtract_fr("Quel temps fera-t-il vendredi matin ?",
|
||||
"2017-06-30 08:00:00", "quel temps fera-t-il")
|
||||
testExtract_fr("Quel temps fera-t-il vendredi soir",
|
||||
"2017-06-30 19:00:00", "quel temps fera-t-il")
|
||||
testExtract_fr("Quel temps fera-t-il vendredi après-midi",
|
||||
"2017-06-30 15:00:00", "quel temps fera-t-il")
|
||||
testExtract_fr("rappelle-moi d'appeler maman le 3 août",
|
||||
"2017-08-03 00:00:00", "rappelle-moi appeler maman")
|
||||
testExtract_fr("Acheter des feux d'artifice pour le 14 juil",
|
||||
"2017-07-14 00:00:00", "acheter feux artifice pour")
|
||||
testExtract_fr("Quel temps fera-t-il 2 semaines après vendredi",
|
||||
"2017-07-14 00:00:00", "quel temps fera-t-il")
|
||||
testExtract_fr("Quel temps fera-t-il mercredi à 7 heures",
|
||||
"2017-06-28 07:00:00", "quel temps fera-t-il")
|
||||
testExtract_fr("Quel temps fera-t-il mercredi à 7 heures",
|
||||
"2017-06-28 07:00:00", "quel temps fera-t-il")
|
||||
testExtract_fr("Prendre rendez-vous à 12:45 jeudi prochain",
|
||||
"2017-07-06 12:45:00", "prendre rendez-vous")
|
||||
testExtract_fr("Quel temps fait-il ce jeudi ?",
|
||||
"2017-06-29 00:00:00", "quel temps fait-il")
|
||||
testExtract_fr("Organiser une visite 2 semaines et 6 jours après"
|
||||
" samedi",
|
||||
"2017-07-21 00:00:00", "organiser 1 visite")
|
||||
testExtract_fr("Commencer l'invasion à 3 heures 45 jeudi",
|
||||
"2017-06-29 03:45:00", "commencer invasion")
|
||||
testExtract_fr("Commencer l'invasion à 20 heures jeudi",
|
||||
"2017-06-29 20:00:00", "commencer invasion")
|
||||
testExtract_fr("Lancer la fête jeudi à 8 heures du soir",
|
||||
"2017-06-29 20:00:00", "lancer fête")
|
||||
testExtract_fr("Commencer l'invasion à 4 heures de l'après-midi jeudi",
|
||||
"2017-06-29 16:00:00", "commencer invasion")
|
||||
testExtract_fr("Commencer l'invasion jeudi à midi",
|
||||
"2017-06-29 12:00:00", "commencer invasion")
|
||||
testExtract_fr("Commencer l'invasion jeudi à minuit",
|
||||
"2017-06-29 00:00:00", "commencer invasion")
|
||||
testExtract_fr("Commencer l'invasion jeudi à dix-sept heures",
|
||||
"2017-06-29 17:00:00", "commencer invasion")
|
||||
testExtract_fr("rappelle-moi de me réveiller dans 4 années",
|
||||
"2021-06-27 00:00:00", "rappelle-moi me réveiller")
|
||||
testExtract_fr("rappelle-moi de me réveiller dans 4 ans et 4 jours",
|
||||
"2021-07-01 00:00:00", "rappelle-moi me réveiller")
|
||||
testExtract_fr("Quel temps fera-t-il 3 jours après demain ?",
|
||||
"2017-07-01 00:00:00", "quel temps fera-t-il")
|
||||
testExtract_fr("3 décembre",
|
||||
"2017-12-03 00:00:00", "")
|
||||
testExtract_fr("retrouvons-nous à 8:00 ce soir",
|
||||
"2017-06-27 20:00:00", "retrouvons-nous")
|
||||
testExtract_fr("retrouvons-nous demain à minuit et demi",
|
||||
"2017-06-28 00:30:00", "retrouvons-nous")
|
||||
testExtract_fr("retrouvons-nous à midi et quart",
|
||||
"2017-06-27 12:15:00", "retrouvons-nous")
|
||||
testExtract_fr("retrouvons-nous à midi moins le quart",
|
||||
"2017-06-27 11:45:00", "retrouvons-nous")
|
||||
testExtract_fr("retrouvons-nous à midi moins dix",
|
||||
"2017-06-27 11:50:00", "retrouvons-nous")
|
||||
testExtract_fr("retrouvons-nous à midi dix",
|
||||
"2017-06-27 12:10:00", "retrouvons-nous")
|
||||
testExtract_fr("retrouvons-nous à minuit moins 23",
|
||||
"2017-06-27 23:37:00", "retrouvons-nous")
|
||||
testExtract_fr("mangeons à 3 heures moins 23 minutes",
|
||||
"2017-06-27 02:37:00", "mangeons")
|
||||
testExtract_fr("mangeons aussi à 4 heures moins le quart du matin",
|
||||
"2017-06-27 03:45:00", "mangeons aussi")
|
||||
testExtract_fr("mangeons encore à minuit moins le quart",
|
||||
"2017-06-27 23:45:00", "mangeons encore")
|
||||
testExtract_fr("buvons à 4 heures et quart",
|
||||
"2017-06-27 04:15:00", "buvons")
|
||||
testExtract_fr("buvons également à 18 heures et demi",
|
||||
"2017-06-27 18:30:00", "buvons également")
|
||||
testExtract_fr("dormons à 20 heures moins le quart",
|
||||
"2017-06-27 19:45:00", "dormons")
|
||||
testExtract_fr("buvons le dernier verre à 10 heures moins 12 du soir",
|
||||
"2017-06-27 21:48:00", "buvons dernier verre")
|
||||
testExtract_fr("s'échapper de l'île à 15h45",
|
||||
"2017-06-27 15:45:00", "s'échapper île")
|
||||
testExtract_fr("s'échapper de l'île à 3h45min de l'après-midi",
|
||||
"2017-06-27 15:45:00", "s'échapper île")
|
||||
testExtract_fr("décale donc ça à 3h48min cet après-midi",
|
||||
"2017-06-27 15:48:00", "décale donc ça")
|
||||
testExtract_fr("construire un bunker à 9h42min du matin",
|
||||
"2017-06-27 09:42:00", "construire 1 bunker")
|
||||
testExtract_fr("ou plutôt à 9h43 ce matin",
|
||||
"2017-06-27 09:43:00", "ou plutôt")
|
||||
testExtract_fr("faire un feu à 8h du soir",
|
||||
"2017-06-27 20:00:00", "faire 1 feu")
|
||||
testExtract_fr("faire la fête jusqu'à 18h cette nuit",
|
||||
"2017-06-27 18:00:00", "faire fête jusqu'à")
|
||||
testExtract_fr("cuver jusqu'à 4h cette nuit",
|
||||
"2017-06-27 04:00:00", "cuver jusqu'à")
|
||||
testExtract_fr("réveille-moi dans 20 secondes aujourd'hui",
|
||||
"2017-06-27 00:00:20", "réveille-moi")
|
||||
testExtract_fr("réveille-moi dans 33 minutes",
|
||||
"2017-06-27 00:33:00", "réveille-moi")
|
||||
testExtract_fr("tais-toi dans 12 heures et 3 minutes",
|
||||
"2017-06-27 12:03:00", "tais-toi")
|
||||
testExtract_fr("ouvre-la dans 1 heure 3",
|
||||
"2017-06-27 01:03:00", "ouvre-la")
|
||||
testExtract_fr("ferme-la dans 1 heure et quart",
|
||||
"2017-06-27 01:15:00", "ferme-la")
|
||||
testExtract_fr("scelle-la dans 1 heure et demi",
|
||||
"2017-06-27 01:30:00", "scelle-la")
|
||||
testExtract_fr("zippe-la dans 2 heures moins 12",
|
||||
"2017-06-27 01:48:00", "zippe-la")
|
||||
testExtract_fr("soude-la dans 3 heures moins le quart",
|
||||
"2017-06-27 02:45:00", "soude-la")
|
||||
testExtract_fr("mange la semaine prochaine",
|
||||
"2017-07-04 00:00:00", "mange")
|
||||
testExtract_fr("bois la semaine dernière",
|
||||
"2017-06-20 00:00:00", "bois")
|
||||
testExtract_fr("mange le mois prochain",
|
||||
"2017-07-27 00:00:00", "mange")
|
||||
testExtract_fr("bois le mois dernier",
|
||||
"2017-05-27 00:00:00", "bois")
|
||||
testExtract_fr("mange l'an prochain",
|
||||
"2018-06-27 00:00:00", "mange")
|
||||
testExtract_fr("bois l'année dernière",
|
||||
"2016-06-27 00:00:00", "bois")
|
||||
testExtract_fr("reviens à lundi dernier",
|
||||
"2017-06-26 00:00:00", "reviens")
|
||||
testExtract_fr("capitule le 8 mai 1945",
|
||||
"1945-05-08 00:00:00", "capitule")
|
||||
testExtract_fr("rédige le contrat 3 jours après jeudi prochain",
|
||||
"2017-07-09 00:00:00", "rédige contrat")
|
||||
testExtract_fr("signe le contrat 2 semaines après jeudi dernier",
|
||||
"2017-07-06 00:00:00", "signe contrat")
|
||||
testExtract_fr("lance le four dans un quart d'heure",
|
||||
"2017-06-27 00:15:00", "lance four")
|
||||
testExtract_fr("enfourne la pizza dans une demi-heure",
|
||||
"2017-06-27 00:30:00", "enfourne pizza")
|
||||
testExtract_fr("arrête le four dans trois quarts d'heure",
|
||||
"2017-06-27 00:45:00", "arrête four")
|
||||
testExtract_fr("mange la pizza dans une heure",
|
||||
"2017-06-27 01:00:00", "mange pizza")
|
||||
testExtract_fr("faire les plantations le 3ème jour de mars",
|
||||
"2018-03-03 00:00:00", "faire plantations")
|
||||
testExtract_fr("récolter dans 10 mois",
|
||||
"2018-04-27 00:00:00", "récolter")
|
||||
testExtract_fr("point 6a: dans 10 mois",
|
||||
"2018-04-27 06:00:00", "point")
|
||||
testExtract_fr("l'après-midi démissionner à 16:59",
|
||||
"2017-06-27 16:59:00", "démissionner")
|
||||
testExtract_fr("ranger son bureau à 1700 heures",
|
||||
"2017-06-27 17:00:00", "ranger son bureau")
|
||||
|
||||
testExtractDate2_fr("range le contrat 2 semaines après lundi",
|
||||
"2017-07-17 00:00:00", "range contrat")
|
||||
testExtractDate2_fr("achète-toi de l'humour à 15h",
|
||||
"2017-07-01 15:00:00", "achète-toi humour")
|
||||
testExtractNoDate_fr("tais-toi aujourd'hui",
|
||||
datetime.now().strftime("%Y-%m-%d") + " 00:00:00",
|
||||
"tais-toi")
|
||||
self.assertEqual(extract_datetime("", lang="fr-fr"), None)
|
||||
self.assertEqual(extract_datetime("phrase inutile", lang="fr-fr"),
|
||||
None)
|
||||
self.assertEqual(extract_datetime(
|
||||
"apprendre à compter à 37 heures", lang="fr-fr"), None)
|
||||
|
||||
def test_spaces_fr(self):
|
||||
self.assertEqual(normalize(" c'est le test", lang="fr-fr"),
|
||||
"c'est test")
|
||||
self.assertEqual(normalize(" c'est le test ", lang="fr-fr"),
|
||||
"c'est test")
|
||||
self.assertEqual(normalize(" c'est un test", lang="fr-fr"),
|
||||
"c'est 1 test")
|
||||
|
||||
def test_numbers_fr(self):
|
||||
self.assertEqual(normalize("c'est un deux trois test",
|
||||
lang="fr-fr"),
|
||||
"c'est 1 2 3 test")
|
||||
self.assertEqual(normalize(" c'est le quatre cinq six test",
|
||||
lang="fr-fr"),
|
||||
"c'est 4 5 6 test")
|
||||
self.assertEqual(normalize("c'est le sept huit neuf test",
|
||||
lang="fr-fr"),
|
||||
"c'est 7 8 9 test")
|
||||
self.assertEqual(normalize("c'est le sept huit neuf test",
|
||||
lang="fr-fr"),
|
||||
"c'est 7 8 9 test")
|
||||
self.assertEqual(normalize("voilà le test dix onze douze",
|
||||
lang="fr-fr"),
|
||||
"voilà test 10 11 12")
|
||||
self.assertEqual(normalize("voilà le treize quatorze test",
|
||||
lang="fr-fr"),
|
||||
"voilà 13 14 test")
|
||||
self.assertEqual(normalize("ça fait quinze seize dix-sept",
|
||||
lang="fr-fr"),
|
||||
"ça fait 15 16 17")
|
||||
self.assertEqual(normalize("ça fait dix-huit dix-neuf vingt",
|
||||
lang="fr-fr"),
|
||||
"ça fait 18 19 20")
|
||||
self.assertEqual(normalize("ça fait mille cinq cents",
|
||||
lang="fr-fr"),
|
||||
"ça fait 1500")
|
||||
self.assertEqual(normalize("voilà cinq cents trente et un mille euros",
|
||||
lang="fr-fr"),
|
||||
"voilà 531000 euros")
|
||||
self.assertEqual(normalize("voilà trois cents soixante mille cinq"
|
||||
" cents quatre-vingt-dix-huit euros",
|
||||
lang="fr-fr"),
|
||||
"voilà 360598 euros")
|
||||
self.assertEqual(normalize("voilà vingt et un euros", lang="fr-fr"),
|
||||
"voilà 21 euros")
|
||||
self.assertEqual(normalize("joli zéro sur vingt", lang="fr-fr"),
|
||||
"joli 0 sur 20")
|
||||
self.assertEqual(normalize("je veux du quatre-quart", lang="fr-fr"),
|
||||
"je veux quatre-quart")
|
||||
|
||||
def test_gender_fr(self):
|
||||
self.assertEqual(get_gender("personne", lang="fr-fr"),
|
||||
False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in New Issue