From 3a11f39d7d881189cdadd757373b0fa95d523049 Mon Sep 17 00:00:00 2001 From: Cakeh Date: Thu, 8 Feb 2018 21:46:24 +0100 Subject: [PATCH] Issue-1375 - Fix and complete french translation --- mycroft/res/text/fr-fr/cancel.voc | 3 + .../text/fr-fr/checking for updates.dialog | 4 +- mycroft/res/text/fr-fr/i am awake.dialog | 2 - .../res/text/fr-fr/i didn't catch that.dialog | 5 +- .../res/text/fr-fr/learning disabled.dialog | 1 + .../res/text/fr-fr/learning enabled.dialog | 1 + .../res/text/fr-fr/message_rebooting.dialog | 1 + .../text/fr-fr/message_synching.clock.dialog | 1 + .../res/text/fr-fr/message_updating.dialog | 1 + mycroft/res/text/fr-fr/mycroft.intro.dialog | 1 + .../not connected to the internet.dialog | 8 +- mycroft/res/text/fr-fr/phonetic_spellings.txt | 1 + .../fr-fr/reset to factory defaults.dialog | 2 +- mycroft/res/text/fr-fr/skills updated.dialog | 4 +- ...y I couldn't install default skills.dialog | 3 +- .../res/text/fr-fr/time.changed.reboot.dialog | 1 + mycroft/util/format.py | 7 + mycroft/util/lang/format_fr.py | 278 +++++ mycroft/util/lang/parse_fr.py | 950 ++++++++++++++++++ mycroft/util/parse.py | 8 +- test/unittests/util/test_format_fr.py | 335 ++++++ test/unittests/util/test_parse_fr.py | 361 +++++++ 22 files changed, 1962 insertions(+), 16 deletions(-) create mode 100644 mycroft/res/text/fr-fr/cancel.voc delete mode 100644 mycroft/res/text/fr-fr/i am awake.dialog create mode 100644 mycroft/res/text/fr-fr/learning disabled.dialog create mode 100644 mycroft/res/text/fr-fr/learning enabled.dialog create mode 100644 mycroft/res/text/fr-fr/message_rebooting.dialog create mode 100644 mycroft/res/text/fr-fr/message_synching.clock.dialog create mode 100644 mycroft/res/text/fr-fr/message_updating.dialog create mode 100644 mycroft/res/text/fr-fr/mycroft.intro.dialog create mode 100644 mycroft/res/text/fr-fr/phonetic_spellings.txt create mode 100644 mycroft/res/text/fr-fr/time.changed.reboot.dialog create mode 100644 mycroft/util/lang/format_fr.py create mode 100644 mycroft/util/lang/parse_fr.py create mode 100644 test/unittests/util/test_format_fr.py create mode 100644 test/unittests/util/test_parse_fr.py diff --git a/mycroft/res/text/fr-fr/cancel.voc b/mycroft/res/text/fr-fr/cancel.voc new file mode 100644 index 0000000000..d54376687f --- /dev/null +++ b/mycroft/res/text/fr-fr/cancel.voc @@ -0,0 +1,3 @@ +annule tout +oublie ça +laisse tomber diff --git a/mycroft/res/text/fr-fr/checking for updates.dialog b/mycroft/res/text/fr-fr/checking for updates.dialog index 6772391d53..1a03facb42 100644 --- a/mycroft/res/text/fr-fr/checking for updates.dialog +++ b/mycroft/res/text/fr-fr/checking for updates.dialog @@ -1,2 +1,2 @@ -Recherche de mise à jours -Un instant, le temps que je me mette à jour +Recherche de mises à jour +Un instant, je me mets à jour diff --git a/mycroft/res/text/fr-fr/i am awake.dialog b/mycroft/res/text/fr-fr/i am awake.dialog deleted file mode 100644 index 1a37acb22a..0000000000 --- a/mycroft/res/text/fr-fr/i am awake.dialog +++ /dev/null @@ -1,2 +0,0 @@ -Je suis réveillé -Je suis maintenant réveillé diff --git a/mycroft/res/text/fr-fr/i didn't catch that.dialog b/mycroft/res/text/fr-fr/i didn't catch that.dialog index c3fdf1c0ee..98f40ae701 100644 --- a/mycroft/res/text/fr-fr/i didn't catch that.dialog +++ b/mycroft/res/text/fr-fr/i didn't catch that.dialog @@ -1,4 +1,5 @@ Désolé, je n'ai pas compris -Je crains ne pas avoir compris +J'ai bien peur de ne pas avoir compris Peux-tu répéter ? -Peux-tu répéter s'il te plait ? +Peux-tu répéter, s'il te plaît ? +Aurais-tu l'obligeance de répéter ? diff --git a/mycroft/res/text/fr-fr/learning disabled.dialog b/mycroft/res/text/fr-fr/learning disabled.dialog new file mode 100644 index 0000000000..5cdc3c0a60 --- /dev/null +++ b/mycroft/res/text/fr-fr/learning disabled.dialog @@ -0,0 +1 @@ +Les données d'interaction ne seront plus envoyées à Mycroft A.I. diff --git a/mycroft/res/text/fr-fr/learning enabled.dialog b/mycroft/res/text/fr-fr/learning enabled.dialog new file mode 100644 index 0000000000..0856c41c20 --- /dev/null +++ b/mycroft/res/text/fr-fr/learning enabled.dialog @@ -0,0 +1 @@ +Dorénavant, je vais envoyer les données d'interaction à Mycroft A.I. pour être plus intelligent. Pour l'instant, cela inclut les enregistrements des activations par mot d'éveil. diff --git a/mycroft/res/text/fr-fr/message_rebooting.dialog b/mycroft/res/text/fr-fr/message_rebooting.dialog new file mode 100644 index 0000000000..e113b6c67f --- /dev/null +++ b/mycroft/res/text/fr-fr/message_rebooting.dialog @@ -0,0 +1 @@ +REDÉMARRAGE... diff --git a/mycroft/res/text/fr-fr/message_synching.clock.dialog b/mycroft/res/text/fr-fr/message_synching.clock.dialog new file mode 100644 index 0000000000..7bd303eb04 --- /dev/null +++ b/mycroft/res/text/fr-fr/message_synching.clock.dialog @@ -0,0 +1 @@ +< < < SYNC < < < diff --git a/mycroft/res/text/fr-fr/message_updating.dialog b/mycroft/res/text/fr-fr/message_updating.dialog new file mode 100644 index 0000000000..f7044aae3b --- /dev/null +++ b/mycroft/res/text/fr-fr/message_updating.dialog @@ -0,0 +1 @@ +< < < MISE À JOUR < < < diff --git a/mycroft/res/text/fr-fr/mycroft.intro.dialog b/mycroft/res/text/fr-fr/mycroft.intro.dialog new file mode 100644 index 0000000000..0964c0c2fa --- /dev/null +++ b/mycroft/res/text/fr-fr/mycroft.intro.dialog @@ -0,0 +1 @@ +Bonjour, je suis Mycroft, ton nouvel assistant. Pour t'assister, j'ai besoin d'être connecté à Internet. Tu peux me connecter avec un câble réseau, ou bien utiliser le wi-fi. Suis ces instructions pour configurer le wi-fi : diff --git a/mycroft/res/text/fr-fr/not connected to the internet.dialog b/mycroft/res/text/fr-fr/not connected to the internet.dialog index 3588558025..2c41679dd3 100644 --- a/mycroft/res/text/fr-fr/not connected to the internet.dialog +++ b/mycroft/res/text/fr-fr/not connected to the internet.dialog @@ -1,4 +1,4 @@ -Il semblerait que je ne sois pas connecté à internet -Je ne pense pas être connecté à internet -Je n'arrive pas à accéder à internet -Je ne peux pas accèder à internet +On dirait que je ne suis pas connecté à Internet +Je ne pense pas être connecté à Internet +Je n'arrive pas à accéder à Internet +Je ne peux pas accéder à Internet diff --git a/mycroft/res/text/fr-fr/phonetic_spellings.txt b/mycroft/res/text/fr-fr/phonetic_spellings.txt new file mode 100644 index 0000000000..a8b1d71195 --- /dev/null +++ b/mycroft/res/text/fr-fr/phonetic_spellings.txt @@ -0,0 +1 @@ +mycroft: maycroft diff --git a/mycroft/res/text/fr-fr/reset to factory defaults.dialog b/mycroft/res/text/fr-fr/reset to factory defaults.dialog index e5533af377..8826362399 100644 --- a/mycroft/res/text/fr-fr/reset to factory defaults.dialog +++ b/mycroft/res/text/fr-fr/reset to factory defaults.dialog @@ -1 +1 @@ -J'ai été remis à zéro (valeurs d'usine) +J'ai été réinitialisé. diff --git a/mycroft/res/text/fr-fr/skills updated.dialog b/mycroft/res/text/fr-fr/skills updated.dialog index 444a1e20da..ce7c14b565 100644 --- a/mycroft/res/text/fr-fr/skills updated.dialog +++ b/mycroft/res/text/fr-fr/skills updated.dialog @@ -1,2 +1,2 @@ -Je suis actuellement à jour -Compétences à jour. Je suis prêt à t'aider +Je suis à jour, maintenant. +Compétences mises à jour. Je suis prêt à t'aider. diff --git a/mycroft/res/text/fr-fr/sorry I couldn't install default skills.dialog b/mycroft/res/text/fr-fr/sorry I couldn't install default skills.dialog index 3024c238fd..59caa2965b 100644 --- a/mycroft/res/text/fr-fr/sorry I couldn't install default skills.dialog +++ b/mycroft/res/text/fr-fr/sorry I couldn't install default skills.dialog @@ -1,2 +1 @@ -Désolé, je n'ai pas pu installer les compétences par défaut -Une erreur s'est produite lors de l'installation des compétences par défaut +Une erreur est survenue lors de la mise à jour des compétences. diff --git a/mycroft/res/text/fr-fr/time.changed.reboot.dialog b/mycroft/res/text/fr-fr/time.changed.reboot.dialog new file mode 100644 index 0000000000..41dbcc2b4e --- /dev/null +++ b/mycroft/res/text/fr-fr/time.changed.reboot.dialog @@ -0,0 +1 @@ +Je dois redémarrer après avoir synchronisé mon horloge avec internet, à tout de suite. diff --git a/mycroft/util/format.py b/mycroft/util/format.py index 5e8f83e472..f8c019a539 100755 --- a/mycroft/util/format.py +++ b/mycroft/util/format.py @@ -16,6 +16,7 @@ from mycroft.util.lang.format_en import * from mycroft.util.lang.format_pt import * from mycroft.util.lang.format_it import * +from mycroft.util.lang.format_fr import * def nice_number(number, lang="en-us", speech=True, denominators=None): @@ -52,6 +53,8 @@ def nice_number(number, lang="en-us", speech=True, denominators=None): return nice_number_pt(result) elif lang_lower.startswith("it"): return nice_number_it(result) + elif lang_lower.startswith("fr"): + return nice_number_fr(result) # Default to the raw number for unsupported languages, # hopefully the STT engine will pronounce understandably. @@ -80,6 +83,8 @@ def nice_time(dt, lang="en-us", speech=True, use_24hour=False, return nice_time_en(dt, speech, use_24hour, use_ampm) elif lang_lower.startswith("it"): return nice_time_it(dt, speech, use_24hour, use_ampm) + elif lang_lower.startswith("fr"): + return nice_time_fr(dt, speech, use_24hour, use_ampm) # TODO: Other languages return str(dt) @@ -101,6 +106,8 @@ def pronounce_number(number, lang="en-us", places=2): return pronounce_number_en(number, places=places) elif lang_lower.startswith("it"): return pronounce_number_it(number, places=places) + elif lang_lower.startswith("fr"): + return pronounce_number_fr(number, places=places) # Default to just returning the numeric value return str(number) diff --git a/mycroft/util/lang/format_fr.py b/mycroft/util/lang/format_fr.py new file mode 100644 index 0000000000..41d286985f --- /dev/null +++ b/mycroft/util/lang/format_fr.py @@ -0,0 +1,278 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017 Mycroft AI Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" Format functions for french (fr) + + Todo: + * nice_number should leave number formatting to nice_number_fr +""" + +NUM_STRING_FR = { + 0: 'zéro', + 1: 'un', + 2: 'deux', + 3: 'trois', + 4: 'quatre', + 5: 'cinq', + 6: 'six', + 7: 'sept', + 8: 'huit', + 9: 'neuf', + 10: 'dix', + 11: 'onze', + 12: 'douze', + 13: 'treize', + 14: 'quatorze', + 15: 'quinze', + 16: 'seize', + 20: 'vingt', + 30: 'trente', + 40: 'quarante', + 50: 'cinquante', + 60: 'soixante', + 70: 'soixante-dix', + 80: 'quatre-vingt', + 90: 'quatre-vingt-dix' +} + +FRACTION_STRING_FR = { + 2: 'demi', + 3: 'tiers', + 4: 'quart', + 5: 'cinquième', + 6: 'sixième', + 7: 'septième', + 8: 'huitième', + 9: 'neuvième', + 10: 'dixième', + 11: 'onzième', + 12: 'douzième', + 13: 'treizième', + 14: 'quatorzième', + 15: 'quinzième', + 16: 'seizième', + 17: 'dix-septième', + 18: 'dix-huitième', + 19: 'dix-neuvième', + 20: 'vingtième' +} + + +def nice_number_fr(result): + """ + Helper for nice_number + + Convert (1 1/3) to spoken value like "1 et 1 tiers" + + Args: + mixed (int,int,int): the mixed number; whole, numerator, denominator + Return: + (str): spoken version of the number + """ + whole, num, den = result + if num == 0: + # if the number is an integer, nothing to do + return str(whole) + den_str = FRACTION_STRING_FR[den] + # if it is not an integer + if whole == 0: + # if there is no whole number + if num == 1: + # if numerator is 1, return "un demi", for example + return_string = 'un {}'.format(den_str) + else: + # else return "quatre tiers", for example + return_string = '{} {}'.format(num, den_str) + elif num == 1: + # if there is a whole number and numerator is 1 + if den == 2: + # if denominator is 2, return "1 et demi", for example + return_string = '{} et {}'.format(whole, den_str) + else: + # else return "1 et 1 tiers", for example + return_string = '{} et 1 {}'.format(whole, den_str) + else: + # else return "2 et 3 quart", for example + return_string = '{} et {} {}'.format(whole, num, den_str) + if num > 1 and den != 3: + # if the numerator is greater than 1 and the denominator + # is not 3 ("tiers"), add an s for plural + return_string += 's' + + return return_string + + +def pronounce_number_fr(num, places=2): + """ + Convert a number to it's spoken equivalent + + For example, '5.2' would return 'cinq virgule deux' + + Args: + num(float or int): the number to pronounce (under 100) + places(int): maximum decimal places to speak + Returns: + (str): The pronounced number + """ + if abs(num) >= 100: + # TODO: Support for numbers over 100 + return str(num) + + result = "" + if num < 0: + result = "moins " + num = abs(num) + + if num > 16: + tens = int(num-int(num) % 10) + ones = int(num-tens) + if ones != 0: + if tens > 10 and tens <= 60 and int(num-tens) == 1: + result += NUM_STRING_FR[tens] + "-et-" + NUM_STRING_FR[ones] + elif num == 71: + result += "soixante-et-onze" + elif tens == 70: + result += NUM_STRING_FR[60] + "-" + if ones < 7: + result += NUM_STRING_FR[10 + ones] + else: + result += NUM_STRING_FR[10] + "-" + NUM_STRING_FR[ones] + elif tens == 90: + result += NUM_STRING_FR[80] + "-" + if ones < 7: + result += NUM_STRING_FR[10 + ones] + else: + result += NUM_STRING_FR[10] + "-" + NUM_STRING_FR[ones] + else: + result += NUM_STRING_FR[tens] + "-" + NUM_STRING_FR[ones] + else: + if num == 80: + result += "quatre-vingts" + else: + result += NUM_STRING_FR[tens] + else: + result += NUM_STRING_FR[int(num)] + + # Deal with decimal part + if not num == int(num) and places > 0: + result += " virgule" + place = 10 + while int(num*place) % 10 > 0 and places > 0: + result += " " + NUM_STRING_FR[int(num*place) % 10] + place *= 10 + places -= 1 + return result + + +def nice_time_fr(dt, speech=True, use_24hour=False, use_ampm=False): + """ + Format a time to a comfortable human format + + For example, generate 'cinq heures trente' for speech or '5:30' for + text display. + + Args: + dt (datetime): date to format (assumes already in local timezone) + speech (bool): format for speech (default/True) or display (False)=Fal + use_24hour (bool): output in 24-hour/military or 12-hour format + use_ampm (bool): include the am/pm for 12-hour format + Returns: + (str): The formatted time string + """ + if use_24hour: + # e.g. "03:01" or "14:22" + string = dt.strftime("%H:%M") + else: + if use_ampm: + # e.g. "3:01 AM" or "2:22 PM" + string = dt.strftime("%I:%M %p") + else: + # e.g. "3:01" or "2:22" + string = dt.strftime("%I:%M") + if string[0] == '0': + string = string[1:] # strip leading zeros + + if not speech: + return string + + # Generate a speakable version of the time + speak = "" + if use_24hour: + + # "13 heures trente" + if dt.hour == 0: + speak += "minuit" + elif dt.hour == 12: + speak += "midi" + elif dt.hour == 1: + speak += "une heure" + else: + speak += pronounce_number_fr(dt.hour) + " heures" + + if dt.minute != 0: + speak += " " + pronounce_number_fr(dt.minute) + + else: + # Prepare for "trois heures moins le quart" + if dt.minute == 35: + minute = -25 + hour = dt.hour + 1 + elif dt.minute == 40: + minute = -20 + hour = dt.hour + 1 + elif dt.minute == 45: + minute = -15 + hour = dt.hour + 1 + elif dt.minute == 50: + minute = -10 + hour = dt.hour + 1 + elif dt.minute == 55: + minute = -5 + hour = dt.hour + 1 + else: + minute = dt.minute + hour = dt.hour + + if hour == 0: + speak += "minuit" + elif hour == 12: + speak += "midi" + elif hour == 1 or hour == 13: + speak += "une heure" + elif hour < 13: + speak = pronounce_number_fr(hour) + " heures" + else: + speak = pronounce_number_fr(hour-12) + " heures" + + if minute != 0: + if minute == 15: + speak += " et quart" + elif minute == 30: + speak += " et demi" + elif minute == -15: + speak += " moins le quart" + else: + speak += " " + pronounce_number_fr(minute) + + if use_ampm: + if hour > 17: + speak += " du soir" + elif hour > 12: + speak += " de l'après-midi" + elif hour > 0 and hour < 12: + speak += " du matin" + + return speak diff --git a/mycroft/util/lang/parse_fr.py b/mycroft/util/lang/parse_fr.py new file mode 100644 index 0000000000..ad13bbc4c5 --- /dev/null +++ b/mycroft/util/lang/parse_fr.py @@ -0,0 +1,950 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017 Mycroft AI Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" Parse functions for french (fr) + + Todo: + * extractnumber_fr: ordinal numbers ("le cinquième") + * extractnumber_fr: numbers greater than 999 999 ("cinq millions") + * extract_datetime_fr: "quatrième lundi de janvier" + * extract_datetime_fr: "5 heures moins le quart" + * extract_datetime_fr: "troisième lundi de juillet" + * get_gender_fr +""" + +from datetime import datetime +from dateutil.relativedelta import relativedelta +from mycroft.util.lang.parse_common import is_numeric, look_for_fractions + +# Undefined articles ["un", "une"] cannot be supressed, +# in French, "un cheval" means "a horse" or "one horse". +articles_fr = ["le", "la", "du", "de", "les", "des"] + +numbers_fr = { + "zéro": 0, + "un": 1, + "une": 1, + "premier": 1, + "première": 1, + "deux": 2, + "second": 2, + "trois": 3, + "quatre": 4, + "cinq": 5, + "six": 6, + "sept": 7, + "huit": 8, + "neuf": 9, + "dix": 10, + "onze": 11, + "douze": 12, + "treize": 13, + "quatorze": 14, + "quinze": 15, + "seize": 16, + "vingt": 20, + "trente": 30, + "quarante": 40, + "cinquante": 50, + "soixante": 60, + "soixante-dix": 70, + "septante": 70, + "quatre-vingt": 80, + "quatre-vingts": 80, + "octante": 80, + "huitante": 80, + "quatre-vingt-dix": 90, + "nonante": 90, + "cent": 100, + "cents": 100, + "mille": 1000, + "mil": 1000, + "millier": 1000, + "milliers": 1000, + "million": 1000000, + "millions": 1000000, + "milliard": 1000000000, + "milliards": 1000000000} + + +def number_parse_fr(words, i): + """ + Takes in a list of words (strings without whitespace) and + extracts a number that starts at the given index. + Args: + words (array): the list to extract a number from + i (int): the index in words where to look for the number + Returns: + tuple with number, index of next word after the number. + + Returns None if no number was found. + """ + def cte_fr(i, s): + # Check if string s is equal to words[i]. + # If it is return tuple with s, index of next word. + # If it is not return None. + if i < len(words) and s == words[i]: + return s, i + 1 + return None + + def number_word_fr(i, min, max): + # Check if words[i] is a number in numbers_fr between min and max. + # If it is return tuple with number, index of next word. + # If it is not return None. + if i < len(words): + val = numbers_fr.get(words[i]) + # Numbers [1-16,20,30,40,50,60,70,80,90,100,1000] + if val is not None: + if val >= min and val <= max: + return val, i + 1 + else: + return None + # The number may be hyphenated (numbers [17-99]) + splitWord = words[i].split('-') + if len(splitWord) > 1: + val1 = numbers_fr.get(splitWord[0]) + if val1: + i1 = 1 + val2 = 0 + + # For [81-99], e.g. "quatre-vingt-deux" + if splitWord[0] == "quatre" and splitWord[1] == "vingt": + val1 = 80 + i1 = 2 + + # For [21,31,41,51,61,71] + if splitWord[i1] == "et" and len(splitWord) > i1: + val2 = numbers_fr.get(splitWord[i1 + 1]) + # For [77-79],[97-99] e.g. "soixante-dix-sept" + elif splitWord[i1] == "dix" and len(splitWord) > i1: + val2 = 10 + numbers_fr.get(splitWord[i1 + 1]) + else: + val2 = numbers_fr.get(splitWord[i1]) + + if val2: + val = val1 + val2 + else: + return None + if val and val >= min and val <= max: + return val, i + 1 + + return None + + def number_1_99_fr(i): + # Check if words[i] is a number between 1 and 99. + # If it is return tuple with number, index of next word. + # If it is not return None. + + # Is it a number between 1 and 16? + result1 = number_word_fr(i, 1, 16) + if result1: + return result1 + + # Is it a number between 10 and 99? + result1 = number_word_fr(i, 10, 99) + if result1: + val1, i1 = result1 + result2 = cte_fr(i1, "et") + # If the number is not hyphenated [21,31,41,51,61,71] + if result2: + i2 = result2[1] + result3 = number_word_fr(i2, 1, 11) + if result3: + val3, i3 = result3 + return val1 + val3, i3 + return result1 + + # It is not a number + return None + + def number_1_999_fr(i): + # Check if words[i] is a number between 1 and 999. + # If it is return tuple with number, index of next word. + # If it is not return None. + + # Is it 100 ? + result1 = number_word_fr(i, 100, 100) + + # Is it [200,300,400,500,600,700,800,900]? + if not result1: + resultH1 = number_word_fr(i, 2, 9) + if resultH1: + valH1, iH1 = resultH1 + resultH2 = number_word_fr(iH1, 100, 100) + if resultH2: + iH2 = resultH2[1] + result1 = valH1 * 100, iH2 + + if result1: + val1, i1 = result1 + result2 = number_1_99_fr(i1) + if result2: + val2, i2 = result2 + return val1 + val2, i2 + else: + return result1 + + # [1-99] + result1 = number_1_99_fr(i) + if result1: + return result1 + + return None + + def number_fr(i): + # Check if words[i] is a number between 1 and 999,999. + # If it is return tuple with number, index of next word. + # If it is not return None. + + # check for zero + result1 = number_word_fr(i, 0, 0) + if result1: + return result1 + + # check for [1-999] + result1 = number_1_999_fr(i) + if result1: + val1, i1 = result1 + else: + val1 = 1 + i1 = i + # check for 1000 + result2 = number_word_fr(i1, 1000, 1000) + if result2: + # it's [1000-999000] + i2 = result2[1] + # check again for [1-999] + result3 = number_1_999_fr(i2) + if result3: + val3, i3 = result3 + return val1 * 1000 + val3, i3 + else: + return val1 * 1000, i2 + elif result1: + return result1 + return None + + return number_fr(i) + + +def extractnumber_fr(text): + """Takes in a string and extracts a number. + Args: + text (str): the string to extract a number from + Returns: + (str): The number extracted or the original text. + """ + # normalize text, remove articles + text = normalize_fr(text, True) + # split words by whitespace + aWords = text.split() + count = 0 + result = None + add = False + while count < len(aWords): + val = None + word = aWords[count] + wordNext = None + wordNextNext = None + if count < (len(aWords) - 1): + wordNext = aWords[count + 1] + + if word in ["et", "plus", "+"]: + count += 1 + add = True + continue + + # is current word a numeric number? + if word.isdigit(): + val = int(word) + count += 1 + elif is_numeric(word): + val = float(word) + count += 1 + # is current word the denominator of a fraction? + elif isFractional_fr(word): + val = isFractional_fr(word) + count += 1 + + # is current word the numerator of a fraction? + if val and wordNext: + valNext = isFractional_fr(wordNext) + if valNext: + val = float(val) * valNext + count += 1 + + if not val: + count += 1 + # is current word a numeric fraction like "2/3"? + aPieces = word.split('/') + # if (len(aPieces) == 2 and is_numeric(aPieces[0]) + # and is_numeric(aPieces[1])): + if look_for_fractions(aPieces): + val = float(aPieces[0]) / float(aPieces[1]) + + # is current word followed by a decimal value? + if wordNext == "virgule": + zeros = 0 + newWords = aWords[count + 1:] + # count the number of zeros after the decimal sign + for word in newWords: + if word == "zéro" or word == "0": + zeros += 1 + else: + break + afterDotVal = None + # extract the number after the zeros + if newWords[zeros].isdigit(): + afterDotVal = newWords[zeros] + countDot = count + zeros + 2 + # if a number was extracted (since comma is also a + # punctuation sign) + if afterDotVal: + count = countDot + if not val: + val = 0 + # add the zeros + afterDotString = zeros * "0" + afterDotVal + val = float(str(val) + "." + afterDotString) + if val: + if add: + result += val + add = False + else: + result = val + + # if result == False: + if not result: + return text + + return result + + +def extract_datetime_fr(string, currentDate=None): + def clean_string(s): + """ + cleans the input string of unneeded punctuation and capitalization + among other things. + """ + s = normalize_fr(s, True) + wordList = s.split() + for idx, word in enumerate(wordList): + # remove comma and dot if it's not a number + if word[-1] in [",", "."]: + word = word[:-1] + wordList[idx] = word + + return wordList + + def date_found(): + return found or \ + ( + datestr != "" or + yearOffset != 0 or monthOffset != 0 or + dayOffset is True or hrOffset != 0 or + hrAbs != 0 or minOffset != 0 or + minAbs != 0 or secOffset != 0 + ) + + def is_ordinal(word): + if word: + ordinals = ["er", "ère", "ème", "e", "nd", "nde"] + for ordinal in ordinals: + if word[0].isdigit() and ordinal in word: + word = word.replace(ordinal, "") + return int(word) + + return None + + if string == "": + return None + if currentDate is None: + currentDate = datetime.now() + + found = False + daySpecified = False + dayOffset = False + monthOffset = 0 + yearOffset = 0 + dateNow = currentDate + today = dateNow.strftime("%w") + currentYear = dateNow.strftime("%Y") + fromFlag = False + datestr = "" + hasYear = False + timeQualifier = "" + + timeQualifiersList = ["matin", "après-midi", "soir"] + markers = ["à", "dès", "autour", "vers", "environs", "dans", + "ce", "cette", "après"] + days = ["lundi", "mardi", "mercredi", + "jeudi", "vendredi", "samedi", "dimanche"] + months = ["janvier", "février", "mars", "avril", "mai", "juin", + "juillet", "août", "septembre", "octobre", "novembre", + "décembre"] + monthsShort = ["jan", "fév", "mar", "avr", "mai", "juin", "juil", "aoû", + "sept", "oct", "nov", "déc"] + # needed for format functions + months_en = ['january', 'february', 'march', 'april', 'may', 'june', + 'july', 'august', 'september', 'october', 'november', + 'december'] + + words = clean_string(string) + + for idx, word in enumerate(words): + if word == "": + continue + wordPrevPrevPrev = words[idx - 3] if idx > 2 else "" + wordPrevPrev = words[idx - 2] if idx > 1 else "" + wordPrev = words[idx - 1] if idx > 0 else "" + wordNext = words[idx + 1] if idx + 1 < len(words) else "" + wordNextNext = words[idx + 2] if idx + 2 < len(words) else "" + + start = idx + used = 0 + # save timequalifier for later + if word in timeQualifiersList: + timeQualifier = word + # parse aujourd'hui, demain, après-demain + elif word == "aujourd'hui" and not fromFlag: + dayOffset = 0 + used += 1 + elif word == "demain" and not fromFlag: + dayOffset = 1 + used += 1 + elif word == "après-demain" and not fromFlag: + dayOffset = 2 + used += 1 + # parse 5 jours, 10 semaines, semaine dernière, semaine prochaine + elif word in ["jour", "jours"]: + if wordPrev.isdigit(): + dayOffset += int(wordPrev) + start -= 1 + used = 2 + # "3e jour" + elif is_ordinal(wordPrev) is not None: + dayOffset += is_ordinal(wordPrev) - 1 + start -= 1 + used = 2 + elif word in ["semaine", "semaines"] and not fromFlag: + if wordPrev[0].isdigit(): + dayOffset += int(wordPrev) * 7 + start -= 1 + used = 2 + elif wordNext in ["prochaine", "suivante"]: + dayOffset = 7 + used = 2 + elif wordNext in ["dernière", "précédente"]: + dayOffset = -7 + used = 2 + # parse 10 mois, mois prochain, mois dernier + elif word == "mois" and not fromFlag: + if wordPrev[0].isdigit(): + monthOffset = int(wordPrev) + start -= 1 + used = 2 + elif wordNext in ["prochain", "suivant"]: + monthOffset = 1 + used = 2 + elif wordNext in ["dernier", "précédent"]: + monthOffset = -1 + used = 2 + # parse 5 ans, an prochain, année dernière + elif word in ["an", "ans", "année", "années"] and not fromFlag: + if wordPrev[0].isdigit(): + yearOffset = int(wordPrev) + start -= 1 + used = 2 + elif wordNext in ["prochain", "prochaine", "suivant", "suivante"]: + yearOffset = 1 + used = 2 + elif wordNext in ["dernier", "dernière", "précédent", + "précédente"]: + yearOffset = -1 + used = 2 + # parse lundi, mardi etc., and lundi prochain, mardi dernier, etc. + elif word in days and not fromFlag: + d = days.index(word) + dayOffset = (d + 1) - int(today) + used = 1 + if dayOffset < 0: + dayOffset += 7 + if wordNext in ["prochain", "suivant"]: + dayOffset += 7 + used += 1 + elif wordNext in ["dernier", "précédent"]: + dayOffset -= 7 + used += 1 + # parse 15 juillet, 15 juil + elif word in months or word in monthsShort and not fromFlag: + try: + m = months.index(word) + except ValueError: + m = monthsShort.index(word) + used += 1 + datestr = months_en[m] + if wordPrev and (wordPrev[0].isdigit()): + datestr += " " + wordPrev + start -= 1 + used += 1 + else: + datestr += " 1" + if wordNext and wordNext[0].isdigit(): + datestr += " " + wordNext + used += 1 + hasYear = True + else: + hasYear = False + # parse 5 jours après demain, 10 semaines après jeudi prochain, + # 2 mois après juillet + validFollowups = days + months + monthsShort + validFollowups.append("aujourd'hui") + validFollowups.append("demain") + validFollowups.append("prochain") + validFollowups.append("prochaine") + validFollowups.append("suivant") + validFollowups.append("suivante") + validFollowups.append("dernier") + validFollowups.append("dernière") + validFollowups.append("précédent") + validFollowups.append("précédente") + validFollowups.append("maintenant") + if word in ["après", "depuis"] and wordNext in validFollowups: + used = 2 + fromFlag = True + if wordNext == "demain": + dayOffset += 1 + elif wordNext in days: + d = days.index(wordNext) + tmpOffset = (d + 1) - int(today) + used = 2 + if wordNextNext == "prochain": + tmpOffset += 7 + used += 1 + elif wordNextNext == "dernier": + tmpOffset -= 7 + used += 1 + elif tmpOffset < 0: + tmpOffset += 7 + dayOffset += tmpOffset + if used > 0: + if start - 1 > 0 and words[start - 1] in ["ce", "cette"]: + start -= 1 + used += 1 + + for i in range(0, used): + words[i + start] = "" + + if start - 1 >= 0 and words[start - 1] in markers: + words[start - 1] = "" + found = True + daySpecified = True + + # parse time + hrOffset = 0 + minOffset = 0 + secOffset = 0 + hrAbs = 0 + minAbs = 0 + + for idx, word in enumerate(words): + if word == "": + continue + + wordPrevPrev = words[idx - 2] if idx > 1 else "" + wordPrev = words[idx - 1] if idx > 0 else "" + wordNext = words[idx + 1] if idx + 1 < len(words) else "" + wordNextNext = words[idx + 2] if idx + 2 < len(words) else "" + # parse midi, minuit, matin, après-midi, soir + used = 0 + if word == "midi": + hrAbs = 12 + used += 1 + elif word == "minuit": + hrAbs = 0 + used += 1 + elif word == "matin": + if hrAbs == 0: + hrAbs = 8 + used += 1 + elif word == "après-midi": + if hrAbs == 0: + hrAbs = 15 + used += 1 + elif word in ["soir", "soirée"]: + if hrAbs == 0: + hrAbs = 19 + used += 1 + # parse midi et quart, minuit et demi, midi 10, minuit moins 20 + if word in ["midi", "minuit"]: + if wordNext.isdigit(): + minAbs = int(wordNext) + used += 1 + elif wordNext == "et": + if wordNextNext == "quart": + minAbs = 15 + used += 2 + elif wordNextNext == "demi": + minAbs = 30 + used += 2 + elif wordNext == "moins": + if wordNextNext.isdigit(): + minAbs = 60 - int(wordNextNext) + if hrAbs == 0: + hrAbs = 23 + else: + hrAbs -= 1 + used += 2 + if wordNextNext == "quart": + minAbs = 45 + if hrAbs == 0: + hrAbs = 23 + else: + hrAbs -= 1 + used += 2 + # parse une demi-heure, un quart d'heure + elif word == "demi-heure" or word == "heure" and \ + (wordPrevPrev in markers or wordPrevPrevPrev in markers): + if word == "demi-heure": + minOffset = 30 + elif wordPrev == "quart": + minOffset = 15 + elif wordPrev == "quarts" and wordPrevPrev.isdigit(): + minOffset = int(wordPrevPrev) * 15 + words[idx - 2] = "" + # else: + # hrOffset = 1 + if wordPrevPrevPrev in markers: + words[idx - 3] = "" + words[idx - 2] = "" + words[idx - 1] = "" + used += 1 + hrAbs = -1 + minAbs = -1 + # parse 5:00 du matin, 12:00, etc + elif word[0].isdigit() and is_ordinal(word) is None: + isTime = True + strHH = "" + strMM = "" + remainder = "" + if ":" in word or "h" in word: + # parse hours on short format + # "3:00 du matin", "4h14", "3h15min" + stage = 0 + length = len(word) + for i in range(length): + if stage == 0: + if word[i].isdigit(): + strHH += word[i] + used = 1 + elif word[i] in [":", "h"]: + stage = 1 + else: + stage = 2 + i -= 1 + elif stage == 1: + if word[i].isdigit(): + strMM += word[i] + used = 1 + else: + stage = 2 + if word[i:i+3] == "min": + i += 1 + elif stage == 2: + break + if remainder == "": + if wordNext == "matin": + remainder = "am" + used += 1 + elif wordNext == "après-midi": + remainder = "pm" + used += 1 + elif wordNext == "soir": + remainder = "pm" + used += 1 + elif wordNext == "ce" and wordNextNext == "matin": + remainder = "am" + used = 2 + elif wordNext in ["cet", "cette"] and \ + wordNextNext == "après-midi": + remainder = "pm" + used = 2 + elif wordNext == "ce" and wordNextNext == "soir": + remainder = "pm" + used = 2 + elif wordNext == "cette" and wordNextNext == "nuit": + if int(strHH) > 8: + remainder = "pm" + else: + remainder = "am" + used += 2 + else: + # try to parse time without colons + # 5 hours, 10 minutes etc. + length = len(word) + strNum = "" + remainder = "" + for i in range(length): + if word[i].isdigit(): + strNum += word[i] + + if ( + wordNext in ["heures", "heure"] and word != "0" and + ( + int(word) < 100 or + int(word) > 2400 + )): + # "dans 3 heures", "à 3 heures" + if wordPrev in ["dans", "après"]: + hrOffset = int(word) + isTime = False + else: + strHH = strNum + used = 2 + idxHr = idx + 2 + # "dans 1 heure 40", "à 1 heure 40" + if idxHr < len(words): + # "3 heures 45" + if words[idxHr].isdigit(): + if wordPrev in ["dans", "après"]: + minOffset = int(words[idxHr]) + else: + strMM = int(words[idxHr]) + used += 1 + idxHr += 1 + # "3 heures et quart", "4 heures et demi" + elif words[idxHr] == "et" and idxHr + 1 < len(words): + if words[idxHr + 1] == "quart": + if wordPrev in ["dans", "après"]: + minOffset = 15 + else: + strMM = 15 + used += 2 + idxHr += 2 + elif words[idxHr + 1] == "demi": + if wordPrev in ["dans", "après"]: + minOffset = 30 + else: + strMM = 30 + used += 2 + idxHr += 2 + # "5 heures moins 20", "6 heures moins le quart" + elif words[idxHr] == "moins" and \ + idxHr + 1 < len(words): + if words[idxHr + 1].isdigit(): + if wordPrev in ["dans", "après"]: + hrOffset -= 1 + minOffset = 60 - int(words[idxHr + 1]) + else: + strHH = int(strHH) - 1 + strMM = 60 - int(words[idxHr + 1]) + used += 2 + idxHr += 2 + elif words[idxHr + 1] == "quart": + if wordPrev in ["dans", "après"]: + hrOffset -= 1 + minOffset = 45 + else: + strHH = int(strHH) - 1 + strMM = 45 + used += 2 + idxHr += 2 + # remove word minutes if present + if idxHr < len(words) and \ + words[idxHr] in ["minutes", "minute"]: + used += 1 + idxHr += 1 + # handle am/pm + if idxHr < len(words) and \ + words[idxHr] in timeQualifiersList: + if words[idxHr] == "matin": + remainder = "am" + else: + remainder = "pm" + used += 1 + elif wordNext == "minutes": + # "dans 10 minutes" + if wordPrev in ["dans", "après"]: + minOffset = int(word) + isTime = False + else: + strMM = int(word) + used = 2 + hrAbs = -1 + minAbs = -1 + elif wordNext == "secondes": + # "dans 5 secondes" + secOffset = int(word) + isTime = False + used = 2 + hrAbs = -1 + minAbs = -1 + elif int(word) > 100: + # format militaire + strHH = int(word) / 100 + strMM = int(word) - strHH * 100 + used = 1 + if wordNext == "heures": + used += 1 + + strHH = int(strHH) if strHH else 0 + strMM = int(strMM) if strMM else 0 + strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH + strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH + if strHH > 24 or strMM > 59: + isTime = False + used = 0 + if isTime: + hrAbs = strHH * 1 + minAbs = strMM * 1 + # used += 1 + if used > 0: + # removed parsed words from the sentence + for i in range(used): + words[idx + i] = "" + + if idx > 0 and wordPrev in markers: + words[idx - 1] = "" + if idx > 1 and wordPrevPrev in markers: + words[idx - 2] = "" + + idx += used - 1 + found = True + + # check that we found a date + if not date_found(): + return None + + if dayOffset is False: + dayOffset = 0 + + # perform date manipulation + + extractedDate = dateNow + extractedDate = extractedDate.replace(microsecond=0, + second=0, + minute=0, + hour=0) + if datestr != "": + if not hasYear: + temp = datetime.strptime(datestr, "%B %d") + temp = temp.replace(year=extractedDate.year) + if extractedDate < temp: + extractedDate = extractedDate.replace(year=int(currentYear), + month=int( + temp.strftime( + "%m")), + day=int(temp.strftime( + "%d"))) + else: + extractedDate = extractedDate.replace( + year=int(currentYear) + 1, + month=int(temp.strftime("%m")), + day=int(temp.strftime("%d"))) + else: + temp = datetime.strptime(datestr, "%B %d %Y") + extractedDate = extractedDate.replace( + year=int(temp.strftime("%Y")), + month=int(temp.strftime("%m")), + day=int(temp.strftime("%d"))) + + if yearOffset != 0: + extractedDate = extractedDate + relativedelta(years=yearOffset) + if monthOffset != 0: + extractedDate = extractedDate + relativedelta(months=monthOffset) + if dayOffset != 0: + extractedDate = extractedDate + relativedelta(days=dayOffset) + if hrAbs != -1 and minAbs != -1: + + extractedDate = extractedDate + relativedelta(hours=hrAbs, + minutes=minAbs) + if (hrAbs != 0 or minAbs != 0) and datestr == "": + if not daySpecified and dateNow > extractedDate: + extractedDate = extractedDate + relativedelta(days=1) + if hrOffset != 0: + extractedDate = extractedDate + relativedelta(hours=hrOffset) + if minOffset != 0: + extractedDate = extractedDate + relativedelta(minutes=minOffset) + if secOffset != 0: + extractedDate = extractedDate + relativedelta(seconds=secOffset) + for idx, word in enumerate(words): + if words[idx] == "et" and words[idx - 1] == "" and words[ + idx + 1] == "": + words[idx] = "" + + resultStr = " ".join(words) + resultStr = ' '.join(resultStr.split()) + return [extractedDate, resultStr] + + +def isFractional_fr(input_str): + """ + This function takes the given text and checks if it is a fraction. + Args: + input_str (str): the string to check if fractional + Returns: + (bool) or (float): False if not a fraction, otherwise the fraction + """ + input_str = input_str.lower() + + if input_str != "tiers" and input_str.endswith('s', -1): + input_str = input_str[:len(input_str) - 1] # e.g. "quarts" + + aFrac = ["entier", "demi", "tiers", "quart", "cinquième", "sixième", + "septième", "huitième", "neuvième", "dixième", "onzième", + "douzième", "treizième", "quatorzième", "quinzième", "seizième", + "dix-septième", "dix-huitième", "dix-neuvième", "vingtième"] + + if input_str in aFrac: + return 1.0 / (aFrac.index(input_str) + 1) + if input_str == "trentième": + return 1.0 / 30 + if input_str == "centième": + return 1.0 / 100 + if input_str == "millième": + return 1.0 / 1000 + + return False + + +def normalize_fr(text, remove_articles): + """ French string normalization """ + text = text.lower() + words = text.split() # this also removed extra spaces + normalized = "" + i = 0 + while i < len(words): + # remove articles + if remove_articles and words[i] in articles_fr: + i += 1 + continue + if remove_articles and words[i][:2] in ["l'", "d'"]: + words[i] = words[i][2:] + # remove useless punctuation signs + if words[i] in ["?", "!", ";", "…"]: + i += 1 + continue + # Convert numbers into digits + result = number_parse_fr(words, i) + if result is not None: + val, i = result + normalized += " " + str(val) + continue + + normalized += " " + words[i] + i += 1 + + return normalized[1:] # strip the initial space diff --git a/mycroft/util/parse.py b/mycroft/util/parse.py index 88aefb9f8b..43e42d2e52 100644 --- a/mycroft/util/parse.py +++ b/mycroft/util/parse.py @@ -20,6 +20,7 @@ from mycroft.util.lang.parse_en import * from mycroft.util.lang.parse_pt import * from mycroft.util.lang.parse_es import * from mycroft.util.lang.parse_it import * +from mycroft.util.lang.parse_fr import * from mycroft.util.lang.parse_common import * @@ -78,7 +79,8 @@ def extractnumber(text, lang="en-us"): return extractnumber_pt(text) elif lang_lower.startswith("it"): return extractnumber_it(text) - + elif lang_lower.startswith("fr"): + return extractnumber_fr(text) # TODO: Normalization for other languages return text @@ -136,6 +138,8 @@ def extract_datetime(text, anchorDate=None, lang="en-us"): return extract_datetime_pt(text, anchorDate) elif lang_lower.startswith("it"): return extract_datetime_it(text, anchorDate) + elif lang_lower.startswith("fr"): + return extract_datetime_fr(text, anchorDate) return text # ============================================================== @@ -163,6 +167,8 @@ def normalize(text, lang="en-us", remove_articles=True): return normalize_pt(text, remove_articles) elif lang_lower.startswith("it"): return normalize_it(text, remove_articles) + elif lang_lower.startswith("fr"): + return normalize_fr(text, remove_articles) # TODO: Normalization for other languages return text diff --git a/test/unittests/util/test_format_fr.py b/test/unittests/util/test_format_fr.py new file mode 100644 index 0000000000..0a9e9a0473 --- /dev/null +++ b/test/unittests/util/test_format_fr.py @@ -0,0 +1,335 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017 Mycroft AI Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest +import datetime + +from mycroft.util.format import nice_number +from mycroft.util.format import nice_time +from mycroft.util.format import pronounce_number + + +NUMBERS_FIXTURE_FR = { + 1.435634: '1.436', + 2: '2', + 5.0: '5', + 0.027: '0.027', + 0.5: 'un demi', + 1.333: '1 et 1 tiers', + 2.666: '2 et 2 tiers', + 0.25: 'un quart', + 1.25: '1 et 1 quart', + 0.75: '3 quarts', + 1.75: '1 et 3 quarts', + 3.4: '3 et 2 cinquièmes', + 16.8333: '16 et 5 sixièmes', + 12.5714: '12 et 4 septièmes', + 9.625: '9 et 5 huitièmes', + 6.777: '6 et 7 neuvièmes', + 3.1: '3 et 1 dixième', + 2.272: '2 et 3 onzièmes', + 5.583: '5 et 7 douzièmes', + 8.384: '8 et 5 treizièmes', + 0.071: 'un quatorzième', + 6.466: '6 et 7 quinzièmes', + 8.312: '8 et 5 seizièmes', + 2.176: '2 et 3 dix-septièmes', + 200.722: '200 et 13 dix-huitièmes', + 7.421: '7 et 8 dix-neuvièmes', + 0.05: 'un vingtième' +} + + +class TestNiceNumberFormat_fr(unittest.TestCase): + def test_convert_float_to_nice_number_fr(self): + for number, number_str in NUMBERS_FIXTURE_FR.items(): + self.assertEqual(nice_number(number, lang="fr-fr"), number_str, + 'should format {} as {} and not {}'.format( + number, number_str, nice_number( + number, lang="fr-fr"))) + + def test_specify_denominator_fr(self): + self.assertEqual(nice_number(5.5, lang="fr-fr", + denominators=[1, 2, 3]), + '5 et demi', + 'should format 5.5 as 5 and a half not {}'.format( + nice_number(5.5, lang="fr-fr", + denominators=[1, 2, 3]))) + self.assertEqual(nice_number(2.333, denominators=[1, 2]), + '2.333', + 'should format 2.333 as 2.333 not {}'.format( + nice_number(2.333, lang="fr-fr", + denominators=[1, 2]))) + + def test_no_speech_fr(self): + self.assertEqual(nice_number(6.777, speech=False), + '6 7/9', + 'should format 6.777 as 6 7/9 not {}'.format( + nice_number(6.777, lang="fr-fr", speech=False))) + self.assertEqual(nice_number(6.0, speech=False), + '6', + 'should format 6.0 as 6 not {}'.format( + nice_number(6.0, lang="fr-fr", speech=False))) + + +# def pronounce_number(number, lang="en-us", places=2): +class TestPronounceNumber_fr(unittest.TestCase): + def test_convert_int_fr(self): + self.assertEqual(pronounce_number(0, lang="fr-fr"), "zéro") + self.assertEqual(pronounce_number(1, lang="fr-fr"), "un") + self.assertEqual(pronounce_number(10, lang="fr-fr"), "dix") + self.assertEqual(pronounce_number(15, lang="fr-fr"), "quinze") + self.assertEqual(pronounce_number(20, lang="fr-fr"), "vingt") + self.assertEqual(pronounce_number(27, lang="fr-fr"), "vingt-sept") + self.assertEqual(pronounce_number(30, lang="fr-fr"), "trente") + self.assertEqual(pronounce_number(33, lang="fr-fr"), "trente-trois") + self.assertEqual(pronounce_number(71, lang="fr-fr"), + "soixante-et-onze") + self.assertEqual(pronounce_number(80, lang="fr-fr"), "quatre-vingts") + self.assertEqual(pronounce_number(74, lang="fr-fr"), + "soixante-quatorze") + self.assertEqual(pronounce_number(79, lang="fr-fr"), + "soixante-dix-neuf") + self.assertEqual(pronounce_number(91, lang="fr-fr"), + "quatre-vingt-onze") + self.assertEqual(pronounce_number(97, lang="fr-fr"), + "quatre-vingt-dix-sept") + self.assertEqual(pronounce_number(300, lang="fr-fr"), "300") + + def test_convert_negative_int_fr(self): + self.assertEqual(pronounce_number(-1, lang="fr-fr"), "moins un") + self.assertEqual(pronounce_number(-10, lang="fr-fr"), "moins dix") + self.assertEqual(pronounce_number(-15, lang="fr-fr"), "moins quinze") + self.assertEqual(pronounce_number(-20, lang="fr-fr"), "moins vingt") + self.assertEqual(pronounce_number(-27, lang="fr-fr"), + "moins vingt-sept") + self.assertEqual(pronounce_number(-30, lang="fr-fr"), "moins trente") + self.assertEqual(pronounce_number(-33, lang="fr-fr"), + "moins trente-trois") + + def test_convert_decimals_fr(self): + self.assertEqual(pronounce_number(1.234, lang="fr-fr"), + "un virgule deux trois") + self.assertEqual(pronounce_number(21.234, lang="fr-fr"), + "vingt-et-un virgule deux trois") + self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=1), + "vingt-et-un virgule deux") + self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=0), + "vingt-et-un") + self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=3), + "vingt-et-un virgule deux trois quatre") + self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=4), + "vingt-et-un virgule deux trois quatre") + self.assertEqual(pronounce_number(21.234, lang="fr-fr", places=5), + "vingt-et-un virgule deux trois quatre") + self.assertEqual(pronounce_number(-1.234, lang="fr-fr"), + "moins un virgule deux trois") + self.assertEqual(pronounce_number(-21.234, lang="fr-fr"), + "moins vingt-et-un virgule deux trois") + self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=1), + "moins vingt-et-un virgule deux") + self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=0), + "moins vingt-et-un") + self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=3), + "moins vingt-et-un virgule deux trois quatre") + self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=4), + "moins vingt-et-un virgule deux trois quatre") + self.assertEqual(pronounce_number(-21.234, lang="fr-fr", places=5), + "moins vingt-et-un virgule deux trois quatre") + + +# def nice_time(dt, lang="en-us", speech=True, use_24hour=False, +# use_ampm=False): +class TestNiceDateFormat_fr(unittest.TestCase): + def test_convert_times_fr(self): + dt = datetime.datetime(2017, 1, 31, + 13, 22, 3) + + self.assertEqual(nice_time(dt, lang="fr-fr"), + "une heure vingt-deux") + self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True), + "une heure vingt-deux de l'après-midi") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False), + "1:22") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_ampm=True), + "1:22 PM") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True), + "13:22") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True, use_ampm=True), + "13:22") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=True), + "treize heures vingt-deux") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=False), + "treize heures vingt-deux") + + dt = datetime.datetime(2017, 1, 31, + 13, 0, 3) + self.assertEqual(nice_time(dt, lang="fr-fr"), + "une heure") + self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True), + "une heure de l'après-midi") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False), + "1:00") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_ampm=True), + "1:00 PM") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True), + "13:00") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True, use_ampm=True), + "13:00") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=True), + "treize heures") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=False), + "treize heures") + + dt = datetime.datetime(2017, 1, 31, + 13, 2, 3) + self.assertEqual(nice_time(dt, lang="fr-fr"), + "une heure deux") + self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True), + "une heure deux de l'après-midi") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False), + "1:02") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_ampm=True), + "1:02 PM") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True), + "13:02") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True, use_ampm=True), + "13:02") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=True), + "treize heures deux") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=False), + "treize heures deux") + + dt = datetime.datetime(2017, 1, 31, + 0, 2, 3) + self.assertEqual(nice_time(dt, lang="fr-fr"), + "minuit deux") + self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True), + "minuit deux") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False), + "12:02") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_ampm=True), + "12:02 AM") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True), + "00:02") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True, use_ampm=True), + "00:02") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=True), + "minuit deux") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=False), + "minuit deux") + + dt = datetime.datetime(2017, 1, 31, + 12, 15, 9) + self.assertEqual(nice_time(dt, lang="fr-fr"), + "midi et quart") + self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True), + "midi et quart") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False), + "12:15") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_ampm=True), + "12:15 PM") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True), + "12:15") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True, use_ampm=True), + "12:15") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=True), + "midi quinze") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=False), + "midi quinze") + + dt = datetime.datetime(2017, 1, 31, + 19, 40, 49) + self.assertEqual(nice_time(dt, lang="fr-fr"), + "huit heures moins vingt") + self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True), + "huit heures moins vingt du soir") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False), + "7:40") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_ampm=True), + "7:40 PM") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True), + "19:40") + self.assertEqual(nice_time(dt, lang="fr-fr", speech=False, + use_24hour=True, use_ampm=True), + "19:40") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=True), + "dix-neuf heures quarante") + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True, + use_ampm=False), + "dix-neuf heures quarante") + + dt = datetime.datetime(2017, 1, 31, + 1, 15, 00) + self.assertEqual(nice_time(dt, lang="fr-fr", use_24hour=True), + "une heure quinze") + + dt = datetime.datetime(2017, 1, 31, + 1, 35, 00) + self.assertEqual(nice_time(dt, lang="fr-fr"), + "deux heures moins vingt-cinq") + + dt = datetime.datetime(2017, 1, 31, + 1, 45, 00) + self.assertEqual(nice_time(dt, lang="fr-fr"), + "deux heures moins le quart") + + dt = datetime.datetime(2017, 1, 31, + 4, 50, 00) + self.assertEqual(nice_time(dt, lang="fr-fr"), + "cinq heures moins dix") + + dt = datetime.datetime(2017, 1, 31, + 5, 55, 00) + self.assertEqual(nice_time(dt, lang="fr-fr"), + "six heures moins cinq") + + dt = datetime.datetime(2017, 1, 31, + 5, 30, 00) + self.assertEqual(nice_time(dt, lang="fr-fr", use_ampm=True), + "cinq heures et demi du matin") + + +if __name__ == "__main__": + unittest.main() diff --git a/test/unittests/util/test_parse_fr.py b/test/unittests/util/test_parse_fr.py new file mode 100644 index 0000000000..506c2620ba --- /dev/null +++ b/test/unittests/util/test_parse_fr.py @@ -0,0 +1,361 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017 Mycroft AI Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest +from datetime import datetime + +from mycroft.util.parse import get_gender +from mycroft.util.parse import extract_datetime +from mycroft.util.parse import extractnumber +from mycroft.util.parse import normalize +from mycroft.util.parse import fuzzy_match +from mycroft.util.parse import match_one + + +class TestNormalize_fr(unittest.TestCase): + def test_articles_fr(self): + self.assertEqual(normalize("c'est le test", remove_articles=True, + lang="fr-fr"), + "c'est test") + self.assertEqual(normalize("et l'autre test", remove_articles=True, + lang="fr-fr"), + "et autre test") + self.assertEqual(normalize("et la tentative", remove_articles=True, + lang="fr-fr"), + "et tentative") + self.assertEqual(normalize("la dernière tentative", + remove_articles=False, lang="fr-fr"), + "la dernière tentative") + + def test_extractnumber_fr(self): + self.assertEqual(extractnumber("voici le premier test", lang="fr-fr"), + 1) + self.assertEqual(extractnumber("c'est 2 tests", lang="fr-fr"), 2) + self.assertEqual(extractnumber("voici le second test", lang="fr-fr"), + 2) + self.assertEqual(extractnumber("voici trois tests", + lang="fr-fr"), + 3) + self.assertEqual(extractnumber("voici le test numéro 4", lang="fr-fr"), + 4) + self.assertEqual(extractnumber("un tiers de litre", lang="fr-fr"), + 1.0 / 3.0) + self.assertEqual(extractnumber("3 cuillères", lang="fr-fr"), 3) + self.assertEqual(extractnumber("1/3 de litre", lang="fr-fr"), + 1.0 / 3.0) + self.assertEqual(extractnumber("un quart de bol", lang="fr-fr"), 0.25) + self.assertEqual(extractnumber("1/4 de verre", lang="fr-fr"), 0.25) + self.assertEqual(extractnumber("2/3 de bol", lang="fr-fr"), 2.0 / 3.0) + self.assertEqual(extractnumber("3/4 de bol", lang="fr-fr"), 3.0 / 4.0) + self.assertEqual(extractnumber("1 et 3/4 de bol", lang="fr-fr"), 1.75) + self.assertEqual(extractnumber("1 bol et demi", lang="fr-fr"), 1.5) + self.assertEqual(extractnumber("un bol et demi", lang="fr-fr"), 1.5) + self.assertEqual(extractnumber("un et demi bols", lang="fr-fr"), 1.5) + self.assertEqual(extractnumber("un bol et un demi", lang="fr-fr"), 1.5) + self.assertEqual(extractnumber("trois quarts de bol", lang="fr-fr"), + 3.0 / 4.0) + self.assertEqual(extractnumber("32.2 degrés", lang="fr-fr"), 32.2) + self.assertEqual(extractnumber("2 virgule 2 cm", lang="fr-fr"), 2.2) + self.assertEqual(extractnumber("2 virgule 0 2 cm", lang="fr-fr"), 2.02) + self.assertEqual(extractnumber("ça fait virgule 2 cm", lang="fr-fr"), + 0.2) + self.assertEqual(extractnumber("point du tout", lang="fr-fr"), + "point tout") + self.assertEqual(extractnumber("32.00 secondes", lang="fr-fr"), 32) + self.assertEqual(extractnumber("mange trente-et-une bougies", + lang="fr-fr"), 31) + self.assertEqual(extractnumber("un trentième", + lang="fr-fr"), 1.0 / 30.0) + self.assertEqual(extractnumber("un centième", + lang="fr-fr"), 0.01) + self.assertEqual(extractnumber("un millième", + lang="fr-fr"), 0.001) + + def test_extractdatetime_fr(self): + def extractWithFormat_fr(text): + date = datetime(2017, 06, 27, 00, 00) + [extractedDate, leftover] = extract_datetime(text, date, + lang="fr-fr") + extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") + return [extractedDate, leftover] + + def testExtract_fr(text, expected_date, expected_leftover): + res = extractWithFormat_fr(text) + self.assertEqual(res[0], expected_date) + self.assertEqual(res[1], expected_leftover) + + def extractWithFormatDate2_fr(text): + date = datetime(2017, 06, 30, 17, 00) + [extractedDate, leftover] = extract_datetime(text, date, + lang="fr-fr") + extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") + return [extractedDate, leftover] + + def testExtractDate2_fr(text, expected_date, expected_leftover): + res = extractWithFormatDate2_fr(text) + self.assertEqual(res[0], expected_date) + self.assertEqual(res[1], expected_leftover) + + def extractWithFormatNoDate_fr(text): + [extractedDate, leftover] = extract_datetime(text, lang="fr-fr") + extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") + return [extractedDate, leftover] + + def testExtractNoDate_fr(text, expected_date, expected_leftover): + res = extractWithFormatNoDate_fr(text) + self.assertEqual(res[0], expected_date) + self.assertEqual(res[1], expected_leftover) + + testExtract_fr("Planifier l'embûche dans 5 jours", + "2017-07-02 00:00:00", "planifier embûche") + testExtract_fr("Quel temps fera-t-il après-demain ?", + "2017-06-29 00:00:00", "quel temps fera-t-il") + testExtract_fr("Met un rappel à 10:45 du soir", + "2017-06-27 22:45:00", "met 1 rappel") + testExtract_fr("quel temps est prévu pour vendredi matin ?", + "2017-06-30 08:00:00", "quel temps est prévu pour") + testExtract_fr("quel temps fait-il demain", + "2017-06-28 00:00:00", "quel temps fait-il") + testExtract_fr("rappelle-moi d'appeler maman dans 8 semaines et" + " 2 jours", "2017-08-24 00:00:00", + "rappelle-moi appeler maman") + testExtract_fr("Jouer des musiques de Beyonce 2 jours après vendredi", + "2017-07-02 00:00:00", "jouer musiques beyonce") + testExtract_fr("Commencer l'invasion à 15 heures 45 jeudi", + "2017-06-29 15:45:00", "commencer invasion") + testExtract_fr("Lundi, commander le gâteau à la boulangerie", + "2017-07-03 00:00:00", "commander gâteau à boulangerie") + testExtract_fr("Jouer la chanson Joyeux anniversaire dans 5 ans", + "2022-06-27 00:00:00", "jouer chanson joyeux" + " anniversaire") + testExtract_fr("Skyper Maman à 12 heures 45 jeudi prochain", + "2017-07-06 12:45:00", "skyper maman") + testExtract_fr("Quel temps fera-t-il jeudi prochain ?", + "2017-07-06 00:00:00", "quel temps fera-t-il") + testExtract_fr("Quel temps fera-t-il vendredi matin ?", + "2017-06-30 08:00:00", "quel temps fera-t-il") + testExtract_fr("Quel temps fera-t-il vendredi soir", + "2017-06-30 19:00:00", "quel temps fera-t-il") + testExtract_fr("Quel temps fera-t-il vendredi après-midi", + "2017-06-30 15:00:00", "quel temps fera-t-il") + testExtract_fr("rappelle-moi d'appeler maman le 3 août", + "2017-08-03 00:00:00", "rappelle-moi appeler maman") + testExtract_fr("Acheter des feux d'artifice pour le 14 juil", + "2017-07-14 00:00:00", "acheter feux artifice pour") + testExtract_fr("Quel temps fera-t-il 2 semaines après vendredi", + "2017-07-14 00:00:00", "quel temps fera-t-il") + testExtract_fr("Quel temps fera-t-il mercredi à 7 heures", + "2017-06-28 07:00:00", "quel temps fera-t-il") + testExtract_fr("Quel temps fera-t-il mercredi à 7 heures", + "2017-06-28 07:00:00", "quel temps fera-t-il") + testExtract_fr("Prendre rendez-vous à 12:45 jeudi prochain", + "2017-07-06 12:45:00", "prendre rendez-vous") + testExtract_fr("Quel temps fait-il ce jeudi ?", + "2017-06-29 00:00:00", "quel temps fait-il") + testExtract_fr("Organiser une visite 2 semaines et 6 jours après" + " samedi", + "2017-07-21 00:00:00", "organiser 1 visite") + testExtract_fr("Commencer l'invasion à 3 heures 45 jeudi", + "2017-06-29 03:45:00", "commencer invasion") + testExtract_fr("Commencer l'invasion à 20 heures jeudi", + "2017-06-29 20:00:00", "commencer invasion") + testExtract_fr("Lancer la fête jeudi à 8 heures du soir", + "2017-06-29 20:00:00", "lancer fête") + testExtract_fr("Commencer l'invasion à 4 heures de l'après-midi jeudi", + "2017-06-29 16:00:00", "commencer invasion") + testExtract_fr("Commencer l'invasion jeudi à midi", + "2017-06-29 12:00:00", "commencer invasion") + testExtract_fr("Commencer l'invasion jeudi à minuit", + "2017-06-29 00:00:00", "commencer invasion") + testExtract_fr("Commencer l'invasion jeudi à dix-sept heures", + "2017-06-29 17:00:00", "commencer invasion") + testExtract_fr("rappelle-moi de me réveiller dans 4 années", + "2021-06-27 00:00:00", "rappelle-moi me réveiller") + testExtract_fr("rappelle-moi de me réveiller dans 4 ans et 4 jours", + "2021-07-01 00:00:00", "rappelle-moi me réveiller") + testExtract_fr("Quel temps fera-t-il 3 jours après demain ?", + "2017-07-01 00:00:00", "quel temps fera-t-il") + testExtract_fr("3 décembre", + "2017-12-03 00:00:00", "") + testExtract_fr("retrouvons-nous à 8:00 ce soir", + "2017-06-27 20:00:00", "retrouvons-nous") + testExtract_fr("retrouvons-nous demain à minuit et demi", + "2017-06-28 00:30:00", "retrouvons-nous") + testExtract_fr("retrouvons-nous à midi et quart", + "2017-06-27 12:15:00", "retrouvons-nous") + testExtract_fr("retrouvons-nous à midi moins le quart", + "2017-06-27 11:45:00", "retrouvons-nous") + testExtract_fr("retrouvons-nous à midi moins dix", + "2017-06-27 11:50:00", "retrouvons-nous") + testExtract_fr("retrouvons-nous à midi dix", + "2017-06-27 12:10:00", "retrouvons-nous") + testExtract_fr("retrouvons-nous à minuit moins 23", + "2017-06-27 23:37:00", "retrouvons-nous") + testExtract_fr("mangeons à 3 heures moins 23 minutes", + "2017-06-27 02:37:00", "mangeons") + testExtract_fr("mangeons aussi à 4 heures moins le quart du matin", + "2017-06-27 03:45:00", "mangeons aussi") + testExtract_fr("mangeons encore à minuit moins le quart", + "2017-06-27 23:45:00", "mangeons encore") + testExtract_fr("buvons à 4 heures et quart", + "2017-06-27 04:15:00", "buvons") + testExtract_fr("buvons également à 18 heures et demi", + "2017-06-27 18:30:00", "buvons également") + testExtract_fr("dormons à 20 heures moins le quart", + "2017-06-27 19:45:00", "dormons") + testExtract_fr("buvons le dernier verre à 10 heures moins 12 du soir", + "2017-06-27 21:48:00", "buvons dernier verre") + testExtract_fr("s'échapper de l'île à 15h45", + "2017-06-27 15:45:00", "s'échapper île") + testExtract_fr("s'échapper de l'île à 3h45min de l'après-midi", + "2017-06-27 15:45:00", "s'échapper île") + testExtract_fr("décale donc ça à 3h48min cet après-midi", + "2017-06-27 15:48:00", "décale donc ça") + testExtract_fr("construire un bunker à 9h42min du matin", + "2017-06-27 09:42:00", "construire 1 bunker") + testExtract_fr("ou plutôt à 9h43 ce matin", + "2017-06-27 09:43:00", "ou plutôt") + testExtract_fr("faire un feu à 8h du soir", + "2017-06-27 20:00:00", "faire 1 feu") + testExtract_fr("faire la fête jusqu'à 18h cette nuit", + "2017-06-27 18:00:00", "faire fête jusqu'à") + testExtract_fr("cuver jusqu'à 4h cette nuit", + "2017-06-27 04:00:00", "cuver jusqu'à") + testExtract_fr("réveille-moi dans 20 secondes aujourd'hui", + "2017-06-27 00:00:20", "réveille-moi") + testExtract_fr("réveille-moi dans 33 minutes", + "2017-06-27 00:33:00", "réveille-moi") + testExtract_fr("tais-toi dans 12 heures et 3 minutes", + "2017-06-27 12:03:00", "tais-toi") + testExtract_fr("ouvre-la dans 1 heure 3", + "2017-06-27 01:03:00", "ouvre-la") + testExtract_fr("ferme-la dans 1 heure et quart", + "2017-06-27 01:15:00", "ferme-la") + testExtract_fr("scelle-la dans 1 heure et demi", + "2017-06-27 01:30:00", "scelle-la") + testExtract_fr("zippe-la dans 2 heures moins 12", + "2017-06-27 01:48:00", "zippe-la") + testExtract_fr("soude-la dans 3 heures moins le quart", + "2017-06-27 02:45:00", "soude-la") + testExtract_fr("mange la semaine prochaine", + "2017-07-04 00:00:00", "mange") + testExtract_fr("bois la semaine dernière", + "2017-06-20 00:00:00", "bois") + testExtract_fr("mange le mois prochain", + "2017-07-27 00:00:00", "mange") + testExtract_fr("bois le mois dernier", + "2017-05-27 00:00:00", "bois") + testExtract_fr("mange l'an prochain", + "2018-06-27 00:00:00", "mange") + testExtract_fr("bois l'année dernière", + "2016-06-27 00:00:00", "bois") + testExtract_fr("reviens à lundi dernier", + "2017-06-26 00:00:00", "reviens") + testExtract_fr("capitule le 8 mai 1945", + "1945-05-08 00:00:00", "capitule") + testExtract_fr("rédige le contrat 3 jours après jeudi prochain", + "2017-07-09 00:00:00", "rédige contrat") + testExtract_fr("signe le contrat 2 semaines après jeudi dernier", + "2017-07-06 00:00:00", "signe contrat") + testExtract_fr("lance le four dans un quart d'heure", + "2017-06-27 00:15:00", "lance four") + testExtract_fr("enfourne la pizza dans une demi-heure", + "2017-06-27 00:30:00", "enfourne pizza") + testExtract_fr("arrête le four dans trois quarts d'heure", + "2017-06-27 00:45:00", "arrête four") + testExtract_fr("mange la pizza dans une heure", + "2017-06-27 01:00:00", "mange pizza") + testExtract_fr("faire les plantations le 3ème jour de mars", + "2018-03-03 00:00:00", "faire plantations") + testExtract_fr("récolter dans 10 mois", + "2018-04-27 00:00:00", "récolter") + testExtract_fr("point 6a: dans 10 mois", + "2018-04-27 06:00:00", "point") + testExtract_fr("l'après-midi démissionner à 16:59", + "2017-06-27 16:59:00", "démissionner") + testExtract_fr("ranger son bureau à 1700 heures", + "2017-06-27 17:00:00", "ranger son bureau") + + testExtractDate2_fr("range le contrat 2 semaines après lundi", + "2017-07-17 00:00:00", "range contrat") + testExtractDate2_fr("achète-toi de l'humour à 15h", + "2017-07-01 15:00:00", "achète-toi humour") + testExtractNoDate_fr("tais-toi aujourd'hui", + datetime.now().strftime("%Y-%m-%d") + " 00:00:00", + "tais-toi") + self.assertEqual(extract_datetime("", lang="fr-fr"), None) + self.assertEqual(extract_datetime("phrase inutile", lang="fr-fr"), + None) + self.assertEqual(extract_datetime( + "apprendre à compter à 37 heures", lang="fr-fr"), None) + + def test_spaces_fr(self): + self.assertEqual(normalize(" c'est le test", lang="fr-fr"), + "c'est test") + self.assertEqual(normalize(" c'est le test ", lang="fr-fr"), + "c'est test") + self.assertEqual(normalize(" c'est un test", lang="fr-fr"), + "c'est 1 test") + + def test_numbers_fr(self): + self.assertEqual(normalize("c'est un deux trois test", + lang="fr-fr"), + "c'est 1 2 3 test") + self.assertEqual(normalize(" c'est le quatre cinq six test", + lang="fr-fr"), + "c'est 4 5 6 test") + self.assertEqual(normalize("c'est le sept huit neuf test", + lang="fr-fr"), + "c'est 7 8 9 test") + self.assertEqual(normalize("c'est le sept huit neuf test", + lang="fr-fr"), + "c'est 7 8 9 test") + self.assertEqual(normalize("voilà le test dix onze douze", + lang="fr-fr"), + "voilà test 10 11 12") + self.assertEqual(normalize("voilà le treize quatorze test", + lang="fr-fr"), + "voilà 13 14 test") + self.assertEqual(normalize("ça fait quinze seize dix-sept", + lang="fr-fr"), + "ça fait 15 16 17") + self.assertEqual(normalize("ça fait dix-huit dix-neuf vingt", + lang="fr-fr"), + "ça fait 18 19 20") + self.assertEqual(normalize("ça fait mille cinq cents", + lang="fr-fr"), + "ça fait 1500") + self.assertEqual(normalize("voilà cinq cents trente et un mille euros", + lang="fr-fr"), + "voilà 531000 euros") + self.assertEqual(normalize("voilà trois cents soixante mille cinq" + " cents quatre-vingt-dix-huit euros", + lang="fr-fr"), + "voilà 360598 euros") + self.assertEqual(normalize("voilà vingt et un euros", lang="fr-fr"), + "voilà 21 euros") + self.assertEqual(normalize("joli zéro sur vingt", lang="fr-fr"), + "joli 0 sur 20") + self.assertEqual(normalize("je veux du quatre-quart", lang="fr-fr"), + "je veux quatre-quart") + + def test_gender_fr(self): + self.assertEqual(get_gender("personne", lang="fr-fr"), + False) + + +if __name__ == "__main__": + unittest.main()