From bf04383e7437faaf28906b93669de87b34bcdf96 Mon Sep 17 00:00:00 2001 From: WeberJulian Date: Fri, 5 Mar 2021 19:56:50 +0100 Subject: [PATCH] fix french_cleaners --- TTS/tts/utils/text/abbreviations.py | 76 ++++++++++++++++------------- TTS/tts/utils/text/cleaners.py | 4 +- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/TTS/tts/utils/text/abbreviations.py b/TTS/tts/utils/text/abbreviations.py index fe4c1cdc..bc2f4830 100644 --- a/TTS/tts/utils/text/abbreviations.py +++ b/TTS/tts/utils/text/abbreviations.py @@ -24,38 +24,44 @@ abbreviations_en = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) ]] # List of (regular expression, replacement) pairs for abbreviations in french: -abbreviations_fr = [(re.compile('\\b%s\\.?' % x[0], re.IGNORECASE), x[1]) - for x in [ - ('M', 'monsieur'), - ('Mlle', 'mademoiselle'), - ('Mlles', 'mesdemoiselles'), - ('Mme', 'Madame'), - ('Mmes', 'Mesdames'), - ('N.B', 'nota bene'), - ('M', 'monsieur'), - ('p.c.q', 'parce que'), - ('Pr', 'professeur'), - ('qqch', 'quelque chose'), - ('rdv', 'rendez-vous'), - ('max', 'maximum'), - ('min', 'minimum'), - ('no', 'numéro'), - ('adr', 'adresse'), - ('dr', 'docteur'), - ('st', 'saint'), - ('co', 'companie'), - ('jr', 'junior'), - ('sgt', 'sergent'), - ('capt', 'capitain'), - ('col', 'colonel'), - ('av', 'avenue'), - ('av. J.-C', 'avant Jésus-Christ'), - ('apr. J.-C', 'après Jésus-Christ'), - ('art', 'article'), - ('boul', 'boulevard'), - ('c.-à-d', 'c’est-à-dire'), - ('etc', 'et cetera'), - ('ex', 'exemple'), - ('excl', 'exclusivement'), - ('boul', 'boulevard'), - ]] +abbreviations_fr = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) + for x in [ + ('M', 'monsieur'), + ('Mlle', 'mademoiselle'), + ('Mlles', 'mesdemoiselles'), + ('Mme', 'Madame'), + ('Mmes', 'Mesdames'), + ('N.B', 'nota bene'), + ('M', 'monsieur'), + ('p.c.q', 'parce que'), + ('Pr', 'professeur'), + ('qqch', 'quelque chose'), + ('rdv', 'rendez-vous'), + ('max', 'maximum'), + ('min', 'minimum'), + ('no', 'numéro'), + ('adr', 'adresse'), + ('dr', 'docteur'), + ('st', 'saint'), + ('co', 'companie'), + ('jr', 'junior'), + ('sgt', 'sergent'), + ('capt', 'capitain'), + ('col', 'colonel'), + ('av', 'avenue'), + ('av. J.-C', 'avant Jésus-Christ'), + ('apr. J.-C', 'après Jésus-Christ'), + ('art', 'article'), + ('boul', 'boulevard'), + ('c.-à-d', 'c’est-à-dire'), + ('etc', 'et cetera'), + ('ex', 'exemple'), + ('excl', 'exclusivement'), + ('boul', 'boulevard'), + ]] + [(re.compile('\\b%s' % x[0]), x[1]) + for x in [ + ('Mlle', 'mademoiselle'), + ('Mlles', 'mesdemoiselles'), + ('Mme', 'Madame'), + ('Mmes', 'Mesdames'), + ]] diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index 49a25557..c7a2b91a 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -108,8 +108,8 @@ def english_cleaners(text): def french_cleaners(text): '''Pipeline for French text. There is no need to expand numbers, phonemizer already does that''' - text = lowercase(text) text = expand_abbreviations(text, lang='fr') + text = lowercase(text) text = replace_symbols(text, lang='fr') text = remove_aux_symbols(text) text = collapse_whitespace(text) @@ -129,8 +129,6 @@ def chinese_mandarin_cleaners(text: str) -> str: text = replace_numbers_to_characters_in_text(text) return text - - def phoneme_cleaners(text): '''Pipeline for phonemes mode, including number and abbreviation expansion.''' text = expand_numbers(text)