[AbstractRuleBasedInterpreter] Fix spanish tokenization (#2889)

Signed-off-by: Miguel Álvarez Díez <miguelwork92@gmail.com>
pull/2895/head
GiviMAD 2022-04-03 12:14:09 +02:00 committed by GitHub
parent b5de891ebd
commit 7442220830
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 1 deletions

View File

@ -600,7 +600,8 @@ public abstract class AbstractRuleBasedInterpreter implements HumanLanguageInter
split = text.toLowerCase(locale).replaceAll("[\\']", " ").replaceAll("[^\\w\\sàâäçéèêëîïôùûü]", " ")
.split("\\s");
} else if ("es".equalsIgnoreCase(locale.getLanguage())) {
split = text.toLowerCase(locale).replaceAll("[\\']", " ").replaceAll("[^\\w\\sáéíóúü]", " ").split("\\s");
split = text.toLowerCase(locale).replaceAll("[\\']", " ").replaceAll("[^\\w\\sáéíóúïüñç]", " ")
.split("\\s");
} else {
split = text.toLowerCase(locale).replaceAll("[\\']", "").replaceAll("[^\\w\\s]", " ").split("\\s");
}