[AbstractRuleBasedInterpreter] Fix spanish tokenization (#2889)
Signed-off-by: Miguel Álvarez Díez <miguelwork92@gmail.com>pull/2895/head
parent
b5de891ebd
commit
7442220830
|
@ -600,7 +600,8 @@ public abstract class AbstractRuleBasedInterpreter implements HumanLanguageInter
|
|||
split = text.toLowerCase(locale).replaceAll("[\\']", " ").replaceAll("[^\\w\\sàâäçéèêëîïôùûü]", " ")
|
||||
.split("\\s");
|
||||
} else if ("es".equalsIgnoreCase(locale.getLanguage())) {
|
||||
split = text.toLowerCase(locale).replaceAll("[\\']", " ").replaceAll("[^\\w\\sáéíóúü]", " ").split("\\s");
|
||||
split = text.toLowerCase(locale).replaceAll("[\\']", " ").replaceAll("[^\\w\\sáéíóúïüñç]", " ")
|
||||
.split("\\s");
|
||||
} else {
|
||||
split = text.toLowerCase(locale).replaceAll("[\\']", "").replaceAll("[^\\w\\s]", " ").split("\\s");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue