diff --git a/modules/filter/filter.module b/modules/filter/filter.module index 295a1783772..7c8b2b88cfa 100644 --- a/modules/filter/filter.module +++ b/modules/filter/filter.module @@ -1480,9 +1480,12 @@ function _filter_url($text, $filter) { // Prepare domain name pattern. // The ICANN seems to be on track towards accepting more diverse top level - // domains, so this pattern has been "future-proofed" to allow for TLDs - // of length 2-64. + // domains (TLDs), so this pattern has been "future-proofed" to allow for + // TLDs of length 2-64. $domain = '(?:[A-Za-z0-9._+-]+\.)?[A-Za-z]{2,64}\b'; + // Mail domains differ from the generic domain pattern, specifically: + // A . character must be present in the string that follows the @ character. + $email_domain = '(?:[\p{L}\p{M}\p{N}._+-]+\.)+[\p{L}\p{M}]{2,64}\b'; $ip = '(?:[0-9]{1,3}\.){3}[0-9]{1,3}'; $auth = '[a-zA-Z0-9:%_+*~#?&=.,/;-]+@'; $trail = '[a-zA-Z0-9:%_+*~#&\[\]=/;?!\.,-]*[a-zA-Z0-9:%_+*~#&\[\]=/;-]'; @@ -1499,7 +1502,7 @@ function _filter_url($text, $filter) { $tasks['_filter_url_parse_full_links'] = $pattern; // Match e-mail addresses. - $url_pattern = "[A-Za-z0-9._+-]{1,254}@(?:$domain)"; + $url_pattern = "[\p{L}\p{M}\p{N}._+-]{1,254}@(?:$email_domain)"; $pattern = "`($url_pattern)`"; $tasks['_filter_url_parse_email_links'] = $pattern; diff --git a/modules/filter/tests/filter.url-input.txt b/modules/filter/tests/filter.url-input.txt index 7b33af56ca9..32f4c5e89e5 100644 --- a/modules/filter/tests/filter.url-input.txt +++ b/modules/filter/tests/filter.url-input.txt @@ -9,6 +9,7 @@ This is just a www.test.com. paragraph with person@test.com. some http://www.tes http://www.test.com www.test.com person@test.com +person@test www.test.com What about tags that don't exist like x say www.test.com? And what about tag beginning www.test.com with p? @@ -25,6 +26,7 @@ The old URL filter has problems with www.test.com. paragraph with http://www.test.com www.test.com person@test.com +person@test www.test.com What about tags that don't exist like x say www.test.com? And what about tag beginning www.test.com with p? @@ -25,6 +26,7 @@ The old URL filter has problems with www.test.com
http://www.test.com
person@test.com
+
person@test
check www.test.com
this with some text around: http://www.test.com not so easy person@test.com now?