Valid email address should only start with alphanumeric (#28174)

This fixes issue #27847 where regular expression allowed email address
to start with special symbols. Valid email addresses should start with
alphanumeric character, and as such will be rendered as email.

Added test cases from the bug report to validate, such input will not be
rendered anymore as email address.

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
pull/32564/head^2
Dejan Kitic 2025-04-20 12:18:14 +01:00 committed by GitHub
parent 6d3c6741ec
commit af6be75adb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 43 additions and 10 deletions

View File

@ -71,7 +71,8 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
// it is still accepted by the CommonMark specification, as well as the HTML5 spec:
// http://spec.commonmark.org/0.28/#email-address
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
v.emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
// At the moment, we use stricter rule for rendering purpose: only allow the "name" part starting after the word boundary
v.emailRegex = regexp.MustCompile(`\b([-\w.!#$%&'*+/=?^{|}~]*@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)\b`)
// emojiShortCodeRegex find emoji by alias like :smile:
v.emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)

View File

@ -3,7 +3,11 @@
package markup
import "golang.org/x/net/html"
import (
"strings"
"golang.org/x/net/html"
)
// emailAddressProcessor replaces raw email addresses with a mailto: link.
func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
@ -14,6 +18,14 @@ func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
return
}
var nextByte byte
if len(node.Data) > m[3] {
nextByte = node.Data[m[3]]
}
if strings.IndexByte(":/", nextByte) != -1 {
// for cases: "git@gitea.com:owner/repo.git", "https://git@gitea.com/owner/repo.git"
return
}
mail := node.Data[m[2]:m[3]]
replaceContent(node, m[2], m[3], createLink(ctx, "mailto:"+mail, mail, "" /*mailto*/))
node = node.NextSibling.NextSibling

View File

@ -225,10 +225,10 @@ func TestRender_email(t *testing.T) {
test := func(input, expected string) {
res, err := markup.RenderString(markup.NewTestRenderContext().WithRelativePath("a.md"), input)
assert.NoError(t, err)
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(res))
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(res), "input: %s", input)
}
// Text that should be turned into email link
// Text that should be turned into email link
test(
"info@gitea.com",
`<p><a href="mailto:info@gitea.com" rel="nofollow">info@gitea.com</a></p>`)
@ -260,28 +260,48 @@ func TestRender_email(t *testing.T) {
<a href="mailto:j.doe@example.com" rel="nofollow">j.doe@example.com</a>?
<a href="mailto:j.doe@example.com" rel="nofollow">j.doe@example.com</a>!</p>`)
// match GitHub behavior
test("email@domain@domain.com", `<p>email@<a href="mailto:domain@domain.com" rel="nofollow">domain@domain.com</a></p>`)
// match GitHub behavior
test(`"info@gitea.com"`, `<p>&#34;<a href="mailto:info@gitea.com" rel="nofollow">info@gitea.com</a>&#34;</p>`)
// Test that should *not* be turned into email links
test(
"\"info@gitea.com\"",
`<p>&#34;info@gitea.com&#34;</p>`)
test(
"/home/gitea/mailstore/info@gitea/com",
`<p>/home/gitea/mailstore/info@gitea/com</p>`)
test(
"git@try.gitea.io:go-gitea/gitea.git",
`<p>git@try.gitea.io:go-gitea/gitea.git</p>`)
test(
"https://foo:bar@gitea.io",
`<p><a href="https://foo:bar@gitea.io" rel="nofollow">https://foo:bar@gitea.io</a></p>`)
test(
"gitea@3",
`<p>gitea@3</p>`)
test(
"gitea@gmail.c",
`<p>gitea@gmail.c</p>`)
test(
"email@domain@domain.com",
`<p>email@domain@domain.com</p>`)
test(
"email@domain..com",
`<p>email@domain..com</p>`)
cases := []struct {
input, expected string
}{
// match GitHub behavior
{"?a@d.zz", `<p>?<a href="mailto:a@d.zz" rel="nofollow">a@d.zz</a></p>`},
{"*a@d.zz", `<p>*<a href="mailto:a@d.zz" rel="nofollow">a@d.zz</a></p>`},
{"~a@d.zz", `<p>~<a href="mailto:a@d.zz" rel="nofollow">a@d.zz</a></p>`},
// the following cases don't match GitHub behavior, but they are valid email addresses ...
// maybe we should reduce the candidate characters for the "name" part in the future
{"a*a@d.zz", `<p><a href="mailto:a*a@d.zz" rel="nofollow">a*a@d.zz</a></p>`},
{"a~a@d.zz", `<p><a href="mailto:a~a@d.zz" rel="nofollow">a~a@d.zz</a></p>`},
}
for _, c := range cases {
test(c.input, c.expected)
}
}
func TestRender_emoji(t *testing.T) {