Refactor commit reader (#34542)

pull/34541/head^2
wxiaoguang 2025-05-28 00:49:05 +08:00 committed by GitHub
parent 688da55f54
commit 9f10885b21
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 71 additions and 87 deletions

View File

@ -34,7 +34,7 @@ type Commit struct {
// CommitSignature represents a git commit signature part.
type CommitSignature struct {
Signature string
Payload string // TODO check if can be reconstruct from the rest of commit information to not have duplicate data
Payload string
}
// Message returns the commit message. Same as retrieving CommitMessage directly.

View File

@ -6,10 +6,44 @@ package git
import (
"bufio"
"bytes"
"fmt"
"io"
"strings"
)
const (
commitHeaderGpgsig = "gpgsig"
commitHeaderGpgsigSha256 = "gpgsig-sha256"
)
func assignCommitFields(gitRepo *Repository, commit *Commit, headerKey string, headerValue []byte) error {
if len(headerValue) > 0 && headerValue[len(headerValue)-1] == '\n' {
headerValue = headerValue[:len(headerValue)-1] // remove trailing newline
}
switch headerKey {
case "tree":
objID, err := NewIDFromString(string(headerValue))
if err != nil {
return fmt.Errorf("invalid tree ID %q: %w", string(headerValue), err)
}
commit.Tree = *NewTree(gitRepo, objID)
case "parent":
objID, err := NewIDFromString(string(headerValue))
if err != nil {
return fmt.Errorf("invalid parent ID %q: %w", string(headerValue), err)
}
commit.Parents = append(commit.Parents, objID)
case "author":
commit.Author.Decode(headerValue)
case "committer":
commit.Committer.Decode(headerValue)
case commitHeaderGpgsig, commitHeaderGpgsigSha256:
// if there are duplicate "gpgsig" and "gpgsig-sha256" headers, then the signature must have already been invalid
// so we don't need to handle duplicate headers here
commit.Signature = &CommitSignature{Signature: string(headerValue)}
}
return nil
}
// CommitFromReader will generate a Commit from a provided reader
// We need this to interpret commits from cat-file or cat-file --batch
//
@ -21,90 +55,46 @@ func CommitFromReader(gitRepo *Repository, objectID ObjectID, reader io.Reader)
Committer: &Signature{},
}
payloadSB := new(strings.Builder)
signatureSB := new(strings.Builder)
messageSB := new(strings.Builder)
message := false
pgpsig := false
bufReader, ok := reader.(*bufio.Reader)
if !ok {
bufReader = bufio.NewReader(reader)
}
readLoop:
bufReader := bufio.NewReader(reader)
inHeader := true
var payloadSB, messageSB bytes.Buffer
var headerKey string
var headerValue []byte
for {
line, err := bufReader.ReadBytes('\n')
if err != nil {
if err == io.EOF {
if message {
_, _ = messageSB.Write(line)
if err != nil && err != io.EOF {
return nil, fmt.Errorf("unable to read commit %q: %w", objectID.String(), err)
}
if len(line) == 0 {
break
}
if inHeader {
inHeader = !(len(line) == 1 && line[0] == '\n') // still in header if line is not just a newline
k, v, _ := bytes.Cut(line, []byte{' '})
if len(k) != 0 || !inHeader {
if headerKey != "" {
if err = assignCommitFields(gitRepo, commit, headerKey, headerValue); err != nil {
return nil, fmt.Errorf("unable to parse commit %q: %w", objectID.String(), err)
}
}
_, _ = payloadSB.Write(line)
break readLoop
headerKey = string(k) // it also resets the headerValue to empty string if not inHeader
headerValue = v
} else {
headerValue = append(headerValue, v...)
}
return nil, err
}
if pgpsig {
if len(line) > 0 && line[0] == ' ' {
_, _ = signatureSB.Write(line[1:])
continue
}
pgpsig = false
}
if !message {
// This is probably not correct but is copied from go-gits interpretation...
trimmed := bytes.TrimSpace(line)
if len(trimmed) == 0 {
message = true
if headerKey != commitHeaderGpgsig && headerKey != commitHeaderGpgsigSha256 {
_, _ = payloadSB.Write(line)
continue
}
split := bytes.SplitN(trimmed, []byte{' '}, 2)
var data []byte
if len(split) > 1 {
data = split[1]
}
switch string(split[0]) {
case "tree":
commit.Tree = *NewTree(gitRepo, MustIDFromString(string(data)))
_, _ = payloadSB.Write(line)
case "parent":
commit.Parents = append(commit.Parents, MustIDFromString(string(data)))
_, _ = payloadSB.Write(line)
case "author":
commit.Author = &Signature{}
commit.Author.Decode(data)
_, _ = payloadSB.Write(line)
case "committer":
commit.Committer = &Signature{}
commit.Committer.Decode(data)
_, _ = payloadSB.Write(line)
case "encoding":
_, _ = payloadSB.Write(line)
case "gpgsig":
fallthrough
case "gpgsig-sha256": // FIXME: no intertop, so only 1 exists at present.
_, _ = signatureSB.Write(data)
_ = signatureSB.WriteByte('\n')
pgpsig = true
}
} else {
_, _ = messageSB.Write(line)
_, _ = payloadSB.Write(line)
}
}
commit.CommitMessage = messageSB.String()
commit.Signature = &CommitSignature{
Signature: signatureSB.String(),
Payload: payloadSB.String(),
}
if len(commit.Signature.Signature) == 0 {
commit.Signature = nil
}
commit.CommitMessage = messageSB.String()
if commit.Signature != nil {
commit.Signature.Payload = payloadSB.String()
}
return commit, nil
}

View File

@ -60,8 +60,7 @@ func TestGetFullCommitIDErrorSha256(t *testing.T) {
}
func TestCommitFromReaderSha256(t *testing.T) {
commitString := `9433b2a62b964c17a4485ae180f45f595d3e69d31b786087775e28c6b6399df0 commit 1114
tree e7f9e96dd79c09b078cac8b303a7d3b9d65ff9b734e86060a4d20409fd379f9e
commitString := `tree e7f9e96dd79c09b078cac8b303a7d3b9d65ff9b734e86060a4d20409fd379f9e
parent 26e9ccc29fad747e9c5d9f4c9ddeb7eff61cc45ef6a8dc258cbeb181afc055e8
author Adam Majer <amajer@suse.de> 1698676906 +0100
committer Adam Majer <amajer@suse.de> 1698676906 +0100
@ -112,8 +111,7 @@ VAEUo6ecdDxSpyt2naeg9pKus/BRi7P6g4B1hkk/zZstUX/QP4IQuAJbXjkvsC+X
HKRr3NlRM/DygzTyj0gN74uoa0goCIbyAQhiT42nm0cuhM7uN/W0ayrlZjGF1cbR
8NCJUL2Nwj0ywKIavC99Ipkb8AsFwpVT6U6effs6
=xybZ
-----END PGP SIGNATURE-----
`, commitFromReader.Signature.Signature)
-----END PGP SIGNATURE-----`, commitFromReader.Signature.Signature)
assert.Equal(t, `tree e7f9e96dd79c09b078cac8b303a7d3b9d65ff9b734e86060a4d20409fd379f9e
parent 26e9ccc29fad747e9c5d9f4c9ddeb7eff61cc45ef6a8dc258cbeb181afc055e8
author Adam Majer <amajer@suse.de> 1698676906 +0100

View File

@ -59,8 +59,7 @@ func TestGetFullCommitIDError(t *testing.T) {
}
func TestCommitFromReader(t *testing.T) {
commitString := `feaf4ba6bc635fec442f46ddd4512416ec43c2c2 commit 1074
tree f1a6cb52b2d16773290cefe49ad0684b50a4f930
commitString := `tree f1a6cb52b2d16773290cefe49ad0684b50a4f930
parent 37991dec2c8e592043f47155ce4808d4580f9123
author silverwind <me@silverwind.io> 1563741793 +0200
committer silverwind <me@silverwind.io> 1563741793 +0200
@ -108,8 +107,7 @@ sD53z/f0J+We4VZjY+pidvA9BGZPFVdR3wd3xGs8/oH6UWaLJAMGkLG6dDb3qDLm
mfeFhT57UbE4qukTDIQ0Y0WM40UYRTakRaDY7ubhXgLgx09Cnp9XTVMsHgT6j9/i
1pxsB104XLWjQHTjr1JtiaBQEwFh9r2OKTcpvaLcbNtYpo7CzOs=
=FRsO
-----END PGP SIGNATURE-----
`, commitFromReader.Signature.Signature)
-----END PGP SIGNATURE-----`, commitFromReader.Signature.Signature)
assert.Equal(t, `tree f1a6cb52b2d16773290cefe49ad0684b50a4f930
parent 37991dec2c8e592043f47155ce4808d4580f9123
author silverwind <me@silverwind.io> 1563741793 +0200
@ -126,8 +124,7 @@ empty commit`, commitFromReader.Signature.Payload)
}
func TestCommitWithEncodingFromReader(t *testing.T) {
commitString := `feaf4ba6bc635fec442f46ddd4512416ec43c2c2 commit 1074
tree ca3fad42080dd1a6d291b75acdfc46e5b9b307e5
commitString := `tree ca3fad42080dd1a6d291b75acdfc46e5b9b307e5
parent 47b24e7ab977ed31c5a39989d570847d6d0052af
author KN4CK3R <admin@oldschoolhack.me> 1711702962 +0100
committer KN4CK3R <admin@oldschoolhack.me> 1711702962 +0100
@ -172,8 +169,7 @@ SONRzusmu5n3DgV956REL7x62h7JuqmBz/12HZkr0z0zgXkcZ04q08pSJATX5N1F
yN+tWxTsWg+zhDk96d5Esdo9JMjcFvPv0eioo30GAERaz1hoD7zCMT4jgUFTQwgz
jw4YcO5u
=r3UU
-----END PGP SIGNATURE-----
`, commitFromReader.Signature.Signature)
-----END PGP SIGNATURE-----`, commitFromReader.Signature.Signature)
assert.Equal(t, `tree ca3fad42080dd1a6d291b75acdfc46e5b9b307e5
parent 47b24e7ab977ed31c5a39989d570847d6d0052af
author KN4CK3R <admin@oldschoolhack.me> 1711702962 +0100