Split big insert message into serveral smaller

Signed-off-by: dragondriver <jiquan.long@zilliz.com>
pull/4973/head^2
dragondriver 2021-03-09 16:18:58 +08:00 committed by yefu.chen
parent af1900b42a
commit af32f442bb
9 changed files with 180 additions and 4 deletions

View File

@ -38,6 +38,8 @@ minio:
pulsar:
address: localhost
port: 6650
rest-port: 18080 # keep same with pulsar container
maxMessageSize: 5242880 # 5 * 1024 * 1024
authentication: false
user: user-default
token: eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJKb2UifQ.ipevRNuRP6HflG8cFKnmUPtypruRC4fb1DWtoLL62SY

1
go.mod
View File

@ -16,6 +16,7 @@ require (
github.com/golang/mock v1.3.1
github.com/golang/protobuf v1.3.2
github.com/google/btree v1.0.0
github.com/jarcoal/httpmock v1.0.8
github.com/klauspost/compress v1.10.11 // indirect
github.com/minio/minio-go/v7 v7.0.5
github.com/mitchellh/mapstructure v1.1.2

2
go.sum
View File

@ -177,6 +177,8 @@ github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0m
github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I=
github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jarcoal/httpmock v1.0.8 h1:8kI16SoO6LQKgPE7PvQuV+YuD/inwHd7fOOe2zMbo4k=
github.com/jarcoal/httpmock v1.0.8/go.mod h1:ATjnClrvW/3tijVmpL/va5Z3aAyGvqU3gCT8nX0Txik=
github.com/jawher/mow.cli v1.0.4/go.mod h1:5hQj2V8g+qYmLUVWqu4Wuja1pI57M83EChYLVZ0sMKk=
github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg=
github.com/jawher/mow.cli v1.2.0/go.mod h1:y+pcA3jBAdo/GIZx/0rFjw/K2bVEODP9rfZOfaiq8Ko=

View File

@ -19,7 +19,9 @@ import (
)
const (
StartParamsKey = "START_PARAMS"
StartParamsKey = "START_PARAMS"
PulsarMaxMessageSizeKey = "maxMessageSize"
SuggestPulsarMaxMessageSizeKey = 5 * 1024 * 1024
)
type ParamTable struct {
@ -55,7 +57,8 @@ type ParamTable struct {
DefaultPartitionTag string
DefaultIndexName string
Log log.Config
PulsarMaxMessageSize int
Log log.Config
}
var Params ParamTable
@ -154,6 +157,7 @@ func (pt *ParamTable) initParams() {
pt.initDefaultIndexName()
pt.initLogCfg()
pt.initPulsarMaxMessageSize()
}
func (pt *ParamTable) initPulsarAddress() {
@ -404,6 +408,42 @@ func (pt *ParamTable) initDefaultIndexName() {
pt.DefaultIndexName = name
}
func (pt *ParamTable) initPulsarMaxMessageSize() {
// pulsarHost, err := pt.Load("pulsar.address")
// if err != nil {
// panic(err)
// }
// pulsarRestPort, err := pt.Load("pulsar.rest-port")
// if err != nil {
// panic(err)
// }
// protocol := "http"
// url := "/admin/v2/brokers/configuration/runtime"
// runtimeConfig, err := GetPulsarConfig(protocol, pulsarHost, pulsarRestPort, url)
// if err != nil {
// panic(err)
// }
// maxMessageSizeStr := fmt.Sprintf("%v", runtimeConfig[PulsarMaxMessageSizeKey])
// pt.PulsarMaxMessageSize, err = strconv.Atoi(maxMessageSizeStr)
// if err != nil {
// panic(err)
// }
maxMessageSizeStr, err := pt.Load("pulsar.maxMessageSize")
if err != nil {
pt.PulsarMaxMessageSize = SuggestPulsarMaxMessageSizeKey
} else {
maxMessageSize, err := strconv.Atoi(maxMessageSizeStr)
if err != nil {
pt.PulsarMaxMessageSize = SuggestPulsarMaxMessageSizeKey
} else {
pt.PulsarMaxMessageSize = maxMessageSize
}
}
}
func (pt *ParamTable) initLogCfg() {
pt.Log = log.Config{}
format, err := pt.Load("log.format")

View File

@ -4,6 +4,7 @@ import (
"errors"
"log"
"sort"
"unsafe"
"github.com/zilliztech/milvus-distributed/internal/msgstream"
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
@ -170,6 +171,53 @@ func insertRepackFunc(tsMsgs []msgstream.TsMsg,
return 0
}
factor := 10
threshold := Params.PulsarMaxMessageSize / factor
log.Println("threshold of message size: ", threshold)
// not accurate
getSizeOfInsertMsg := func(msg *msgstream.InsertMsg) int {
// if real struct, call unsafe.Sizeof directly,
// if reference, dereference and then call unsafe.Sizeof,
// if slice, todo: a common function to calculate size of slice,
// if map, a little complicated
size := 0
size += int(unsafe.Sizeof(msg.BeginTimestamp))
size += int(unsafe.Sizeof(msg.EndTimestamp))
size += int(unsafe.Sizeof(msg.HashValues))
size += len(msg.HashValues) * 4
size += int(unsafe.Sizeof(*msg.MsgPosition))
size += int(unsafe.Sizeof(*msg.Base))
size += int(unsafe.Sizeof(msg.DbName))
size += int(unsafe.Sizeof(msg.CollectionName))
size += int(unsafe.Sizeof(msg.PartitionName))
size += int(unsafe.Sizeof(msg.DbID))
size += int(unsafe.Sizeof(msg.CollectionID))
size += int(unsafe.Sizeof(msg.PartitionID))
size += int(unsafe.Sizeof(msg.SegmentID))
size += int(unsafe.Sizeof(msg.ChannelID))
size += int(unsafe.Sizeof(msg.Timestamps))
size += int(unsafe.Sizeof(msg.RowIDs))
size += len(msg.RowIDs) * 8
for _, blob := range msg.RowData {
size += int(unsafe.Sizeof(blob.Value))
size += len(blob.Value)
}
log.Println("size of insert message: ", size)
return size
}
// not accurate
// getSizeOfMsgPack := func(mp *msgstream.MsgPack) int {
// size := 0
// for _, msg := range mp.Msgs {
// insertMsg, ok := msg.(*msgstream.InsertMsg)
// if !ok {
// log.Panic("only insert message is supported!")
// }
// size += getSizeOfInsertMsg(insertMsg)
// }
// return size
// }
for i, request := range tsMsgs {
insertRequest := request.(*msgstream.InsertMsg)
keys := hashKeys[i]
@ -214,10 +262,13 @@ func insertRepackFunc(tsMsgs []msgstream.TsMsg,
InsertRequest: sliceRequest,
}
if together { // all rows with same hash value are accumulated to only one message
msgNums := len(result[key].Msgs)
if len(result[key].Msgs) <= 0 {
result[key].Msgs = append(result[key].Msgs, insertMsg)
} else if getSizeOfInsertMsg(result[key].Msgs[msgNums-1].(*msgstream.InsertMsg)) >= threshold {
result[key].Msgs = append(result[key].Msgs, insertMsg)
} else {
accMsgs, _ := result[key].Msgs[0].(*msgstream.InsertMsg)
accMsgs, _ := result[key].Msgs[msgNums-1].(*msgstream.InsertMsg)
accMsgs.Timestamps = append(accMsgs.Timestamps, ts)
accMsgs.RowIDs = append(accMsgs.RowIDs, rowID)
accMsgs.RowData = append(accMsgs.RowData, row)

View File

@ -0,0 +1,41 @@
package proxynode
import (
"encoding/json"
"io/ioutil"
"log"
"net/http"
"time"
"github.com/zilliztech/milvus-distributed/internal/util/retry"
)
func GetPulsarConfig(protocol, ip, port, url string) (map[string]interface{}, error) {
var resp *http.Response
var err error
getResp := func() error {
log.Println("GET: ", protocol+"://"+ip+":"+port+url)
resp, err = http.Get(protocol + "://" + ip + ":" + port + url)
return err
}
err = retry.Retry(10, time.Second, getResp)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
ret := make(map[string]interface{})
err = json.Unmarshal(body, &ret)
if err != nil {
return nil, err
}
return ret, nil
}

View File

@ -0,0 +1,38 @@
package proxynode
import (
"fmt"
"net/http"
"strconv"
"testing"
"github.com/stretchr/testify/assert"
"github.com/jarcoal/httpmock"
)
func TestGetPulsarConfig(t *testing.T) {
httpmock.Activate()
defer httpmock.DeactivateAndReset()
runtimeConfig := make(map[string]interface{})
runtimeConfig[PulsarMaxMessageSizeKey] = strconv.FormatInt(5*1024*1024, 10)
protocol := "http"
ip := "pulsar"
port := "18080"
url := "/admin/v2/brokers/configuration/runtime"
httpmock.RegisterResponder("GET", protocol+"://"+ip+":"+port+url,
func(req *http.Request) (*http.Response, error) {
return httpmock.NewJsonResponse(200, runtimeConfig)
},
)
ret, err := GetPulsarConfig(protocol, ip, port, url)
assert.Equal(t, nil, err)
assert.Equal(t, len(ret), len(runtimeConfig))
assert.Equal(t, len(ret), 1)
for key, value := range ret {
assert.Equal(t, fmt.Sprintf("%v", value), fmt.Sprintf("%v", runtimeConfig[key]))
}
}

View File

@ -702,6 +702,7 @@ class TestInsertAsync:
assert len(ids) == nb
@pytest.mark.level(2)
@pytest.mark.tags("0331")
def test_insert_async_long(self, connect, collection):
'''
target: test insert vectors with different length of vectors

View File

@ -2,7 +2,7 @@ grpcio==1.26.0
grpcio-tools==1.26.0
numpy==1.18.1
pytest-cov==2.8.1
pymilvus-distributed==0.0.39
pymilvus-distributed==0.0.40
sklearn==0.0
pytest==6.2.2
pytest-timeout==1.3.3