2022-12-08 10:37:19 +00:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package proxy
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-07-26 06:15:01 +00:00
|
|
|
"strconv"
|
|
|
|
"time"
|
2022-12-08 10:37:19 +00:00
|
|
|
|
2023-03-04 15:21:50 +00:00
|
|
|
"go.uber.org/zap"
|
2023-06-06 02:24:34 +00:00
|
|
|
"golang.org/x/sync/errgroup"
|
2023-03-04 15:21:50 +00:00
|
|
|
|
2023-06-08 17:28:37 +00:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
2023-06-06 02:24:34 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/allocator"
|
2023-04-06 11:14:32 +00:00
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
2023-07-26 06:15:01 +00:00
|
|
|
"github.com/milvus-io/milvus/pkg/metrics"
|
2023-04-06 11:14:32 +00:00
|
|
|
"github.com/milvus-io/milvus/pkg/mq/msgstream"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
|
2023-07-26 06:15:01 +00:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
2023-04-06 11:14:32 +00:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/retry"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
2022-12-08 10:37:19 +00:00
|
|
|
)
|
|
|
|
|
2023-06-06 02:24:34 +00:00
|
|
|
func genInsertMsgsByPartition(ctx context.Context,
|
|
|
|
segmentID UniqueID,
|
|
|
|
partitionID UniqueID,
|
|
|
|
partitionName string,
|
|
|
|
rowOffsets []int,
|
|
|
|
channelName string,
|
|
|
|
insertMsg *msgstream.InsertMsg) ([]msgstream.TsMsg, error) {
|
2022-12-08 10:37:19 +00:00
|
|
|
threshold := Params.PulsarCfg.MaxMessageSize.GetAsInt()
|
|
|
|
|
|
|
|
// create empty insert message
|
2023-06-06 02:24:34 +00:00
|
|
|
createInsertMsg := func(segmentID UniqueID, channelName string) *msgstream.InsertMsg {
|
2023-03-04 15:21:50 +00:00
|
|
|
insertReq := msgpb.InsertRequest{
|
2022-12-08 10:37:19 +00:00
|
|
|
Base: commonpbutil.NewMsgBase(
|
|
|
|
commonpbutil.WithMsgType(commonpb.MsgType_Insert),
|
|
|
|
commonpbutil.WithTimeStamp(insertMsg.BeginTimestamp), // entity's timestamp was set to equal it.BeginTimestamp in preExecute()
|
|
|
|
commonpbutil.WithSourceID(insertMsg.Base.SourceID),
|
|
|
|
),
|
|
|
|
CollectionID: insertMsg.CollectionID,
|
2023-06-06 02:24:34 +00:00
|
|
|
PartitionID: partitionID,
|
2022-12-08 10:37:19 +00:00
|
|
|
CollectionName: insertMsg.CollectionName,
|
2023-06-06 02:24:34 +00:00
|
|
|
PartitionName: partitionName,
|
2022-12-08 10:37:19 +00:00
|
|
|
SegmentID: segmentID,
|
|
|
|
ShardName: channelName,
|
2023-03-04 15:21:50 +00:00
|
|
|
Version: msgpb.InsertDataVersion_ColumnBased,
|
2022-12-08 10:37:19 +00:00
|
|
|
}
|
|
|
|
insertReq.FieldsData = make([]*schemapb.FieldData, len(insertMsg.GetFieldsData()))
|
|
|
|
|
|
|
|
msg := &msgstream.InsertMsg{
|
|
|
|
BaseMsg: msgstream.BaseMsg{
|
|
|
|
Ctx: ctx,
|
|
|
|
},
|
|
|
|
InsertRequest: insertReq,
|
|
|
|
}
|
|
|
|
|
|
|
|
return msg
|
|
|
|
}
|
|
|
|
|
2023-06-06 02:24:34 +00:00
|
|
|
repackedMsgs := make([]msgstream.TsMsg, 0)
|
|
|
|
requestSize := 0
|
|
|
|
msg := createInsertMsg(segmentID, channelName)
|
|
|
|
for _, offset := range rowOffsets {
|
|
|
|
curRowMessageSize, err := typeutil.EstimateEntitySize(insertMsg.GetFieldsData(), offset)
|
2022-12-08 10:37:19 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-06-06 02:24:34 +00:00
|
|
|
// if insertMsg's size is greater than the threshold, split into multiple insertMsgs
|
|
|
|
if requestSize+curRowMessageSize >= threshold {
|
|
|
|
repackedMsgs = append(repackedMsgs, msg)
|
|
|
|
msg = createInsertMsg(segmentID, channelName)
|
|
|
|
requestSize = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
typeutil.AppendFieldData(msg.FieldsData, insertMsg.GetFieldsData(), int64(offset))
|
|
|
|
msg.HashValues = append(msg.HashValues, insertMsg.HashValues[offset])
|
|
|
|
msg.Timestamps = append(msg.Timestamps, insertMsg.Timestamps[offset])
|
|
|
|
msg.RowIDs = append(msg.RowIDs, insertMsg.RowIDs[offset])
|
|
|
|
msg.NumRows++
|
|
|
|
requestSize += curRowMessageSize
|
|
|
|
}
|
|
|
|
repackedMsgs = append(repackedMsgs, msg)
|
|
|
|
|
|
|
|
return repackedMsgs, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func repackInsertDataByPartition(ctx context.Context,
|
|
|
|
partitionName string,
|
|
|
|
rowOffsets []int,
|
|
|
|
channelName string,
|
|
|
|
insertMsg *msgstream.InsertMsg,
|
|
|
|
segIDAssigner *segIDAssigner) ([]msgstream.TsMsg, error) {
|
|
|
|
res := make([]msgstream.TsMsg, 0)
|
2022-12-08 10:37:19 +00:00
|
|
|
|
2023-06-06 02:24:34 +00:00
|
|
|
maxTs := Timestamp(0)
|
|
|
|
for _, offset := range rowOffsets {
|
|
|
|
ts := insertMsg.Timestamps[offset]
|
|
|
|
if maxTs < ts {
|
|
|
|
maxTs = ts
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-25 09:20:43 +00:00
|
|
|
partitionID, err := globalMetaCache.GetPartitionID(ctx, insertMsg.GetDbName(), insertMsg.CollectionName, partitionName)
|
2023-06-06 02:24:34 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2023-07-26 06:15:01 +00:00
|
|
|
beforeAssign := time.Now()
|
2023-06-06 02:24:34 +00:00
|
|
|
assignedSegmentInfos, err := segIDAssigner.GetSegmentID(insertMsg.CollectionID, partitionID, channelName, uint32(len(rowOffsets)), maxTs)
|
2023-07-26 06:15:01 +00:00
|
|
|
metrics.ProxyAssignSegmentIDLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(time.Since(beforeAssign).Milliseconds()))
|
2023-06-06 02:24:34 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Error("allocate segmentID for insert data failed",
|
2023-07-14 07:56:31 +00:00
|
|
|
zap.String("collectionName", insertMsg.CollectionName),
|
|
|
|
zap.String("channelName", channelName),
|
2023-06-06 02:24:34 +00:00
|
|
|
zap.Int("allocate count", len(rowOffsets)),
|
|
|
|
zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
startPos := 0
|
|
|
|
for segmentID, count := range assignedSegmentInfos {
|
|
|
|
subRowOffsets := rowOffsets[startPos : startPos+int(count)]
|
|
|
|
msgs, err := genInsertMsgsByPartition(ctx, segmentID, partitionID, partitionName, subRowOffsets, channelName, insertMsg)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("repack insert data to insert msgs failed",
|
2023-07-14 07:56:31 +00:00
|
|
|
zap.String("collectionName", insertMsg.CollectionName),
|
2023-06-06 02:24:34 +00:00
|
|
|
zap.Int64("partitionID", partitionID),
|
|
|
|
zap.Error(err))
|
|
|
|
return nil, err
|
2022-12-08 10:37:19 +00:00
|
|
|
}
|
2023-06-06 02:24:34 +00:00
|
|
|
res = append(res, msgs...)
|
|
|
|
startPos += int(count)
|
|
|
|
}
|
|
|
|
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func setMsgID(ctx context.Context,
|
|
|
|
msgs []msgstream.TsMsg,
|
|
|
|
idAllocator *allocator.IDAllocator) error {
|
|
|
|
var idBegin int64
|
|
|
|
var err error
|
|
|
|
|
|
|
|
err = retry.Do(ctx, func() error {
|
|
|
|
idBegin, _, err = idAllocator.Alloc(uint32(len(msgs)))
|
|
|
|
return err
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to allocate msg id", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
for i, msg := range msgs {
|
|
|
|
msg.SetID(idBegin + UniqueID(i))
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2022-12-08 10:37:19 +00:00
|
|
|
|
2023-06-06 02:24:34 +00:00
|
|
|
func repackInsertData(ctx context.Context,
|
|
|
|
channelNames []string,
|
|
|
|
insertMsg *msgstream.InsertMsg,
|
|
|
|
result *milvuspb.MutationResult,
|
|
|
|
idAllocator *allocator.IDAllocator,
|
|
|
|
segIDAssigner *segIDAssigner) (*msgstream.MsgPack, error) {
|
|
|
|
msgPack := &msgstream.MsgPack{
|
|
|
|
BeginTs: insertMsg.BeginTs(),
|
|
|
|
EndTs: insertMsg.EndTs(),
|
2022-12-08 10:37:19 +00:00
|
|
|
}
|
|
|
|
|
2023-06-06 02:24:34 +00:00
|
|
|
channel2RowOffsets := assignChannelsByPK(result.IDs, channelNames, insertMsg)
|
|
|
|
for channel, rowOffsets := range channel2RowOffsets {
|
|
|
|
partitionName := insertMsg.PartitionName
|
|
|
|
msgs, err := repackInsertDataByPartition(ctx, partitionName, rowOffsets, channel, insertMsg, segIDAssigner)
|
2022-12-08 10:37:19 +00:00
|
|
|
if err != nil {
|
2023-06-06 02:24:34 +00:00
|
|
|
log.Warn("repack insert data to msg pack failed",
|
2023-07-14 07:56:31 +00:00
|
|
|
zap.String("collectionName", insertMsg.CollectionName),
|
2023-06-06 02:24:34 +00:00
|
|
|
zap.String("partition name", partitionName),
|
2022-12-08 10:37:19 +00:00
|
|
|
zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-06-06 02:24:34 +00:00
|
|
|
msgPack.Msgs = append(msgPack.Msgs, msgs...)
|
|
|
|
}
|
|
|
|
|
|
|
|
err := setMsgID(ctx, msgPack.Msgs, idAllocator)
|
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to set msgID when repack insert data",
|
2023-07-14 07:56:31 +00:00
|
|
|
zap.String("collectionName", insertMsg.CollectionName),
|
2023-06-06 02:24:34 +00:00
|
|
|
zap.String("partition name", insertMsg.PartitionName),
|
|
|
|
zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return msgPack, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func repackInsertDataWithPartitionKey(ctx context.Context,
|
|
|
|
channelNames []string,
|
|
|
|
partitionKeys *schemapb.FieldData,
|
|
|
|
insertMsg *msgstream.InsertMsg,
|
|
|
|
result *milvuspb.MutationResult,
|
|
|
|
idAllocator *allocator.IDAllocator,
|
|
|
|
segIDAssigner *segIDAssigner) (*msgstream.MsgPack, error) {
|
|
|
|
msgPack := &msgstream.MsgPack{
|
|
|
|
BeginTs: insertMsg.BeginTs(),
|
|
|
|
EndTs: insertMsg.EndTs(),
|
|
|
|
}
|
|
|
|
|
|
|
|
channel2RowOffsets := assignChannelsByPK(result.IDs, channelNames, insertMsg)
|
2023-06-25 09:20:43 +00:00
|
|
|
partitionNames, err := getDefaultPartitionNames(ctx, insertMsg.GetDbName(), insertMsg.CollectionName)
|
2023-06-06 02:24:34 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Warn("get default partition names failed in partition key mode",
|
2023-07-14 07:56:31 +00:00
|
|
|
zap.String("collectionName", insertMsg.CollectionName),
|
2023-06-06 02:24:34 +00:00
|
|
|
zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
hashValues, err := typeutil.HashKey2Partitions(partitionKeys, partitionNames)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("has partition keys to partitions failed",
|
2023-07-14 07:56:31 +00:00
|
|
|
zap.String("collectionName", insertMsg.CollectionName),
|
2023-06-06 02:24:34 +00:00
|
|
|
zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
for channel, rowOffsets := range channel2RowOffsets {
|
|
|
|
partition2RowOffsets := make(map[string][]int)
|
|
|
|
for _, idx := range rowOffsets {
|
|
|
|
partitionName := partitionNames[hashValues[idx]]
|
|
|
|
if _, ok := partition2RowOffsets[partitionName]; !ok {
|
|
|
|
partition2RowOffsets[partitionName] = []int{}
|
2022-12-08 10:37:19 +00:00
|
|
|
}
|
2023-06-06 02:24:34 +00:00
|
|
|
partition2RowOffsets[partitionName] = append(partition2RowOffsets[partitionName], idx)
|
2022-12-08 10:37:19 +00:00
|
|
|
}
|
2023-06-06 02:24:34 +00:00
|
|
|
|
|
|
|
errGroup, _ := errgroup.WithContext(ctx)
|
2023-07-24 02:23:01 +00:00
|
|
|
partition2Msgs := typeutil.NewConcurrentMap[string, []msgstream.TsMsg]()
|
2023-06-06 02:24:34 +00:00
|
|
|
for partitionName, offsets := range partition2RowOffsets {
|
|
|
|
partitionName := partitionName
|
|
|
|
offsets := offsets
|
|
|
|
errGroup.Go(func() error {
|
|
|
|
msgs, err := repackInsertDataByPartition(ctx, partitionName, offsets, channel, insertMsg, segIDAssigner)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-07-24 02:23:01 +00:00
|
|
|
partition2Msgs.Insert(partitionName, msgs)
|
2023-06-06 02:24:34 +00:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
err = errGroup.Wait()
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("repack insert data into insert msg pack failed",
|
2023-07-14 07:56:31 +00:00
|
|
|
zap.String("collectionName", insertMsg.CollectionName),
|
|
|
|
zap.String("channelName", channel),
|
2023-06-06 02:24:34 +00:00
|
|
|
zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-07-24 02:23:01 +00:00
|
|
|
partition2Msgs.Range(func(name string, msgs []msgstream.TsMsg) bool {
|
2023-06-06 02:24:34 +00:00
|
|
|
msgPack.Msgs = append(msgPack.Msgs, msgs...)
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
err = setMsgID(ctx, msgPack.Msgs, idAllocator)
|
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to set msgID when repack insert data",
|
2023-07-14 07:56:31 +00:00
|
|
|
zap.String("collectionName", insertMsg.CollectionName),
|
2023-06-06 02:24:34 +00:00
|
|
|
zap.Error(err))
|
|
|
|
return nil, err
|
2022-12-08 10:37:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return msgPack, nil
|
|
|
|
}
|