Sort out the log of IndexCoord (#16810)

Signed-off-by: Cai.Zhang <cai.zhang@zilliz.com>
pull/16852/head
cai.zhang 2022-05-09 12:05:52 +08:00 committed by GitHub
parent 8fcf349609
commit b0c9f25352
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 101 additions and 13 deletions

View File

@ -889,6 +889,10 @@ func (i *IndexCoord) assignTaskLoop() {
return
case <-timeTicker.C:
serverIDs := i.nodeManager.ListNode()
if len(serverIDs) == 0 {
log.Warn("there is no indexnode online")
continue
}
metas := i.metaTable.GetUnassignedTasks(serverIDs)
sort.Slice(metas, func(i, j int) bool {
return metas[i].indexMeta.Version <= metas[j].indexMeta.Version
@ -906,10 +910,14 @@ func (i *IndexCoord) assignTaskLoop() {
log.Debug("The version of the task has been updated", zap.Int64("indexBuildID", indexBuildID))
nodeID, builderClient := i.nodeManager.PeekClient(meta)
if builderClient == nil {
log.Warn("IndexCoord assignmentTasksLoop can not find available IndexNode")
if builderClient == nil && nodeID == -1 {
log.Warn("there is no indexnode online")
break
}
if builderClient == nil && nodeID == 0 {
log.Warn("The memory of all indexnodes does not meet the requirements")
continue
}
log.Debug("IndexCoord PeekClient success", zap.Int64("nodeID", nodeID))
req := &indexpb.CreateIndexRequest{
IndexBuildID: indexBuildID,

View File

@ -76,6 +76,33 @@ func TestIndexCoord(t *testing.T) {
err = inm0.Stop()
assert.Nil(t, err)
t.Run("create index without indexnodes", func(t *testing.T) {
indexID := int64(rand.Int())
req := &indexpb.BuildIndexRequest{
IndexID: indexID,
DataPaths: []string{"NoIndexNode-1", "NoIndexNode-2"},
NumRows: 10,
TypeParams: []*commonpb.KeyValuePair{
{
Key: "dim",
Value: "128",
},
},
FieldSchema: &schemapb.FieldSchema{
DataType: schemapb.DataType_FloatVector,
},
}
resp, err := ic.BuildIndex(ctx, req)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.Status.ErrorCode)
time.Sleep(time.Second)
status, err := ic.DropIndex(ctx, &indexpb.DropIndexRequest{
IndexID: indexID,
})
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, status.ErrorCode)
})
in, err := grpcindexnode.NewServer(ctx, factory)
assert.Nil(t, err)
assert.NotNil(t, in)
@ -123,6 +150,24 @@ func TestIndexCoord(t *testing.T) {
assert.Equal(t, commonpb.ErrorCode_Success, resp.Status.ErrorCode)
assert.Equal(t, indexBuildID, resp2.IndexBuildID)
assert.Equal(t, "already have same index", resp2.Status.Reason)
req2 := &indexpb.BuildIndexRequest{
IndexID: indexID,
DataPaths: []string{"DataPath-3", "DataPath-4"},
NumRows: 1000,
TypeParams: []*commonpb.KeyValuePair{
{
Key: "dim",
Value: "128",
},
},
FieldSchema: &schemapb.FieldSchema{
DataType: schemapb.DataType_FloatVector,
},
}
resp3, err := ic.BuildIndex(ctx, req2)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp3.Status.ErrorCode)
})
t.Run("Get Index State", func(t *testing.T) {

View File

@ -113,8 +113,13 @@ func (nm *NodeManager) PeekClient(meta Meta) (UniqueID, types.IndexNode) {
log.Debug("IndexCoord peek IndexNode client from pq", zap.Uint64("data size", dataSize))
nodeID := nm.pq.Peek(dataSize*indexSizeFactor, meta.indexMeta.Req.IndexParams, meta.indexMeta.Req.TypeParams)
if nodeID == -1 {
log.Error("there is no indexnode online")
return nodeID, nil
}
if nodeID == 0 {
log.Error("No IndexNode available", zap.Uint64("data size", dataSize),
zap.Uint64("IndexNode must have memory size", dataSize*indexSizeFactor))
return nodeID, nil
}
nm.lock.Lock()
defer nm.lock.Unlock()

View File

@ -17,11 +17,41 @@
package indexcoord
import (
"context"
"testing"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/schemapb"
"github.com/stretchr/testify/assert"
)
func TestNodeManager_getMetrics(t *testing.T) {
log.Info("TestNodeManager_getMetrics, todo")
func TestNodeManager_PeekClient(t *testing.T) {
nm := NewNodeManager(context.Background())
meta := Meta{
indexMeta: &indexpb.IndexMeta{
Req: &indexpb.BuildIndexRequest{
DataPaths: []string{"PeekClient-1", "PeekClient-2"},
NumRows: 1000,
TypeParams: []*commonpb.KeyValuePair{
{
Key: "dim",
Value: "128",
},
},
FieldSchema: &schemapb.FieldSchema{
DataType: schemapb.DataType_FloatVector,
},
},
},
}
nodeID, client := nm.PeekClient(meta)
assert.Equal(t, int64(-1), nodeID)
assert.Nil(t, client)
err := nm.AddNode(1, "indexnode-1")
assert.Nil(t, err)
nm.pq.SetMemory(1, 100)
nodeID2, client2 := nm.PeekClient(meta)
assert.Equal(t, int64(0), nodeID2)
assert.Nil(t, client2)
}

View File

@ -37,5 +37,5 @@ func PeekClientV1(memorySize uint64, indexParams []*commonpb.KeyValuePair,
return pq.items[i].key
}
}
return UniqueID(-1)
return UniqueID(0)
}

View File

@ -26,14 +26,14 @@ import (
func TestPeekClientV0(t *testing.T) {
pq := newPriorityQueue()
key := PeekClientV0(10, []*commonpb.KeyValuePair{}, []*commonpb.KeyValuePair{}, pq)
assert.Equal(t, UniqueID(0), key)
assert.Equal(t, UniqueID(1), key)
}
func TestPeekClientV1(t *testing.T) {
pq := newPriorityQueue()
key := PeekClientV1(10, []*commonpb.KeyValuePair{}, []*commonpb.KeyValuePair{}, pq)
assert.Equal(t, UniqueID(0), key)
assert.Equal(t, UniqueID(1), key)
key2 := PeekClientV1(10000, []*commonpb.KeyValuePair{}, []*commonpb.KeyValuePair{}, pq)
assert.Equal(t, UniqueID(-1), key2)
assert.Equal(t, UniqueID(0), key2)
}

View File

@ -30,11 +30,11 @@ func newPriorityQueue() *PriorityQueue {
ret := &PriorityQueue{
policy: PeekClientV0,
}
for i := 0; i < QueueLen; i++ {
for i := 1; i <= QueueLen; i++ {
item := &PQItem{
key: UniqueID(i),
priority: i,
index: i,
index: i - 1,
totalMem: 1000,
}
ret.items = append(ret.items, item)
@ -66,7 +66,7 @@ func TestPriorityQueue_Push(t *testing.T) {
func TestPriorityQueue_Remove(t *testing.T) {
pq := newPriorityQueue()
cnt := 0
for i := 0; i < QueueLen; i++ {
for i := 1; i <= QueueLen; i++ {
if i%2 == 0 {
continue
}

View File

@ -342,7 +342,7 @@ func getMockSystemInfoMetrics(
HardwareInfos: metricsinfo.HardwareMetrics{
CPUCoreCount: metricsinfo.GetCPUCoreCount(false),
CPUCoreUsage: metricsinfo.GetCPUUsage(),
Memory: metricsinfo.GetMemoryCount(),
Memory: 1000,
MemoryUsage: metricsinfo.GetUsedMemoryCount(),
Disk: metricsinfo.GetDiskCount(),
DiskUsage: metricsinfo.GetDiskUsage(),