mirror of https://github.com/milvus-io/milvus.git
Use GroupChecker for flowgraph node input check (#14916)
Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>pull/14952/head
parent
2fdd74affa
commit
da4182a90f
|
@ -17,7 +17,6 @@
|
|||
package flowgraph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
@ -75,15 +74,14 @@ func (nodeCtx *nodeCtx) Start(wg *sync.WaitGroup) {
|
|||
// 2. invoke node.Operate
|
||||
// 3. deliver the Operate result to downstream nodes
|
||||
func (nodeCtx *nodeCtx) work() {
|
||||
// TODO: necessary to check every node?
|
||||
name := fmt.Sprintf("nodeCtxTtChecker-%s", nodeCtx.node.Name())
|
||||
warn := fmt.Sprintf("node %s haven't received input for %f minutes",
|
||||
nodeCtx.node.Name(), nodeCtxTtInterval.Minutes())
|
||||
var checker *timerecord.LongTermChecker
|
||||
var checker *timerecord.GroupChecker
|
||||
if enableTtChecker {
|
||||
checker = timerecord.NewLongTermChecker(context.Background(), name, nodeCtxTtInterval, warn)
|
||||
checker.Start()
|
||||
defer checker.Stop()
|
||||
checker = timerecord.GetGroupChecker("fgNode", nodeCtxTtInterval, func(list []string) {
|
||||
log.Warn("some node(s) haven't received input", zap.Strings("list", list), zap.Duration("duration ", nodeCtxTtInterval))
|
||||
})
|
||||
checker.Check(name)
|
||||
defer checker.Remove(name)
|
||||
}
|
||||
|
||||
for {
|
||||
|
@ -102,7 +100,7 @@ func (nodeCtx *nodeCtx) work() {
|
|||
res = n.Operate(inputs)
|
||||
|
||||
if enableTtChecker {
|
||||
checker.Check()
|
||||
checker.Check(name)
|
||||
}
|
||||
|
||||
downstreamLength := len(nodeCtx.downstreamInputChanIdx)
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package timerecord
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// groups maintains string to GroupChecker
|
||||
var groups sync.Map
|
||||
|
||||
// GroupChecker checks members in same group silent for certain period of time
|
||||
// print warning msg if there are item(s) that not reported
|
||||
type GroupChecker struct {
|
||||
groupName string
|
||||
|
||||
d time.Duration // check duration
|
||||
t *time.Ticker // internal ticker
|
||||
ch chan struct{} // closing signal
|
||||
lastest sync.Map // map member name => lastest report time
|
||||
initOnce sync.Once
|
||||
stopOnce sync.Once
|
||||
|
||||
fn func(list []string)
|
||||
}
|
||||
|
||||
// init start worker goroutine
|
||||
// protected by initOnce
|
||||
func (gc *GroupChecker) init() {
|
||||
gc.initOnce.Do(func() {
|
||||
gc.ch = make(chan struct{})
|
||||
go gc.work()
|
||||
})
|
||||
}
|
||||
|
||||
// work is the main procedure logic
|
||||
func (gc *GroupChecker) work() {
|
||||
gc.t = time.NewTicker(gc.d)
|
||||
var name string
|
||||
var ts time.Time
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-gc.t.C:
|
||||
case <-gc.ch:
|
||||
return
|
||||
}
|
||||
|
||||
var list []string
|
||||
gc.lastest.Range(func(k, v interface{}) bool {
|
||||
name = k.(string)
|
||||
ts = v.(time.Time)
|
||||
if time.Since(ts) > gc.d {
|
||||
list = append(list, name)
|
||||
}
|
||||
return true
|
||||
})
|
||||
if len(list) > 0 && gc.fn != nil {
|
||||
gc.fn(list)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check updates the latest timestamp for provided name
|
||||
func (gc *GroupChecker) Check(name string) {
|
||||
gc.lastest.Store(name, time.Now())
|
||||
}
|
||||
|
||||
// Remove deletes name from watch list
|
||||
func (gc *GroupChecker) Remove(name string) {
|
||||
gc.lastest.Delete(name)
|
||||
}
|
||||
|
||||
// Stop closes the GroupChecker
|
||||
func (gc *GroupChecker) Stop() {
|
||||
gc.stopOnce.Do(func() {
|
||||
close(gc.ch)
|
||||
groups.Delete(gc.groupName)
|
||||
})
|
||||
}
|
||||
|
||||
// GetGroupChecker returns the GroupChecker with related group name
|
||||
// if no exist GroupChecker has the provided name, a new instance will be created with provided params
|
||||
// otherwise the params will be ignored
|
||||
func GetGroupChecker(groupName string, duration time.Duration, fn func([]string)) *GroupChecker {
|
||||
gc := &GroupChecker{
|
||||
groupName: groupName,
|
||||
d: duration,
|
||||
fn: fn,
|
||||
}
|
||||
actual, loaded := groups.LoadOrStore(groupName, gc)
|
||||
if !loaded {
|
||||
gc.init()
|
||||
}
|
||||
|
||||
gc = actual.(*GroupChecker)
|
||||
return gc
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package timerecord
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestGroupChecker(t *testing.T) {
|
||||
groupName := `test_group`
|
||||
signal := make(chan []string, 1)
|
||||
gc1 := GetGroupChecker(groupName, 10*time.Millisecond, func(list []string) {
|
||||
signal <- list
|
||||
})
|
||||
gc1.Check("1")
|
||||
gc2 := GetGroupChecker(groupName, time.Second, func(list []string) {
|
||||
t.FailNow()
|
||||
})
|
||||
gc2.Check("2")
|
||||
|
||||
assert.Equal(t, 10*time.Millisecond, gc2.d)
|
||||
|
||||
list := <-signal
|
||||
assert.Equal(t, []string{"1", "2"}, list)
|
||||
|
||||
gc2.Remove("2")
|
||||
|
||||
list = <-signal
|
||||
assert.Equal(t, []string{"1"}, list)
|
||||
|
||||
assert.NotPanics(t, func() {
|
||||
gc1.Stop()
|
||||
gc2.Stop()
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue