mirror of https://github.com/milvus-io/milvus.git
enhance: Add logs for check health failed (#39208)
Signed-off-by: Wei Liu <wei.liu@zilliz.com>pull/39295/head
parent
27a99f6b9d
commit
d2834a1812
|
@ -318,7 +318,7 @@ func (s *Server) Init() error {
|
|||
log.Info("DataCoord startup success")
|
||||
return nil
|
||||
}
|
||||
s.stateCode.Store(commonpb.StateCode_StandBy)
|
||||
s.UpdateStateCode(commonpb.StateCode_StandBy)
|
||||
log.Info("DataCoord enter standby mode successfully")
|
||||
return nil
|
||||
}
|
||||
|
@ -328,7 +328,7 @@ func (s *Server) Init() error {
|
|||
|
||||
func (s *Server) initDataCoord() error {
|
||||
log := log.Ctx(s.ctx)
|
||||
s.stateCode.Store(commonpb.StateCode_Initializing)
|
||||
s.UpdateStateCode(commonpb.StateCode_Initializing)
|
||||
var err error
|
||||
if err = s.initRootCoordClient(); err != nil {
|
||||
return err
|
||||
|
@ -463,7 +463,7 @@ func (s *Server) startDataCoord() {
|
|||
// })
|
||||
|
||||
s.afterStart()
|
||||
s.stateCode.Store(commonpb.StateCode_Healthy)
|
||||
s.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||
sessionutil.SaveServerInfo(typeutil.DataCoordRole, s.session.GetServerID())
|
||||
}
|
||||
|
||||
|
|
|
@ -688,6 +688,12 @@ func (s *Server) GetStateCode() commonpb.StateCode {
|
|||
return code.(commonpb.StateCode)
|
||||
}
|
||||
|
||||
// UpdateStateCode update state code
|
||||
func (s *Server) UpdateStateCode(code commonpb.StateCode) {
|
||||
s.stateCode.Store(code)
|
||||
log.Ctx(s.ctx).Info("update datacoord state", zap.String("state", code.String()))
|
||||
}
|
||||
|
||||
// GetComponentStates returns DataCoord's current state
|
||||
func (s *Server) GetComponentStates(ctx context.Context, req *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error) {
|
||||
code := s.GetStateCode()
|
||||
|
|
|
@ -90,14 +90,14 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
|
|||
resp := &HealthResponse{
|
||||
State: "OK",
|
||||
}
|
||||
|
||||
unhealthyComponent := make([]string, 0)
|
||||
ctx := context.Background()
|
||||
healthNum := 0
|
||||
for _, in := range handler.indicators {
|
||||
handler.unregisterLock.RLock()
|
||||
_, unregistered := handler.unregisteredRoles[in.GetName()]
|
||||
handler.unregisterLock.RUnlock()
|
||||
if unregistered {
|
||||
healthNum++
|
||||
continue
|
||||
}
|
||||
code := in.Health(ctx)
|
||||
|
@ -105,13 +105,15 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
|
|||
Name: in.GetName(),
|
||||
Code: code,
|
||||
})
|
||||
if code == commonpb.StateCode_Healthy || code == commonpb.StateCode_StandBy {
|
||||
healthNum++
|
||||
|
||||
if code != commonpb.StateCode_Healthy && code != commonpb.StateCode_StandBy {
|
||||
unhealthyComponent = append(unhealthyComponent, in.GetName())
|
||||
}
|
||||
}
|
||||
|
||||
if healthNum != handler.indicatorNum {
|
||||
resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", healthNum, handler.indicatorNum)
|
||||
if len(unhealthyComponent) > 0 {
|
||||
resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", handler.indicatorNum-len(unhealthyComponent), handler.indicatorNum)
|
||||
log.Info("check health failed", zap.Strings("UnhealthyComponent", unhealthyComponent))
|
||||
}
|
||||
|
||||
if resp.State == "OK" {
|
||||
|
|
|
@ -671,6 +671,7 @@ func (s *Server) Stop() error {
|
|||
// UpdateStateCode updates the status of the coord, including healthy, unhealthy
|
||||
func (s *Server) UpdateStateCode(code commonpb.StateCode) {
|
||||
s.status.Store(int32(code))
|
||||
log.Ctx(s.ctx).Info("update querycoord state", zap.String("state", code.String()))
|
||||
}
|
||||
|
||||
func (s *Server) State() commonpb.StateCode {
|
||||
|
|
Loading…
Reference in New Issue