enhance: Add logs for check health failed (#39208)

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
pull/39295/head
wei liu 2025-01-15 17:31:00 +08:00 committed by GitHub
parent 27a99f6b9d
commit d2834a1812
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 18 additions and 9 deletions

View File

@ -318,7 +318,7 @@ func (s *Server) Init() error {
log.Info("DataCoord startup success") log.Info("DataCoord startup success")
return nil return nil
} }
s.stateCode.Store(commonpb.StateCode_StandBy) s.UpdateStateCode(commonpb.StateCode_StandBy)
log.Info("DataCoord enter standby mode successfully") log.Info("DataCoord enter standby mode successfully")
return nil return nil
} }
@ -328,7 +328,7 @@ func (s *Server) Init() error {
func (s *Server) initDataCoord() error { func (s *Server) initDataCoord() error {
log := log.Ctx(s.ctx) log := log.Ctx(s.ctx)
s.stateCode.Store(commonpb.StateCode_Initializing) s.UpdateStateCode(commonpb.StateCode_Initializing)
var err error var err error
if err = s.initRootCoordClient(); err != nil { if err = s.initRootCoordClient(); err != nil {
return err return err
@ -463,7 +463,7 @@ func (s *Server) startDataCoord() {
// }) // })
s.afterStart() s.afterStart()
s.stateCode.Store(commonpb.StateCode_Healthy) s.UpdateStateCode(commonpb.StateCode_Healthy)
sessionutil.SaveServerInfo(typeutil.DataCoordRole, s.session.GetServerID()) sessionutil.SaveServerInfo(typeutil.DataCoordRole, s.session.GetServerID())
} }

View File

@ -688,6 +688,12 @@ func (s *Server) GetStateCode() commonpb.StateCode {
return code.(commonpb.StateCode) return code.(commonpb.StateCode)
} }
// UpdateStateCode update state code
func (s *Server) UpdateStateCode(code commonpb.StateCode) {
s.stateCode.Store(code)
log.Ctx(s.ctx).Info("update datacoord state", zap.String("state", code.String()))
}
// GetComponentStates returns DataCoord's current state // GetComponentStates returns DataCoord's current state
func (s *Server) GetComponentStates(ctx context.Context, req *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error) { func (s *Server) GetComponentStates(ctx context.Context, req *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error) {
code := s.GetStateCode() code := s.GetStateCode()

View File

@ -90,14 +90,14 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
resp := &HealthResponse{ resp := &HealthResponse{
State: "OK", State: "OK",
} }
unhealthyComponent := make([]string, 0)
ctx := context.Background() ctx := context.Background()
healthNum := 0
for _, in := range handler.indicators { for _, in := range handler.indicators {
handler.unregisterLock.RLock() handler.unregisterLock.RLock()
_, unregistered := handler.unregisteredRoles[in.GetName()] _, unregistered := handler.unregisteredRoles[in.GetName()]
handler.unregisterLock.RUnlock() handler.unregisterLock.RUnlock()
if unregistered { if unregistered {
healthNum++
continue continue
} }
code := in.Health(ctx) code := in.Health(ctx)
@ -105,13 +105,15 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
Name: in.GetName(), Name: in.GetName(),
Code: code, Code: code,
}) })
if code == commonpb.StateCode_Healthy || code == commonpb.StateCode_StandBy {
healthNum++ if code != commonpb.StateCode_Healthy && code != commonpb.StateCode_StandBy {
unhealthyComponent = append(unhealthyComponent, in.GetName())
} }
} }
if healthNum != handler.indicatorNum { if len(unhealthyComponent) > 0 {
resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", healthNum, handler.indicatorNum) resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", handler.indicatorNum-len(unhealthyComponent), handler.indicatorNum)
log.Info("check health failed", zap.Strings("UnhealthyComponent", unhealthyComponent))
} }
if resp.State == "OK" { if resp.State == "OK" {

View File

@ -671,6 +671,7 @@ func (s *Server) Stop() error {
// UpdateStateCode updates the status of the coord, including healthy, unhealthy // UpdateStateCode updates the status of the coord, including healthy, unhealthy
func (s *Server) UpdateStateCode(code commonpb.StateCode) { func (s *Server) UpdateStateCode(code commonpb.StateCode) {
s.status.Store(int32(code)) s.status.Store(int32(code))
log.Ctx(s.ctx).Info("update querycoord state", zap.String("state", code.String()))
} }
func (s *Server) State() commonpb.StateCode { func (s *Server) State() commonpb.StateCode {