From 4bb969ef8ffbe87f756c612dd3ef6495d7f15045 Mon Sep 17 00:00:00 2001 From: wei liu Date: Tue, 30 Jul 2024 14:37:51 +0800 Subject: [PATCH] enhance: Skip manual stopped component during health check (#34953) after manual stop component by management restful api, `healthz` may return unhealthy state. k8s may restart the pod to save the unhealthy sate, and the manual stop operation will got unexpected result. to solve this, we make `healthz` API skip the manual stopped component. --------- Signed-off-by: Wei Liu --- cmd/roles/roles.go | 3 +++ internal/http/healthz/healthz_handler.go | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/cmd/roles/roles.go b/cmd/roles/roles.go index 09e0ebeb82..2c04c4eac7 100644 --- a/cmd/roles/roles.go +++ b/cmd/roles/roles.go @@ -445,6 +445,9 @@ func (mr *MilvusRoles) Run() { if len(role) == 0 || componentMap[role] == nil { return fmt.Errorf("stop component [%s] in [%s] is not supported", role, mr.ServerType) } + + log.Info("unregister component before stop", zap.String("role", role)) + healthz.UnRegister(role) return componentMap[role].Stop() }) diff --git a/internal/http/healthz/healthz_handler.go b/internal/http/healthz/healthz_handler.go index 7509710851..3848eb4c44 100644 --- a/internal/http/healthz/healthz_handler.go +++ b/internal/http/healthz/healthz_handler.go @@ -21,6 +21,7 @@ import ( "encoding/json" "fmt" "net/http" + "sync" "go.uber.org/zap" @@ -52,6 +53,10 @@ type HealthResponse struct { type HealthHandler struct { indicators []Indicator + + // unregister role when call stop by restful api + unregisterLock sync.RWMutex + unregisteredRoles map[string]struct{} } var _ http.Handler = (*HealthHandler)(nil) @@ -62,6 +67,16 @@ func Register(indicator Indicator) { defaultHandler.indicators = append(defaultHandler.indicators, indicator) } +func UnRegister(role string) { + defaultHandler.unregisterLock.Lock() + defer defaultHandler.unregisterLock.Unlock() + + if defaultHandler.unregisteredRoles == nil { + defaultHandler.unregisteredRoles = make(map[string]struct{}) + } + defaultHandler.unregisteredRoles[role] = struct{}{} +} + func Handler() *HealthHandler { return &defaultHandler } @@ -72,6 +87,12 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) } ctx := context.Background() for _, in := range handler.indicators { + handler.unregisterLock.RLock() + _, unregistered := handler.unregisteredRoles[in.GetName()] + handler.unregisterLock.RUnlock() + if unregistered { + continue + } code := in.Health(ctx) resp.Detail = append(resp.Detail, &IndicatorState{ Name: in.GetName(),