From b4703566a80f9c30aecec0faf0787f8be1ca090c Mon Sep 17 00:00:00 2001 From: Brett Buddin Date: Thu, 26 Dec 2019 14:51:27 -0500 Subject: [PATCH 1/2] feat(kit/check): Adds manual override capabilities to readiness endpoint. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With `/health`, it is possible to override the overall status reported. This change adds the same functionality to `/ready`. This allows incident responders to take an unhealthy pod out of a service without killing it—giving them time to gather meaningful forensic data from the pod. The new contract is: Force not ready: GET /ready?force=true&ready=false Force ready: GET /ready?force=true&ready=true Disable override: GET /ready?force=false --- kit/check/check.go | 141 ++++++++++++++++++++++--------- kit/check/check_test.go | 182 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 282 insertions(+), 41 deletions(-) diff --git a/kit/check/check.go b/kit/check/check.go index 79ae3d3720..0e75c5f0ad 100644 --- a/kit/check/check.go +++ b/kit/check/check.go @@ -8,7 +8,7 @@ import ( "fmt" "net/http" "sort" - "sync/atomic" + "sync" ) // Status string to indicate the overall status of the check. @@ -26,10 +26,10 @@ const ( // Check wraps a map of service names to status checkers. type Check struct { - healthChecks []Checker - readyChecks []Checker - manualOverride atomic.Value - manualHealthState atomic.Value + healthChecks []Checker + readyChecks []Checker + healthOverride override + readyOverride override passthroughHandler http.Handler } @@ -42,8 +42,8 @@ type Checker interface { // NewCheck returns a Health with a default checker. func NewCheck() *Check { ch := &Check{} - ch.manualOverride.Store(false) - ch.manualHealthState.Store(false) + ch.healthOverride.disable() + ch.readyOverride.disable() return ch } @@ -74,13 +74,10 @@ func (c *Check) CheckHealth(ctx context.Context) Response { Status: StatusPass, Checks: make(Responses, len(c.healthChecks)), } - override := c.manualOverride.Load().(bool) - if override { - if c.manualHealthState.Load().(bool) { - response.Status = StatusPass - } else { - response.Status = StatusFail - } + + status, overriding := c.healthOverride.get() + if overriding { + response.Status = status overrideResponse := Response{ Name: "manual-override", Message: "health manually overridden", @@ -89,7 +86,7 @@ func (c *Check) CheckHealth(ctx context.Context) Response { } for i, ch := range c.healthChecks { resp := ch.Check(ctx) - if resp.Status != StatusPass && !override { + if resp.Status != StatusPass && !overriding { response.Status = resp.Status } response.Checks[i] = resp @@ -105,9 +102,19 @@ func (c *Check) CheckReady(ctx context.Context) Response { Status: StatusPass, Checks: make(Responses, len(c.readyChecks)), } + + status, overriding := c.readyOverride.get() + if overriding { + response.Status = status + overrideResponse := Response{ + Name: "manual-override", + Message: "ready manually overridden", + } + response.Checks = append(response.Checks, overrideResponse) + } for i, c := range c.readyChecks { resp := c.Check(ctx) - if resp.Status != StatusPass { + if resp.Status != StatusPass && !overriding { response.Status = resp.Status } response.Checks[i] = resp @@ -124,54 +131,108 @@ func (c *Check) SetPassthrough(h http.Handler) { // ServeHTTP serves /ready and /health requests with the respective checks. func (c *Check) ServeHTTP(w http.ResponseWriter, r *http.Request) { - // allow requests not intended for checks to pass through. - if r.URL.Path != "/ready" && r.URL.Path != "/health" { + const ( + pathReady = "/ready" + pathHealth = "/health" + queryForce = "force" + ) + + path := r.URL.Path + + // Allow requests not intended for checks to pass through. + if path != pathReady && path != pathHealth { if c.passthroughHandler != nil { c.passthroughHandler.ServeHTTP(w, r) return } - // We cant handle this request. + // We can't handle this request. w.WriteHeader(http.StatusNotFound) return } - msg := "" - status := http.StatusOK + ctx := r.Context() + query := r.URL.Query() - var resp Response - switch r.URL.Path { - case "/ready": - resp = c.CheckReady(r.Context()) - case "/health": - query := r.URL.Query() - switch query.Get("force") { + switch path { + case pathReady: + switch query.Get(queryForce) { case "true": - c.manualOverride.Store(true) - switch query.Get("healthy") { + switch query.Get("ready") { case "true": - c.manualHealthState.Store(true) + c.readyOverride.enable(StatusPass) case "false": - c.manualHealthState.Store(false) + c.readyOverride.enable(StatusFail) } case "false": - c.manualOverride.Store(false) + c.readyOverride.disable() } - resp = c.CheckHealth(r.Context()) + writeResponse(w, c.CheckReady(ctx)) + case pathHealth: + switch query.Get(queryForce) { + case "true": + switch query.Get("healthy") { + case "true": + c.healthOverride.enable(StatusPass) + case "false": + c.healthOverride.enable(StatusFail) + } + case "false": + c.healthOverride.disable() + } + writeResponse(w, c.CheckHealth(ctx)) } +} - // Set the HTTP status if the check failed +// writeResponse writes a Response to the wire as JSON. The HTTP status code +// accompanying the payload is the primary means for signaling the status of the +// checks. The possible status codes are: +// +// - 200 OK: All checks pass. +// - 503 Service Unavailable: Some checks are failing. +// - 500 Internal Server Error: There was a problem serializing the Response. +func writeResponse(w http.ResponseWriter, resp Response) { + status := http.StatusOK if resp.Status == StatusFail { - // Normal state, the HTTP response status reflects the status-reported health. status = http.StatusServiceUnavailable } - b, err := json.MarshalIndent(resp, "", " ") + msg, err := json.MarshalIndent(resp, "", " ") if err != nil { - b = []byte(`{"message": "error marshaling response", "status": "fail"}`) + msg = []byte(`{"message": "error marshaling response", "status": "fail"}`) status = http.StatusInternalServerError } - msg = string(b) w.WriteHeader(status) - fmt.Fprintln(w, msg) + fmt.Fprintln(w, string(msg)) +} + +// override is a manual override for an entire group of checks. +type override struct { + mtx sync.Mutex + status Status + active bool +} + +// get returns the Status of an override as well as whether or not an override +// is currently active. +func (m *override) get() (Status, bool) { + m.mtx.Lock() + defer m.mtx.Unlock() + return m.status, m.active +} + +// disable disables the override. +func (m *override) disable() { + m.mtx.Lock() + m.active = false + m.status = StatusFail + m.mtx.Unlock() +} + +// enable turns on the override and establishes a specific Status for which to. +func (m *override) enable(s Status) { + m.mtx.Lock() + m.active = true + m.status = s + m.mtx.Unlock() } diff --git a/kit/check/check_test.go b/kit/check/check_test.go index 610c914374..90b15073df 100644 --- a/kit/check/check_test.go +++ b/kit/check/check_test.go @@ -165,7 +165,67 @@ func TestHealthSorting(t *testing.T) { } } -func TestForceHealth(t *testing.T) { +func TestForceHealthy(t *testing.T) { + c, ts := buildCheckWithServer() + defer ts.Close() + + c.AddHealthCheck(mockFail("a")) + + _, err := http.Get(ts.URL + "/health?force=true&healthy=true") + if err != nil { + t.Fatal(err) + } + + resp, err := http.Get(ts.URL + "/health") + if err != nil { + t.Fatal(err) + } + actual, err := respBuilder(resp.Body) + if err != nil { + t.Fatal(err) + } + + expected := &Response{ + Name: "Health", + Status: "pass", + Checks: Responses{ + Response{Name: "manual-override", Message: "health manually overridden"}, + Response{Name: "a", Status: "fail"}, + }, + } + + if !reflect.DeepEqual(expected, actual) { + t.Errorf("unexpected response. expected %v, actual %v", expected, actual) + } + + _, err = http.Get(ts.URL + "/health?force=false") + if err != nil { + t.Fatal(err) + } + + expected = &Response{ + Name: "Health", + Status: "fail", + Checks: Responses{ + Response{Name: "a", Status: "fail"}, + }, + } + + resp, err = http.Get(ts.URL + "/health") + if err != nil { + t.Fatal(err) + } + actual, err = respBuilder(resp.Body) + if err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(expected, actual) { + t.Errorf("unexpected response. expected %v, actual %v", expected, actual) + } +} + +func TestForceUnhealthy(t *testing.T) { c, ts := buildCheckWithServer() defer ts.Close() @@ -225,6 +285,126 @@ func TestForceHealth(t *testing.T) { } } +func TestForceReady(t *testing.T) { + c, ts := buildCheckWithServer() + defer ts.Close() + + c.AddReadyCheck(mockFail("a")) + + _, err := http.Get(ts.URL + "/ready?force=true&ready=true") + if err != nil { + t.Fatal(err) + } + + resp, err := http.Get(ts.URL + "/ready") + if err != nil { + t.Fatal(err) + } + actual, err := respBuilder(resp.Body) + if err != nil { + t.Fatal(err) + } + + expected := &Response{ + Name: "Ready", + Status: "pass", + Checks: Responses{ + Response{Name: "manual-override", Message: "ready manually overridden"}, + Response{Name: "a", Status: "fail"}, + }, + } + + if !reflect.DeepEqual(expected, actual) { + t.Errorf("unexpected response. expected %v, actual %v", expected, actual) + } + + _, err = http.Get(ts.URL + "/ready?force=false") + if err != nil { + t.Fatal(err) + } + + expected = &Response{ + Name: "Ready", + Status: "fail", + Checks: Responses{ + Response{Name: "a", Status: "fail"}, + }, + } + + resp, err = http.Get(ts.URL + "/ready") + if err != nil { + t.Fatal(err) + } + actual, err = respBuilder(resp.Body) + if err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(expected, actual) { + t.Errorf("unexpected response. expected %v, actual %v", expected, actual) + } +} + +func TestForceNotReady(t *testing.T) { + c, ts := buildCheckWithServer() + defer ts.Close() + + c.AddReadyCheck(mockPass("a")) + + _, err := http.Get(ts.URL + "/ready?force=true&ready=false") + if err != nil { + t.Fatal(err) + } + + resp, err := http.Get(ts.URL + "/ready") + if err != nil { + t.Fatal(err) + } + actual, err := respBuilder(resp.Body) + if err != nil { + t.Fatal(err) + } + + expected := &Response{ + Name: "Ready", + Status: "fail", + Checks: Responses{ + Response{Name: "manual-override", Message: "ready manually overridden"}, + Response{Name: "a", Status: "pass"}, + }, + } + + if !reflect.DeepEqual(expected, actual) { + t.Errorf("unexpected response. expected %v, actual %v", expected, actual) + } + + _, err = http.Get(ts.URL + "/ready?force=false") + if err != nil { + t.Fatal(err) + } + + expected = &Response{ + Name: "Ready", + Status: "pass", + Checks: Responses{ + Response{Name: "a", Status: "pass"}, + }, + } + + resp, err = http.Get(ts.URL + "/ready") + if err != nil { + t.Fatal(err) + } + actual, err = respBuilder(resp.Body) + if err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(expected, actual) { + t.Errorf("unexpected response. expected %v, actual %v", expected, actual) + } +} + func TestNoCrossOver(t *testing.T) { c, ts := buildCheckWithServer() defer ts.Close() From c42a23254508c987f06ed11012f0539540a83e5e Mon Sep 17 00:00:00 2001 From: Brett Buddin Date: Fri, 27 Dec 2019 12:04:12 -0500 Subject: [PATCH 2/2] chore(kit/check): Add new readiness override to CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90185bf728..792aa0e88e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ 1. [16340](https://github.com/influxdata/influxdb/pull/16340): Add last run status to tasks 1. [16341](https://github.com/influxdata/influxdb/pull/16341): Extend pkger apply functionality with ability to provide secrets outside of pkg 1. [16345](https://github.com/influxdata/influxdb/pull/16345): Add hide headers flag to influx cli task find cmd +1. [16336](https://github.com/influxdata/influxdb/pull/16336): Manual Overrides for Readiness Endpoint ### Bug Fixes