feat(http): allow for disabling pprof (#20827)

And translate pprof-enabled config during upgrade
pull/20847/head^2
Daniel Moran 2021-03-04 09:28:03 -05:00 committed by GitHub
parent 265c1f311e
commit 25738db42c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 373 additions and 31 deletions

View File

@ -49,9 +49,8 @@ Generate profiles with the following commands for bugs related to performance, l
```sh
# Commands should be run when the bug is actively happening.
# Note: This command will run for at least 30 seconds.
curl -o profiles.tar.gz "http://localhost:8086/debug/pprof/all?cpu=true"
curl -o vars.txt "http://localhost:8086/debug/vars"
# Note: This command will run for ~30 seconds.
curl -o profiles.tar.gz "http://localhost:8086/debug/pprof/all?cpu=30s"
iostat -xd 1 30 > iostat.txt
# Attach the `profiles.tar.gz`, `vars.txt`, and `iostat.txt` output files.
# Attach the `profiles.tar.gz` and `iostat.txt` output files.
```

View File

@ -1,5 +1,13 @@
## unreleased
### Breaking Changes
#### /debug/vars removed
Prior to this release, the `influxd` server would always expose profiling information over `/debug/vars`.
This endpoint was unauthenticated, and not used by InfluxDB systems to report diagnostics. For security and clarity,
the endpoint has been removed. Use the `/metrics` endpoint to collect system statistics.
### Features
1. [19811](https://github.com/influxdata/influxdb/pull/19811): Add Geo graph type to be able to store in Dashboard cells.
@ -7,6 +15,9 @@
1. [20307](https://github.com/influxdata/influxdb/pull/20307): Add `influx task retry-failed` command to rerun failed runs.
1. [20759](https://github.com/influxdata/influxdb/pull/20759): Add additional properties for Mosaic Graph.
1. [20763](https://github.com/influxdata/influxdb/pull/20763): Add `--compression` option to `influx write` to support GZIP inputs.
1. [20827](https://github.com/influxdata/influxdb/pull/20827): Add `--pprof-disabled` option to `influxd` to disable exposing profiling information over HTTP.
1. [20827](https://github.com/influxdata/influxdb/pull/20827): Add `/debug/pprof/all` HTTP endpoint to gather all profiles at once.
1. [20827](https://github.com/influxdata/influxdb/pull/20827): Upgrade `http.pprof-enabled` config in `influxd upgrade`.
### Bug Fixes
@ -20,6 +31,7 @@
1. [20798](https://github.com/influxdata/influxdb/pull/20798): Deprecate misleading `retentionPeriodHrs` key in onboarding API.
1. [20819](https://github.com/influxdata/influxdb/pull/20819): Fix Single Stat graphs with thresholds crashing on negative values.
1. [20809](https://github.com/influxdata/influxdb/pull/20809): Fix InfluxDB port in Flux function UI examples. Thanks @sunjincheng121!
1. [20827](https://github.com/influxdata/influxdb/pull/20827): Remove unauthenticated, unsupported `/debug/vars` HTTP endpoint.
## v2.0.4 [2021-02-08]

View File

@ -8,8 +8,6 @@ import (
"strconv"
"strings"
_ "net/http/pprof"
"github.com/NYTimes/gziphandler"
"github.com/bouk/httprouter"
jhttprouter "github.com/influxdata/httprouter"
@ -33,7 +31,6 @@ type MuxOpts struct {
ProviderFuncs []func(func(oauth2.Provider, oauth2.Mux))
StatusFeedURL string // JSON Feed URL for the client Status page News Feed
CustomLinks map[string]string // Any custom external links for client's User menu
PprofEnabled bool // Mount pprof routes for profiling
}
// NewMux attaches all the route handlers; handler returned servers chronograf.
@ -131,11 +128,6 @@ func NewMux(opts MuxOpts, service Service) http.Handler {
)
}
if opts.PprofEnabled {
// add profiling routes
router.GET("/debug/pprof/:thing", http.DefaultServeMux.ServeHTTP)
}
/* Documentation */
router.GET("/swagger.json", Spec())
router.GET("/docs", Redoc("/swagger.json"))

View File

@ -15,6 +15,7 @@ import (
"github.com/influxdata/influxdb/v2/kit/signals"
influxlogger "github.com/influxdata/influxdb/v2/logger"
"github.com/influxdata/influxdb/v2/nats"
"github.com/influxdata/influxdb/v2/pprof"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/v1/coordinator"
"github.com/influxdata/influxdb/v2/vault"
@ -74,6 +75,9 @@ func setCmdDescriptions(cmd *cobra.Command) {
func cmdRunE(ctx context.Context, o *InfluxdOpts) func() error {
return func() error {
// Set this as early as possible, since it affects global profiling rates.
pprof.SetGlobalProfiling(!o.ProfilingDisabled)
fluxinit.FluxInit()
l := NewLauncher()
@ -129,6 +133,8 @@ type InfluxdOpts struct {
SessionLength int // in minutes
SessionRenewDisabled bool
ProfilingDisabled bool
NatsPort int
NatsMaxPayloadBytes int
@ -173,6 +179,8 @@ func newOpts(viper *viper.Viper) *InfluxdOpts {
SessionLength: 60, // 60 minutes
SessionRenewDisabled: false,
ProfilingDisabled: false,
StoreType: BoltStore,
SecretStore: BoltStore,
@ -494,5 +502,13 @@ func (o *InfluxdOpts) bindCliOpts() []cli.Opt {
Desc: "The maximum number of bytes allowed in a NATS message payload.",
Default: o.NatsMaxPayloadBytes,
},
// Pprof config
{
DestP: &o.ProfilingDisabled,
Flag: "pprof-disabled",
Desc: "Don't expose debugging information over HTTP at /debug/pprof",
Default: o.ProfilingDisabled,
},
}
}

View File

@ -8,7 +8,6 @@ import (
"io"
"net"
nethttp "net/http"
_ "net/http/pprof" // needed to add pprof to our binary.
"os"
"path/filepath"
"strings"
@ -921,6 +920,7 @@ func (m *Launcher) run(ctx context.Context, opts *InfluxdOpts) (err error) {
m.reg,
http.WithLog(httpLogger),
http.WithAPIHandler(platformHandler),
http.WithPprofEnabled(!opts.ProfilingDisabled),
)
if opts.LogLevel == zap.DebugLevel {

View File

@ -3,7 +3,6 @@ package main
import (
"context"
"fmt"
_ "net/http/pprof"
"os"
"time"

View File

@ -43,6 +43,7 @@ var configMapRules = map[string]string{
"http.bind-address": "http-bind-address",
"http.https-certificate": "tls-cert",
"http.https-private-key": "tls-key",
"http.pprof-enabled": "pprof-disabled",
}
// configValueTransforms is a map from 2.x config keys to transformation functions
@ -59,6 +60,14 @@ var configValueTransforms = map[string]func(interface{}) interface{}{
}
return ret
},
// Flip the boolean (1.x tracked 'enabled', 2.x tracks 'disabled').
"pprof-disabled": func(v interface{}) interface{} {
ret := v
if b, ok := v.(bool); ok {
ret = !b
}
return ret
},
}
func loadV1Config(configFile string) (*configV1, *map[string]interface{}, error) {

View File

@ -181,6 +181,7 @@ bind-address = "127.0.0.1:8088"
bind-address = ":8086"
https-certificate = "/etc/ssl/influxdb.pem"
https-private-key = "/etc/ssl/influxdb-key.pem"
pprof-enabled = false
[logging]
level = "debug"
@ -410,6 +411,7 @@ storage-shard-precreator-check-interval = "5m"
storage-wal-fsync-delay = "100s"
tls-cert = "/etc/ssl/influxdb.pem"
tls-key = "/etc/ssl/influxdb-key.pem"
pprof-disabled = true
`
var testConfigV2default = `reporting-disabled = false
@ -438,6 +440,7 @@ storage-validate-keys = false
storage-wal-fsync-delay = "0s"
tls-cert = "/etc/ssl/influxdb.pem"
tls-key = ""
pprof-disabled = false
`
var testConfigV2obsoleteArrays = `reporting-disabled = true

View File

@ -38,7 +38,7 @@ func NewAuthenticationHandler(log *zap.Logger, h platform.HTTPErrorHandler) *Aut
return &AuthenticationHandler{
log: log,
HTTPErrorHandler: h,
Handler: http.DefaultServeMux,
Handler: http.NotFoundHandler(),
TokenParser: jsonweb.NewTokenParser(jsonweb.EmptyKeyStore),
noAuthRouter: httprouter.New(),
}

View File

@ -4,11 +4,11 @@ import (
"context"
"encoding/json"
"net/http"
_ "net/http/pprof" // used for debug pprof at the default path.
"github.com/go-chi/chi"
"github.com/influxdata/influxdb/v2/kit/prom"
kithttp "github.com/influxdata/influxdb/v2/kit/transport/http"
"github.com/influxdata/influxdb/v2/pprof"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/zap"
)
@ -41,10 +41,10 @@ type (
handlerOpts struct {
log *zap.Logger
apiHandler http.Handler
debugHandler http.Handler
healthHandler http.Handler
metricsHandler http.Handler
readyHandler http.Handler
pprofEnabled bool
}
HandlerOptFn func(opts *handlerOpts)
@ -62,9 +62,9 @@ func WithAPIHandler(h http.Handler) HandlerOptFn {
}
}
func WithDebugHandler(h http.Handler) HandlerOptFn {
func WithPprofEnabled(enabled bool) HandlerOptFn {
return func(opts *handlerOpts) {
opts.debugHandler = h
opts.pprofEnabled = enabled
}
}
@ -92,10 +92,10 @@ func WithReadyHandler(h http.Handler) HandlerOptFn {
func NewHandlerFromRegistry(name string, reg *prom.Registry, opts ...HandlerOptFn) *Handler {
opt := handlerOpts{
log: zap.NewNop(),
debugHandler: http.DefaultServeMux,
healthHandler: http.HandlerFunc(HealthHandler),
metricsHandler: reg.HTTPHandler(),
readyHandler: ReadyHandler(),
pprofEnabled: false,
}
for _, o := range opts {
o(&opt)
@ -113,12 +113,10 @@ func NewHandlerFromRegistry(name string, reg *prom.Registry, opts ...HandlerOptF
r.Use(
kithttp.Metrics(name, h.requests, h.requestDur),
)
{
r.Mount(MetricsPath, opt.metricsHandler)
r.Mount(ReadyPath, opt.readyHandler)
r.Mount(HealthPath, opt.healthHandler)
r.Mount(DebugPath, opt.debugHandler)
}
r.Mount(MetricsPath, opt.metricsHandler)
r.Mount(ReadyPath, opt.readyHandler)
r.Mount(HealthPath, opt.healthHandler)
r.Mount(DebugPath, pprof.NewHTTPHandler(opt.pprofEnabled))
})
// gather metrics and traces for everything else
@ -127,9 +125,7 @@ func NewHandlerFromRegistry(name string, reg *prom.Registry, opts ...HandlerOptF
kithttp.Trace(name),
kithttp.Metrics(name, h.requests, h.requestDur),
)
{
r.Mount("/", opt.apiHandler)
}
r.Mount("/", opt.apiHandler)
})
h.r = r
@ -182,7 +178,7 @@ func logEncodingError(log *zap.Logger, r *http.Request, err error) {
// If we encounter an error while encoding the response to an http request
// the best thing we can do is log that error, as we may have already written
// the headers for the http request in question.
log.Info("Error encoding response",
log.Error("Error encoding response",
zap.String("path", r.URL.Path),
zap.String("method", r.Method),
zap.Error(err))

View File

@ -3,7 +3,6 @@ package http
import (
"net/http"
"net/http/httptest"
_ "net/http/pprof"
"testing"
"github.com/influxdata/influxdb/v2/kit/prom"

188
pprof/http_server.go Normal file
View File

@ -0,0 +1,188 @@
package pprof
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
httppprof "net/http/pprof"
"strconv"
"time"
"github.com/go-chi/chi"
"github.com/influxdata/influxdb/v2"
ihttp "github.com/influxdata/influxdb/v2/kit/transport/http"
)
type Handler struct {
chi.Router
}
func NewHTTPHandler(profilingEnabled bool) *Handler {
r := chi.NewRouter()
r.Route("/pprof", func(r chi.Router) {
if !profilingEnabled {
r.NotFound(profilingDisabledHandler)
return
}
r.Get("/cmdline", httppprof.Cmdline)
r.Get("/profile", httppprof.Profile)
r.Get("/symbol", httppprof.Symbol)
r.Get("/trace", httppprof.Trace)
r.Get("/all", archiveProfilesHandler)
r.Mount("/", http.HandlerFunc(httppprof.Index))
})
return &Handler{r}
}
func errResponse(ctx context.Context, w http.ResponseWriter, code string, message string) {
w.Header().Set(ihttp.PlatformErrorCodeHeader, code)
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(ihttp.ErrorCodeToStatusCode(ctx, code))
e := struct {
Code string `json:"code"`
Message string `json:"message"`
}{
Code: code,
Message: message,
}
b, _ := json.Marshal(e)
_, _ = w.Write(b)
}
func profilingDisabledHandler(w http.ResponseWriter, r *http.Request) {
errResponse(r.Context(), w, influxdb.EForbidden, "profiling disabled")
}
func archiveProfilesHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
// We parse the form here so that we can use the http.Request.Form map.
//
// Otherwise we'd have to use r.FormValue() which makes it impossible to
// distinguish between a form value that exists and has no value and one that
// does not exist at all.
if err := r.ParseForm(); err != nil {
errResponse(ctx, w, influxdb.EInternal, err.Error())
return
}
// In the following two blocks, we check if the request should include cpu
// profiles and a trace log.
//
// Since the submitted form can contain multiple version of a variable like:
//
// http://localhost:8086?cpu=1s&cpu=30s&trace=3s&cpu=5s
//
// the question arises: which value should we use? We choose to use the LAST
// value supplied.
//
// This is an edge case but if for some reason, for example, a url is
// programmatically built and multiple values are supplied, this will do what
// is expected.
//
var traceDuration, cpuDuration time.Duration
// last() returns either the last item from a slice of strings or an empty
// string if the supplied slice is empty or nil.
last := func(s []string) string {
if len(s) == 0 {
return ""
}
return s[len(s)-1]
}
// If trace exists as a form value, add it to the profiles slice with the
// decoded duration.
//
// Requests for a trace should look like:
//
// ?trace=10s
//
if vals, exists := r.Form["trace"]; exists {
// parse the duration encoded in the last "trace" value supplied.
val := last(vals)
duration, err := time.ParseDuration(val)
// If we can't parse the duration or if the user supplies a negative
// number, return an appropriate error status and message.
//
// In this case it is a StatusBadRequest (400) since the problem is in the
// supplied form data.
if duration < 0 {
errResponse(ctx, w, influxdb.EInvalid, "negative trace durations not allowed")
return
}
if err != nil {
errResponse(ctx, w, influxdb.EInvalid, fmt.Sprintf("could not parse supplied duration for trace %q", val))
return
}
// Trace files can get big. Lets clamp the maximum trace duration to 45s.
if duration > 45*time.Second {
errResponse(ctx, w, influxdb.EInvalid, "cannot trace for longer than 45s")
return
}
traceDuration = duration
}
// Capturing CPU profiles is a little trickier. The preferred way to send the
// the cpu profile duration is via the supplied "cpu" variable's value.
//
// The duration should be encoded as a Go duration that can be parsed by
// time.ParseDuration().
//
// In the past users were encouraged to assign any value to cpu and provide
// the duration in a separate "seconds" value.
//
// The code below handles both -- first it attempts to use the old method
// which would look like:
//
// ?cpu=foobar&seconds=10
//
// Then it attempts to ascertain the duration provided with:
//
// ?cpu=10s
//
// This preserves backwards compatibility with any tools that have been
// written to gather profiles.
//
if vals, exists := r.Form["cpu"]; exists {
duration := time.Second * 30
val := last(vals)
// getDuration is a small function literal that encapsulates the logic
// for getting the duration from either the "seconds" form value or from
// the value assigned to "cpu".
getDuration := func() (time.Duration, error) {
if seconds, exists := r.Form["seconds"]; exists {
s, err := strconv.ParseInt(last(seconds), 10, 64)
if err != nil {
return 0, err
}
return time.Second * time.Duration(s), nil
}
// see if the value of cpu is a duration like: cpu=10s
return time.ParseDuration(val)
}
duration, err := getDuration()
if err != nil {
errResponse(ctx, w, influxdb.EInvalid, fmt.Sprintf("could not parse supplied duration for cpu profile %q", val))
return
}
cpuDuration = duration
}
tarstream, err := collectAllProfiles(ctx, traceDuration, cpuDuration)
if err != nil {
errResponse(ctx, w, influxdb.EInternal, err.Error())
return
}
_, _ = io.Copy(w, tarstream)
}

129
pprof/pprof.go Normal file
View File

@ -0,0 +1,129 @@
package pprof
import (
"archive/tar"
"bytes"
"context"
"fmt"
"io"
"path"
"runtime"
"runtime/pprof"
"runtime/trace"
"time"
)
func SetGlobalProfiling(enabled bool) {
if enabled {
// Copy the rates used in 1.x.
runtime.MemProfileRate = 4096
runtime.SetBlockProfileRate(int(1 * time.Second))
runtime.SetMutexProfileFraction(1)
} else {
runtime.MemProfileRate = 0
runtime.SetBlockProfileRate(0)
runtime.SetMutexProfileFraction(0)
}
}
// collectAllProfiles generates a tarball containing:
// - goroutine profile
// - blocking profile
// - mutex profile
// - heap profile
// - allocations profile
// - (optionally) trace profile
// - (optionally) CPU profile
//
// All information is added to a tar archive and then compressed, before being
// returned to the requester as an archive file. Where profiles support debug
// parameters, the profile is collected with debug=1.
func collectAllProfiles(ctx context.Context, traceDuration time.Duration, cpuDuration time.Duration) (io.Reader, error) {
// prof describes a profile name and a debug value, or in the case of a CPU
// profile, the number of seconds to collect the profile for.
type prof struct {
Name string // name of profile
Duration time.Duration // duration of profile if applicable. currently only used by cpu and trace
}
var profiles = []prof{
{Name: "goroutine"},
{Name: "block"},
{Name: "mutex"},
{Name: "heap"},
{Name: "allocs"},
{Name: "threadcreate"},
}
if traceDuration > 0 {
profiles = append(profiles, prof{"trace", traceDuration})
}
if cpuDuration > 0 {
// We want to gather CPU profiles first, if enabled.
profiles = append([]prof{{"cpu", cpuDuration}}, profiles...)
}
tarball := &bytes.Buffer{}
buf := &bytes.Buffer{} // Temporary buffer for each profile/query result.
tw := tar.NewWriter(tarball)
// Collect and write out profiles.
for _, profile := range profiles {
switch profile.Name {
case "cpu":
if err := pprof.StartCPUProfile(buf); err != nil {
return nil, err
}
sleep(ctx, profile.Duration)
pprof.StopCPUProfile()
case "trace":
if err := trace.Start(buf); err != nil {
return nil, err
}
sleep(ctx, profile.Duration)
trace.Stop()
default:
prof := pprof.Lookup(profile.Name)
if prof == nil {
return nil, fmt.Errorf("unable to find profile %q", profile.Name)
}
if err := prof.WriteTo(buf, 0); err != nil {
return nil, err
}
}
// Write the profile file's header.
if err := tw.WriteHeader(&tar.Header{
Name: path.Join("profiles", profile.Name+".pb.gz"),
Mode: 0600,
Size: int64(buf.Len()),
}); err != nil {
return nil, err
}
// Write the profile file's data.
if _, err := tw.Write(buf.Bytes()); err != nil {
return nil, err
}
// Reset the buffer for the next profile.
buf.Reset()
}
// Close the tar writer.
if err := tw.Close(); err != nil {
return nil, err
}
return tarball, nil
}
// Adapted from net/http/pprof/pprof.go
func sleep(ctx context.Context, d time.Duration) {
select {
case <-time.After(d):
case <-ctx.Done():
}
}