refactor: move the logger package from influxdb to platform

We are moving the necessary code for 2.0 from the influxdb 1.X repository to the platform 2.0 repository. The logger is an unnecessary dependency on the old influxdb that is making life more complicated.
2018-10-10 14:41:46 -05:00 · 2018-10-10 14:41:46 -05:00 · e7e17d6972
parent 18cdd698e2
commit e7e17d6972
21 changed files with 489 additions and 16 deletions
--- a/cmd/influxd/main.go
+++ b/cmd/influxd/main.go
@ -15,13 +15,13 @@ import (

 	"github.com/influxdata/flux/control"
 	"github.com/influxdata/flux/execute"
-	influxlogger "github.com/influxdata/influxdb/logger"
 	"github.com/influxdata/platform"
 	"github.com/influxdata/platform/bolt"
 	"github.com/influxdata/platform/chronograf/server"
 	"github.com/influxdata/platform/gather"
 	"github.com/influxdata/platform/http"
 	"github.com/influxdata/platform/kit/prom"
+	influxlogger "github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/nats"
 	"github.com/influxdata/platform/query"
 	_ "github.com/influxdata/platform/query/builtin"
--- a/gather/scheduler_test.go
+++ b/gather/scheduler_test.go
@ -8,8 +8,8 @@ import (
 	"time"

 	"github.com/google/go-cmp/cmp"
-	influxlogger "github.com/influxdata/influxdb/logger"
 	"github.com/influxdata/platform"
+	influxlogger "github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/mock"
 )

--- a/go.mod
+++ b/go.mod
@ -48,11 +48,12 @@ require (
 	github.com/influxdata/roaring v0.4.12 // indirect
 	github.com/influxdata/usage-client v0.0.0-20160829180054-6d3895376368
 	github.com/jessevdk/go-flags v1.4.0
-	github.com/jsternberg/zap-logfmt v1.2.0 // indirect
+	github.com/jsternberg/zap-logfmt v1.2.0
 	github.com/jtolds/gls v4.2.1+incompatible // indirect
 	github.com/julienschmidt/httprouter v0.0.0-20180715161854-348b672cd90d
 	github.com/jwilder/encoding v0.0.0-20170811194829-b4e1701a28ef
 	github.com/kevinburke/go-bindata v3.11.0+incompatible
+	github.com/mattn/go-isatty v0.0.4
 	github.com/mattn/go-zglob v0.0.0-20180803001819-2ea3427bfa53 // indirect
 	github.com/matttproud/golang_protobuf_extensions v1.0.1
 	github.com/mitchellh/go-homedir v1.0.0 // indirect
--- a/http/server.go
+++ b/http/server.go
@ -10,7 +10,7 @@ import (
 	"syscall"
 	"time"

-	"github.com/influxdata/influxdb/logger"
+	"github.com/influxdata/platform/logger"
 	"go.uber.org/zap"
 )

--- a/logger/config.go
+++ b/logger/config.go
@ -0,0 +1,18 @@
+package logger
+
+import (
+	"go.uber.org/zap/zapcore"
+)
+
+type Config struct {
+	Format       string        `toml:"format"`
+	Level        zapcore.Level `toml:"level"`
+	SuppressLogo bool          `toml:"suppress-logo"`
+}
+
+// NewConfig returns a new instance of Config with defaults.
+func NewConfig() Config {
+	return Config{
+		Format: "auto",
+	}
+}
--- a/logger/context.go
+++ b/logger/context.go
@ -0,0 +1,24 @@
+package logger
+
+import (
+	"context"
+
+	"go.uber.org/zap"
+)
+
+type key int
+
+const (
+	loggerKey key = iota
+)
+
+// NewContextWithLogger returns a new context with log added.
+func NewContextWithLogger(ctx context.Context, log *zap.Logger) context.Context {
+	return context.WithValue(ctx, loggerKey, log)
+}
+
+// LoggerFromContext returns the zap.Logger associated with ctx or nil if no logger has been assigned.
+func LoggerFromContext(ctx context.Context) *zap.Logger {
+	l, _ := ctx.Value(loggerKey).(*zap.Logger)
+	return l
+}
--- a/logger/fields.go
+++ b/logger/fields.go
@ -0,0 +1,111 @@
+package logger
+
+import (
+	"time"
+
+	"github.com/influxdata/platform/pkg/snowflake"
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
+)
+
+const (
+	// TraceIDKey is the logging context key used for identifying unique traces.
+	TraceIDKey = "trace_id"
+
+	// OperationNameKey is the logging context key used for identifying name of an operation.
+	OperationNameKey = "op_name"
+
+	// OperationEventKey is the logging context key used for identifying a notable
+	// event during the course of an operation.
+	OperationEventKey = "op_event"
+
+	// OperationElapsedKey is the logging context key used for identifying time elapsed to finish an operation.
+	OperationElapsedKey = "op_elapsed"
+
+	// DBInstanceKey is the logging context key used for identifying name of the relevant database.
+	DBInstanceKey = "db_instance"
+
+	// DBRetentionKey is the logging context key used for identifying name of the relevant retention policy.
+	DBRetentionKey = "db_rp"
+
+	// DBShardGroupKey is the logging context key used for identifying relevant shard group.
+	DBShardGroupKey = "db_shard_group"
+
+	// DBShardIDKey is the logging context key used for identifying name of the relevant shard number.
+	DBShardIDKey = "db_shard_id"
+)
+const (
+	eventStart = "start"
+	eventEnd   = "end"
+)
+
+var (
+	gen = snowflake.New(0)
+)
+
+func nextID() string {
+	return gen.NextString()
+}
+
+// TraceID returns a field for tracking the trace identifier.
+func TraceID(id string) zapcore.Field {
+	return zap.String(TraceIDKey, id)
+}
+
+// OperationName returns a field for tracking the name of an operation.
+func OperationName(name string) zapcore.Field {
+	return zap.String(OperationNameKey, name)
+}
+
+// OperationElapsed returns a field for tracking the duration of an operation.
+func OperationElapsed(d time.Duration) zapcore.Field {
+	return zap.Duration(OperationElapsedKey, d)
+}
+
+// OperationEventStart returns a field for tracking the start of an operation.
+func OperationEventStart() zapcore.Field {
+	return zap.String(OperationEventKey, eventStart)
+}
+
+// OperationEventFinish returns a field for tracking the end of an operation.
+func OperationEventEnd() zapcore.Field {
+	return zap.String(OperationEventKey, eventEnd)
+}
+
+// Database returns a field for tracking the name of a database.
+func Database(name string) zapcore.Field {
+	return zap.String(DBInstanceKey, name)
+}
+
+// Database returns a field for tracking the name of a database.
+func RetentionPolicy(name string) zapcore.Field {
+	return zap.String(DBRetentionKey, name)
+}
+
+// ShardGroup returns a field for tracking the shard group identifier.
+func ShardGroup(id uint64) zapcore.Field {
+	return zap.Uint64(DBShardGroupKey, id)
+}
+
+// Shard returns a field for tracking the shard identifier.
+func Shard(id uint64) zapcore.Field {
+	return zap.Uint64(DBShardIDKey, id)
+}
+
+// NewOperation uses the exiting log to create a new logger with context
+// containing a trace id and the operation. Prior to returning, a standardized message
+// is logged indicating the operation has started. The returned function should be
+// called when the operation concludes in order to log a corresponding message which
+// includes an elapsed time and that the operation has ended.
+func NewOperation(log *zap.Logger, msg, name string, fields ...zapcore.Field) (*zap.Logger, func()) {
+	f := []zapcore.Field{TraceID(nextID()), OperationName(name)}
+	if len(fields) > 0 {
+		f = append(f, fields...)
+	}
+
+	now := time.Now()
+	log = log.With(f...)
+	log.Info(msg+" (start)", OperationEventStart())
+
+	return log, func() { log.Info(msg+" (end)", OperationEventEnd(), OperationElapsed(time.Since(now))) }
+}
--- a/logger/logger.go
+++ b/logger/logger.go
@ -0,0 +1,127 @@
+package logger
+
+import (
+	"fmt"
+	"io"
+	"time"
+
+	"github.com/jsternberg/zap-logfmt"
+	isatty "github.com/mattn/go-isatty"
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
+)
+
+const TimeFormat = "2006-01-02T15:04:05.000000Z07:00"
+
+func New(w io.Writer) *zap.Logger {
+	config := NewConfig()
+	l, _ := config.New(w)
+	return l
+}
+
+func (c *Config) New(defaultOutput io.Writer) (*zap.Logger, error) {
+	w := defaultOutput
+	format := c.Format
+	if format == "console" {
+		// Disallow the console logger if the output is not a terminal.
+		return nil, fmt.Errorf("unknown logging format: %s", format)
+	}
+
+	// If the format is empty or auto, then set the format depending
+	// on whether or not a terminal is present.
+	if format == "" || format == "auto" {
+		if IsTerminal(w) {
+			format = "console"
+		} else {
+			format = "logfmt"
+		}
+	}
+
+	encoder, err := newEncoder(format)
+	if err != nil {
+		return nil, err
+	}
+	return zap.New(zapcore.NewCore(
+		encoder,
+		zapcore.Lock(zapcore.AddSync(w)),
+		c.Level,
+	), zap.Fields(zap.String("log_id", nextID()))), nil
+}
+
+func newEncoder(format string) (zapcore.Encoder, error) {
+	config := newEncoderConfig()
+	switch format {
+	case "json":
+		return zapcore.NewJSONEncoder(config), nil
+	case "console":
+		return zapcore.NewConsoleEncoder(config), nil
+	case "logfmt":
+		return zaplogfmt.NewEncoder(config), nil
+	default:
+		return nil, fmt.Errorf("unknown logging format: %s", format)
+	}
+}
+
+func newEncoderConfig() zapcore.EncoderConfig {
+	config := zap.NewProductionEncoderConfig()
+	config.EncodeTime = func(ts time.Time, encoder zapcore.PrimitiveArrayEncoder) {
+		encoder.AppendString(ts.UTC().Format(TimeFormat))
+	}
+	config.EncodeDuration = func(d time.Duration, encoder zapcore.PrimitiveArrayEncoder) {
+		val := float64(d) / float64(time.Millisecond)
+		encoder.AppendString(fmt.Sprintf("%.3fms", val))
+	}
+	config.LevelKey = "lvl"
+	return config
+}
+
+// IsTerminal checks if w is a file and whether it is an interactive terminal session.
+func IsTerminal(w io.Writer) bool {
+	if f, ok := w.(interface {
+		Fd() uintptr
+	}); ok {
+		return isatty.IsTerminal(f.Fd())
+	}
+	return false
+}
+
+const (
+	year = 365 * 24 * time.Hour
+	week = 7 * 24 * time.Hour
+	day  = 24 * time.Hour
+)
+
+func DurationLiteral(key string, val time.Duration) zapcore.Field {
+	if val == 0 {
+		return zap.String(key, "0s")
+	}
+
+	var (
+		value int
+		unit  string
+	)
+	switch {
+	case val%year == 0:
+		value = int(val / year)
+		unit = "y"
+	case val%week == 0:
+		value = int(val / week)
+		unit = "w"
+	case val%day == 0:
+		value = int(val / day)
+		unit = "d"
+	case val%time.Hour == 0:
+		value = int(val / time.Hour)
+		unit = "h"
+	case val%time.Minute == 0:
+		value = int(val / time.Minute)
+		unit = "m"
+	case val%time.Second == 0:
+		value = int(val / time.Second)
+		unit = "s"
+	default:
+		value = int(val / time.Millisecond)
+		unit = "ms"
+	}
+	return zap.String(key, fmt.Sprintf("%d%s", value, unit))
+}
--- a/logger/style_guide.md
+++ b/logger/style_guide.md
@ -0,0 +1,192 @@
+# Logging Style Guide
+
+The intention of logging is to give insight to the administrator of how
+the server is running and also notify the administrator of any problems
+or potential problems with the system.
+
+At the moment, log level filtering is the only option to configure
+logging in InfluxDB. Adding a logging message and choosing its level
+should be done according to the guidelines in this document for
+operational clarity. The available log levels are:
+
+* Error
+* Warn
+* Info
+* Debug
+
+InfluxDB uses structured logging. Structured logging is when you log
+messages and attach context to those messages with more easily read data
+regarding the state of the system. A structured log message is composed
+of:
+
+* Time
+* Level
+* Message
+* (Optionally) Additional context
+
+## Guidelines
+
+**Log messages** should be simple statements or phrases that begin with
+a capital letter, but have no punctuation at the end. The message should be a
+constant so that every time it is logged it is easily identified and can
+be filtered by without regular expressions.
+
+Any **dynamic content** should be expressed by context. The key should
+be a constant and the value is the dynamic content.
+
+Do not log messages in tight loops or other high performance locations.
+It will likely create a performance problem.
+
+## Naming Conventions
+
+If the log encoding format uses keys for the time, message, or level,
+the key names should be `ts` for time, `msg` for the message, and
+`lvl` for the level.
+
+If the log encoding format does not use keys for the time, message, or
+level and instead outputs them in some other method, this guideline can
+be ignored. The output formats logfmt and json both use keys when
+encoding these values.
+
+### Context Key Names
+
+The key for the dynamic content in the context should be formatted in
+`snake_case`. The key should be completely lower case.
+
+## Levels
+
+As a reminder, levels are usually the only way to configure what is
+logged. There are four available logging levels.
+
+* Error
+* Warn
+* Info
+* Debug
+
+It is important to get the right logging level to ensure the log
+messages are useful for end users to act on.
+
+In general, when considering which log level to use, you should use
+**info**. If you are considering using another level, read the below
+expanded descriptions to determine which level your message belongs in.
+
+### Error
+
+The **error** level is intended to communicate that there is a serious
+problem with the server. **An error should be emitted only when an
+on-call engineer can take some action to remedy the situation _and_ the
+system cannot continue operating properly without remedying the
+situation.**
+
+An example of what may qualify as an error level message is the creation
+of the internal storage for the monitor service. For that system to
+function at all, a database must be created. If no database is created,
+the service itself cannot function. The error has a clear actionable
+solution. Figure out why the database isn't being created and create it.
+
+An example of what does not qualify as an error is failing to parse a
+query or a socket closing prematurely. Both of these usually indicate
+some kind of user error rather than system error. Both are ephemeral
+errors and they would not be clearly actionable to an administrator who
+was paged at 3 AM. Both of these are examples of logging messages that
+should be emitted at the info level with an error key rather than being
+logged at the error level.
+
+Logged errors **must not propagate**. Propagating the error risks
+logging it in multiple locations and confusing users when the same error
+is reported multiple times. In general, if you are returning an error,
+never log at any level. By returning the error, you are telling the
+parent function to handle the error. Logging a message at any level is
+handling the error.
+
+This logging message should be used very rarely and any messages that
+use this logging level should not repeat frequently. Assume that
+anything that is logged with error will page someone in the middle of
+the night.
+
+### Warn
+
+The **warn** level is intended to communicate that there is likely to be
+a serious problem with the server if it not addressed. **A warning
+should be emitted only when a support engineer can take some action to
+remedy the situation _and_ the system may not continue operating
+properly in the near future without remedying the situation.**
+
+An example of what may qualify as a warning is the `max-values-per-tag`
+setting. If the server starts to approach the maximum number of values,
+the server may stop being able to function properly when it reaches the
+maximum number.
+
+An example of what does not qualify as a warning is the
+`log-queries-after` setting. While the message is "warning" that a query
+was running for a long period of time, it is not clearly actionable and
+does not indicate that the server will fail in the near future. This
+should be logged at the info level instead.
+
+This logging message should be used very rarely and any messages that
+use this logging level should not repeat frequently. Assume that
+anything that is logged with warn will page someone in the middle of the
+night and potentially ignored until normal working hours.
+
+### Info
+
+The **info** level should be used for almost anything. If you are not
+sure which logging level to use, use info. Temporary or user errors
+should be logged at the info level and any informational messages for
+administrators should be logged at this level. Info level messages
+should be safe for an administrator to discard if they really want to,
+but most people will run the system at the info level.
+
+### Debug
+
+The **debug** level exists to log messages that are useful only for
+debugging a bad running instance.
+
+This level should be rarely used if ever. If you intend to use this
+level, please have a rationale ready. Most messages that could be
+considered debug either shouldn't exist or should be logged at the info
+level. Debug messages will be suppressed by default.
+
+## Value Formatting
+
+Formatting for strings, integers, and other standard values are usually
+determined by the log format itself and those will be kept ambiguous.
+The following specific formatting choices are for data types that could
+be output in multiple ways.
+
+### Time
+
+Time values should be encoded using RFC3339 with microsecond precision.
+The size of the string should be normalized to the same number of digits
+every time to ensure that it is easier to read the time as a column.
+
+### Duration
+
+Duration values that denote a period of time should be output in
+milliseconds with microsecond precision. The microseconds should be in
+decimal form with three decimal points. Durations that denote a static
+period of time should be output with a single number and a suffix with
+the largest possible unit that doesn't cause the value to be a decimal.
+
+There are two types of durations.
+
+* Tracks a (usually small) period of time and is meant for timing how
+  long something take. The content is dynamic and may be graphed.
+* Duration literal where the content is dynamic, is unlikely to be
+  graphed, and usually comes from some type of configuration.
+
+If the content is dynamic, the duration should be printed as a number of
+milliseconds with a decimal indicating the number of microseconds. Any
+duration lower than microseconds should be truncated. The decimal section
+should always print exactly 3 points after the decimal point.
+
+If the content is static, the duration should be printed with a single
+number and a suffix indicating the unit in years (`y`), weeks (`w`),
+days (`d`), hours (`h`), minutes (`m`), seconds (`s`), or
+milliseconds (`ms`). The suffix should be the greatest unit that can be
+used without truncating the value. As an example, if the duration is
+60 minutes, then `1h` should be used. If the duration is 61 minutes,
+then `61m` should be used.
+
+For anything lower than milliseconds that is static, the duration should
+be truncated. A value of zero should be shown as `0s`.
--- a/storage/reads/group_resultset.go
+++ b/storage/reads/group_resultset.go
@ -7,7 +7,7 @@ import (
 	"sort"
 	"strings"

-	"github.com/influxdata/influxdb/logger"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/models"
 	"github.com/influxdata/platform/storage/reads/datatypes"
 	"github.com/influxdata/platform/tsdb/cursors"
--- a/storage/retention.go
+++ b/storage/retention.go
@ -8,9 +8,9 @@ import (
 	"sync/atomic"
 	"time"

-	"github.com/influxdata/influxdb/logger"
 	"github.com/influxdata/influxql"
 	"github.com/influxdata/platform"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/models"
 	"github.com/influxdata/platform/tsdb"
 	"github.com/prometheus/client_golang/prometheus"
--- a/task/backend/executor/executor.go
+++ b/task/backend/executor/executor.go
@ -9,7 +9,7 @@ import (

 	"github.com/influxdata/flux"
 	"github.com/influxdata/flux/lang"
-	"github.com/influxdata/influxdb/logger"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/query"
 	"github.com/influxdata/platform/task/backend"
 	"go.uber.org/zap"
--- a/tsdb/index_test.go
+++ b/tsdb/index_test.go
@ -10,9 +10,9 @@ import (
 	"sync"
 	"testing"

-	"github.com/influxdata/influxdb/logger"
 	"github.com/influxdata/influxdb/query"
 	"github.com/influxdata/influxql"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/models"
 	"github.com/influxdata/platform/pkg/slices"
 	"github.com/influxdata/platform/tsdb"
--- a/tsdb/series_file_test.go
+++ b/tsdb/series_file_test.go
@ -7,7 +7,7 @@ import (
 	"os"
 	"testing"

-	"github.com/influxdata/influxdb/logger"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/models"
 	"github.com/influxdata/platform/tsdb"
 )
--- a/tsdb/series_partition.go
+++ b/tsdb/series_partition.go
@ -9,7 +9,7 @@ import (
 	"path/filepath"
 	"sync"

-	"github.com/influxdata/influxdb/logger"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/models"
 	"github.com/influxdata/platform/pkg/rhh"
 	"go.uber.org/zap"
--- a/tsdb/shard_internal_test.go
+++ b/tsdb/shard_internal_test.go
@ -7,7 +7,7 @@ import (
 	"strings"
 	"time"

-	"github.com/influxdata/influxdb/logger"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/models"
 )

--- a/tsdb/tsi1/partition.go
+++ b/tsdb/tsi1/partition.go
@ -14,8 +14,8 @@ import (
 	"time"
 	"unsafe"

-	"github.com/influxdata/influxdb/logger"
 	"github.com/influxdata/influxql"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/pkg/bytesutil"
 	"github.com/influxdata/platform/pkg/estimator"
 	"github.com/influxdata/platform/tsdb"
--- a/tsdb/tsm1/engine.go
+++ b/tsdb/tsm1/engine.go
@ -20,12 +20,12 @@ import (
 	"sync/atomic"
 	"time"

-	"github.com/influxdata/influxdb/logger"
 	"github.com/influxdata/influxdb/pkg/metrics"
 	intar "github.com/influxdata/influxdb/pkg/tar"
 	"github.com/influxdata/influxdb/pkg/tracing"
 	"github.com/influxdata/influxdb/query"
 	"github.com/influxdata/influxql"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/models"
 	"github.com/influxdata/platform/pkg/bytesutil"
 	"github.com/influxdata/platform/pkg/estimator"
--- a/tsdb/tsm1/engine_test.go
+++ b/tsdb/tsm1/engine_test.go
@ -20,9 +20,9 @@ import (
 	"time"

 	"github.com/google/go-cmp/cmp"
-	"github.com/influxdata/influxdb/logger"
 	"github.com/influxdata/influxdb/query"
 	"github.com/influxdata/influxql"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/models"
 	"github.com/influxdata/platform/pkg/deep"
 	"github.com/influxdata/platform/tsdb"
--- a/tsdb/tsm1/file_store_test.go
+++ b/tsdb/tsm1/file_store_test.go
@ -12,7 +12,7 @@ import (
 	"testing"
 	"time"

-	"github.com/influxdata/influxdb/logger"
+	"github.com/influxdata/platform/logger"
 	"github.com/influxdata/platform/tsdb/tsm1"
 )

--- a/tsdb/tsm1/iterator_test.go
+++ b/tsdb/tsm1/iterator_test.go
@ -6,9 +6,9 @@ import (
 	"testing"
 	"time"

-	"github.com/influxdata/influxdb/logger"
 	"github.com/influxdata/influxdb/query"
 	"github.com/influxdata/influxql"
+	"github.com/influxdata/platform/logger"
 )

 func BenchmarkIntegerIterator_Next(b *testing.B) {