Merge pull request #6764 from influxdata/nc-holt-winters-nans

Check for NaNs from Holt-Winters and do not return them
pull/6769/head
Nathaniel Cook 2016-06-03 11:17:11 -06:00
commit 128b07e352
5 changed files with 158 additions and 165 deletions

View File

@ -322,14 +322,6 @@ func (r *IntegerMovingAverageReducer) Emit() []FloatPoint {
// 1. Using the series the initial values are calculated using a SSE.
// 2. The series is forecasted into the future using the iterative relations.
type FloatHoltWintersReducer struct {
// Smoothing parameters
alpha,
beta,
gamma float64
// Dampening parameter
phi float64
// Season period
m int
seasonal bool
@ -355,11 +347,19 @@ type FloatHoltWintersReducer struct {
}
const (
defaultAlpha = 0.5
defaultBeta = 0.5
defaultGamma = 0.5
defaultPhi = 0.5
defaultEpsilon = 1.0e-4
// Arbitrary weight for initializing some intial guesses.
// This should be in the range [0,1]
hwWeight = 0.5
// Epsilon value for the minimization process
hwDefaultEpsilon = 1.0e-4
// Define a grid of initial guesses for the parameters: alpha, beta, gamma, and phi.
// Keep in mind that this grid is N^4 so we should keep N small
// The starting lower guess
hwGuessLower = 0.3
// The upper bound on the grid
hwGuessUpper = 1.0
// The step between guesses
hwGuessStep = 0.4
)
// NewFloatHoltWintersReducer creates a new FloatHoltWintersReducer.
@ -369,10 +369,6 @@ func NewFloatHoltWintersReducer(h, m int, includeFitData bool, interval time.Dur
seasonal = false
}
return &FloatHoltWintersReducer{
alpha: defaultAlpha,
beta: defaultBeta,
gamma: defaultGamma,
phi: defaultPhi,
h: h,
m: m,
seasonal: seasonal,
@ -380,7 +376,7 @@ func NewFloatHoltWintersReducer(h, m int, includeFitData bool, interval time.Dur
interval: int64(interval),
halfInterval: int64(interval) / 2,
optim: neldermead.New(),
epsilon: defaultEpsilon,
epsilon: hwDefaultEpsilon,
}
}
@ -442,15 +438,9 @@ func (r *FloatHoltWintersReducer) Emit() []FloatPoint {
r.y = append(r.y, p.Value)
}
// Smoothing parameters
alpha, beta, gamma := r.alpha, r.beta, r.gamma
// Seasonality
m := r.m
// Dampening paramter
phi := r.phi
// Starting guesses
// NOTE: Since these values are guesses
// in the cases where we were missing data,
@ -464,7 +454,7 @@ func (r *FloatHoltWintersReducer) Emit() []FloatPoint {
}
}
} else {
l_0 += alpha * r.y[0]
l_0 += hwWeight * r.y[0]
}
b_0 := 0.0
@ -476,7 +466,7 @@ func (r *FloatHoltWintersReducer) Emit() []FloatPoint {
}
} else {
if !math.IsNaN(r.y[1]) {
b_0 = beta * (r.y[1] - r.y[0])
b_0 = hwWeight * (r.y[1] - r.y[0])
}
}
@ -493,10 +483,6 @@ func (r *FloatHoltWintersReducer) Emit() []FloatPoint {
}
parameters := make([]float64, 6+len(s))
parameters[0] = alpha
parameters[1] = beta
parameters[2] = gamma
parameters[3] = phi
parameters[4] = l_0
parameters[5] = b_0
o := len(parameters) - len(s)
@ -505,28 +491,51 @@ func (r *FloatHoltWintersReducer) Emit() []FloatPoint {
}
// Determine best fit for the various parameters
_, params := r.optim.Optimize(r.sse, parameters, r.epsilon, 1, r.constrain)
minSSE := math.Inf(1)
var bestParams []float64
for alpha := hwGuessLower; alpha < hwGuessUpper; alpha += hwGuessStep {
for beta := hwGuessLower; beta < hwGuessUpper; beta += hwGuessStep {
for gamma := hwGuessLower; gamma < hwGuessUpper; gamma += hwGuessStep {
for phi := hwGuessLower; phi < hwGuessUpper; phi += hwGuessStep {
parameters[0] = alpha
parameters[1] = beta
parameters[2] = gamma
parameters[3] = phi
sse, params := r.optim.Optimize(r.sse, parameters, r.epsilon, 1)
if sse < minSSE || bestParams == nil {
minSSE = sse
bestParams = params
}
}
}
}
}
// Forecast
forecasted := r.forecast(r.h, params)
forecasted := r.forecast(r.h, bestParams)
var points []FloatPoint
if r.includeFitData {
points = make([]FloatPoint, len(forecasted))
start := r.points[0].Time
points = make([]FloatPoint, 0, len(forecasted))
for i, v := range forecasted {
t := start + r.interval*(int64(i))
points[i] = FloatPoint{
Value: v,
Time: t,
if !math.IsNaN(v) {
t := start + r.interval*(int64(i))
points = append(points, FloatPoint{
Value: v,
Time: t,
})
}
}
} else {
points = make([]FloatPoint, r.h)
forecasted := r.forecast(r.h, params)
stop := r.points[len(r.points)-1].Time
points = make([]FloatPoint, 0, r.h)
for i, v := range forecasted[len(r.y):] {
t := stop + r.interval*(int64(i)+1)
points[i] = FloatPoint{
Value: v,
Time: t,
if !math.IsNaN(v) {
t := stop + r.interval*(int64(i)+1)
points = append(points, FloatPoint{
Value: v,
Time: t,
})
}
}
}
@ -546,6 +555,9 @@ func (r *FloatHoltWintersReducer) next(alpha, beta, gamma, phi, phi_h, y_t, l_tp
// Forecast the data h points into the future.
func (r *FloatHoltWintersReducer) forecast(h int, params []float64) []float64 {
// Constrain parameters
r.constrain(params)
y_t := r.y[0]
phi := params[3]
@ -575,7 +587,7 @@ func (r *FloatHoltWintersReducer) forecast(h int, params []float64) []float64 {
stm, stmh := 1.0, 1.0
for t := 1; t < l+h; t++ {
if r.seasonal {
hm = (t - 1) % m
hm = t % m
stm = seasonals[(t-m+so)%m]
stmh = seasonals[(t-m+hm+so)%m]
}
@ -594,8 +606,8 @@ func (r *FloatHoltWintersReducer) forecast(h int, params []float64) []float64 {
phi_h += math.Pow(phi, float64(t))
if r.seasonal {
so++
seasonals[(t+so)%m] = s_t
so++
}
forecasted[t] = y_t
@ -611,8 +623,13 @@ func (r *FloatHoltWintersReducer) sse(params []float64) float64 {
// Skip missing values since we cannot use them to compute an error.
if !math.IsNaN(r.y[i]) {
// Compute error
diff := forecasted[i] - r.y[i]
sse += diff * diff
if math.IsNaN(forecasted[i]) {
// Penalize forecasted NaNs
return math.Inf(1)
} else {
diff := forecasted[i] - r.y[i]
sse += diff * diff
}
}
}
return sse

View File

@ -72,16 +72,16 @@ func TestHoltWinters_AusTourists(t *testing.T) {
points := hw.Emit()
forecasted := []influxql.FloatPoint{
{Time: 49, Value: 57.01368875810684},
{Time: 50, Value: 40.190037686564295},
{Time: 51, Value: 54.90600903429195},
{Time: 52, Value: 52.61130714223962},
{Time: 53, Value: 59.85400578890833},
{Time: 54, Value: 42.21766711269367},
{Time: 55, Value: 57.65856066704675},
{Time: 56, Value: 55.26293832246274},
{Time: 57, Value: 62.83676840257498},
{Time: 58, Value: 44.34255373999999},
{Time: 49, Value: 51.85064132137853},
{Time: 50, Value: 43.26055282315273},
{Time: 51, Value: 41.827258044814464},
{Time: 52, Value: 54.3990354591749},
{Time: 53, Value: 54.62334472770803},
{Time: 54, Value: 45.57155693625209},
{Time: 55, Value: 44.06051240252263},
{Time: 56, Value: 57.30029870759433},
{Time: 57, Value: 57.53591513519172},
{Time: 58, Value: 47.999008139396096},
}
if exp, got := len(forecasted), len(points); exp != got {
@ -151,16 +151,16 @@ func TestHoltWinters_AusTourists_Missing(t *testing.T) {
points := hw.Emit()
forecasted := []influxql.FloatPoint{
{Time: 49, Value: 54.39825435294697},
{Time: 50, Value: 41.93726334513928},
{Time: 51, Value: 54.909838091213345},
{Time: 52, Value: 57.1641355392107},
{Time: 53, Value: 57.164128921488114},
{Time: 54, Value: 44.06955989656805},
{Time: 55, Value: 57.701724090970124},
{Time: 56, Value: 60.07064109901599},
{Time: 57, Value: 60.0706341448159},
{Time: 58, Value: 46.310272882941966},
{Time: 49, Value: 54.84533610387743},
{Time: 50, Value: 41.19329421863249},
{Time: 51, Value: 45.71673175112451},
{Time: 52, Value: 56.05759298805955},
{Time: 53, Value: 59.32337460282217},
{Time: 54, Value: 44.75280096850461},
{Time: 55, Value: 49.98865098113751},
{Time: 56, Value: 61.86084934967605},
{Time: 57, Value: 65.95805633454883},
{Time: 58, Value: 50.1502170480547},
}
if exp, got := len(forecasted), len(points); exp != got {
@ -207,34 +207,34 @@ func TestHoltWinters_USPopulation(t *testing.T) {
forecasted := []influxql.FloatPoint{
{Time: 1, Value: 3.93},
{Time: 2, Value: 4.666229883011407},
{Time: 3, Value: 7.582703219729101},
{Time: 4, Value: 11.38750857910578},
{Time: 5, Value: 16.061458362866013},
{Time: 6, Value: 21.60339677828295},
{Time: 7, Value: 28.028797969563485},
{Time: 8, Value: 35.36898516917276},
{Time: 9, Value: 43.67088460608491},
{Time: 10, Value: 52.99725850596854},
{Time: 11, Value: 63.42738609386119},
{Time: 12, Value: 75.05818203585376},
{Time: 13, Value: 88.00575972021298},
{Time: 14, Value: 102.40746333932526},
{Time: 15, Value: 118.42440883475055},
{Time: 16, Value: 136.24459021744897},
{Time: 17, Value: 156.08662531118478},
{Time: 18, Value: 178.20423430441988},
{Time: 19, Value: 202.89156636951475},
{Time: 20, Value: 230.48951480987003},
{Time: 21, Value: 261.3931906116184},
{Time: 22, Value: 296.0607589238543},
{Time: 23, Value: 335.0238840597938},
{Time: 24, Value: 378.900077509081},
{Time: 25, Value: 428.40730185977736},
{Time: 26, Value: 484.38125346495343},
{Time: 27, Value: 547.7958305836579},
{Time: 28, Value: 619.7873945146928},
{Time: 29, Value: 701.6835524758332},
{Time: 2, Value: 4.957405463559748},
{Time: 3, Value: 7.012210102535647},
{Time: 4, Value: 10.099589257439924},
{Time: 5, Value: 14.229926188104242},
{Time: 6, Value: 19.418878968703797},
{Time: 7, Value: 25.68749172281409},
{Time: 8, Value: 33.062351305731305},
{Time: 9, Value: 41.575791076125206},
{Time: 10, Value: 51.26614395589263},
{Time: 11, Value: 62.178047564264595},
{Time: 12, Value: 74.36280483872488},
{Time: 13, Value: 87.87880423073163},
{Time: 14, Value: 102.79200429905801},
{Time: 15, Value: 119.17648832929542},
{Time: 16, Value: 137.11509549747296},
{Time: 17, Value: 156.70013608313175},
{Time: 18, Value: 178.03419933863566},
{Time: 19, Value: 201.23106385518594},
{Time: 20, Value: 226.4167216525905},
{Time: 21, Value: 253.73052878285205},
{Time: 22, Value: 283.32649700397553},
{Time: 23, Value: 315.37474308085984},
{Time: 24, Value: 350.06311454009256},
{Time: 25, Value: 387.59901328556873},
{Time: 26, Value: 428.21144141893404},
{Time: 27, Value: 472.1532969569147},
{Time: 28, Value: 519.7039509590035},
{Time: 29, Value: 571.1721419458248},
}
if exp, got := len(forecasted), len(points); exp != got {
@ -277,34 +277,34 @@ func TestHoltWinters_USPopulation_Missing(t *testing.T) {
forecasted := []influxql.FloatPoint{
{Time: 1, Value: 3.93},
{Time: 2, Value: -0.48020223172549237},
{Time: 3, Value: 4.802781783666482},
{Time: 4, Value: 9.877595464844614},
{Time: 5, Value: 15.247582981982251},
{Time: 6, Value: 21.11408248318643},
{Time: 7, Value: 27.613033499005994},
{Time: 8, Value: 34.85894117561601},
{Time: 9, Value: 42.96187408308528},
{Time: 10, Value: 52.03593886054132},
{Time: 11, Value: 62.20424563556273},
{Time: 12, Value: 73.60229860599725},
{Time: 13, Value: 86.38069796377347},
{Time: 14, Value: 100.70760028319269},
{Time: 15, Value: 116.77117933639244},
{Time: 16, Value: 134.78222852768778},
{Time: 17, Value: 154.97699617972333},
{Time: 18, Value: 177.6203209232622},
{Time: 19, Value: 203.00912417351864},
{Time: 20, Value: 231.47631387044993},
{Time: 21, Value: 263.39515509958034},
{Time: 22, Value: 299.18416724280326},
{Time: 23, Value: 339.31261310834543},
{Time: 24, Value: 384.3066526673811},
{Time: 25, Value: 434.756242430451},
{Time: 26, Value: 491.3228711104303},
{Time: 27, Value: 554.748233097815},
{Time: 28, Value: 625.8639535249647},
{Time: 29, Value: 705.6024924601211},
{Time: 2, Value: 4.8931364428135105},
{Time: 3, Value: 6.962653629047061},
{Time: 4, Value: 10.056207765903274},
{Time: 5, Value: 14.18435088129532},
{Time: 6, Value: 19.362939306110846},
{Time: 7, Value: 25.613247940326584},
{Time: 8, Value: 32.96213087008264},
{Time: 9, Value: 41.442230043017204},
{Time: 10, Value: 51.09223428526052},
{Time: 11, Value: 61.95719155158485},
{Time: 12, Value: 74.08887794968567},
{Time: 13, Value: 87.54622778052787},
{Time: 14, Value: 102.39582960014131},
{Time: 15, Value: 118.7124941463221},
{Time: 16, Value: 136.57990089987464},
{Time: 17, Value: 156.09133107941278},
{Time: 18, Value: 177.35049601833734},
{Time: 19, Value: 200.472471161683},
{Time: 20, Value: 225.58474737097785},
{Time: 21, Value: 252.82841286206823},
{Time: 22, Value: 282.35948095261017},
{Time: 23, Value: 314.3503808953992},
{Time: 24, Value: 348.99163145856954},
{Time: 25, Value: 386.49371962730555},
{Time: 26, Value: 427.08920989407727},
{Time: 27, Value: 471.0351131332573},
{Time: 28, Value: 518.615548088049},
{Time: 29, Value: 570.1447331101863},
}
if exp, got := len(forecasted), len(points); exp != got {
@ -322,7 +322,7 @@ func TestHoltWinters_USPopulation_Missing(t *testing.T) {
func TestHoltWinters_RoundTime(t *testing.T) {
maxTime := time.Unix(0, influxql.MaxTime).Round(time.Second).UnixNano()
data := []influxql.FloatPoint{
{Time: maxTime - int64(5*time.Second+50*time.Millisecond), Value: 1},
{Time: maxTime - int64(5*time.Second), Value: 1},
{Time: maxTime - int64(4*time.Second+103*time.Millisecond), Value: 10},
{Time: maxTime - int64(3*time.Second+223*time.Millisecond), Value: 2},
{Time: maxTime - int64(2*time.Second+481*time.Millisecond), Value: 11},
@ -335,11 +335,11 @@ func TestHoltWinters_RoundTime(t *testing.T) {
forecasted := []influxql.FloatPoint{
{Time: maxTime - int64(5*time.Second), Value: 1},
{Time: maxTime - int64(4*time.Second), Value: 10.499068390422073},
{Time: maxTime - int64(3*time.Second), Value: 2.002458220927272},
{Time: maxTime - int64(2*time.Second), Value: 10.499826428426315},
{Time: maxTime - int64(1*time.Second), Value: 2.898110014107811},
{Time: maxTime - int64(0*time.Second), Value: 10.499786614238138},
{Time: maxTime - int64(4*time.Second), Value: 10.006729104838234},
{Time: maxTime - int64(3*time.Second), Value: 1.998341814469269},
{Time: maxTime - int64(2*time.Second), Value: 10.997858830631172},
{Time: maxTime - int64(1*time.Second), Value: 4.085860238030013},
{Time: maxTime - int64(0*time.Second), Value: 11.35713604403339},
}
if exp, got := len(forecasted), len(points); exp != got {
@ -368,8 +368,8 @@ func TestHoltWinters_MaxTime(t *testing.T) {
forecasted := []influxql.FloatPoint{
{Time: influxql.MaxTime - 1, Value: 1},
{Time: influxql.MaxTime, Value: 2.0058478778784132},
{Time: influxql.MaxTime + 1, Value: 3.9399400964478106},
{Time: influxql.MaxTime, Value: 2.001516944066403},
{Time: influxql.MaxTime + 1, Value: 2.5365248972488343},
}
if exp, got := len(forecasted), len(points); exp != got {

View File

@ -38,7 +38,6 @@ func (o *Optimizer) Optimize(
start []float64,
epsilon,
scale float64,
constrain func([]float64),
) (float64, []float64) {
n := len(start)
@ -83,10 +82,6 @@ func (o *Optimizer) Optimize(
}
}
if constrain != nil {
constrain(v[n])
}
// find the initial function values
for j := 0; j <= n; j++ {
f[j] = objfunc(v[j])
@ -129,9 +124,6 @@ func (o *Optimizer) Optimize(
for i := 0; i <= n-1; i++ {
vr[i] = vm[i] + o.Alpha*(vm[i]-v[vg][i])
}
if constrain != nil {
constrain(vr)
}
// value of function at reflection point
fr := objfunc(vr)
@ -148,9 +140,6 @@ func (o *Optimizer) Optimize(
for i := 0; i <= n-1; i++ {
ve[i] = vm[i] + o.Gamma*(vr[i]-vm[i])
}
if constrain != nil {
constrain(ve)
}
// value of function at expansion point
fe := objfunc(ve)
@ -186,10 +175,6 @@ func (o *Optimizer) Optimize(
}
}
if constrain != nil {
constrain(vc)
}
// value of function at contraction point
fc := objfunc(vc)
@ -210,17 +195,7 @@ func (o *Optimizer) Optimize(
}
}
}
if constrain != nil {
constrain(v[vg])
}
f[vg] = objfunc(v[vg])
if constrain != nil {
constrain(v[vh])
}
f[vh] = objfunc(v[vh])
}
}

View File

@ -24,6 +24,12 @@ func almostEqual(a, b, e float64) bool {
}
func Test_Optimize(t *testing.T) {
constraints := func(x []float64) {
for i := range x {
x[i] = round(x[i], 5)
}
}
// 100*(b-a^2)^2 + (1-a)^2
//
// Obvious global minimum at (a,b) = (1,1)
@ -31,22 +37,17 @@ func Test_Optimize(t *testing.T) {
// Useful visualization:
// https://www.wolframalpha.com/input/?i=minimize(100*(b-a%5E2)%5E2+%2B+(1-a)%5E2)
f := func(x []float64) float64 {
constraints(x)
// a = x[0]
// b = x[1]
return 100*(x[1]-x[0]*x[0])*(x[1]-x[0]*x[0]) + (1.0-x[0])*(1.0-x[0])
}
constraints := func(x []float64) {
for i := range x {
x[i] = round(x[i], 5)
}
}
start := []float64{-1.2, 1.0}
opt := neldermead.New()
epsilon := 1e-5
min, parameters := opt.Optimize(f, start, epsilon, 1, constraints)
min, parameters := opt.Optimize(f, start, epsilon, 1)
if !almostEqual(min, 0, epsilon) {
t.Errorf("unexpected min: got %f exp 0", min)

View File

@ -2302,8 +2302,8 @@ func TestSelect_HoltWinters_GroupBy_Agg(t *testing.T) {
} else if a, err := Iterators(itrs).ReadAll(); err != nil {
t.Fatalf("unexpected error: %s", err)
} else if !deep.Equal(a, [][]influxql.Point{
{&influxql.FloatPoint{Name: "cpu", Time: 20 * Second, Value: 11.136685559138241}},
{&influxql.FloatPoint{Name: "cpu", Time: 22 * Second, Value: 7.507280682335967}},
{&influxql.FloatPoint{Name: "cpu", Time: 20 * Second, Value: 11.960623419918432}},
{&influxql.FloatPoint{Name: "cpu", Time: 22 * Second, Value: 7.953140268154609}},
}) {
t.Fatalf("unexpected points: %s", spew.Sdump(a))
}