Remove the interval setting from NextInterval to make remote mappers work.

2015-04-05 11:59:16 -04:00 · 2015-04-05 11:59:16 -04:00 · d41b85a715
parent 6e8ea9ae91
commit d41b85a715
5 changed files with 61 additions and 27 deletions
--- a/httpd/handler.go
+++ b/httpd/handler.go
@ -753,7 +753,7 @@ func (h *Handler) serveRunMapper(w http.ResponseWriter, r *http.Request) {
 	// write results to the client until the next interval is empty
 	for {
 		fmt.Println("start interval")
-		v, err := lm.NextInterval(m.TMax)
+		v, err := lm.NextInterval()
 		if err != nil {
 			mapError(w, err)
 			return
@ -763,7 +763,7 @@ func (h *Handler) serveRunMapper(w http.ResponseWriter, r *http.Request) {
 		// see if we're done
 		if v == nil {
 			fmt.Println("DONE")
-			return
+			break
 		}

 		// marshal and write out
@ -782,6 +782,14 @@ func (h *Handler) serveRunMapper(w http.ResponseWriter, r *http.Request) {
 		w.Write(b)
 		w.(http.Flusher).Flush()
 	}
+
+	d, err := json.Marshal(&influxdb.MapResponse{Completed: true})
+	if err != nil {
+		mapError(w, err)
+	} else {
+		w.Write(d)
+		w.(http.Flusher).Flush()
+	}
 }

 type dataNodeJSON struct {
@ -809,6 +817,7 @@ func isFieldNotFoundError(err error) bool {

 // mapError writes an error result after trying to start a mapper
 func mapError(w http.ResponseWriter, err error) {
+	fmt.Println("mapError: ", err.Error())
 	b, _ := json.Marshal(&influxdb.MapResponse{Err: err.Error()})
 	w.Write(b)
 }
--- a/influxql/engine.go
+++ b/influxql/engine.go
@ -234,7 +234,7 @@ func (m *MapReduceJob) processRawQuery(out chan *Row, filterEmptyResults bool) {
 				continue
 			}

-			res, err := mm.NextInterval(m.TMax)
+			res, err := mm.NextInterval()
 			if err != nil {
 				out <- &Row{Err: err}
 				return
@ -614,19 +614,11 @@ func (m *MapReduceJob) processAggregate(c *Call, reduceFunc ReduceFunc, resultVa
 		}
 	}

-	// the first interval in a query with a group by may be smaller than the others. This happens when they have a
-	// where time > clause that is in the middle of the bucket that the group by time creates
-	firstInterval := (m.TMin/m.interval*m.interval + m.interval) - m.TMin
-
 	// populate the result values for each interval of time
 	for i, _ := range resultValues {
 		// collect the results from each mapper
 		for j, mm := range m.Mappers {
-			interval := m.interval
-			if i == 0 {
-				interval = firstInterval
-			}
-			res, err := mm.NextInterval(interval)
+			res, err := mm.NextInterval()
 			if err != nil {
 				return err
 			}
@ -661,10 +653,8 @@ type Mapper interface {

 	// NextInterval will get the time ordered next interval of the given interval size from the mapper. This is a
 	// forward only operation from the start time passed into Begin. Will return nil when there is no more data to be read.
-	// We pass the interval in here so that it can be varied over the period of the query. This is useful for queries that
-	// must respect natural time boundaries like months or queries that span daylight savings time borders. Note that if
-	// a limit is set on the mapper, the interval passed here should represent the MaxTime in a nano epoch.
-	NextInterval(interval int64) (interface{}, error)
+	// Interval periods can be different based on time boundaries (months, daylight savings, etc) of the query.
+	NextInterval() (interface{}, error)
 }

 type TagSet struct {
--- a/remote_mapper.go
+++ b/remote_mapper.go
@ -21,6 +21,7 @@ type RemoteMapper struct {
 	resp      *http.Response
 	results   chan interface{}
 	unmarshal influxql.UnmarshalFunc
+	complete  bool

 	Call            string
 	Database        string
@ -34,11 +35,13 @@ type RemoteMapper struct {
 	SelectFields    []*Field
 	SelectTags      []string
 	Limit           int
+	Interval        int64
 }

 type MapResponse struct {
-	Err  string
+	Err       string `json:",omitempty"`
 	Data      []byte
+	Completed bool `json:",omitempty"`
 }

 // Open is a no op, real work is done starting with Being
@ -80,10 +83,15 @@ func (m *RemoteMapper) Begin(c *influxql.Call, startingTime int64, limit int) er
 	return nil
 }

-func (m *RemoteMapper) NextInterval(interval int64) (interface{}, error) {
+func (m *RemoteMapper) NextInterval() (interface{}, error) {
+	if m.complete {
+		return nil, nil
+	}
+
 	chunk := make([]byte, MAX_MAP_RESPONSE_SIZE, MAX_MAP_RESPONSE_SIZE)
 	n, err := m.resp.Body.Read(chunk)
 	if err != nil {
+		warn("NextInterval err:", n, err.Error())
 		return nil, err
 	}
 	if n == 0 {
@ -98,6 +106,10 @@ func (m *RemoteMapper) NextInterval(interval int64) (interface{}, error) {
 	if mr.Err != "" {
 		return nil, errors.New(mr.Err)
 	}
+	if mr.Completed {
+		m.complete = true
+		return nil, nil
+	}
 	v, err := m.unmarshal(mr.Data)
 	if err != nil {
 		return nil, err
--- a/server.go
+++ b/server.go
@ -3133,6 +3133,7 @@ func (s *Server) StartLocalMapper(rm *RemoteMapper) (*LocalMapper, error) {
 		whereFields:  rm.WhereFields,
 		selectFields: rm.SelectFields,
 		selectTags:   rm.SelectTags,
+		interval:     rm.Interval,
 	}

 	return lm, nil
--- a/tx.go
+++ b/tx.go
@ -114,6 +114,14 @@ func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []stri
 			return nil, nil
 		}

+		// get the group by interval, if there is one
+		var interval int64
+		if d, err := stmt.GroupByInterval(); err != nil {
+			return nil, err
+		} else {
+			interval = d.Nanoseconds()
+		}
+
 		// get the sorted unique tag sets for this query.
 		tagSets := m.tagSets(stmt, tagKeys)

@ -159,7 +167,8 @@ func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []stri
 						WhereFields:     whereFields,
 						SelectFields:    selectFields,
 						SelectTags:      selectTags,
-						Limit:           stmt.Limit,
+						Limit:           stmt.Limit + stmt.Offset,
+						Interval:        interval,
 					}
 					mapper.(*RemoteMapper).SetFilters(t.Filters)
 				} else {
@ -172,6 +181,8 @@ func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []stri
 						whereFields:  whereFields,
 						selectFields: selectFields,
 						selectTags:   selectTags,
+						tmax:         tmax.UnixNano(),
+						interval:     interval,
 						// multiple mappers may need to be merged together to get the results
 						// for a raw query. So each mapper will have to read at least the
 						// limit plus the offset in data points to ensure we've hit our mark
@ -253,6 +264,7 @@ type LocalMapper struct {
 	selectFields     []*Field               // field names that occur in the select clause
 	selectTags       []string               // tag keys that occur in the select clause
 	isRaw            bool                   // if the query is a non-aggregate query
+	interval         int64                  // the group by interval of the query, if any
 	limit            uint64                 // used for raw queries for LIMIT
 	perIntervalLimit int                    // used for raw queries to determine how far into a chunk we are
 	chunkSize        int                    // used for raw queries to determine how much data to read before flushing to client
@ -297,8 +309,8 @@ func (l *LocalMapper) Begin(c *influxql.Call, startingTime int64, chunkSize int)
 	l.mapFunc = mapFunc
 	l.keyBuffer = make([]int64, len(l.cursors))
 	l.valueBuffer = make([][]byte, len(l.cursors))
-	l.tmin = startingTime
 	l.chunkSize = chunkSize
+	l.tmin = startingTime

 	// determine if this is a raw data query with a single field, multiple fields, or an aggregate
 	var fieldName string
@ -355,18 +367,28 @@ func (l *LocalMapper) Begin(c *influxql.Call, startingTime int64, chunkSize int)
 // NextInterval will get the time ordered next interval of the given interval size from the mapper. This is a
 // forward only operation from the start time passed into Begin. Will return nil when there is no more data to be read.
 // If this is a raw query, interval should be the max time to hit in the query
-func (l *LocalMapper) NextInterval(interval int64) (interface{}, error) {
+func (l *LocalMapper) NextInterval() (interface{}, error) {
 	if l.cursorsEmpty || l.tmin > l.job.TMax {
 		return nil, nil
 	}

+	// after we call to the mapper, this will be the tmin for the next interval.
+	nextMin := l.tmin + l.interval
+
 	// Set the upper bound of the interval.
 	if l.isRaw {
-		l.tmax = interval
 		l.perIntervalLimit = l.chunkSize
-	} else if interval > 0 {
-		// Make sure the bottom of the interval lands on a natural boundary.
-		l.tmax = l.tmin + interval - 1
+	} else if l.interval > 0 {
+		// Set tmax to ensure that the interval lands on the boundary of the interval
+		if l.tmin%l.interval != 0 {
+			// the first interval in a query with a group by may be smaller than the others. This happens when they have a
+			// where time > clause that is in the middle of the bucket that the group by time creates. That will be the
+			// case on the first interval when the tmin % the interval isn't equal to zero
+			nextMin = l.tmin/l.interval*l.interval + l.interval
+			l.tmax = nextMin - 1
+		} else {
+			l.tmax = l.tmin + l.interval - 1
+		}
 	}

 	// Execute the map function. This local mapper acts as the iterator
@ -383,7 +405,7 @@ func (l *LocalMapper) NextInterval(interval int64) (interface{}, error) {

 	// Move the interval forward if it's not a raw query. For raw queries we use the limit to advance intervals.
 	if !l.isRaw {
-		l.tmin += interval
+		l.tmin = nextMin
 	}

 	return val, nil