select derivative(mean(value)) from cpu group by time(5m) select mean(value) from cpu group by time(5m) select top(10, value) from cpu group by host where time > now() - 1h this query uses this type of cycle -------REMOTE HOST ------------- -----HOST THAT GOT QUERY --- map -> reduce -> combine -> map -> reduce -> combine -> user select mean(value) cpu group by time(5m), host where time > now() -4h map -> reduce -> combine -> user map -> reduce -> map -> reduce -> combine -> user map -> reduce -> combine -> map -> reduce -> combine -> user select value from ( select mean(value) AS value FROM cpu GROUP BY time(5m) ) [ { name: cpu, tags: { host: servera, }, columns: [time, mean], values : [ [23423423, 88.8] ] }, { name: cpu, tags: { host: serverb, } } ] ================================================================================ // list series -> /* [ { "name": "cpu", "columns": ["id", "region", "host"], "values": [ 1, "uswest", "servera", 2, "uswest", "serverb" ] }, { "" } ] list series where region = 'uswest' list tags where name = 'cpu' list tagKeys where name = 'cpu' list series where name = 'cpu' and region = 'uswest' select distinct(region) from cpu list names list tagKeys list tagValeus where tagKey = 'region' and time > now() -1h select a.value, b.value from a join b where a.user_id == 100 select a.value from a where a.user_id == 100 select b.value from b 3 1 2 select sum(a.value) + (sum(b.value) / min(b.value)) from a join b group by region select suM(a.value) from a group by time(5m) select sum(b.value) from b group by time(5m) execute sum MR on series [23, 65, 88, 99, 101, 232] map -> 1 tick per 5m reduce -> combines ticks per 5m interval -> outputs planner -> take reduce output per 5m interval from the two reducers and combine with the join function, which is + [1,/,2,+,3] for v := s[0].Next(); v != nil; v = 2[0].Next() { var result interface{} for i := 1; i < len(s); i += 2 { / it's an operator if i % 2 == 1 { } } } select count(distinct(host)) from cpu where time > now() - 5m type mapper interface { Map(iterator) } type floatCountMapper struct {} func(m *floatCountMapper) Map(i Iterator) { itr := i.(*floatIterator) } type Iterator interface { itr() } type iterator struct { cursor *bolt.Cursor timeBucket time.Time name string seriesID uint32 tags map[string]string fieldID uint8 where *WhereClause } func (i *intIterator) itr() {} func (i *intIterator) Next() (k int64, v float64) { // loop through bolt cursor applying where clause and yield next point // if cursor is at end or time is out of range, yield nil } */ field: ipaddress select top(10, count, ipaddress) from hits group by time(5m), host map -> 10 records, , reducer -> take in all map outputs for each 5m bucket combine them, sort, take out the top 10 output -> 10 records, count, ipaddresses, time ========== select top(10, count, host) from hits group by time(5m) select host, value from cpu where time > now() - 1h select last(value) from cpu group by time(auto), host fill(previous) where time > now() - 1h select sum(value) from cpu group by host where time > now() - 1h select sum(value) from cpu where time > now() - 1h select * from a; [ { "name": "cpu", "tags": { "host": "servera" }, "fields": [ "time", "count", "ipaddress" ] "values": [ [t, v, ""], [t, v, ""], ] }, { "name": "cpu", "tags": { "host": "serverb" }, "values": [ [t, v], [t + 1, v], ] }, ] [t, v, "servera"] [t, v, "serverb"] [t+1, v, "servera"] [t+1, v, "serverb"] ====== a INNER JOIN b - planner always has "group by" select count(errors.value) / count(requests.value) as error_rate from errors join requests as "mysuperseries" group by time(5m) fill(previous) where time > now() - 4h select mean(value) as cpu_mean from cpu group by time(5m) where host = 'servera' select count(value) from errors group by time(5m) fill(previous) where.. select count(value) from requests group by time(5m) fill(previ... { "name": "errors.requests", "tags": {}, "fields": ["time", "errors.count", "requests.count"], "values": [ [t, n, m] ] } a MERGE b a - t b - t a - t + 1 b - t + 1 b - t + 2 a - t + 3 select value from cpu select mean(value) from cpu group by time(5m) select first(value) from cpu ===== 1. Group by time 2. Group by 3. Raw ====== SELECT sum(value) FROM myseries host=servera host=serverb {"host":"servera", "value":100} {"host":"serverb", "value":"hello!"} series = series = seriesID seriesID -> name name has_many seriesIDs name has_many fields field -> (type, id) -> (type, id) -> fieldValues field type topMapper struct { count int } func newTopMaper(count int) { } func (t *topCountMapper) Map(i Iterator) { topValues := make(map[string]int) for p := i.Next(); p != nil; p = i.Next() { topValues[p.String()] += 1 } for k, v := range topValues { t.job.Emit(k, v) } } type topCountReducer struct { count int } func (r *topCountReducer) Reduce(i Iterator) { realzTop10 := make(map[string]int) for v := i.Next(); v != nil; v = i.Next() { top10 := v.(map[string]int) for k, n := range top10 { realzTop10[k] += n } } realyrealTop10 := make(map[string]int) // do sorty magic on reazTop10 and set realyreal r.job.Emit(realyrealTop10) } type Transformer interface { Transform(interface{}) Series } type ReduceOutput struct { values [][]interface{} fieldIDs [] } // for topCountReducer ReduceOutput would look like // values = [t, c, "some strign"] // fieldIDs = [0, 0, 3] SELECT val1, val2 FROM abc select mean(value) from cpu where region='uswest' group by time(5m), host 2000 series 200 series to each machine ================================================================================ type Mapper interface { Map(Iterator) } type countMapper struct {} // Iterator is the entire series if not an aggregate query // or iterator is the entire time bucket if an aggregate query func (m *sumMapper) Map(i Iterator) { var sum int for p := i.Next(); p != nil; p = i.Next() { sum += p.Float() } m.Emitter.Emit(k, sum) } type Point interface { String(name) Int(name) } type cursorIterator struct { Cursor *bolt.Cursor FieldID uint8 Value []byte } func (i cursorIterator) Next() Point { _, i.Value = i.Cursor.Next() return byteSlicePoint(i.Value) } type byteSlicePoint []byte func (p byteSlicePoint) String() string { // unmarshal from byte slice. } /* { "name": "foo", "fields": { "value": 23.2, "user_id": 23 }, "tags": { } } */ CNT ID0 VALUEVALUEVALUEVALUEVALUEVALUEVALUEVALU 0001 0000 0000 0000 0000 0000 0000 0000 0000 0000 CNT ID0 ID1 ID2 FLOATFLOA STRINGSTR STRINGSTR 0002 0001 0002 0003 0000 0000 0000 0000 0000 0000 // SELECT count() FROM cpu GROUP BY host // SELECT mean(value) from cpu where region = 'uswest' // SELECT derivative(value) from redis_key_count GROUP BY time(5m) // SELECT host, mean(value) // FROM cpu // GROUP BY host // HAVING top(20, mean) // WHERE time > now() - 1h // AND region = 'uswest' // SELECT ipaddress, count(ipaddress) // FROM hits // GROUP BY ipaddress // HAVING top(10, count) // WHERE time > now() - 1h series := meta.DistinctTagValues("cpu", "host") tye Series struct { name string fields map[uint8]string } type SeriesData struct { ID tags map[string]string } mrJobs := make([]*MRJob, 0, len(series)) for _, s := range series { j := NewMRJob(s) mrJobs = append(mrJobs, j) j.Execute() } for _, j := range mrJobs { // pull in results // construct series object with same tags as series } ================================================================================ type Mapper interface { Map(Iterator) } type countMapper struct {} // Iterator is the entire series if not an aggregate query // or iterator is the entire time bucket if an aggregate query func (m *sumMapper) Map(i Iterator) { var sum int for p := i.Next(); p != nil; p = i.Next() { sum += p.Float() } m.Emitter.Emit(k, sum) } type Point interface { String(name) Int(name) } type cursorIterator struct { Cursor *bolt.Cursor FieldID uint8 Value []byte } func (i cursorIterator) Next() Point { _, i.Value = i.Cursor.Next() return byteSlicePoint(i.Value) } type byteSlicePoint []byte func (p byteSlicePoint) String() string { // unmarshal from byte slice. } /* { "name": "foo", "fields": { "value": 23.2, "user_id": 23 }, "tags": { } } */ CNT ID0 VALUEVALUEVALUEVALUEVALUEVALUEVALUEVALU 0001 0000 0000 0000 0000 0000 0000 0000 0000 0000 CNT ID0 ID1 ID2 FLOATFLOA STRINGSTR STRINGSTR 0002 0001 0002 0003 0000 0000 0000 0000 0000 0000 // SELECT count() FROM cpu GROUP BY host // SELECT mean(value) from cpu where region = 'uswest' // SELECT derivative(value) from redis_key_count GROUP BY time(5m) // SELECT host, mean(value) // FROM cpu // GROUP BY host // HAVING top(20, mean) // WHERE time > now() - 1h // AND region = 'uswest' // SELECT ipaddress, count(ipaddress) // FROM hits // GROUP BY ipaddress // HAVING top(10, count) // WHERE time > now() - 1h series := meta.DistinctTagValues("cpu", "host") mrJobs := make([]*MRJob, 0, len(series)) for _, s := range series { j := NewMRJob(s) mrJobs = append(mrJobs, j) j.Execute() } for _, j := range mrJobs { // pull in results // construct series object with same tags as series } ================================================================================ type Iterator interface { Next() (interface{}, bool) } type iteratorCounter struct { iterator Iterator } func (iteratorCounter) Next() { } SELECT max(a.value), min(a.value), max(b.value) FROM a, b WHERE = '' grouper { []Iterator } SELECT max(a.value) FROM a WHERE = '' --> 1 value SELECT min(a.value) FROM a WHERE = '' --> 1 value SELECT max(b.value) FROM b --> 1 value SELECT max(a.value) FROM a GROUP BY time WHERE = '' --> key,value timeGrouper { []Iterator } type maxMapper struct { } IntervalIterator { } maxMapper.Map(Iterator) - GROUP BY time - GROUP BY time, - GROUP BY COUNT(field) MIN(field) MAX(field) MEAN(field) MODE(field) MEDIAN(field) COUNT(DISTINCT field) PERCENTILE(field, N) HISTOGRAM(field [, bucketSize]) DERIVATIVE(field) SUM(field) STDDEV(field) FIRST(field) LAST(field) DIFFERENCE(field) TOP(field, N) BOTTOM(field, N) <----- multivalue ================================================================================