Merge pull request #10089 from influxdata/jmw-radix-sort

inmem: use radix sort for series ids
pull/10063/head
Jeff Wendling 2018-07-17 17:45:41 -06:00 committed by GitHub
commit f5ed934646
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 121 additions and 1 deletions

92
pkg/radix/sort.go Normal file
View File

@ -0,0 +1,92 @@
// Portions of this file from github.com/shawnsmithdev/zermelo under the MIT license.
//
// The MIT License (MIT)
//
// Copyright (c) 2014 Shawn Smith
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
package radix
import (
"sort"
)
const (
minSize = 256
radix uint = 8
bitSize uint = 64
)
// SortUint64s sorts a slice of uint64s.
func SortUint64s(x []uint64) {
if len(x) < 2 {
return
} else if len(x) < minSize {
sort.Slice(x, func(i, j int) bool { return x[i] < x[j] })
} else {
doSort(x)
}
}
func doSort(x []uint64) {
// Each pass processes a byte offset, copying back and forth between slices
from := x
to := make([]uint64, len(x))
var key uint8
var offset [256]int // Keep track of where groups start
for keyOffset := uint(0); keyOffset < bitSize; keyOffset += radix {
keyMask := uint64(0xFF << keyOffset) // Current 'digit' to look at
var counts [256]int // Keep track of the number of elements for each kind of byte
sorted := true // Check for already sorted
prev := uint64(0) // if elem is always >= prev it is already sorted
for _, elem := range from {
key = uint8((elem & keyMask) >> keyOffset) // fetch the byte at current 'digit'
counts[key]++ // count of elems to put in this digit's bucket
if sorted { // Detect sorted
sorted = elem >= prev
prev = elem
}
}
if sorted { // Short-circuit sorted
if (keyOffset/radix)%2 == 1 {
copy(to, from)
}
return
}
// Find target bucket offsets
offset[0] = 0
for i := 1; i < len(offset); i++ {
offset[i] = offset[i-1] + counts[i-1]
}
// Rebucket while copying to other buffer
for _, elem := range from {
key = uint8((elem & keyMask) >> keyOffset) // Get the digit
to[offset[key]] = elem // Copy the element to the digit's bucket
offset[key]++ // One less space, move the offset
}
// On next pass copy data the other way
to, from = from, to
}
}

27
pkg/radix/sort_test.go Normal file
View File

@ -0,0 +1,27 @@
package radix
import (
"math/rand"
"testing"
)
func benchmarkSort(b *testing.B, size int) {
orig := make([]uint64, size)
for i := range orig {
orig[i] = uint64(rand.Int63())
}
data := make([]uint64, size)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
copy(data, orig)
SortUint64s(data)
}
}
func BenchmarkSort_64(b *testing.B) { benchmarkSort(b, 64) }
func BenchmarkSort_128(b *testing.B) { benchmarkSort(b, 128) }
func BenchmarkSort_256(b *testing.B) { benchmarkSort(b, 256) }
func BenchmarkSort_12K(b *testing.B) { benchmarkSort(b, 12*1024) }

View File

@ -10,6 +10,7 @@ import (
"github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/bytesutil" "github.com/influxdata/influxdb/pkg/bytesutil"
"github.com/influxdata/influxdb/pkg/radix"
"github.com/influxdata/influxdb/query" "github.com/influxdata/influxdb/query"
"github.com/influxdata/influxdb/tsdb" "github.com/influxdata/influxdb/tsdb"
"github.com/influxdata/influxql" "github.com/influxdata/influxql"
@ -1181,7 +1182,7 @@ func (e *tagKeyValueEntry) ids() seriesIDs {
for id := range e.m { for id := range e.m {
a = append(a, id) a = append(a, id)
} }
sort.Sort(a) radix.SortUint64s(a)
e.a = a e.a = a
return e.a return e.a