Add utility functions for merging k collections of sorted slices

pull/9150/head
Edd Robinson 2017-09-29 15:31:23 +01:00 committed by Ben Johnson
parent 6d87ff7fa2
commit 0dd97cc84a
No known key found for this signature in database
GPG Key ID: 81741CD251883081
4 changed files with 625 additions and 0 deletions

398
pkg/slices/merge.gen.go Normal file
View File

@ -0,0 +1,398 @@
// Generated by tmpl
// https://github.com/benbjohnson/tmpl
//
// DO NOT EDIT!
// Source: merge.gen.go.tmpl
package slices
import "bytes"
// Merge uses a k-way merge to merge n collections of sorted byte slices.
//
// The resulting slice is returned in ascending order, with any duplicate values
// removed.
func MergeSortedFloats(n ...[]float64) []float64 {
var result []float64
if len(n) == 0 {
return nil
} else if len(n) == 1 {
// Special case. Merge single slice with a nil slice, to remove any
// duplicates from the single slice.
return MergeSortedFloats(n[0], nil)
}
var maxSize int
for _, a := range n {
if len(a) > maxSize {
maxSize = len(a)
}
}
result = make([]float64, 0, maxSize) // This will likely be too small but it's a start.
idxs := make([]int, len(n)) // Indexes we've processed.
var j int // Index we currently think is minimum.
for {
j = -1
// Find the smallest minimum in all slices.
for i := 0; i < len(n); i++ {
if idxs[i] >= len(n[i]) {
continue // We have completely drained all values in this slice.
} else if j == -1 {
// We haven't picked the minimum value yet. Pick this one.
j = i
continue
}
// It this value key is lower than the candidate.
if n[i][idxs[i]] < n[j][idxs[j]] {
j = i
} else if n[i][idxs[i]] == n[j][idxs[j]] {
// Duplicate value. Throw it away.
idxs[i]++
}
}
// We could have drained all of the values and be done...
if j == -1 {
break
}
// First value to just append it and move on.
if len(result) == 0 {
result = append(result, n[j][idxs[j]])
idxs[j]++
continue
}
// Append the minimum value to results if it's not a duplicate of
// the existing one.
if result[len(result)-1] < n[j][idxs[j]] {
result = append(result, n[j][idxs[j]])
} else if result[len(result)-1] == n[j][idxs[j]] {
// Duplicate so drop it.
} else {
panic("value being merged out of order.")
}
idxs[j]++
}
return result
}
// Merge uses a k-way merge to merge n collections of sorted byte slices.
//
// The resulting slice is returned in ascending order, with any duplicate values
// removed.
func MergeSortedInts(n ...[]int64) []int64 {
var result []int64
if len(n) == 0 {
return nil
} else if len(n) == 1 {
// Special case. Merge single slice with a nil slice, to remove any
// duplicates from the single slice.
return MergeSortedInts(n[0], nil)
}
var maxSize int
for _, a := range n {
if len(a) > maxSize {
maxSize = len(a)
}
}
result = make([]int64, 0, maxSize) // This will likely be too small but it's a start.
idxs := make([]int, len(n)) // Indexes we've processed.
var j int // Index we currently think is minimum.
for {
j = -1
// Find the smallest minimum in all slices.
for i := 0; i < len(n); i++ {
if idxs[i] >= len(n[i]) {
continue // We have completely drained all values in this slice.
} else if j == -1 {
// We haven't picked the minimum value yet. Pick this one.
j = i
continue
}
// It this value key is lower than the candidate.
if n[i][idxs[i]] < n[j][idxs[j]] {
j = i
} else if n[i][idxs[i]] == n[j][idxs[j]] {
// Duplicate value. Throw it away.
idxs[i]++
}
}
// We could have drained all of the values and be done...
if j == -1 {
break
}
// First value to just append it and move on.
if len(result) == 0 {
result = append(result, n[j][idxs[j]])
idxs[j]++
continue
}
// Append the minimum value to results if it's not a duplicate of
// the existing one.
if result[len(result)-1] < n[j][idxs[j]] {
result = append(result, n[j][idxs[j]])
} else if result[len(result)-1] == n[j][idxs[j]] {
// Duplicate so drop it.
} else {
panic("value being merged out of order.")
}
idxs[j]++
}
return result
}
// Merge uses a k-way merge to merge n collections of sorted byte slices.
//
// The resulting slice is returned in ascending order, with any duplicate values
// removed.
func MergeSortedUInts(n ...[]uint64) []uint64 {
var result []uint64
if len(n) == 0 {
return nil
} else if len(n) == 1 {
// Special case. Merge single slice with a nil slice, to remove any
// duplicates from the single slice.
return MergeSortedUInts(n[0], nil)
}
var maxSize int
for _, a := range n {
if len(a) > maxSize {
maxSize = len(a)
}
}
result = make([]uint64, 0, maxSize) // This will likely be too small but it's a start.
idxs := make([]int, len(n)) // Indexes we've processed.
var j int // Index we currently think is minimum.
for {
j = -1
// Find the smallest minimum in all slices.
for i := 0; i < len(n); i++ {
if idxs[i] >= len(n[i]) {
continue // We have completely drained all values in this slice.
} else if j == -1 {
// We haven't picked the minimum value yet. Pick this one.
j = i
continue
}
// It this value key is lower than the candidate.
if n[i][idxs[i]] < n[j][idxs[j]] {
j = i
} else if n[i][idxs[i]] == n[j][idxs[j]] {
// Duplicate value. Throw it away.
idxs[i]++
}
}
// We could have drained all of the values and be done...
if j == -1 {
break
}
// First value to just append it and move on.
if len(result) == 0 {
result = append(result, n[j][idxs[j]])
idxs[j]++
continue
}
// Append the minimum value to results if it's not a duplicate of
// the existing one.
if result[len(result)-1] < n[j][idxs[j]] {
result = append(result, n[j][idxs[j]])
} else if result[len(result)-1] == n[j][idxs[j]] {
// Duplicate so drop it.
} else {
panic("value being merged out of order.")
}
idxs[j]++
}
return result
}
// Merge uses a k-way merge to merge n collections of sorted byte slices.
//
// The resulting slice is returned in ascending order, with any duplicate values
// removed.
func MergeSortedStrings(n ...[]string) []string {
var result []string
if len(n) == 0 {
return nil
} else if len(n) == 1 {
// Special case. Merge single slice with a nil slice, to remove any
// duplicates from the single slice.
return MergeSortedStrings(n[0], nil)
}
var maxSize int
for _, a := range n {
if len(a) > maxSize {
maxSize = len(a)
}
}
result = make([]string, 0, maxSize) // This will likely be too small but it's a start.
idxs := make([]int, len(n)) // Indexes we've processed.
var j int // Index we currently think is minimum.
for {
j = -1
// Find the smallest minimum in all slices.
for i := 0; i < len(n); i++ {
if idxs[i] >= len(n[i]) {
continue // We have completely drained all values in this slice.
} else if j == -1 {
// We haven't picked the minimum value yet. Pick this one.
j = i
continue
}
// It this value key is lower than the candidate.
if n[i][idxs[i]] < n[j][idxs[j]] {
j = i
} else if n[i][idxs[i]] == n[j][idxs[j]] {
// Duplicate value. Throw it away.
idxs[i]++
}
}
// We could have drained all of the values and be done...
if j == -1 {
break
}
// First value to just append it and move on.
if len(result) == 0 {
result = append(result, n[j][idxs[j]])
idxs[j]++
continue
}
// Append the minimum value to results if it's not a duplicate of
// the existing one.
if result[len(result)-1] < n[j][idxs[j]] {
result = append(result, n[j][idxs[j]])
} else if result[len(result)-1] == n[j][idxs[j]] {
// Duplicate so drop it.
} else {
panic("value being merged out of order.")
}
idxs[j]++
}
return result
}
// Merge uses a k-way merge to merge n collections of sorted byte slices.
//
// The resulting slice is returned in ascending order, with any duplicate values
// removed.
func MergeSortedBytes(n ...[][]byte) [][]byte {
var result [][]byte
if len(n) == 0 {
return nil
} else if len(n) == 1 {
// Special case. Merge single slice with a nil slice, to remove any
// duplicates from the single slice.
return MergeSortedBytes(n[0], nil)
}
var maxSize int
for _, a := range n {
if len(a) > maxSize {
maxSize = len(a)
}
}
result = make([][]byte, 0, maxSize) // This will likely be too small but it's a start.
idxs := make([]int, len(n)) // Indexes we've processed.
var j int // Index we currently think is minimum.
var cmp int // Result of comparing most recent value.
for {
j = -1
// Find the smallest minimum in all slices.
for i := 0; i < len(n); i++ {
if idxs[i] >= len(n[i]) {
continue // We have completely drained all values in this slice.
} else if j == -1 {
// We haven't picked the minimum value yet. Pick this one.
j = i
continue
}
// It this value key is lower than the candidate.
cmp = bytes.Compare(n[i][idxs[i]], n[j][idxs[j]])
if cmp == -1 {
j = i
} else if cmp == 0 {
// Duplicate value. Throw it away.
idxs[i]++
}
}
// We could have drained all of the values and be done...
if j == -1 {
break
}
// First value to just append it and move on.
if len(result) == 0 {
result = append(result, n[j][idxs[j]])
idxs[j]++
continue
}
// Append the minimum value to results if it's not a duplicate of
// the existing one.
cmp = bytes.Compare(result[len(result)-1], n[j][idxs[j]])
if cmp == -1 {
result = append(result, n[j][idxs[j]])
} else if cmp == 0 {
// Duplicate so drop it.
} else {
panic("value being merged out of order.")
}
idxs[j]++
}
return result
}

View File

@ -0,0 +1,104 @@
package slices
import "bytes"
{{with $types := .}}{{range $k := $types}}
// Merge uses a k-way merge to merge n collections of sorted byte slices.
//
// The resulting slice is returned in ascending order, with any duplicate values
// removed.
func MergeSorted{{$k.Name}}(n ...[]{{$k.Type}}) []{{$k.Type}} {
var result []{{$k.Type}}
if len(n) == 0 {
return nil
} else if len(n) == 1 {
// Special case. Merge single slice with a nil slice, to remove any
// duplicates from the single slice.
return MergeSorted{{$k.Name}}(n[0], nil)
}
var maxSize int
for _, a := range n {
if len(a) > maxSize {
maxSize = len(a)
}
}
result = make([]{{$k.Type}}, 0, maxSize) // This will likely be too small but it's a start.
idxs := make([]int, len(n)) // Indexes we've processed.
var j int // Index we currently think is minimum.
{{if eq $k.Name "Bytes" }}
var cmp int // Result of comparing most recent value.
{{end}}
for {
j = -1
// Find the smallest minimum in all slices.
for i := 0; i < len(n); i++ {
if idxs[i] >= len(n[i]) {
continue // We have completely drained all values in this slice.
} else if j == -1 {
// We haven't picked the minimum value yet. Pick this one.
j = i
continue
}
// It this value key is lower than the candidate.
{{if eq $k.Name "Bytes" }}
cmp = bytes.Compare(n[i][idxs[i]], n[j][idxs[j]])
if cmp == -1 {
j = i
} else if cmp == 0 {
// Duplicate value. Throw it away.
idxs[i]++
}
{{else}}
if n[i][idxs[i]] < n[j][idxs[j]] {
j = i
} else if n[i][idxs[i]] == n[j][idxs[j]] {
// Duplicate value. Throw it away.
idxs[i]++
}
{{end}}
}
// We could have drained all of the values and be done...
if j == -1 {
break
}
// First value to just append it and move on.
if len(result) == 0 {
result = append(result, n[j][idxs[j]])
idxs[j]++
continue
}
// Append the minimum value to results if it's not a duplicate of
// the existing one.
{{if eq $k.Name "Bytes" }}
cmp = bytes.Compare(result[len(result)-1], n[j][idxs[j]])
if cmp == -1 {
result = append(result, n[j][idxs[j]])
} else if cmp == 0 {
// Duplicate so drop it.
} else {
panic("value being merged out of order.")
}
{{else}}
if result[len(result)-1] < n[j][idxs[j]] {
result = append(result, n[j][idxs[j]])
} else if result[len(result)-1] == n[j][idxs[j]] {
// Duplicate so drop it.
} else {
panic("value being merged out of order.")
}
{{end}}
idxs[j]++
}
return result
}
{{end}}{{end}}

101
pkg/slices/merge_test.go Normal file
View File

@ -0,0 +1,101 @@
package slices_test
import (
"fmt"
"reflect"
"testing"
"github.com/influxdata/influxdb/pkg/slices"
)
func TestMergeSortedBytes(t *testing.T) {
cases := []struct {
Inputs [][][]byte
Out [][]byte
}{
{Inputs: [][][]byte{}},
{Inputs: [][][]byte{toBytes(0)}, Out: toBytes(0)},
{
Inputs: [][][]byte{toBytes(2), [][]byte(nil), toBytes(2)},
Out: toBytes(2),
},
{
Inputs: [][][]byte{toBytes(9), toBytes(1, 16, 16), toBytes(5, 10)},
Out: toBytes(1, 5, 9, 10, 16),
},
{
Inputs: [][][]byte{toBytes(20), toBytes(16), toBytes(10)},
Out: toBytes(10, 16, 20),
},
{
Inputs: [][][]byte{toBytes(2, 2, 2, 2, 2, 2, 2, 2)},
Out: toBytes(2),
},
{
Inputs: [][][]byte{toBytes(2, 2, 2, 2, 2, 2, 2, 2), [][]byte(nil), [][]byte(nil), [][]byte(nil)},
Out: toBytes(2),
},
{
Inputs: [][][]byte{toBytes(1, 2, 3, 4, 5), toBytes(1, 2, 3, 4, 5), toBytes(1, 2, 3, 4, 5)},
Out: toBytes(1, 2, 3, 4, 5),
},
}
for i, c := range cases {
t.Run(fmt.Sprintf("Example %d", i+1), func(t *testing.T) {
if got, exp := slices.MergeSortedBytes(c.Inputs...), c.Out; !reflect.DeepEqual(got, exp) {
t.Fatalf("got %v, expected %v", got, exp)
}
})
}
}
func toBytes(a ...int) [][]byte {
var result [][]byte
for _, v := range a {
result = append(result, []byte{byte(v)})
}
return result
}
func TestMergeSortedInts(t *testing.T) {
cases := []struct {
Inputs [][]int64
Out []int64
}{
{Inputs: [][]int64{}},
{Inputs: [][]int64{[]int64{0}}, Out: []int64{0}},
{
Inputs: [][]int64{[]int64{2}, []int64(nil), []int64{2}},
Out: []int64{2},
},
{
Inputs: [][]int64{[]int64{9}, []int64{1, 16, 16}, []int64{5, 10}},
Out: []int64{1, 5, 9, 10, 16},
},
{
Inputs: [][]int64{[]int64{20}, []int64{16}, []int64{10}},
Out: []int64{10, 16, 20},
},
{
Inputs: [][]int64{[]int64{2, 2, 2, 2, 2, 2, 2, 2}},
Out: []int64{2},
},
{
Inputs: [][]int64{[]int64{2, 2, 2, 2, 2, 2, 2, 2}, []int64(nil), []int64(nil), []int64(nil)},
Out: []int64{2},
},
{
Inputs: [][]int64{[]int64{1, 2, 3, 4, 5}, []int64{1, 2, 3, 4, 5}, []int64{1, 2, 3, 4, 5}},
Out: []int64{1, 2, 3, 4, 5},
},
}
for i, c := range cases {
t.Run(fmt.Sprintf("Example %d", i+1), func(t *testing.T) {
if got, exp := slices.MergeSortedInts(c.Inputs...), c.Out; !reflect.DeepEqual(got, exp) {
t.Fatalf("got %v, expected %v", got, exp)
}
})
}
}

22
pkg/slices/tmpldata Normal file
View File

@ -0,0 +1,22 @@
[
{
"Name":"Floats",
"Type":"float64"
},
{
"Name":"Ints",
"Type":"int64"
},
{
"Name":"UInts",
"Type":"uint64"
},
{
"Name":"Strings",
"Type":"string"
},
{
"Name":"Bytes",
"Type":"[]byte"
}
]