Add utility functions for merging k collections of sorted slices
parent
6d87ff7fa2
commit
0dd97cc84a
|
@ -0,0 +1,398 @@
|
|||
// Generated by tmpl
|
||||
// https://github.com/benbjohnson/tmpl
|
||||
//
|
||||
// DO NOT EDIT!
|
||||
// Source: merge.gen.go.tmpl
|
||||
|
||||
package slices
|
||||
|
||||
import "bytes"
|
||||
|
||||
// Merge uses a k-way merge to merge n collections of sorted byte slices.
|
||||
//
|
||||
// The resulting slice is returned in ascending order, with any duplicate values
|
||||
// removed.
|
||||
func MergeSortedFloats(n ...[]float64) []float64 {
|
||||
var result []float64
|
||||
if len(n) == 0 {
|
||||
return nil
|
||||
} else if len(n) == 1 {
|
||||
// Special case. Merge single slice with a nil slice, to remove any
|
||||
// duplicates from the single slice.
|
||||
return MergeSortedFloats(n[0], nil)
|
||||
}
|
||||
|
||||
var maxSize int
|
||||
for _, a := range n {
|
||||
if len(a) > maxSize {
|
||||
maxSize = len(a)
|
||||
}
|
||||
}
|
||||
result = make([]float64, 0, maxSize) // This will likely be too small but it's a start.
|
||||
|
||||
idxs := make([]int, len(n)) // Indexes we've processed.
|
||||
var j int // Index we currently think is minimum.
|
||||
|
||||
for {
|
||||
j = -1
|
||||
|
||||
// Find the smallest minimum in all slices.
|
||||
for i := 0; i < len(n); i++ {
|
||||
if idxs[i] >= len(n[i]) {
|
||||
continue // We have completely drained all values in this slice.
|
||||
} else if j == -1 {
|
||||
// We haven't picked the minimum value yet. Pick this one.
|
||||
j = i
|
||||
continue
|
||||
}
|
||||
|
||||
// It this value key is lower than the candidate.
|
||||
|
||||
if n[i][idxs[i]] < n[j][idxs[j]] {
|
||||
j = i
|
||||
} else if n[i][idxs[i]] == n[j][idxs[j]] {
|
||||
// Duplicate value. Throw it away.
|
||||
idxs[i]++
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// We could have drained all of the values and be done...
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
// First value to just append it and move on.
|
||||
if len(result) == 0 {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
idxs[j]++
|
||||
continue
|
||||
}
|
||||
|
||||
// Append the minimum value to results if it's not a duplicate of
|
||||
// the existing one.
|
||||
|
||||
if result[len(result)-1] < n[j][idxs[j]] {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
} else if result[len(result)-1] == n[j][idxs[j]] {
|
||||
// Duplicate so drop it.
|
||||
} else {
|
||||
panic("value being merged out of order.")
|
||||
}
|
||||
|
||||
idxs[j]++
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Merge uses a k-way merge to merge n collections of sorted byte slices.
|
||||
//
|
||||
// The resulting slice is returned in ascending order, with any duplicate values
|
||||
// removed.
|
||||
func MergeSortedInts(n ...[]int64) []int64 {
|
||||
var result []int64
|
||||
if len(n) == 0 {
|
||||
return nil
|
||||
} else if len(n) == 1 {
|
||||
// Special case. Merge single slice with a nil slice, to remove any
|
||||
// duplicates from the single slice.
|
||||
return MergeSortedInts(n[0], nil)
|
||||
}
|
||||
|
||||
var maxSize int
|
||||
for _, a := range n {
|
||||
if len(a) > maxSize {
|
||||
maxSize = len(a)
|
||||
}
|
||||
}
|
||||
result = make([]int64, 0, maxSize) // This will likely be too small but it's a start.
|
||||
|
||||
idxs := make([]int, len(n)) // Indexes we've processed.
|
||||
var j int // Index we currently think is minimum.
|
||||
|
||||
for {
|
||||
j = -1
|
||||
|
||||
// Find the smallest minimum in all slices.
|
||||
for i := 0; i < len(n); i++ {
|
||||
if idxs[i] >= len(n[i]) {
|
||||
continue // We have completely drained all values in this slice.
|
||||
} else if j == -1 {
|
||||
// We haven't picked the minimum value yet. Pick this one.
|
||||
j = i
|
||||
continue
|
||||
}
|
||||
|
||||
// It this value key is lower than the candidate.
|
||||
|
||||
if n[i][idxs[i]] < n[j][idxs[j]] {
|
||||
j = i
|
||||
} else if n[i][idxs[i]] == n[j][idxs[j]] {
|
||||
// Duplicate value. Throw it away.
|
||||
idxs[i]++
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// We could have drained all of the values and be done...
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
// First value to just append it and move on.
|
||||
if len(result) == 0 {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
idxs[j]++
|
||||
continue
|
||||
}
|
||||
|
||||
// Append the minimum value to results if it's not a duplicate of
|
||||
// the existing one.
|
||||
|
||||
if result[len(result)-1] < n[j][idxs[j]] {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
} else if result[len(result)-1] == n[j][idxs[j]] {
|
||||
// Duplicate so drop it.
|
||||
} else {
|
||||
panic("value being merged out of order.")
|
||||
}
|
||||
|
||||
idxs[j]++
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Merge uses a k-way merge to merge n collections of sorted byte slices.
|
||||
//
|
||||
// The resulting slice is returned in ascending order, with any duplicate values
|
||||
// removed.
|
||||
func MergeSortedUInts(n ...[]uint64) []uint64 {
|
||||
var result []uint64
|
||||
if len(n) == 0 {
|
||||
return nil
|
||||
} else if len(n) == 1 {
|
||||
// Special case. Merge single slice with a nil slice, to remove any
|
||||
// duplicates from the single slice.
|
||||
return MergeSortedUInts(n[0], nil)
|
||||
}
|
||||
|
||||
var maxSize int
|
||||
for _, a := range n {
|
||||
if len(a) > maxSize {
|
||||
maxSize = len(a)
|
||||
}
|
||||
}
|
||||
result = make([]uint64, 0, maxSize) // This will likely be too small but it's a start.
|
||||
|
||||
idxs := make([]int, len(n)) // Indexes we've processed.
|
||||
var j int // Index we currently think is minimum.
|
||||
|
||||
for {
|
||||
j = -1
|
||||
|
||||
// Find the smallest minimum in all slices.
|
||||
for i := 0; i < len(n); i++ {
|
||||
if idxs[i] >= len(n[i]) {
|
||||
continue // We have completely drained all values in this slice.
|
||||
} else if j == -1 {
|
||||
// We haven't picked the minimum value yet. Pick this one.
|
||||
j = i
|
||||
continue
|
||||
}
|
||||
|
||||
// It this value key is lower than the candidate.
|
||||
|
||||
if n[i][idxs[i]] < n[j][idxs[j]] {
|
||||
j = i
|
||||
} else if n[i][idxs[i]] == n[j][idxs[j]] {
|
||||
// Duplicate value. Throw it away.
|
||||
idxs[i]++
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// We could have drained all of the values and be done...
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
// First value to just append it and move on.
|
||||
if len(result) == 0 {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
idxs[j]++
|
||||
continue
|
||||
}
|
||||
|
||||
// Append the minimum value to results if it's not a duplicate of
|
||||
// the existing one.
|
||||
|
||||
if result[len(result)-1] < n[j][idxs[j]] {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
} else if result[len(result)-1] == n[j][idxs[j]] {
|
||||
// Duplicate so drop it.
|
||||
} else {
|
||||
panic("value being merged out of order.")
|
||||
}
|
||||
|
||||
idxs[j]++
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Merge uses a k-way merge to merge n collections of sorted byte slices.
|
||||
//
|
||||
// The resulting slice is returned in ascending order, with any duplicate values
|
||||
// removed.
|
||||
func MergeSortedStrings(n ...[]string) []string {
|
||||
var result []string
|
||||
if len(n) == 0 {
|
||||
return nil
|
||||
} else if len(n) == 1 {
|
||||
// Special case. Merge single slice with a nil slice, to remove any
|
||||
// duplicates from the single slice.
|
||||
return MergeSortedStrings(n[0], nil)
|
||||
}
|
||||
|
||||
var maxSize int
|
||||
for _, a := range n {
|
||||
if len(a) > maxSize {
|
||||
maxSize = len(a)
|
||||
}
|
||||
}
|
||||
result = make([]string, 0, maxSize) // This will likely be too small but it's a start.
|
||||
|
||||
idxs := make([]int, len(n)) // Indexes we've processed.
|
||||
var j int // Index we currently think is minimum.
|
||||
|
||||
for {
|
||||
j = -1
|
||||
|
||||
// Find the smallest minimum in all slices.
|
||||
for i := 0; i < len(n); i++ {
|
||||
if idxs[i] >= len(n[i]) {
|
||||
continue // We have completely drained all values in this slice.
|
||||
} else if j == -1 {
|
||||
// We haven't picked the minimum value yet. Pick this one.
|
||||
j = i
|
||||
continue
|
||||
}
|
||||
|
||||
// It this value key is lower than the candidate.
|
||||
|
||||
if n[i][idxs[i]] < n[j][idxs[j]] {
|
||||
j = i
|
||||
} else if n[i][idxs[i]] == n[j][idxs[j]] {
|
||||
// Duplicate value. Throw it away.
|
||||
idxs[i]++
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// We could have drained all of the values and be done...
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
// First value to just append it and move on.
|
||||
if len(result) == 0 {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
idxs[j]++
|
||||
continue
|
||||
}
|
||||
|
||||
// Append the minimum value to results if it's not a duplicate of
|
||||
// the existing one.
|
||||
|
||||
if result[len(result)-1] < n[j][idxs[j]] {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
} else if result[len(result)-1] == n[j][idxs[j]] {
|
||||
// Duplicate so drop it.
|
||||
} else {
|
||||
panic("value being merged out of order.")
|
||||
}
|
||||
|
||||
idxs[j]++
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Merge uses a k-way merge to merge n collections of sorted byte slices.
|
||||
//
|
||||
// The resulting slice is returned in ascending order, with any duplicate values
|
||||
// removed.
|
||||
func MergeSortedBytes(n ...[][]byte) [][]byte {
|
||||
var result [][]byte
|
||||
if len(n) == 0 {
|
||||
return nil
|
||||
} else if len(n) == 1 {
|
||||
// Special case. Merge single slice with a nil slice, to remove any
|
||||
// duplicates from the single slice.
|
||||
return MergeSortedBytes(n[0], nil)
|
||||
}
|
||||
|
||||
var maxSize int
|
||||
for _, a := range n {
|
||||
if len(a) > maxSize {
|
||||
maxSize = len(a)
|
||||
}
|
||||
}
|
||||
result = make([][]byte, 0, maxSize) // This will likely be too small but it's a start.
|
||||
|
||||
idxs := make([]int, len(n)) // Indexes we've processed.
|
||||
var j int // Index we currently think is minimum.
|
||||
|
||||
var cmp int // Result of comparing most recent value.
|
||||
|
||||
for {
|
||||
j = -1
|
||||
|
||||
// Find the smallest minimum in all slices.
|
||||
for i := 0; i < len(n); i++ {
|
||||
if idxs[i] >= len(n[i]) {
|
||||
continue // We have completely drained all values in this slice.
|
||||
} else if j == -1 {
|
||||
// We haven't picked the minimum value yet. Pick this one.
|
||||
j = i
|
||||
continue
|
||||
}
|
||||
|
||||
// It this value key is lower than the candidate.
|
||||
|
||||
cmp = bytes.Compare(n[i][idxs[i]], n[j][idxs[j]])
|
||||
if cmp == -1 {
|
||||
j = i
|
||||
} else if cmp == 0 {
|
||||
// Duplicate value. Throw it away.
|
||||
idxs[i]++
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// We could have drained all of the values and be done...
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
// First value to just append it and move on.
|
||||
if len(result) == 0 {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
idxs[j]++
|
||||
continue
|
||||
}
|
||||
|
||||
// Append the minimum value to results if it's not a duplicate of
|
||||
// the existing one.
|
||||
|
||||
cmp = bytes.Compare(result[len(result)-1], n[j][idxs[j]])
|
||||
if cmp == -1 {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
} else if cmp == 0 {
|
||||
// Duplicate so drop it.
|
||||
} else {
|
||||
panic("value being merged out of order.")
|
||||
}
|
||||
|
||||
idxs[j]++
|
||||
}
|
||||
return result
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
package slices
|
||||
|
||||
import "bytes"
|
||||
|
||||
{{with $types := .}}{{range $k := $types}}
|
||||
|
||||
// Merge uses a k-way merge to merge n collections of sorted byte slices.
|
||||
//
|
||||
// The resulting slice is returned in ascending order, with any duplicate values
|
||||
// removed.
|
||||
func MergeSorted{{$k.Name}}(n ...[]{{$k.Type}}) []{{$k.Type}} {
|
||||
var result []{{$k.Type}}
|
||||
if len(n) == 0 {
|
||||
return nil
|
||||
} else if len(n) == 1 {
|
||||
// Special case. Merge single slice with a nil slice, to remove any
|
||||
// duplicates from the single slice.
|
||||
return MergeSorted{{$k.Name}}(n[0], nil)
|
||||
}
|
||||
|
||||
var maxSize int
|
||||
for _, a := range n {
|
||||
if len(a) > maxSize {
|
||||
maxSize = len(a)
|
||||
}
|
||||
}
|
||||
result = make([]{{$k.Type}}, 0, maxSize) // This will likely be too small but it's a start.
|
||||
|
||||
idxs := make([]int, len(n)) // Indexes we've processed.
|
||||
var j int // Index we currently think is minimum.
|
||||
{{if eq $k.Name "Bytes" }}
|
||||
var cmp int // Result of comparing most recent value.
|
||||
{{end}}
|
||||
for {
|
||||
j = -1
|
||||
|
||||
// Find the smallest minimum in all slices.
|
||||
for i := 0; i < len(n); i++ {
|
||||
if idxs[i] >= len(n[i]) {
|
||||
continue // We have completely drained all values in this slice.
|
||||
} else if j == -1 {
|
||||
// We haven't picked the minimum value yet. Pick this one.
|
||||
j = i
|
||||
continue
|
||||
}
|
||||
|
||||
// It this value key is lower than the candidate.
|
||||
{{if eq $k.Name "Bytes" }}
|
||||
cmp = bytes.Compare(n[i][idxs[i]], n[j][idxs[j]])
|
||||
if cmp == -1 {
|
||||
j = i
|
||||
} else if cmp == 0 {
|
||||
// Duplicate value. Throw it away.
|
||||
idxs[i]++
|
||||
}
|
||||
{{else}}
|
||||
if n[i][idxs[i]] < n[j][idxs[j]] {
|
||||
j = i
|
||||
} else if n[i][idxs[i]] == n[j][idxs[j]] {
|
||||
// Duplicate value. Throw it away.
|
||||
idxs[i]++
|
||||
}
|
||||
{{end}}
|
||||
}
|
||||
|
||||
// We could have drained all of the values and be done...
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
// First value to just append it and move on.
|
||||
if len(result) == 0 {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
idxs[j]++
|
||||
continue
|
||||
}
|
||||
|
||||
// Append the minimum value to results if it's not a duplicate of
|
||||
// the existing one.
|
||||
{{if eq $k.Name "Bytes" }}
|
||||
cmp = bytes.Compare(result[len(result)-1], n[j][idxs[j]])
|
||||
if cmp == -1 {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
} else if cmp == 0 {
|
||||
// Duplicate so drop it.
|
||||
} else {
|
||||
panic("value being merged out of order.")
|
||||
}
|
||||
{{else}}
|
||||
if result[len(result)-1] < n[j][idxs[j]] {
|
||||
result = append(result, n[j][idxs[j]])
|
||||
} else if result[len(result)-1] == n[j][idxs[j]] {
|
||||
// Duplicate so drop it.
|
||||
} else {
|
||||
panic("value being merged out of order.")
|
||||
}
|
||||
{{end}}
|
||||
idxs[j]++
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
|
||||
{{end}}{{end}}
|
|
@ -0,0 +1,101 @@
|
|||
package slices_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/influxdb/pkg/slices"
|
||||
)
|
||||
|
||||
func TestMergeSortedBytes(t *testing.T) {
|
||||
cases := []struct {
|
||||
Inputs [][][]byte
|
||||
Out [][]byte
|
||||
}{
|
||||
{Inputs: [][][]byte{}},
|
||||
{Inputs: [][][]byte{toBytes(0)}, Out: toBytes(0)},
|
||||
{
|
||||
Inputs: [][][]byte{toBytes(2), [][]byte(nil), toBytes(2)},
|
||||
Out: toBytes(2),
|
||||
},
|
||||
{
|
||||
Inputs: [][][]byte{toBytes(9), toBytes(1, 16, 16), toBytes(5, 10)},
|
||||
Out: toBytes(1, 5, 9, 10, 16),
|
||||
},
|
||||
{
|
||||
Inputs: [][][]byte{toBytes(20), toBytes(16), toBytes(10)},
|
||||
Out: toBytes(10, 16, 20),
|
||||
},
|
||||
{
|
||||
Inputs: [][][]byte{toBytes(2, 2, 2, 2, 2, 2, 2, 2)},
|
||||
Out: toBytes(2),
|
||||
},
|
||||
{
|
||||
Inputs: [][][]byte{toBytes(2, 2, 2, 2, 2, 2, 2, 2), [][]byte(nil), [][]byte(nil), [][]byte(nil)},
|
||||
Out: toBytes(2),
|
||||
},
|
||||
{
|
||||
Inputs: [][][]byte{toBytes(1, 2, 3, 4, 5), toBytes(1, 2, 3, 4, 5), toBytes(1, 2, 3, 4, 5)},
|
||||
Out: toBytes(1, 2, 3, 4, 5),
|
||||
},
|
||||
}
|
||||
|
||||
for i, c := range cases {
|
||||
t.Run(fmt.Sprintf("Example %d", i+1), func(t *testing.T) {
|
||||
if got, exp := slices.MergeSortedBytes(c.Inputs...), c.Out; !reflect.DeepEqual(got, exp) {
|
||||
t.Fatalf("got %v, expected %v", got, exp)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func toBytes(a ...int) [][]byte {
|
||||
var result [][]byte
|
||||
for _, v := range a {
|
||||
result = append(result, []byte{byte(v)})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func TestMergeSortedInts(t *testing.T) {
|
||||
cases := []struct {
|
||||
Inputs [][]int64
|
||||
Out []int64
|
||||
}{
|
||||
{Inputs: [][]int64{}},
|
||||
{Inputs: [][]int64{[]int64{0}}, Out: []int64{0}},
|
||||
{
|
||||
Inputs: [][]int64{[]int64{2}, []int64(nil), []int64{2}},
|
||||
Out: []int64{2},
|
||||
},
|
||||
{
|
||||
Inputs: [][]int64{[]int64{9}, []int64{1, 16, 16}, []int64{5, 10}},
|
||||
Out: []int64{1, 5, 9, 10, 16},
|
||||
},
|
||||
{
|
||||
Inputs: [][]int64{[]int64{20}, []int64{16}, []int64{10}},
|
||||
Out: []int64{10, 16, 20},
|
||||
},
|
||||
{
|
||||
Inputs: [][]int64{[]int64{2, 2, 2, 2, 2, 2, 2, 2}},
|
||||
Out: []int64{2},
|
||||
},
|
||||
{
|
||||
Inputs: [][]int64{[]int64{2, 2, 2, 2, 2, 2, 2, 2}, []int64(nil), []int64(nil), []int64(nil)},
|
||||
Out: []int64{2},
|
||||
},
|
||||
{
|
||||
Inputs: [][]int64{[]int64{1, 2, 3, 4, 5}, []int64{1, 2, 3, 4, 5}, []int64{1, 2, 3, 4, 5}},
|
||||
Out: []int64{1, 2, 3, 4, 5},
|
||||
},
|
||||
}
|
||||
|
||||
for i, c := range cases {
|
||||
t.Run(fmt.Sprintf("Example %d", i+1), func(t *testing.T) {
|
||||
if got, exp := slices.MergeSortedInts(c.Inputs...), c.Out; !reflect.DeepEqual(got, exp) {
|
||||
t.Fatalf("got %v, expected %v", got, exp)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
[
|
||||
{
|
||||
"Name":"Floats",
|
||||
"Type":"float64"
|
||||
},
|
||||
{
|
||||
"Name":"Ints",
|
||||
"Type":"int64"
|
||||
},
|
||||
{
|
||||
"Name":"UInts",
|
||||
"Type":"uint64"
|
||||
},
|
||||
{
|
||||
"Name":"Strings",
|
||||
"Type":"string"
|
||||
},
|
||||
{
|
||||
"Name":"Bytes",
|
||||
"Type":"[]byte"
|
||||
}
|
||||
]
|
Loading…
Reference in New Issue