<type>Values.Exclude function uses binary search and copy builtin

```
± benchcmp old.txt new.txt
benchmark                                            old ns/op     new ns/op     delta
BenchmarkIntegerValues_ExcludeNone_1000-8            1285          7.34          -99.43%
BenchmarkIntegerValues_ExcludeMiddleHalf_1000-8      1258          148           -88.24%
BenchmarkIntegerValues_ExcludeFirst_1000-8           1268          7.51          -99.41%
BenchmarkIntegerValues_ExcludeLast_1000-8            1125          27.7          -97.54%
BenchmarkIntegerValues_ExcludeNone_10000-8           12665         7.31          -99.94%
BenchmarkIntegerValues_ExcludeMiddleHalf_10000-8     12039         976           -91.89%
BenchmarkIntegerValues_ExcludeFirst_10000-8          12663         7.29          -99.94%
BenchmarkIntegerValues_ExcludeLast_10000-8           10990         34.9          -99.68%
```

(cherry picked from commit d7a3c23)
pull/8851/head
Stuart Carnie 2017-09-01 10:57:14 -07:00
parent 57b0e8efb0
commit ca40c1ad3c
3 changed files with 536 additions and 63 deletions

View File

@ -88,16 +88,65 @@ func (a Values) Deduplicate() Values {
// Exclude returns the subset of values not in [min, max]
func (a Values) Exclude(min, max int64) Values {
var i int
for j := 0; j < len(a); j++ {
if a[j].UnixNano() >= min && a[j].UnixNano() <= max {
continue
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return a
}
a[i] = a[j]
i++
// a[rmin].UnixNano() ≥ min
// a[rmax].UnixNano() ≥ max
if rmax < len(a) {
if a[rmax].UnixNano() == max {
rmax++
}
return a[:i]
rest := len(a) - rmax
if rest > 0 {
b := a[:rmin+rest]
copy(b[rmin:], a[rmax:])
return b
}
}
return a[:rmin]
}
// search performs a binary search for UnixNano() v in a
// and returns the position, i, where v would be inserted.
// An additional check of a[i].UnixNano() == v is necessary
// to determine if the value v exists.
func (a Values) search(v int64) int {
// Define: f(x) → a[x].UnixNano() < v
// Define: f(-1) == true, f(n) == false
// Invariant: f(lo-1) == true, f(hi) == false
lo := 0
hi := len(a)
for lo < hi {
mid := int(uint(lo+hi) >> 1)
if a[mid].UnixNano() < v {
lo = mid + 1 // preserves f(lo-1) == true
} else {
hi = mid // preserves f(hi) == false
}
}
// lo == hi
return lo
}
// FindRange returns the positions where min and max would be
// inserted into the array. If a[0].UnixNano() > max or
// a[len-1].UnixNano() < min then FindRange returns (-1, -1)
// indicating the array is outside the [min, max].
func (a Values) FindRange(min, max int64) (int, int) {
minVal := a[0].UnixNano()
maxVal := a[len(a)-1].UnixNano()
if maxVal < min || minVal > max {
return -1, -1
}
return a.search(min), a.search(max)
}
// Include returns the subset values between min and max inclusive.
@ -238,16 +287,65 @@ func (a FloatValues) Deduplicate() FloatValues {
// Exclude returns the subset of values not in [min, max]
func (a FloatValues) Exclude(min, max int64) FloatValues {
var i int
for j := 0; j < len(a); j++ {
if a[j].UnixNano() >= min && a[j].UnixNano() <= max {
continue
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return a
}
a[i] = a[j]
i++
// a[rmin].UnixNano() ≥ min
// a[rmax].UnixNano() ≥ max
if rmax < len(a) {
if a[rmax].UnixNano() == max {
rmax++
}
return a[:i]
rest := len(a) - rmax
if rest > 0 {
b := a[:rmin+rest]
copy(b[rmin:], a[rmax:])
return b
}
}
return a[:rmin]
}
// search performs a binary search for UnixNano() v in a
// and returns the position, i, where v would be inserted.
// An additional check of a[i].UnixNano() == v is necessary
// to determine if the value v exists.
func (a FloatValues) search(v int64) int {
// Define: f(x) → a[x].UnixNano() < v
// Define: f(-1) == true, f(n) == false
// Invariant: f(lo-1) == true, f(hi) == false
lo := 0
hi := len(a)
for lo < hi {
mid := int(uint(lo+hi) >> 1)
if a[mid].UnixNano() < v {
lo = mid + 1 // preserves f(lo-1) == true
} else {
hi = mid // preserves f(hi) == false
}
}
// lo == hi
return lo
}
// FindRange returns the positions where min and max would be
// inserted into the array. If a[0].UnixNano() > max or
// a[len-1].UnixNano() < min then FindRange returns (-1, -1)
// indicating the array is outside the [min, max].
func (a FloatValues) FindRange(min, max int64) (int, int) {
minVal := a[0].UnixNano()
maxVal := a[len(a)-1].UnixNano()
if maxVal < min || minVal > max {
return -1, -1
}
return a.search(min), a.search(max)
}
// Include returns the subset values between min and max inclusive.
@ -432,16 +530,65 @@ func (a IntegerValues) Deduplicate() IntegerValues {
// Exclude returns the subset of values not in [min, max]
func (a IntegerValues) Exclude(min, max int64) IntegerValues {
var i int
for j := 0; j < len(a); j++ {
if a[j].UnixNano() >= min && a[j].UnixNano() <= max {
continue
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return a
}
a[i] = a[j]
i++
// a[rmin].UnixNano() ≥ min
// a[rmax].UnixNano() ≥ max
if rmax < len(a) {
if a[rmax].UnixNano() == max {
rmax++
}
return a[:i]
rest := len(a) - rmax
if rest > 0 {
b := a[:rmin+rest]
copy(b[rmin:], a[rmax:])
return b
}
}
return a[:rmin]
}
// search performs a binary search for UnixNano() v in a
// and returns the position, i, where v would be inserted.
// An additional check of a[i].UnixNano() == v is necessary
// to determine if the value v exists.
func (a IntegerValues) search(v int64) int {
// Define: f(x) → a[x].UnixNano() < v
// Define: f(-1) == true, f(n) == false
// Invariant: f(lo-1) == true, f(hi) == false
lo := 0
hi := len(a)
for lo < hi {
mid := int(uint(lo+hi) >> 1)
if a[mid].UnixNano() < v {
lo = mid + 1 // preserves f(lo-1) == true
} else {
hi = mid // preserves f(hi) == false
}
}
// lo == hi
return lo
}
// FindRange returns the positions where min and max would be
// inserted into the array. If a[0].UnixNano() > max or
// a[len-1].UnixNano() < min then FindRange returns (-1, -1)
// indicating the array is outside the [min, max].
func (a IntegerValues) FindRange(min, max int64) (int, int) {
minVal := a[0].UnixNano()
maxVal := a[len(a)-1].UnixNano()
if maxVal < min || minVal > max {
return -1, -1
}
return a.search(min), a.search(max)
}
// Include returns the subset values between min and max inclusive.
@ -626,16 +773,65 @@ func (a UnsignedValues) Deduplicate() UnsignedValues {
// Exclude returns the subset of values not in [min, max]
func (a UnsignedValues) Exclude(min, max int64) UnsignedValues {
var i int
for j := 0; j < len(a); j++ {
if a[j].UnixNano() >= min && a[j].UnixNano() <= max {
continue
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return a
}
a[i] = a[j]
i++
// a[rmin].UnixNano() ≥ min
// a[rmax].UnixNano() ≥ max
if rmax < len(a) {
if a[rmax].UnixNano() == max {
rmax++
}
return a[:i]
rest := len(a) - rmax
if rest > 0 {
b := a[:rmin+rest]
copy(b[rmin:], a[rmax:])
return b
}
}
return a[:rmin]
}
// search performs a binary search for UnixNano() v in a
// and returns the position, i, where v would be inserted.
// An additional check of a[i].UnixNano() == v is necessary
// to determine if the value v exists.
func (a UnsignedValues) search(v int64) int {
// Define: f(x) → a[x].UnixNano() < v
// Define: f(-1) == true, f(n) == false
// Invariant: f(lo-1) == true, f(hi) == false
lo := 0
hi := len(a)
for lo < hi {
mid := int(uint(lo+hi) >> 1)
if a[mid].UnixNano() < v {
lo = mid + 1 // preserves f(lo-1) == true
} else {
hi = mid // preserves f(hi) == false
}
}
// lo == hi
return lo
}
// FindRange returns the positions where min and max would be
// inserted into the array. If a[0].UnixNano() > max or
// a[len-1].UnixNano() < min then FindRange returns (-1, -1)
// indicating the array is outside the [min, max].
func (a UnsignedValues) FindRange(min, max int64) (int, int) {
minVal := a[0].UnixNano()
maxVal := a[len(a)-1].UnixNano()
if maxVal < min || minVal > max {
return -1, -1
}
return a.search(min), a.search(max)
}
// Include returns the subset values between min and max inclusive.
@ -820,16 +1016,65 @@ func (a StringValues) Deduplicate() StringValues {
// Exclude returns the subset of values not in [min, max]
func (a StringValues) Exclude(min, max int64) StringValues {
var i int
for j := 0; j < len(a); j++ {
if a[j].UnixNano() >= min && a[j].UnixNano() <= max {
continue
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return a
}
a[i] = a[j]
i++
// a[rmin].UnixNano() ≥ min
// a[rmax].UnixNano() ≥ max
if rmax < len(a) {
if a[rmax].UnixNano() == max {
rmax++
}
return a[:i]
rest := len(a) - rmax
if rest > 0 {
b := a[:rmin+rest]
copy(b[rmin:], a[rmax:])
return b
}
}
return a[:rmin]
}
// search performs a binary search for UnixNano() v in a
// and returns the position, i, where v would be inserted.
// An additional check of a[i].UnixNano() == v is necessary
// to determine if the value v exists.
func (a StringValues) search(v int64) int {
// Define: f(x) → a[x].UnixNano() < v
// Define: f(-1) == true, f(n) == false
// Invariant: f(lo-1) == true, f(hi) == false
lo := 0
hi := len(a)
for lo < hi {
mid := int(uint(lo+hi) >> 1)
if a[mid].UnixNano() < v {
lo = mid + 1 // preserves f(lo-1) == true
} else {
hi = mid // preserves f(hi) == false
}
}
// lo == hi
return lo
}
// FindRange returns the positions where min and max would be
// inserted into the array. If a[0].UnixNano() > max or
// a[len-1].UnixNano() < min then FindRange returns (-1, -1)
// indicating the array is outside the [min, max].
func (a StringValues) FindRange(min, max int64) (int, int) {
minVal := a[0].UnixNano()
maxVal := a[len(a)-1].UnixNano()
if maxVal < min || minVal > max {
return -1, -1
}
return a.search(min), a.search(max)
}
// Include returns the subset values between min and max inclusive.
@ -1014,16 +1259,65 @@ func (a BooleanValues) Deduplicate() BooleanValues {
// Exclude returns the subset of values not in [min, max]
func (a BooleanValues) Exclude(min, max int64) BooleanValues {
var i int
for j := 0; j < len(a); j++ {
if a[j].UnixNano() >= min && a[j].UnixNano() <= max {
continue
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return a
}
a[i] = a[j]
i++
// a[rmin].UnixNano() ≥ min
// a[rmax].UnixNano() ≥ max
if rmax < len(a) {
if a[rmax].UnixNano() == max {
rmax++
}
return a[:i]
rest := len(a) - rmax
if rest > 0 {
b := a[:rmin+rest]
copy(b[rmin:], a[rmax:])
return b
}
}
return a[:rmin]
}
// search performs a binary search for UnixNano() v in a
// and returns the position, i, where v would be inserted.
// An additional check of a[i].UnixNano() == v is necessary
// to determine if the value v exists.
func (a BooleanValues) search(v int64) int {
// Define: f(x) → a[x].UnixNano() < v
// Define: f(-1) == true, f(n) == false
// Invariant: f(lo-1) == true, f(hi) == false
lo := 0
hi := len(a)
for lo < hi {
mid := int(uint(lo+hi) >> 1)
if a[mid].UnixNano() < v {
lo = mid + 1 // preserves f(lo-1) == true
} else {
hi = mid // preserves f(hi) == false
}
}
// lo == hi
return lo
}
// FindRange returns the positions where min and max would be
// inserted into the array. If a[0].UnixNano() > max or
// a[len-1].UnixNano() < min then FindRange returns (-1, -1)
// indicating the array is outside the [min, max].
func (a BooleanValues) FindRange(min, max int64) (int, int) {
minVal := a[0].UnixNano()
maxVal := a[len(a)-1].UnixNano()
if maxVal < min || minVal > max {
return -1, -1
}
return a.search(min), a.search(max)
}
// Include returns the subset values between min and max inclusive.

View File

@ -85,16 +85,65 @@ func (a {{.Name}}Values) Deduplicate() {{.Name}}Values {
// Exclude returns the subset of values not in [min, max]
func (a {{.Name}}Values) Exclude(min, max int64) {{.Name}}Values {
var i int
for j := 0; j < len(a); j++ {
if a[j].UnixNano() >= min && a[j].UnixNano() <= max {
continue
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return a
}
a[i] = a[j]
i++
// a[rmin].UnixNano() ≥ min
// a[rmax].UnixNano() ≥ max
if rmax < len(a) {
if a[rmax].UnixNano() == max {
rmax++
}
return a[:i]
rest := len(a)-rmax
if rest > 0 {
b := a[:rmin+rest]
copy(b[rmin:], a[rmax:])
return b
}
}
return a[:rmin]
}
// search performs a binary search for UnixNano() v in a
// and returns the position, i, where v would be inserted.
// An additional check of a[i].UnixNano() == v is necessary
// to determine if the value v exists.
func (a {{.Name}}Values) search(v int64) int {
// Define: f(x) → a[x].UnixNano() < v
// Define: f(-1) == true, f(n) == false
// Invariant: f(lo-1) == true, f(hi) == false
lo := 0
hi := len(a)
for lo < hi {
mid := int(uint(lo+hi) >> 1)
if a[mid].UnixNano() < v {
lo = mid + 1 // preserves f(lo-1) == true
} else {
hi = mid // preserves f(hi) == false
}
}
// lo == hi
return lo
}
// FindRange returns the positions where min and max would be
// inserted into the array. If a[0].UnixNano() > max or
// a[len-1].UnixNano() < min then FindRange returns (-1, -1)
// indicating the array is outside the [min, max].
func (a {{.Name}}Values) FindRange(min, max int64) (int, int) {
minVal := a[0].UnixNano()
maxVal := a[len(a)-1].UnixNano()
if maxVal < min || minVal > max {
return -1, -1
}
return a.search(min), a.search(max)
}
// Include returns the subset values between min and max inclusive.

View File

@ -0,0 +1,130 @@
package tsm1
import (
"fmt"
"testing"
"github.com/google/go-cmp/cmp"
)
func makeIntegerValues(count int, min, max int64) IntegerValues {
vals := make(IntegerValues, count)
ts := min
inc := (max - min) / int64(count)
for i := 0; i < count; i++ {
vals[i].unixnano = ts
ts += inc
}
return vals
}
func makeIntegerValuesFromSlice(t []int64) IntegerValues {
iv := make(IntegerValues, len(t))
for i, v := range t {
iv[i].unixnano = v
}
return iv
}
func TestIntegerValues_FindRange(t *testing.T) {
vals := makeIntegerValuesFromSlice([]int64{10, 11, 13, 15, 17, 20, 21})
cases := []struct {
min, max int64
l, r int
}{
{12, 20, 2, 5},
{22, 40, -1, -1},
{1, 9, -1, -1},
{1, 10, 0, 0},
{1, 11, 0, 1},
{15, 15, 3, 3},
}
for _, tc := range cases {
t.Run(fmt.Sprintf("%d→%d", tc.min, tc.max), func(t *testing.T) {
l, r := vals.FindRange(tc.min, tc.max)
if l != tc.l {
t.Errorf("left: got %d, exp %d", l, tc.l)
}
if r != tc.r {
t.Errorf("right: got %d, exp %d", r, tc.r)
}
})
}
}
func TestIntegerValues_Exclude(t *testing.T) {
cases := []struct {
n string
min, max int64
exp []int64
}{
{"excl none-lo", 0, 9, []int64{10, 12, 14, 16, 18}},
{"excl none-hi", 19, 30, []int64{10, 12, 14, 16, 18}},
{"excl first", 0, 10, []int64{12, 14, 16, 18}},
{"excl last", 18, 20, []int64{10, 12, 14, 16}},
{"excl all but first and last", 12, 16, []int64{10, 18}},
{"excl none in middle", 13, 13, []int64{10, 12, 14, 16, 18}},
{"excl middle", 14, 14, []int64{10, 12, 16, 18}},
}
for _, tc := range cases {
t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) {
vals := makeIntegerValues(5, 10, 20)
vals = vals.Exclude(tc.min, tc.max)
var got []int64
for _, v := range vals {
got = append(got, v.unixnano)
}
opt := cmp.AllowUnexported(IntegerValue{})
if !cmp.Equal(tc.exp, got, opt) {
t.Error(cmp.Diff(tc.exp, got, opt))
}
})
}
}
func benchExclude(b *testing.B, vals IntegerValues, min, max int64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
vals.Exclude(min, max)
}
}
func BenchmarkIntegerValues_ExcludeNone_1000(b *testing.B) {
benchExclude(b, makeIntegerValues(1000, 1000, 2000), 0, 500)
}
func BenchmarkIntegerValues_ExcludeMiddleHalf_1000(b *testing.B) {
benchExclude(b, makeIntegerValues(1000, 1000, 2000), 1250, 1750)
}
func BenchmarkIntegerValues_ExcludeFirst_1000(b *testing.B) {
benchExclude(b, makeIntegerValues(1000, 1000, 2000), 0, 1000)
}
func BenchmarkIntegerValues_ExcludeLast_1000(b *testing.B) {
benchExclude(b, makeIntegerValues(1000, 1000, 2000), 1999, 2000)
}
func BenchmarkIntegerValues_ExcludeNone_10000(b *testing.B) {
benchExclude(b, makeIntegerValues(10000, 10000, 20000), 00, 5000)
}
func BenchmarkIntegerValues_ExcludeMiddleHalf_10000(b *testing.B) {
benchExclude(b, makeIntegerValues(10000, 10000, 20000), 12500, 17500)
}
func BenchmarkIntegerValues_ExcludeFirst_10000(b *testing.B) {
benchExclude(b, makeIntegerValues(10000, 10000, 20000), 0, 10000)
}
func BenchmarkIntegerValues_ExcludeLast_10000(b *testing.B) {
benchExclude(b, makeIntegerValues(10000, 10000, 20000), 19999, 20000)
}