Merge pull request #5031 from influxdb/jw-mintime
Dedupe points at query time if there are overlapping blockspull/5052/head
commit
d32aeb2535
|
@ -208,6 +208,15 @@ func (e *IndexEntry) Contains(t time.Time) bool {
|
||||||
(e.MaxTime.Equal(t) || e.MaxTime.After(t))
|
(e.MaxTime.Equal(t) || e.MaxTime.After(t))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *IndexEntry) OverlapsTimeRange(min, max time.Time) bool {
|
||||||
|
return (e.MinTime.Equal(max) || e.MinTime.Before(max)) &&
|
||||||
|
(e.MaxTime.Equal(min) || e.MaxTime.After(min))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *IndexEntry) String() string {
|
||||||
|
return fmt.Sprintf("min=%s max=%s ofs=%d siz=%d", e.MinTime.UTC(), e.MaxTime.UTC(), e.Offset, e.Size)
|
||||||
|
}
|
||||||
|
|
||||||
func NewDirectIndex() TSMIndex {
|
func NewDirectIndex() TSMIndex {
|
||||||
return &directIndex{
|
return &directIndex{
|
||||||
blocks: map[string]*indexEntries{},
|
blocks: map[string]*indexEntries{},
|
||||||
|
|
|
@ -443,14 +443,31 @@ func ParseTSMFileName(name string) (int, int, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type KeyCursor struct {
|
type KeyCursor struct {
|
||||||
key string
|
key string
|
||||||
fs *FileStore
|
fs *FileStore
|
||||||
seeks []*location
|
|
||||||
current *location
|
// seeks is all the file locations that we need to return during iteration.
|
||||||
buf []Value
|
seeks []*location
|
||||||
|
|
||||||
|
// current is the set of blocks possibly containing the next set of points.
|
||||||
|
// Normally this is just one entry, but there may be multiple if points have
|
||||||
|
// been overwritten.
|
||||||
|
current []*location
|
||||||
|
buf []Value
|
||||||
|
|
||||||
|
// pos is the index within seeks. Based on ascending, it will increment or
|
||||||
|
// decrement through the size of seeks slice.
|
||||||
pos int
|
pos int
|
||||||
ascending bool
|
ascending bool
|
||||||
ready bool
|
|
||||||
|
// ready indicates that we know the files and blocks to seek to for the key.
|
||||||
|
ready bool
|
||||||
|
|
||||||
|
// duplicates is a hint that there are overlapping blocks for this key in
|
||||||
|
// multiple files (e.g. points have been overwritten but not fully compacted)
|
||||||
|
// If this is true, we need to scan the duplicate blocks and dedup the points
|
||||||
|
// as query time until they are compacted.
|
||||||
|
duplicates bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type location struct {
|
type location struct {
|
||||||
|
@ -462,8 +479,20 @@ func (c *KeyCursor) init(t time.Time, ascending bool) {
|
||||||
if c.ready {
|
if c.ready {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
c.ascending = ascending
|
||||||
c.seeks = c.fs.locations(c.key, t, ascending)
|
c.seeks = c.fs.locations(c.key, t, ascending)
|
||||||
|
|
||||||
|
if len(c.seeks) > 0 {
|
||||||
|
for i := 1; i < len(c.seeks); i++ {
|
||||||
|
prev := c.seeks[i-1]
|
||||||
|
cur := c.seeks[i]
|
||||||
|
|
||||||
|
if prev.entry.MaxTime.Equal(cur.entry.MinTime) || prev.entry.MaxTime.After(cur.entry.MinTime) {
|
||||||
|
c.duplicates = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
c.buf = make([]Value, 1000)
|
c.buf = make([]Value, 1000)
|
||||||
c.ready = true
|
c.ready = true
|
||||||
}
|
}
|
||||||
|
@ -478,42 +507,127 @@ func (c *KeyCursor) SeekTo(t time.Time, ascending bool) ([]Value, error) {
|
||||||
if ascending {
|
if ascending {
|
||||||
for i, e := range c.seeks {
|
for i, e := range c.seeks {
|
||||||
if t.Before(e.entry.MinTime) || e.entry.Contains(t) {
|
if t.Before(e.entry.MinTime) || e.entry.Contains(t) {
|
||||||
c.current = e
|
// Record the position of the first block matching our seek time
|
||||||
c.pos = i
|
if len(c.current) == 0 {
|
||||||
break
|
c.pos = i
|
||||||
|
}
|
||||||
|
|
||||||
|
c.current = append(c.current, e)
|
||||||
|
|
||||||
|
// If we don't have duplicates, break. Otherwise, keep looking for additional blocks containing
|
||||||
|
// this point.
|
||||||
|
if !c.duplicates {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for i := len(c.seeks) - 1; i >= 0; i-- {
|
for i := len(c.seeks) - 1; i >= 0; i-- {
|
||||||
e := c.seeks[i]
|
e := c.seeks[i]
|
||||||
if t.After(e.entry.MaxTime) || e.entry.Contains(t) {
|
if t.After(e.entry.MaxTime) || e.entry.Contains(t) {
|
||||||
c.current = e
|
// Record the position of the first block matching our seek time
|
||||||
c.pos = i
|
if len(c.current) == 0 {
|
||||||
break
|
c.pos = i
|
||||||
|
}
|
||||||
|
|
||||||
|
c.current = append(c.current, e)
|
||||||
|
|
||||||
|
// If we don't have duplicates, break. Otherwise, keep looking for additional blocks containing
|
||||||
|
// this point.
|
||||||
|
if !c.duplicates {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.current == nil {
|
return c.readAt()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *KeyCursor) readAt() ([]Value, error) {
|
||||||
|
// No matching blocks to decode
|
||||||
|
if len(c.current) == 0 {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
return c.current.r.ReadAt(c.current.entry, c.buf[:0])
|
|
||||||
|
// First block is the oldest block containing the points we're search for.
|
||||||
|
first := c.current[0]
|
||||||
|
values, err := first.r.ReadAt(first.entry, c.buf[:0])
|
||||||
|
|
||||||
|
// Only one block with this key and time range so return it
|
||||||
|
if len(c.current) == 1 {
|
||||||
|
return values, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, search the remaining blocks that overlap and append their values so we can
|
||||||
|
// dedup them.
|
||||||
|
for i := 1; i < len(c.current); i++ {
|
||||||
|
cur := c.current[i]
|
||||||
|
if c.ascending && cur.entry.OverlapsTimeRange(first.entry.MinTime, first.entry.MaxTime) {
|
||||||
|
c.pos++
|
||||||
|
v, err := cur.r.ReadAt(cur.entry, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
values = append(values, v...)
|
||||||
|
|
||||||
|
} else if !c.ascending && cur.entry.OverlapsTimeRange(first.entry.MinTime, first.entry.MaxTime) {
|
||||||
|
c.pos--
|
||||||
|
|
||||||
|
v, err := cur.r.ReadAt(cur.entry, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
values = append(v, values...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Values(values).Deduplicate(), err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *KeyCursor) Next(ascending bool) ([]Value, error) {
|
func (c *KeyCursor) Next(ascending bool) ([]Value, error) {
|
||||||
|
c.current = c.current[:0]
|
||||||
|
|
||||||
if ascending {
|
if ascending {
|
||||||
c.pos++
|
c.pos++
|
||||||
if c.pos >= len(c.seeks) {
|
if c.pos >= len(c.seeks) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
c.current = c.seeks[c.pos]
|
|
||||||
return c.current.r.ReadAt(c.current.entry, c.buf[:0])
|
// Append the first matching block
|
||||||
|
c.current = []*location{c.seeks[c.pos]}
|
||||||
|
|
||||||
|
// If we have ovelapping blocks, append all their values so we can dedup
|
||||||
|
if c.duplicates {
|
||||||
|
first := c.seeks[c.pos]
|
||||||
|
for i := c.pos; i < len(c.seeks); i++ {
|
||||||
|
if c.seeks[i].entry.MinTime.Before(first.entry.MaxTime) || c.seeks[i].entry.MinTime.Equal(first.entry.MaxTime) {
|
||||||
|
c.current = append(c.current, c.seeks[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.readAt()
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
c.pos--
|
c.pos--
|
||||||
if c.pos < 0 {
|
if c.pos < 0 {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
c.current = c.seeks[c.pos]
|
|
||||||
return c.current.r.ReadAt(c.current.entry, c.buf[:0])
|
// Append the first matching block
|
||||||
|
c.current = []*location{c.seeks[c.pos]}
|
||||||
|
|
||||||
|
// If we have ovelapping blocks, append all their values so we can dedup
|
||||||
|
if c.duplicates {
|
||||||
|
first := c.seeks[c.pos]
|
||||||
|
for i := c.pos; i >= 0; i-- {
|
||||||
|
if c.seeks[i].entry.MaxTime.After(first.entry.MinTime) || c.seeks[i].entry.MaxTime.Equal(first.entry.MinTime) {
|
||||||
|
c.current = append(c.current, c.seeks[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.readAt()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,6 +83,59 @@ func TestFileStore_SeekToAsc_FromStart(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFileStore_SeekToAsc_Duplicate(t *testing.T) {
|
||||||
|
fs := tsm1.NewFileStore("")
|
||||||
|
|
||||||
|
// Setup 3 files
|
||||||
|
data := []keyValues{
|
||||||
|
keyValues{"cpu", []tsm1.Value{tsm1.NewValue(time.Unix(0, 0), 1.0)}},
|
||||||
|
keyValues{"cpu", []tsm1.Value{tsm1.NewValue(time.Unix(0, 0), 2.0)}},
|
||||||
|
keyValues{"cpu", []tsm1.Value{tsm1.NewValue(time.Unix(2, 0), 3.0)}},
|
||||||
|
keyValues{"cpu", []tsm1.Value{tsm1.NewValue(time.Unix(2, 0), 4.0)}},
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := newFiles(data...)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error creating files: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.Add(files...)
|
||||||
|
|
||||||
|
c := fs.KeyCursor("cpu")
|
||||||
|
// Search for an entry that exists in the second file
|
||||||
|
values, err := c.SeekTo(time.Unix(0, 0), true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error reading values: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
exp := data[1]
|
||||||
|
if got, exp := len(values), len(exp.values); got != exp {
|
||||||
|
t.Fatalf("value length mismatch: got %v, exp %v", got, exp)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, v := range exp.values {
|
||||||
|
if got, exp := values[i].Value(), v.Value(); got != exp {
|
||||||
|
t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that calling Next will dedupe points
|
||||||
|
values, err = c.Next(true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error reading values: %v", err)
|
||||||
|
}
|
||||||
|
exp = data[3]
|
||||||
|
if got, exp := len(values), len(exp.values); got != exp {
|
||||||
|
t.Fatalf("value length mismatch: got %v, exp %v", got, exp)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, v := range exp.values {
|
||||||
|
if got, exp := values[i].Value(), v.Value(); got != exp {
|
||||||
|
t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
func TestFileStore_SeekToAsc_BeforeStart(t *testing.T) {
|
func TestFileStore_SeekToAsc_BeforeStart(t *testing.T) {
|
||||||
fs := tsm1.NewFileStore("")
|
fs := tsm1.NewFileStore("")
|
||||||
|
|
||||||
|
@ -226,6 +279,57 @@ func TestFileStore_SeekToDesc_FromStart(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFileStore_SeekToDesc_Duplicate(t *testing.T) {
|
||||||
|
fs := tsm1.NewFileStore("")
|
||||||
|
|
||||||
|
// Setup 3 files
|
||||||
|
data := []keyValues{
|
||||||
|
keyValues{"cpu", []tsm1.Value{tsm1.NewValue(time.Unix(0, 0), 4.0)}},
|
||||||
|
keyValues{"cpu", []tsm1.Value{tsm1.NewValue(time.Unix(0, 0), 1.0)}},
|
||||||
|
keyValues{"cpu", []tsm1.Value{tsm1.NewValue(time.Unix(2, 0), 2.0)}},
|
||||||
|
keyValues{"cpu", []tsm1.Value{tsm1.NewValue(time.Unix(2, 0), 3.0)}},
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := newFiles(data...)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error creating files: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.Add(files...)
|
||||||
|
|
||||||
|
// Search for an entry that exists in the second file
|
||||||
|
c := fs.KeyCursor("cpu")
|
||||||
|
values, err := c.SeekTo(time.Unix(2, 0), false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error reading values: %v", err)
|
||||||
|
}
|
||||||
|
exp := data[3]
|
||||||
|
if got, exp := len(values), len(exp.values); got != exp {
|
||||||
|
t.Fatalf("value length mismatch: got %v, exp %v", got, exp)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, v := range exp.values {
|
||||||
|
if got, exp := values[i].Value(), v.Value(); got != exp {
|
||||||
|
t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that calling Next will dedupe points
|
||||||
|
values, err = c.Next(false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error reading values: %v", err)
|
||||||
|
}
|
||||||
|
exp = data[1]
|
||||||
|
if got, exp := len(values), len(exp.values); got != exp {
|
||||||
|
t.Fatalf("value length mismatch: got %v, exp %v", got, exp)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, v := range exp.values {
|
||||||
|
if got, exp := values[i].Value(), v.Value(); got != exp {
|
||||||
|
t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
func TestFileStore_SeekToDesc_AfterEnd(t *testing.T) {
|
func TestFileStore_SeekToDesc_AfterEnd(t *testing.T) {
|
||||||
fs := tsm1.NewFileStore("")
|
fs := tsm1.NewFileStore("")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue