influxdb/tsdb/engine/tsi1/term_list.go

118 lines
2.7 KiB
Go

package tsi1
import (
"encoding/binary"
"sort"
"github.com/influxdata/influxdb/models"
)
// TermList represents a list of terms sorted by frequency.
type TermList struct {
m map[string]int // terms by index
a []termListElem // sorted terms
}
// NewTermList computes a term list based on a map of term frequency.
func NewTermList(m map[string]int) *TermList {
if len(m) == 0 {
return &TermList{}
}
l := &TermList{
a: make([]termListElem, 0, len(m)),
m: make(map[string]int, len(m)),
}
// Insert elements into slice.
for term, freq := range m {
l.a = append(l.a, termListElem{term: term, freq: freq})
}
sort.Sort(termListElems(l.a))
// Create lookup of terms to indices.
for i, e := range l.a {
l.m[e.term] = i
}
return l
}
// Len returns the length of the list.
func (l *TermList) Len() int { return len(l.a) }
// Offset returns the offset for a given term. Returns zero if term doesn't exist.
func (l *TermList) Offset(v []byte) uint32 {
i, ok := l.m[string(v)]
if !ok {
return 0
}
return l.a[i].offset
}
// OffsetString returns the offset for a given term. Returns zero if term doesn't exist.
func (l *TermList) OffsetString(v string) uint32 {
i, ok := l.m[v]
if !ok {
return 0
}
return l.a[i].offset
}
// AppendEncodedSeries dictionary encodes a series and appends it to the buffer.
func (l *TermList) AppendEncodedSeries(dst []byte, name string, tags models.Tags) []byte {
var buf [binary.MaxVarintLen32]byte
// Encode name.
offset := l.OffsetString(name)
if offset == 0 {
panic("name not in term list: " + name)
}
n := binary.PutUvarint(buf[:], uint64(offset))
dst = append(dst, buf[:n]...)
// Encode tag count.
n = binary.PutUvarint(buf[:], uint64(len(tags)))
dst = append(dst, buf[:n]...)
// Encode tags.
for _, t := range tags {
// Encode tag key.
offset := l.Offset(t.Key)
if offset == 0 {
panic("tag key not in term list: " + string(t.Key))
}
n := binary.PutUvarint(buf[:], uint64(offset))
dst = append(dst, buf[:n]...)
// Encode tag value.
offset = l.Offset(t.Value)
if offset == 0 {
panic("tag value not in term list: " + string(t.Value))
}
n = binary.PutUvarint(buf[:], uint64(offset))
dst = append(dst, buf[:n]...)
}
return dst
}
// termListElem represents an element in a term list.
type termListElem struct {
term string // term value
freq int // term frequency
offset uint32 // position in file
}
// termListElems represents a list of elements sorted by descending frequency.
type termListElems []termListElem
func (a termListElems) Len() int { return len(a) }
func (a termListElems) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a termListElems) Less(i, j int) bool {
if a[i].freq != a[j].freq {
return a[i].freq > a[i].freq
}
return a[i].term < a[j].term
}