Add string/int radix tree

This is a fork of https://github.com/armon/go-radix that changes
a few things from the original:

* Does not allow updates to nodes
* Typed for int values only
* Is concurrent using a big lock on Tree
pull/9748/head
Jason Wilder 2018-04-17 21:15:09 -06:00
parent ef505542ad
commit 2c8efb2609
2 changed files with 978 additions and 0 deletions

575
pkg/radix/tree.go Normal file
View File

@ -0,0 +1,575 @@
package radix
// This is a fork of https://github.com/armon/go-radix that removes the
// ability to update nodes as well as uses fixed int value type.
import (
"sort"
"strings"
"sync"
)
// WalkFn is used when walking the tree. Takes a
// key and value, returning if iteration should
// be terminated.
type WalkFn func(s string, v int) bool
// leafNode is used to represent a value
type leafNode struct {
key string
val int
}
// edge is used to represent an edge node
type edge struct {
label byte
node *node
}
type node struct {
// leaf is used to store possible leaf
leaf *leafNode
// prefix is the common prefix we ignore
prefix string
// Edges should be stored in-order for iteration.
// We avoid a fully materialized slice to save memory,
// since in most cases we expect to be sparse
edges edges
}
func (n *node) isLeaf() bool {
return n.leaf != nil
}
func (n *node) addEdge(e edge) {
n.edges = append(n.edges, e)
n.edges.Sort()
}
func (n *node) replaceEdge(e edge) {
num := len(n.edges)
idx := sort.Search(num, func(i int) bool {
return n.edges[i].label >= e.label
})
if idx < num && n.edges[idx].label == e.label {
n.edges[idx].node = e.node
return
}
panic("replacing missing edge")
}
func (n *node) getEdge(label byte) *node {
num := len(n.edges)
idx := sort.Search(num, func(i int) bool {
return n.edges[i].label >= label
})
if idx < num && n.edges[idx].label == label {
return n.edges[idx].node
}
return nil
}
func (n *node) delEdge(label byte) {
num := len(n.edges)
idx := sort.Search(num, func(i int) bool {
return n.edges[i].label >= label
})
if idx < num && n.edges[idx].label == label {
copy(n.edges[idx:], n.edges[idx+1:])
n.edges[len(n.edges)-1] = edge{}
n.edges = n.edges[:len(n.edges)-1]
}
}
type edges []edge
func (e edges) Len() int {
return len(e)
}
func (e edges) Less(i, j int) bool {
return e[i].label < e[j].label
}
func (e edges) Swap(i, j int) {
e[i], e[j] = e[j], e[i]
}
func (e edges) Sort() {
sort.Sort(e)
}
// Tree implements a radix tree. This can be treated as a
// Dictionary abstract data type. The main advantage over
// a standard hash map is prefix-based lookups and
// ordered iteration. The tree is safe for concurrent access.
type Tree struct {
mu sync.RWMutex
root *node
size int
}
// New returns an empty Tree
func New() *Tree {
return NewFromMap(nil)
}
// NewFromMap returns a new tree containing the keys
// from an existing map
func NewFromMap(m map[string]int) *Tree {
t := &Tree{root: &node{}}
for k, v := range m {
t.Insert(k, v)
}
return t
}
// Len is used to return the number of elements in the tree
func (t *Tree) Len() int {
return t.size
}
// longestPrefix finds the length of the shared prefix
// of two strings
func longestPrefix(k1, k2 string) int {
max := len(k1)
if l := len(k2); l < max {
max = l
}
var i int
for i = 0; i < max; i++ {
if k1[i] != k2[i] {
break
}
}
return i
}
// Insert is used to add a newentry or update
// an existing entry. Returns if inserted.
func (t *Tree) Insert(s string, v int) (int, bool) {
t.mu.Lock()
defer t.mu.Unlock()
var parent *node
n := t.root
search := s
for {
// Handle key exhaution
if len(search) == 0 {
if n.isLeaf() {
old := n.leaf.val
return old, false
}
n.leaf = &leafNode{
key: s,
val: v,
}
t.size++
return v, true
}
// Look for the edge
parent = n
n = n.getEdge(search[0])
// No edge, create one
if n == nil {
e := edge{
label: search[0],
node: &node{
leaf: &leafNode{
key: s,
val: v,
},
prefix: search,
},
}
parent.addEdge(e)
t.size++
return v, true
}
// Determine longest prefix of the search key on match
commonPrefix := longestPrefix(search, n.prefix)
if commonPrefix == len(n.prefix) {
search = search[commonPrefix:]
continue
}
// Split the node
t.size++
child := &node{
prefix: search[:commonPrefix],
}
parent.replaceEdge(edge{
label: search[0],
node: child,
})
// Restore the existing node
child.addEdge(edge{
label: n.prefix[commonPrefix],
node: n,
})
n.prefix = n.prefix[commonPrefix:]
// Create a new leaf node
leaf := &leafNode{
key: s,
val: v,
}
// If the new key is a subset, add to to this node
search = search[commonPrefix:]
if len(search) == 0 {
child.leaf = leaf
return v, true
}
// Create a new edge for the node
child.addEdge(edge{
label: search[0],
node: &node{
leaf: leaf,
prefix: search,
},
})
return v, true
}
}
// Delete is used to delete a key, returning the previous
// value and if it was deleted
func (t *Tree) Delete(s string) (int, bool) {
t.mu.Lock()
defer t.mu.Unlock()
var parent *node
var label byte
n := t.root
search := s
for {
// Check for key exhaution
if len(search) == 0 {
if !n.isLeaf() {
break
}
goto DELETE
}
// Look for an edge
parent = n
label = search[0]
n = n.getEdge(label)
if n == nil {
break
}
// Consume the search prefix
if strings.HasPrefix(search, n.prefix) {
search = search[len(n.prefix):]
} else {
break
}
}
return 0, false
DELETE:
// Delete the leaf
leaf := n.leaf
n.leaf = nil
t.size--
// Check if we should delete this node from the parent
if parent != nil && len(n.edges) == 0 {
parent.delEdge(label)
}
// Check if we should merge this node
if n != t.root && len(n.edges) == 1 {
n.mergeChild()
}
// Check if we should merge the parent's other child
if parent != nil && parent != t.root && len(parent.edges) == 1 && !parent.isLeaf() {
parent.mergeChild()
}
return leaf.val, true
}
// DeletePrefix is used to delete the subtree under a prefix
// Returns how many nodes were deleted
// Use this to delete large subtrees efficiently
func (t *Tree) DeletePrefix(s string) int {
t.mu.Lock()
defer t.mu.Unlock()
return t.deletePrefix(nil, t.root, s)
}
// delete does a recursive deletion
func (t *Tree) deletePrefix(parent, n *node, prefix string) int {
// Check for key exhaustion
if len(prefix) == 0 {
// Remove the leaf node
subTreeSize := 0
//recursively walk from all edges of the node to be deleted
recursiveWalk(n, func(s string, v int) bool {
subTreeSize++
return false
})
if n.isLeaf() {
n.leaf = nil
}
n.edges = nil // deletes the entire subtree
// Check if we should merge the parent's other child
if parent != nil && parent != t.root && len(parent.edges) == 1 && !parent.isLeaf() {
parent.mergeChild()
}
t.size -= subTreeSize
return subTreeSize
}
// Look for an edge
label := prefix[0]
child := n.getEdge(label)
if child == nil || (!strings.HasPrefix(child.prefix, prefix) && !strings.HasPrefix(prefix, child.prefix)) {
return 0
}
// Consume the search prefix
if len(child.prefix) > len(prefix) {
prefix = prefix[len(prefix):]
} else {
prefix = prefix[len(child.prefix):]
}
return t.deletePrefix(n, child, prefix)
}
func (n *node) mergeChild() {
e := n.edges[0]
child := e.node
n.prefix = n.prefix + child.prefix
n.leaf = child.leaf
n.edges = child.edges
}
// Get is used to lookup a specific key, returning
// the value and if it was found
func (t *Tree) Get(s string) (int, bool) {
t.mu.RLock()
defer t.mu.RUnlock()
n := t.root
search := s
for {
// Check for key exhaution
if len(search) == 0 {
if n.isLeaf() {
return n.leaf.val, true
}
break
}
// Look for an edge
n = n.getEdge(search[0])
if n == nil {
break
}
// Consume the search prefix
if strings.HasPrefix(search, n.prefix) {
search = search[len(n.prefix):]
} else {
break
}
}
return 0, false
}
// LongestPrefix is like Get, but instead of an
// exact match, it will return the longest prefix match.
func (t *Tree) LongestPrefix(s string) (string, int, bool) {
t.mu.RLock()
defer t.mu.RUnlock()
var last *leafNode
n := t.root
search := s
for {
// Look for a leaf node
if n.isLeaf() {
last = n.leaf
}
// Check for key exhaution
if len(search) == 0 {
break
}
// Look for an edge
n = n.getEdge(search[0])
if n == nil {
break
}
// Consume the search prefix
if strings.HasPrefix(search, n.prefix) {
search = search[len(n.prefix):]
} else {
break
}
}
if last != nil {
return last.key, last.val, true
}
return "", 0, false
}
// Minimum is used to return the minimum value in the tree
func (t *Tree) Minimum() (string, int, bool) {
t.mu.RLock()
defer t.mu.RUnlock()
n := t.root
for {
if n.isLeaf() {
return n.leaf.key, n.leaf.val, true
}
if len(n.edges) > 0 {
n = n.edges[0].node
} else {
break
}
}
return "", 0, false
}
// Maximum is used to return the maximum value in the tree
func (t *Tree) Maximum() (string, int, bool) {
t.mu.RLock()
defer t.mu.RUnlock()
n := t.root
for {
if num := len(n.edges); num > 0 {
n = n.edges[num-1].node
continue
}
if n.isLeaf() {
return n.leaf.key, n.leaf.val, true
}
break
}
return "", 0, false
}
// Walk is used to walk the tree
func (t *Tree) Walk(fn WalkFn) {
recursiveWalk(t.root, fn)
}
// WalkPrefix is used to walk the tree under a prefix
func (t *Tree) WalkPrefix(prefix string, fn WalkFn) {
t.mu.RLock()
defer t.mu.RUnlock()
n := t.root
search := prefix
for {
// Check for key exhaution
if len(search) == 0 {
recursiveWalk(n, fn)
return
}
// Look for an edge
n = n.getEdge(search[0])
if n == nil {
break
}
// Consume the search prefix
if strings.HasPrefix(search, n.prefix) {
search = search[len(n.prefix):]
} else if strings.HasPrefix(n.prefix, search) {
// Child may be under our search prefix
recursiveWalk(n, fn)
return
} else {
break
}
}
}
// WalkPath is used to walk the tree, but only visiting nodes
// from the root down to a given leaf. Where WalkPrefix walks
// all the entries *under* the given prefix, this walks the
// entries *above* the given prefix.
func (t *Tree) WalkPath(path string, fn WalkFn) {
t.mu.RLock()
defer t.mu.RUnlock()
n := t.root
search := path
for {
// Visit the leaf values if any
if n.leaf != nil && fn(n.leaf.key, n.leaf.val) {
return
}
// Check for key exhaution
if len(search) == 0 {
return
}
// Look for an edge
n = n.getEdge(search[0])
if n == nil {
return
}
// Consume the search prefix
if strings.HasPrefix(search, n.prefix) {
search = search[len(n.prefix):]
} else {
break
}
}
}
// recursiveWalk is used to do a pre-order walk of a node
// recursively. Returns true if the walk should be aborted
func recursiveWalk(n *node, fn WalkFn) bool {
// Visit the leaf values if any
if n.leaf != nil && fn(n.leaf.key, n.leaf.val) {
return true
}
// Recurse on the children
for _, e := range n.edges {
if recursiveWalk(e.node, fn) {
return true
}
}
return false
}
// ToMap is used to walk the tree and convert it into a map
func (t *Tree) ToMap() map[string]int {
out := make(map[string]int, t.size)
t.Walk(func(k string, v int) bool {
out[k] = v
return false
})
return out
}

403
pkg/radix/tree_test.go Normal file
View File

@ -0,0 +1,403 @@
package radix
import (
crand "crypto/rand"
"fmt"
"reflect"
"sort"
"testing"
)
func TestRadix(t *testing.T) {
var min, max string
inp := make(map[string]int)
for i := 0; i < 1000; i++ {
gen := generateUUID()
inp[gen] = i
if gen < min || i == 0 {
min = gen
}
if gen > max || i == 0 {
max = gen
}
}
r := NewFromMap(inp)
if r.Len() != len(inp) {
t.Fatalf("bad length: %v %v", r.Len(), len(inp))
}
r.Walk(func(k string, v int) bool {
return false
})
for k, v := range inp {
out, ok := r.Get(k)
if !ok {
t.Fatalf("missing key: %v", k)
}
if out != v {
t.Fatalf("value mis-match: %v %v", out, v)
}
}
// Check min and max
outMin, _, _ := r.Minimum()
if outMin != min {
t.Fatalf("bad minimum: %v %v", outMin, min)
}
outMax, _, _ := r.Maximum()
if outMax != max {
t.Fatalf("bad maximum: %v %v", outMax, max)
}
for k, v := range inp {
out, ok := r.Delete(k)
if !ok {
t.Fatalf("missing key: %v", k)
}
if out != v {
t.Fatalf("value mis-match: %v %v", out, v)
}
}
if r.Len() != 0 {
t.Fatalf("bad length: %v", r.Len())
}
}
func TestRoot(t *testing.T) {
r := New()
_, ok := r.Delete("")
if ok {
t.Fatalf("bad")
}
_, ok = r.Insert("", 1)
if !ok {
t.Fatalf("bad")
}
val, ok := r.Get("")
if !ok || val != 1 {
t.Fatalf("bad: %v", val)
}
val, ok = r.Delete("")
if !ok || val != 1 {
t.Fatalf("bad: %v", val)
}
}
func TestDelete(t *testing.T) {
r := New()
s := []string{"", "A", "AB"}
for _, ss := range s {
r.Insert(ss, 1)
}
for _, ss := range s {
_, ok := r.Delete(ss)
if !ok {
t.Fatalf("bad %q", ss)
}
}
}
func TestDeletePrefix(t *testing.T) {
type exp struct {
inp []string
prefix string
out []string
numDeleted int
}
cases := []exp{
{[]string{"", "A", "AB", "ABC", "R", "S"}, "A", []string{"", "R", "S"}, 3},
{[]string{"", "A", "AB", "ABC", "R", "S"}, "ABC", []string{"", "A", "AB", "R", "S"}, 1},
{[]string{"", "A", "AB", "ABC", "R", "S"}, "", []string{}, 6},
{[]string{"", "A", "AB", "ABC", "R", "S"}, "S", []string{"", "A", "AB", "ABC", "R"}, 1},
{[]string{"", "A", "AB", "ABC", "R", "S"}, "SS", []string{"", "A", "AB", "ABC", "R", "S"}, 0},
}
for _, test := range cases {
r := New()
for _, ss := range test.inp {
r.Insert(ss, 1)
}
deleted := r.DeletePrefix(test.prefix)
if deleted != test.numDeleted {
t.Fatalf("Bad delete, expected %v to be deleted but got %v", test.numDeleted, deleted)
}
out := []string{}
fn := func(s string, v int) bool {
out = append(out, s)
return false
}
r.Walk(fn)
if !reflect.DeepEqual(out, test.out) {
t.Fatalf("mis-match: %v %v", out, test.out)
}
}
}
func TestLongestPrefix(t *testing.T) {
r := New()
keys := []string{
"",
"foo",
"foobar",
"foobarbaz",
"foobarbazzip",
"foozip",
}
for _, k := range keys {
r.Insert(k, 1)
}
if r.Len() != len(keys) {
t.Fatalf("bad len: %v %v", r.Len(), len(keys))
}
type exp struct {
inp string
out string
}
cases := []exp{
{"a", ""},
{"abc", ""},
{"fo", ""},
{"foo", "foo"},
{"foob", "foo"},
{"foobar", "foobar"},
{"foobarba", "foobar"},
{"foobarbaz", "foobarbaz"},
{"foobarbazzi", "foobarbaz"},
{"foobarbazzip", "foobarbazzip"},
{"foozi", "foo"},
{"foozip", "foozip"},
{"foozipzap", "foozip"},
}
for _, test := range cases {
m, _, ok := r.LongestPrefix(test.inp)
if !ok {
t.Fatalf("no match: %v", test)
}
if m != test.out {
t.Fatalf("mis-match: %v %v", m, test)
}
}
}
func TestWalkPrefix(t *testing.T) {
r := New()
keys := []string{
"foobar",
"foo/bar/baz",
"foo/baz/bar",
"foo/zip/zap",
"zipzap",
}
for _, k := range keys {
r.Insert(k, 1)
}
if r.Len() != len(keys) {
t.Fatalf("bad len: %v %v", r.Len(), len(keys))
}
type exp struct {
inp string
out []string
}
cases := []exp{
{
"f",
[]string{"foobar", "foo/bar/baz", "foo/baz/bar", "foo/zip/zap"},
},
{
"foo",
[]string{"foobar", "foo/bar/baz", "foo/baz/bar", "foo/zip/zap"},
},
{
"foob",
[]string{"foobar"},
},
{
"foo/",
[]string{"foo/bar/baz", "foo/baz/bar", "foo/zip/zap"},
},
{
"foo/b",
[]string{"foo/bar/baz", "foo/baz/bar"},
},
{
"foo/ba",
[]string{"foo/bar/baz", "foo/baz/bar"},
},
{
"foo/bar",
[]string{"foo/bar/baz"},
},
{
"foo/bar/baz",
[]string{"foo/bar/baz"},
},
{
"foo/bar/bazoo",
[]string{},
},
{
"z",
[]string{"zipzap"},
},
}
for _, test := range cases {
out := []string{}
fn := func(s string, v int) bool {
out = append(out, s)
return false
}
r.WalkPrefix(test.inp, fn)
sort.Strings(out)
sort.Strings(test.out)
if !reflect.DeepEqual(out, test.out) {
t.Fatalf("mis-match: %v %v", out, test.out)
}
}
}
func TestWalkPath(t *testing.T) {
r := New()
keys := []string{
"foo",
"foo/bar",
"foo/bar/baz",
"foo/baz/bar",
"foo/zip/zap",
"zipzap",
}
for _, k := range keys {
r.Insert(k, 1)
}
if r.Len() != len(keys) {
t.Fatalf("bad len: %v %v", r.Len(), len(keys))
}
type exp struct {
inp string
out []string
}
cases := []exp{
{
"f",
[]string{},
},
{
"foo",
[]string{"foo"},
},
{
"foo/",
[]string{"foo"},
},
{
"foo/ba",
[]string{"foo"},
},
{
"foo/bar",
[]string{"foo", "foo/bar"},
},
{
"foo/bar/baz",
[]string{"foo", "foo/bar", "foo/bar/baz"},
},
{
"foo/bar/bazoo",
[]string{"foo", "foo/bar", "foo/bar/baz"},
},
{
"z",
[]string{},
},
}
for _, test := range cases {
out := []string{}
fn := func(s string, v int) bool {
out = append(out, s)
return false
}
r.WalkPath(test.inp, fn)
sort.Strings(out)
sort.Strings(test.out)
if !reflect.DeepEqual(out, test.out) {
t.Fatalf("mis-match: %v %v", out, test.out)
}
}
}
func TestInsert_Duplicate(t *testing.T) {
r := New()
vv, ok := r.Insert("cpu", 1)
if vv != 1 {
t.Fatalf("value mismatch: got %v, exp %v", vv, 1)
}
if !ok {
t.Fatalf("value mismatch: got %v, exp %v", ok, true)
}
// Insert a dup with a different type should fail
vv, ok = r.Insert("cpu", 2)
if vv != 1 {
t.Fatalf("value mismatch: got %v, exp %v", vv, 1)
}
if ok {
t.Fatalf("value mismatch: got %v, exp %v", ok, false)
}
}
// generateUUID is used to generate a random UUID
func generateUUID() string {
buf := make([]byte, 16)
if _, err := crand.Read(buf); err != nil {
panic(fmt.Errorf("failed to read random bytes: %v", err))
}
return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x",
buf[0:4],
buf[4:6],
buf[6:8],
buf[8:10],
buf[10:16])
}
func BenchmarkTree_Insert(b *testing.B) {
t := New()
keys := make([]string, 0, 10000)
for i := 0; i < cap(keys); i++ {
k := fmt.Sprintf("cpu,host=%d", i)
if v, ok := t.Insert(k, 1); v != 1 || !ok {
b.Fatalf("insert failed: %v != 1 || !%v", v, ok)
}
keys = append(keys, k)
}
b.ResetTimer()
for j := 0; j < b.N; j++ {
for _, key := range keys {
if v, ok := t.Insert(key, 1); v != 1 || ok {
b.Fatalf("insert failed: %v != 1 || !%v", v, ok)
}
}
}
}