diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index ad7b779bf7..c661a10abe 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -167,7 +167,6 @@ type genericScheduler struct { pvcLister corelisters.PersistentVolumeClaimLister pdbLister algorithm.PDBLister disablePreemption bool - lastIndex int percentageOfNodesToScore int32 enableNonPreempting bool } @@ -462,8 +461,8 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v if len(g.predicates) == 0 { filtered = nodes } else { - allNodes := g.cache.NodeTree().AllNodes() - numNodesToFind := g.numFeasibleNodesToFind(int32(len(allNodes))) + allNodes := int32(g.cache.NodeTree().NumNodes()) + numNodesToFind := g.numFeasibleNodesToFind(allNodes) // Create filtered list with enough space to avoid growing it // and allow assigning. @@ -479,12 +478,8 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v // We can use the same metadata producer for all nodes. meta := g.predicateMetaProducer(pod, g.nodeInfoSnapshot.NodeInfoMap) - processedNodes := int32(0) checkNode := func(i int) { - // We check the nodes starting from where we left off in the previous scheduling cycle, - // this is to make sure all nodes have the same chance of being examined across pods. - atomic.AddInt32(&processedNodes, 1) - nodeName := allNodes[(g.lastIndex+i)%len(allNodes)] + nodeName := g.cache.NodeTree().Next() fits, failedPredicates, err := podFitsOnNode( pod, meta, @@ -516,8 +511,7 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v // Stops searching for more nodes once the configured number of feasible nodes // are found. - workqueue.ParallelizeUntil(ctx, 16, len(allNodes), checkNode) - g.lastIndex = (g.lastIndex + int(processedNodes)) % len(allNodes) + workqueue.ParallelizeUntil(ctx, 16, int(allNodes), checkNode) filtered = filtered[:filteredLen] if len(errs) > 0 { diff --git a/pkg/scheduler/internal/cache/cache_test.go b/pkg/scheduler/internal/cache/cache_test.go index 5001ad6677..95b9de7bc3 100644 --- a/pkg/scheduler/internal/cache/cache_test.go +++ b/pkg/scheduler/internal/cache/cache_test.go @@ -1066,7 +1066,7 @@ func TestNodeOperators(t *testing.T) { if !found { t.Errorf("Failed to find node %v in internalcache.", node.Name) } - if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.next() != node.Name { + if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.Next() != node.Name { t.Errorf("cache.nodeTree is not updated correctly after adding node: %v", node.Name) } @@ -1109,7 +1109,7 @@ func TestNodeOperators(t *testing.T) { t.Errorf("Failed to update node in schedulernodeinfo:\n got: %+v \nexpected: %+v", got, expected) } // Check nodeTree after update - if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.next() != node.Name { + if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.Next() != node.Name { t.Errorf("unexpected cache.nodeTree after updating node: %v", node.Name) } @@ -1120,7 +1120,7 @@ func TestNodeOperators(t *testing.T) { } // Check nodeTree after remove. The node should be removed from the nodeTree even if there are // still pods on it. - if cache.nodeTree.NumNodes() != 0 || cache.nodeTree.next() != "" { + if cache.nodeTree.NumNodes() != 0 || cache.nodeTree.Next() != "" { t.Errorf("unexpected cache.nodeTree after removing node: %v", node.Name) } } diff --git a/pkg/scheduler/internal/cache/node_tree.go b/pkg/scheduler/internal/cache/node_tree.go index ff77059716..1c7ef2c6eb 100644 --- a/pkg/scheduler/internal/cache/node_tree.go +++ b/pkg/scheduler/internal/cache/node_tree.go @@ -32,7 +32,6 @@ type NodeTree struct { tree map[string]*nodeArray // a map from zone (region-zone) to an array of nodes in the zone. zones []string // a list of all the zones in the tree (keys) zoneIndex int - allNodes []string numNodes int mu sync.RWMutex } @@ -93,7 +92,6 @@ func (nt *NodeTree) addNode(n *v1.Node) { } klog.V(5).Infof("Added node %v in group %v to NodeTree", n.Name, zone) nt.numNodes++ - nt.recomputeAllNodes() } // RemoveNode removes a node from the NodeTree. @@ -114,7 +112,6 @@ func (nt *NodeTree) removeNode(n *v1.Node) error { } klog.V(5).Infof("Removed node %v in group %v from NodeTree", n.Name, zone) nt.numNodes-- - nt.recomputeAllNodes() return nil } } @@ -162,7 +159,9 @@ func (nt *NodeTree) resetExhausted() { // Next returns the name of the next node. NodeTree iterates over zones and in each zone iterates // over nodes in a round robin fashion. -func (nt *NodeTree) next() string { +func (nt *NodeTree) Next() string { + nt.mu.Lock() + defer nt.mu.Unlock() if len(nt.zones) == 0 { return "" } @@ -187,22 +186,6 @@ func (nt *NodeTree) next() string { } } -func (nt *NodeTree) recomputeAllNodes() { - nt.allNodes = make([]string, 0, nt.numNodes) - nt.resetExhausted() - for i := 0; i < nt.numNodes; i++ { - nt.allNodes = append(nt.allNodes, nt.next()) - } -} - -// AllNodes returns the list of nodes as they would be iterated by -// Next() method. -func (nt *NodeTree) AllNodes() []string { - nt.mu.RLock() - defer nt.mu.RUnlock() - return nt.allNodes -} - // NumNodes returns the number of nodes. func (nt *NodeTree) NumNodes() int { nt.mu.RLock() diff --git a/pkg/scheduler/internal/cache/node_tree_test.go b/pkg/scheduler/internal/cache/node_tree_test.go index 8a92423eea..e8cb35ba78 100644 --- a/pkg/scheduler/internal/cache/node_tree_test.go +++ b/pkg/scheduler/internal/cache/node_tree_test.go @@ -140,28 +140,28 @@ func TestNodeTree_AddNode(t *testing.T) { { name: "single node no labels", nodesToAdd: allNodes[:1], - expectedTree: map[string]*nodeArray{"": {[]string{"node-0"}, 1}}, + expectedTree: map[string]*nodeArray{"": {[]string{"node-0"}, 0}}, }, { name: "mix of nodes with and without proper labels", nodesToAdd: allNodes[:4], expectedTree: map[string]*nodeArray{ - "": {[]string{"node-0"}, 1}, - "region-1:\x00:": {[]string{"node-1"}, 1}, - ":\x00:zone-2": {[]string{"node-2"}, 1}, - "region-1:\x00:zone-2": {[]string{"node-3"}, 1}, + "": {[]string{"node-0"}, 0}, + "region-1:\x00:": {[]string{"node-1"}, 0}, + ":\x00:zone-2": {[]string{"node-2"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-3"}, 0}, }, }, { name: "mix of nodes with and without proper labels and some zones with multiple nodes", nodesToAdd: allNodes[:7], expectedTree: map[string]*nodeArray{ - "": {[]string{"node-0"}, 1}, - "region-1:\x00:": {[]string{"node-1"}, 1}, - ":\x00:zone-2": {[]string{"node-2"}, 1}, - "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 2}, - "region-1:\x00:zone-3": {[]string{"node-5"}, 1}, - "region-2:\x00:zone-2": {[]string{"node-6"}, 1}, + "": {[]string{"node-0"}, 0}, + "region-1:\x00:": {[]string{"node-1"}, 0}, + ":\x00:zone-2": {[]string{"node-2"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, }, }, } @@ -190,11 +190,11 @@ func TestNodeTree_RemoveNode(t *testing.T) { existingNodes: allNodes[:7], nodesToRemove: allNodes[:1], expectedTree: map[string]*nodeArray{ - "region-1:\x00:": {[]string{"node-1"}, 1}, - ":\x00:zone-2": {[]string{"node-2"}, 1}, - "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 2}, - "region-1:\x00:zone-3": {[]string{"node-5"}, 1}, - "region-2:\x00:zone-2": {[]string{"node-6"}, 1}, + "region-1:\x00:": {[]string{"node-1"}, 0}, + ":\x00:zone-2": {[]string{"node-2"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, }, }, { @@ -202,10 +202,10 @@ func TestNodeTree_RemoveNode(t *testing.T) { existingNodes: allNodes[:7], nodesToRemove: allNodes[1:4], expectedTree: map[string]*nodeArray{ - "": {[]string{"node-0"}, 1}, - "region-1:\x00:zone-2": {[]string{"node-4"}, 1}, - "region-1:\x00:zone-3": {[]string{"node-5"}, 1}, - "region-2:\x00:zone-2": {[]string{"node-6"}, 1}, + "": {[]string{"node-0"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-4"}, 0}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, }, }, { @@ -257,11 +257,11 @@ func TestNodeTree_UpdateNode(t *testing.T) { }, }, expectedTree: map[string]*nodeArray{ - "region-1:\x00:": {[]string{"node-1"}, 1}, - ":\x00:zone-2": {[]string{"node-2"}, 1}, - "region-1:\x00:zone-2": {[]string{"node-3", "node-4", "node-0"}, 3}, - "region-1:\x00:zone-3": {[]string{"node-5"}, 1}, - "region-2:\x00:zone-2": {[]string{"node-6"}, 1}, + "region-1:\x00:": {[]string{"node-1"}, 0}, + ":\x00:zone-2": {[]string{"node-2"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-3", "node-4", "node-0"}, 0}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, }, }, { @@ -277,7 +277,7 @@ func TestNodeTree_UpdateNode(t *testing.T) { }, }, expectedTree: map[string]*nodeArray{ - "region-1:\x00:zone-2": {[]string{"node-0"}, 1}, + "region-1:\x00:zone-2": {[]string{"node-0"}, 0}, }, }, { @@ -293,8 +293,8 @@ func TestNodeTree_UpdateNode(t *testing.T) { }, }, expectedTree: map[string]*nodeArray{ - "": {[]string{"node-0"}, 1}, - "region-1:\x00:zone-2": {[]string{"node-new"}, 1}, + "": {[]string{"node-0"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-new"}, 0}, }, }, } @@ -322,7 +322,7 @@ func TestNodeTree_Next(t *testing.T) { tests := []struct { name string nodesToAdd []*v1.Node - numRuns int // number of times to run next() + numRuns int // number of times to run Next() expectedOutput []string }{ { @@ -357,7 +357,7 @@ func TestNodeTree_Next(t *testing.T) { var output []string for i := 0; i < test.numRuns; i++ { - output = append(output, nt.next()) + output = append(output, nt.Next()) } if !reflect.DeepEqual(output, test.expectedOutput) { t.Errorf("unexpected output. Expected: %v, Got: %v", test.expectedOutput, output) @@ -399,15 +399,15 @@ func TestNodeTreeMultiOperations(t *testing.T) { name: "add more nodes to an exhausted zone", nodesToAdd: append(allNodes[4:9], allNodes[3]), nodesToRemove: nil, - operations: []string{"add", "add", "add", "add", "add", "next", "next", "next", "next", "next", "next", "next"}, - expectedOutput: []string{"node-4", "node-5", "node-6", "node-7", "node-8", "node-4", "node-5"}, + operations: []string{"add", "add", "add", "add", "add", "next", "next", "next", "next", "add", "next", "next", "next"}, + expectedOutput: []string{"node-4", "node-5", "node-6", "node-7", "node-3", "node-8", "node-4"}, }, { name: "remove zone and add new to ensure exhausted is reset correctly", nodesToAdd: append(allNodes[3:5], allNodes[6:8]...), nodesToRemove: allNodes[3:5], operations: []string{"add", "add", "next", "next", "remove", "add", "add", "next", "next", "remove", "next", "next"}, - expectedOutput: []string{"node-3", "node-4", "node-4", "node-6", "node-6", "node-7"}, + expectedOutput: []string{"node-3", "node-4", "node-6", "node-7", "node-6", "node-7"}, }, } @@ -434,7 +434,7 @@ func TestNodeTreeMultiOperations(t *testing.T) { removeIndex++ } case "next": - output = append(output, nt.next()) + output = append(output, nt.Next()) default: t.Errorf("unknow operation: %v", op) }