diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization.go b/pkg/framework/plugins/nodeutilization/lownodeutilization.go index 5748e7377..cd3c296c4 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization.go @@ -103,18 +103,13 @@ func NewLowNodeUtilization( } // this plugins supports different ways of collecting usage data. each - // different way provides its own "usageClient". here we make sure we - // have the correct one or an error is triggered. XXX MetricsServer is - // deprecated, removed once dropped. + // different way provides its own "usageClient". the metrics-based client + // is reset at every extension point so we always get the latest prometheus + // client (whose SA token can be rotated after plugin creation). + // XXX MetricsServer is deprecated, removed once dropped. var usageClient usageClient = newRequestedUsageClient( extendedResourceNames, handle.GetPodsAssignedToNodeFunc(), ) - if metrics != nil { - usageClient, err = usageClientForMetrics(args, handle, extendedResourceNames) - if err != nil { - return nil, err - } - } return &LowNodeUtilization{ logger: logger, @@ -134,12 +129,33 @@ func (l *LowNodeUtilization) Name() string { return LowNodeUtilizationPluginName } +// resetUsageClient refreshes the usageClient field from the current handle +// state. It must be called at the start of every extension point so that a +// rotated prometheus SA token is picked up without restarting the process. +func (l *LowNodeUtilization) resetUsageClient() error { + if l.args.MetricsUtilization == nil { + return nil + } + client, err := usageClientForMetrics(l.args, l.handle, l.extendedResourceNames) + if err != nil { + return err + } + l.usageClient = client + return nil +} + // Balance holds the main logic of the plugin. It evicts pods from over // utilized nodes to under utilized nodes. The goal here is to evenly // distribute pods across nodes. func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status { logger := klog.FromContext(klog.NewContext(ctx, l.logger)).WithValues("ExtensionPoint", frameworktypes.BalanceExtensionPoint) + if err := l.resetUsageClient(); err != nil { + return &frameworktypes.Status{ + Err: fmt.Errorf("error initializing usage client: %v", err), + } + } + if err := l.usageClient.sync(ctx, nodes); err != nil { return &frameworktypes.Status{ Err: fmt.Errorf("error getting node usage: %v", err), diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go index 3c890d678..bce47ebd2 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go @@ -1661,3 +1661,83 @@ func TestLowNodeUtilizationWithPrometheusMetrics(t *testing.T) { t.Run(tc.name, testFnc(false, tc.expectedPodsEvicted)) } } + +// TestLowNodeUtilizationPrometheusClientNilAtCreation ensures the plugin +// tolerates a nil prometheus client at creation time (e.g. in-cluster SA token +// not yet reconciled) and correctly picks up the client exposed by the handle +// at Balance call time. +func TestLowNodeUtilizationPrometheusClientNilAtCreation(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + n1NodeName := "n1" + n2NodeName := "n2" + n3NodeName := "n3" + + nodes := []*v1.Node{ + test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + test.BuildTestNode(n3NodeName, 4000, 3000, 10, nil), + } + pods := []*v1.Pod{ + test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef), + } + + var objs []runtime.Object + for _, node := range nodes { + objs = append(objs, node) + } + for _, pod := range pods { + objs = append(objs, pod) + } + fakeClient := fake.NewSimpleClientset(objs...) + + handle, podEvictor, err := frameworktesting.InitFrameworkHandle( + ctx, fakeClient, nil, defaultevictor.DefaultEvictorArgs{NodeFit: true}, nil, + ) + if err != nil { + t.Fatalf("Unable to initialize a framework handle: %v", err) + } + + // PrometheusClientImpl is intentionally nil here — the in-cluster SA token + // has not been reconciled yet at plugin-creation time. + args := &LowNodeUtilizationArgs{ + Thresholds: api.ResourceThresholds{MetricResource: 30}, + TargetThresholds: api.ResourceThresholds{MetricResource: 50}, + MetricsUtilization: &MetricsUtilization{ + Source: api.PrometheusMetrics, + Prometheus: &Prometheus{ + Query: "instance:node_cpu:rate:sum", + }, + }, + } + + plugin, err := NewLowNodeUtilization(ctx, args, handle) + if err != nil { + t.Fatalf("plugin creation must succeed even when prometheus client is nil: %v", err) + } + + // Simulate the SA token becoming available before the first descheduling cycle. + handle.PrometheusClientImpl = &fakePromClient{ + result: model.Vector{ + sample("instance:node_cpu:rate:sum", n1NodeName, 0.57), // over target + sample("instance:node_cpu:rate:sum", n2NodeName, 0.42), // over target + sample("instance:node_cpu:rate:sum", n3NodeName, 0.20), // under threshold + }, + dataType: model.ValVector, + } + + status := plugin.(frameworktypes.BalancePlugin).Balance(ctx, nodes) + if status != nil { + t.Fatalf("Balance failed: %v", status.Err) + } + + if podEvictor.TotalEvicted() == 0 { + t.Error("expected at least one pod to be evicted") + } +}