From bc0f0354c69c916ab06241cb89eb4ec44631659f Mon Sep 17 00:00:00 2001 From: Simone Tiraboschi Date: Wed, 8 Apr 2026 13:23:13 +0200 Subject: [PATCH] evictions: fix missing observability for background evictions Background evictions were completely invisible in metrics: the ignore=true path caused EvictPod to return before incrementing any counter, leaving operators with no signal that a background eviction had been triggered or completed. Add a "background" result label emitted at eviction request time and a "success" label emitted from the informer DeleteFunc when the pod is actually gone. The two labels together give a complete picture: "background" is recorded at eviction request time and may not have a matching "success" if the descheduler restarts before the pod is deleted, while "success" confirms the eviction completed within the same lifecycle. Signed-off-by: Simone Tiraboschi --- pkg/descheduler/evictions/evictions.go | 26 ++++- pkg/descheduler/evictions/evictions_test.go | 110 ++++++++++++++++++++ 2 files changed, 133 insertions(+), 3 deletions(-) diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index d0c2c36bb..779ef595c 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -57,6 +57,7 @@ var ( type evictionRequestItem struct { podName, podNamespace, podNodeName string + strategyName, profileName string evictionAssumed bool assumedTimestamp metav1.Time } @@ -149,17 +150,20 @@ func (erc *evictionRequestsCache) addPod(pod *v1.Pod) { erc.requestsTotal++ } -func (erc *evictionRequestsCache) assumePod(pod *v1.Pod) { +func (erc *evictionRequestsCache) assumePod(pod *v1.Pod, profileName, strategyName string) { erc.mu.Lock() defer erc.mu.Unlock() uid := getPodKey(pod) if _, exists := erc.requests[uid]; exists { + // Pod already assumed by a previous strategy/profile; first one wins. return } erc.requests[uid] = evictionRequestItem{ podNamespace: pod.Namespace, podName: pod.Name, podNodeName: pod.Spec.NodeName, + strategyName: strategyName, + profileName: profileName, evictionAssumed: true, assumedTimestamp: metav1.NewTime(time.Now()), } @@ -191,6 +195,14 @@ func (erc *evictionRequestsCache) deletePod(pod *v1.Pod) { } } +func (erc *evictionRequestsCache) getPod(pod *v1.Pod) (evictionRequestItem, bool) { + erc.mu.RLock() + defer erc.mu.RUnlock() + uid := getPodKey(pod) + item, exists := erc.requests[uid] + return item, exists +} + func (erc *evictionRequestsCache) hasPod(pod *v1.Pod) bool { erc.mu.RLock() defer erc.mu.RUnlock() @@ -347,8 +359,12 @@ func NewPodEvictor( klog.ErrorS(nil, "Cannot convert to *v1.Pod", "obj", t) return } - if erCache.hasPod(pod) { + if item, exists := erCache.getPod(pod); exists { klog.V(3).InfoS("Pod with eviction in background deleted/evicted. Removing pod from the cache.", "pod", klog.KObj(pod)) + if item.evictionAssumed && podEvictor.metricsEnabled { + metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": item.strategyName, "namespace": item.podNamespace, "node": item.podNodeName, "profile": item.profileName}).Inc() + metrics.PodsEvictedTotal.With(map[string]string{"result": "success", "strategy": item.strategyName, "namespace": item.podNamespace, "node": item.podNodeName, "profile": item.profileName}).Inc() + } } erCache.deletePod(pod) }, @@ -608,7 +624,11 @@ func (pe *PodEvictor) evictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio return true, nil } klog.V(3).InfoS("Eviction in background assumed", "pod", klog.KObj(pod)) - pe.erCache.assumePod(pod) + pe.erCache.assumePod(pod, opts.ProfileName, opts.StrategyName) + if pe.metricsEnabled { + metrics.PodsEvicted.With(map[string]string{"result": "background", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc() + metrics.PodsEvictedTotal.With(map[string]string{"result": "background", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc() + } return true, nil } } diff --git a/pkg/descheduler/evictions/evictions_test.go b/pkg/descheduler/evictions/evictions_test.go index 2e133e388..e99d2d0cb 100644 --- a/pkg/descheduler/evictions/evictions_test.go +++ b/pkg/descheduler/evictions/evictions_test.go @@ -39,6 +39,9 @@ import ( "k8s.io/klog/v2" utilptr "k8s.io/utils/ptr" + metricstest "k8s.io/component-base/metrics/testutil" + + deschedulermetrics "sigs.k8s.io/descheduler/metrics" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" "sigs.k8s.io/descheduler/pkg/features" "sigs.k8s.io/descheduler/pkg/utils" @@ -463,6 +466,113 @@ func TestEvictionRequestsCacheCleanup(t *testing.T) { } } +func TestEvictionInBackgroundMetrics(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), wait.ForeverTestTimeout) + defer cancel() + + deschedulermetrics.Register() + deschedulermetrics.PodsEvicted.Reset() + deschedulermetrics.PodsEvictedTotal.Reset() + + node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) + + ownerRef1 := test.GetReplicaSetOwnerRefList() + updatePod := func(pod *v1.Pod) { + pod.Namespace = "dev" + pod.ObjectMeta.OwnerReferences = ownerRef1 + } + updatePodWithEvictionAnnotation := func(pod *v1.Pod) { + updatePod(pod) + pod.Annotations = map[string]string{ + EvictionRequestAnnotationKey: "", + } + } + + // p1, p2: will be evicted in background + // p3, p4: will be evicted normally + p1 := test.BuildTestPod("p1", 100, 0, node1.Name, updatePodWithEvictionAnnotation) + p2 := test.BuildTestPod("p2", 100, 0, node1.Name, updatePodWithEvictionAnnotation) + p3 := test.BuildTestPod("p3", 100, 0, node1.Name, updatePod) + p4 := test.BuildTestPod("p4", 100, 0, node1.Name, updatePod) + + client := fakeclientset.NewSimpleClientset(node1, p1, p2, p3, p4) + sharedInformerFactory := informers.NewSharedInformerFactory(client, 0) + _, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, client) + + podEvictor, err := NewPodEvictor( + ctx, + client, + eventRecorder, + sharedInformerFactory.Core().V1().Pods().Informer(), + initFeatureGates(), + NewOptions().WithMetricsEnabled(true), + ) + if err != nil { + t.Fatalf("Unexpected error when creating a pod evictor: %v", err) + } + + client.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + if action.GetSubresource() == "eviction" { + createAct, matched := action.(core.CreateActionImpl) + if !matched { + return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl") + } + if eviction, matched := createAct.Object.(*policy.Eviction); matched { + if eviction.GetName() == "p1" || eviction.GetName() == "p2" { + return true, nil, &apierrors.StatusError{ + ErrStatus: metav1.Status{ + Reason: metav1.StatusReasonTooManyRequests, + Message: "Eviction triggered evacuation", + }, + } + } + return true, nil, nil + } + } + return false, nil, nil + }) + + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + evictOpts := EvictOptions{StrategyName: "TestStrategy", ProfileName: "TestProfile"} + + // Cycle 1: p1/p2 go to background, p3/p4 are evicted normally. + podEvictor.EvictPod(ctx, p1, evictOpts) + podEvictor.EvictPod(ctx, p2, evictOpts) + podEvictor.EvictPod(ctx, p3, evictOpts) + podEvictor.EvictPod(ctx, p4, evictOpts) + + // After cycle 1: 2 "background" (p1, p2), 2 "success" (p3, p4). + metricstest.AssertVectorCount(t, "descheduler_pods_evicted_total", map[string]string{"result": "background"}, 2) + metricstest.AssertVectorCount(t, "descheduler_pods_evicted_total", map[string]string{"result": "success"}, 2) + + // Cycle 2: p1/p2 are still running (background eviction in progress). + // Re-attempting them must not emit any additional metrics. + podEvictor.EvictPod(ctx, p1, evictOpts) + podEvictor.EvictPod(ctx, p2, evictOpts) + + // Counts must be unchanged after cycle 2. + metricstest.AssertVectorCount(t, "descheduler_pods_evicted_total", map[string]string{"result": "background"}, 2) + metricstest.AssertVectorCount(t, "descheduler_pods_evicted_total", map[string]string{"result": "success"}, 2) + + // The background evictions complete: p1 and p2 are deleted. + // The informer's DeleteFunc fires and emits a "success" metric for each assumed pod. + client.CoreV1().Pods(p1.Namespace).Delete(ctx, p1.Name, metav1.DeleteOptions{}) + client.CoreV1().Pods(p2.Namespace).Delete(ctx, p2.Name, metav1.DeleteOptions{}) + + // Poll until DeleteFunc has fired for both pods (informer is async). + if err := wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, wait.ForeverTestTimeout, true, func(ctx context.Context) (bool, error) { + return !podEvictor.erCache.hasPod(p1) && !podEvictor.erCache.hasPod(p2), nil + }); err != nil { + t.Fatalf("Timed out waiting for background evictions to complete: %v", err) + } + + // After deletion: "success" grows by 2 (p1, p2 completed), "background" stays at 2. + metricstest.AssertVectorCount(t, "descheduler_pods_evicted_total", map[string]string{"result": "success"}, 4) + metricstest.AssertVectorCount(t, "descheduler_pods_evicted_total", map[string]string{"result": "background"}, 2) +} + func assertEqualEvents(t *testing.T, expected []string, actual <-chan string) { t.Logf("Assert for events: %v", expected) c := time.After(wait.ForeverTestTimeout)