evictions: fix assumePod silently dropping success metric on informer race

When KubeVirt sets EvictionInProgressAnnotationKey before returning
TooManyRequests, the informer's UpdateFunc can call addPod
(evictionAssumed=false) before evictPod's assumePod call arrives.
assumePod found the entry already present and returned early, leaving
evictionAssumed=false. DeleteFunc then skipped the "success" metric.

Fix: if the existing entry has evictionAssumed=false (added by addPod),
upgrade it in place without double-counting the pod in the counters.

Adds TestEvictionInBackgroundMetrics_InformerRace to reproduce the race
deterministically.

Signed-off-by: Simone Tiraboschi <stirabos@redhat.com>
This commit is contained in:
Simone Tiraboschi
2026-05-14 17:21:47 +02:00
parent c00d88f9ed
commit a7ac072c7d
2 changed files with 127 additions and 2 deletions

View File

@@ -154,8 +154,23 @@ func (erc *evictionRequestsCache) assumePod(pod *v1.Pod, profileName, strategyNa
erc.mu.Lock()
defer erc.mu.Unlock()
uid := getPodKey(pod)
if _, exists := erc.requests[uid]; exists {
// Pod already assumed by a previous strategy/profile; first one wins.
if item, exists := erc.requests[uid]; exists {
if item.evictionAssumed {
// Pod already assumed by a previous strategy/profile; first one wins.
return
}
// The informer's UpdateFunc called addPod (evictionAssumed=false) before
// the TooManyRequests response arrived. Upgrade the entry in place without
// bumping the counters a second time.
erc.requests[uid] = evictionRequestItem{
podNamespace: item.podNamespace,
podName: item.podName,
podNodeName: item.podNodeName,
strategyName: strategyName,
profileName: profileName,
evictionAssumed: true,
assumedTimestamp: metav1.NewTime(time.Now()),
}
return
}
erc.requests[uid] = evictionRequestItem{

View File

@@ -573,6 +573,116 @@ func TestEvictionInBackgroundMetrics(t *testing.T) {
metricstest.AssertVectorCount(t, "descheduler_pods_evicted_total", map[string]string{"result": "background"}, 2)
}
// TestEvictionInBackgroundMetrics_InformerRace reproduces the race where the
// informer's UpdateFunc calls addPod (evictionAssumed=false) before evictPod's
// assumePod call, causing the "success" metric to be silently dropped when the
// pod is later deleted.
//
// The race: evictPod sends the eviction request; the KubeVirt handler sets
// EvictionInProgressAnnotationKey before returning TooManyRequests, so the
// informer's UpdateFunc fires and calls addPod (evictionAssumed=false) while
// evictPod is still waiting for the HTTP response. When the response arrives,
// assumePod finds the entry already present and returns early, leaving
// evictionAssumed=false. DeleteFunc then skips the "success" metric.
func TestEvictionInBackgroundMetrics_InformerRace(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), wait.ForeverTestTimeout)
defer cancel()
deschedulermetrics.Register()
deschedulermetrics.PodsEvicted.Reset()
deschedulermetrics.PodsEvictedTotal.Reset()
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
ownerRef1 := test.GetReplicaSetOwnerRefList()
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, func(pod *v1.Pod) {
pod.Namespace = "dev"
pod.ObjectMeta.OwnerReferences = ownerRef1
pod.Annotations = map[string]string{
EvictionRequestAnnotationKey: "",
}
})
client := fakeclientset.NewSimpleClientset(node1, p1)
sharedInformerFactory := informers.NewSharedInformerFactory(client, 0)
_, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, client)
podEvictor, err := NewPodEvictor(
ctx,
client,
eventRecorder,
sharedInformerFactory.Core().V1().Pods().Informer(),
initFeatureGates(),
NewOptions().WithMetricsEnabled(true),
)
if err != nil {
t.Fatalf("Unexpected error when creating a pod evictor: %v", err)
}
// calling erCache.addPod from a goroutine.
triggerAddPod := make(chan struct{})
addPodDone := make(chan struct{})
go func() {
<-triggerAddPod
// Simulate what UpdateFunc does when KubeVirt sets
// EvictionInProgressAnnotationKey before the TooManyRequests response
// arrives: addPod inserts the entry with evictionAssumed=false.
podEvictor.erCache.addPod(p1)
close(addPodDone)
}()
client.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() != "eviction" {
return false, nil, nil
}
createAct, ok := action.(core.CreateActionImpl)
if !ok {
return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl")
}
eviction, ok := createAct.Object.(*policy.Eviction)
if !ok || eviction.GetName() != "p1" {
return false, nil, nil
}
// Trigger addPod and wait for it to complete before returning
// TooManyRequests, ensuring assumePod will find the entry already present.
close(triggerAddPod)
<-addPodDone
return true, nil, &apierrors.StatusError{
ErrStatus: metav1.Status{
Reason: metav1.StatusReasonTooManyRequests,
Message: "Eviction triggered evacuation",
},
}
})
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
evictOpts := EvictOptions{StrategyName: "TestStrategy", ProfileName: "TestProfile"}
podEvictor.EvictPod(ctx, p1, evictOpts)
// The "background" metric must have been emitted.
metricstest.AssertVectorCount(t, "descheduler_pods_evicted_total", map[string]string{"result": "background"}, 1)
// Simulate the background eviction completing: the pod is deleted.
client.CoreV1().Pods(p1.Namespace).Delete(ctx, p1.Name, metav1.DeleteOptions{})
// Wait until DeleteFunc has fired and removed the pod from the cache.
if err := wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, wait.ForeverTestTimeout, true, func(ctx context.Context) (bool, error) {
return !podEvictor.erCache.hasPod(p1), nil
}); err != nil {
t.Fatalf("Timed out waiting for background eviction to complete: %v", err)
}
// "success" must also be 1: the background eviction completed.
// Without the fix this will be 0 because assumePod returned early (entry
// already present from addPod) leaving evictionAssumed=false.
metricstest.AssertVectorCount(t, "descheduler_pods_evicted_total", map[string]string{"result": "success"}, 1)
}
func assertEqualEvents(t *testing.T, expected []string, actual <-chan string) {
t.Logf("Assert for events: %v", expected)
c := time.After(wait.ForeverTestTimeout)