Extend PodLifeTime with condition, exit code, owner kind, and transition time filters

This commit is contained in:
Amir Alavi
2026-03-06 12:17:07 -05:00
parent 905e762603
commit a845ed3b36
11 changed files with 1353 additions and 238 deletions

View File

@@ -0,0 +1,271 @@
package e2e
import (
"context"
"strings"
"testing"
"time"
batchv1 "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
utilptr "k8s.io/utils/ptr"
deschedulerapi "sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor"
"sigs.k8s.io/descheduler/pkg/framework/plugins/podlifetime"
)
func TestPodLifeTime_FailedPods(t *testing.T) {
ctx := context.Background()
clientSet, _, nodeLister, _ := initializeClient(ctx, t)
t.Log("Creating testing namespace")
testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ReplaceAll(strings.ToLower(t.Name()), "_", "-")}}
if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil {
t.Fatalf("Unable to create ns %v", testNamespace.Name)
}
defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{})
tests := []struct {
name string
expectedEvictedPodCount int
args *podlifetime.PodLifeTimeArgs
}{
{
name: "test-transition-failed-pods-default",
expectedEvictedPodCount: 1,
args: &podlifetime.PodLifeTimeArgs{
States: []string{string(v1.PodFailed)},
},
},
{
name: "test-transition-failed-pods-exclude-job",
expectedEvictedPodCount: 0,
args: &podlifetime.PodLifeTimeArgs{
States: []string{string(v1.PodFailed)},
OwnerKinds: &podlifetime.OwnerKinds{Exclude: []string{"Job"}},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
job := initTransitionTestJob(tc.name, testNamespace.Name)
t.Logf("Creating job %s in %s namespace", job.Name, testNamespace.Name)
jobClient := clientSet.BatchV1().Jobs(testNamespace.Name)
if _, err := jobClient.Create(ctx, job, metav1.CreateOptions{}); err != nil {
t.Fatalf("Error creating Job %s: %v", tc.name, err)
}
deletePropagationPolicy := metav1.DeletePropagationForeground
defer func() {
jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy})
waitForPodsToDisappear(ctx, t, clientSet, job.Labels, testNamespace.Name)
}()
waitForTransitionJobPodPhase(ctx, t, clientSet, job, v1.PodFailed)
preRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...)
tc.args.Namespaces = &deschedulerapi.Namespaces{
Include: []string{testNamespace.Name},
}
runPodLifetimePlugin(ctx, t, clientSet, nodeLister, tc.args,
defaultevictor.DefaultEvictorArgs{EvictLocalStoragePods: true},
nil,
)
var meetsExpectations bool
var actualEvictedPodCount int
if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) {
currentRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...)
actualEvictedPod := preRunNames.Difference(currentRunNames)
actualEvictedPodCount = actualEvictedPod.Len()
t.Logf("preRunNames: %v, currentRunNames: %v, actualEvictedPodCount: %v\n", preRunNames.List(), currentRunNames.List(), actualEvictedPodCount)
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Logf("Expecting %v number of pods evicted, got %v instead", tc.expectedEvictedPodCount, actualEvictedPodCount)
return false, nil
}
meetsExpectations = true
return true, nil
}); err != nil {
t.Errorf("Error waiting for expected eviction count: %v", err)
}
if !meetsExpectations {
t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedPodCount, tc.expectedEvictedPodCount)
} else {
t.Logf("Total of %d Pods were evicted for %s", actualEvictedPodCount, tc.name)
}
})
}
}
func TestPodLifeTime_SucceededPods(t *testing.T) {
ctx := context.Background()
clientSet, _, nodeLister, _ := initializeClient(ctx, t)
t.Log("Creating testing namespace")
testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ReplaceAll(strings.ToLower(t.Name()), "_", "-")}}
if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil {
t.Fatalf("Unable to create ns %v", testNamespace.Name)
}
defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{})
tests := []struct {
name string
expectedEvictedPodCount int
args *podlifetime.PodLifeTimeArgs
}{
{
name: "test-transition-succeeded-pods",
expectedEvictedPodCount: 1,
args: &podlifetime.PodLifeTimeArgs{
States: []string{string(v1.PodSucceeded)},
},
},
{
name: "test-transition-succeeded-condition",
expectedEvictedPodCount: 1,
args: &podlifetime.PodLifeTimeArgs{
States: []string{string(v1.PodSucceeded)},
Conditions: []podlifetime.PodConditionFilter{
{Reason: "PodCompleted", Status: "True"},
},
},
},
{
name: "test-transition-succeeded-condition-unmatched",
expectedEvictedPodCount: 0,
args: &podlifetime.PodLifeTimeArgs{
States: []string{string(v1.PodSucceeded)},
Conditions: []podlifetime.PodConditionFilter{
{Reason: "ReasonDoesNotMatch"},
},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
job := initTransitionSucceededJob(tc.name, testNamespace.Name)
t.Logf("Creating job %s in %s namespace", job.Name, testNamespace.Name)
jobClient := clientSet.BatchV1().Jobs(testNamespace.Name)
if _, err := jobClient.Create(ctx, job, metav1.CreateOptions{}); err != nil {
t.Fatalf("Error creating Job %s: %v", tc.name, err)
}
deletePropagationPolicy := metav1.DeletePropagationForeground
defer func() {
jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy})
waitForPodsToDisappear(ctx, t, clientSet, job.Labels, testNamespace.Name)
}()
waitForTransitionJobPodPhase(ctx, t, clientSet, job, v1.PodSucceeded)
preRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...)
tc.args.Namespaces = &deschedulerapi.Namespaces{
Include: []string{testNamespace.Name},
}
runPodLifetimePlugin(ctx, t, clientSet, nodeLister, tc.args,
defaultevictor.DefaultEvictorArgs{EvictLocalStoragePods: true},
nil,
)
var meetsExpectations bool
var actualEvictedPodCount int
if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) {
currentRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...)
actualEvictedPod := preRunNames.Difference(currentRunNames)
actualEvictedPodCount = actualEvictedPod.Len()
t.Logf("preRunNames: %v, currentRunNames: %v, actualEvictedPodCount: %v\n", preRunNames.List(), currentRunNames.List(), actualEvictedPodCount)
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Logf("Expecting %v number of pods evicted, got %v instead", tc.expectedEvictedPodCount, actualEvictedPodCount)
return false, nil
}
meetsExpectations = true
return true, nil
}); err != nil {
t.Errorf("Error waiting for expected eviction count: %v", err)
}
if !meetsExpectations {
t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedPodCount, tc.expectedEvictedPodCount)
} else {
t.Logf("Total of %d Pods were evicted for %s", actualEvictedPodCount, tc.name)
}
})
}
}
func initTransitionTestJob(name, namespace string) *batchv1.Job {
podSpec := makePodSpec("", nil)
podSpec.Containers[0].Command = []string{"/bin/false"}
podSpec.RestartPolicy = v1.RestartPolicyNever
labelsSet := labels.Set{"test": name, "name": name}
return &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Labels: labelsSet,
Name: name,
Namespace: namespace,
},
Spec: batchv1.JobSpec{
Template: v1.PodTemplateSpec{
Spec: podSpec,
ObjectMeta: metav1.ObjectMeta{Labels: labelsSet},
},
BackoffLimit: utilptr.To[int32](0),
},
}
}
func initTransitionSucceededJob(name, namespace string) *batchv1.Job {
podSpec := makePodSpec("", nil)
podSpec.Containers[0].Image = "registry.k8s.io/e2e-test-images/agnhost:2.43"
podSpec.Containers[0].Command = []string{"/bin/sh", "-c", "exit 0"}
podSpec.RestartPolicy = v1.RestartPolicyNever
labelsSet := labels.Set{"test": name, "name": name}
return &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Labels: labelsSet,
Name: name,
Namespace: namespace,
},
Spec: batchv1.JobSpec{
Template: v1.PodTemplateSpec{
Spec: podSpec,
ObjectMeta: metav1.ObjectMeta{Labels: labelsSet},
},
BackoffLimit: utilptr.To[int32](0),
},
}
}
func waitForTransitionJobPodPhase(ctx context.Context, t *testing.T, clientSet clientset.Interface, job *batchv1.Job, phase v1.PodPhase) {
podClient := clientSet.CoreV1().Pods(job.Namespace)
if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 30*time.Second, true, func(ctx context.Context) (bool, error) {
t.Log(labels.FormatLabels(job.Labels))
if podList, err := podClient.List(ctx, metav1.ListOptions{LabelSelector: labels.FormatLabels(job.Labels)}); err != nil {
return false, err
} else {
if len(podList.Items) == 0 {
t.Logf("Job controller has not created Pod for job %s yet", job.Name)
return false, nil
}
for _, pod := range podList.Items {
if pod.Status.Phase != phase {
t.Logf("Pod %v not in %s phase yet, is %v instead", pod.Name, phase, pod.Status.Phase)
return false, nil
}
}
t.Logf("Job %v Pod is in %s phase now", job.Name, phase)
return true, nil
}
}); err != nil {
t.Fatalf("Error waiting for pods in %s phase: %v", phase, err)
}
}

View File

@@ -421,14 +421,12 @@ func runPodLifetimePlugin(
t *testing.T,
clientset clientset.Interface,
nodeLister listersv1.NodeLister,
namespaces *deschedulerapi.Namespaces,
priorityClass string,
priority *int32,
evictCritical bool,
evictDaemonSet bool,
maxPodsToEvictPerNamespace *uint,
labelSelector *metav1.LabelSelector,
args *podlifetime.PodLifeTimeArgs,
evictorArgs defaultevictor.DefaultEvictorArgs,
evictionOpts *evictions.Options,
) {
t.Helper()
evictionPolicyGroupVersion, err := eutils.SupportEviction(clientset)
if err != nil || len(evictionPolicyGroupVersion) == 0 {
t.Fatalf("%v", err)
@@ -439,42 +437,18 @@ func runPodLifetimePlugin(
t.Fatalf("%v", err)
}
var thresholdPriority int32
if priority != nil {
thresholdPriority = *priority
if evictionOpts == nil {
evictionOpts = evictions.NewOptions().WithPolicyGroupVersion(evictionPolicyGroupVersion)
} else {
thresholdPriority, err = utils.GetPriorityFromPriorityClass(ctx, clientset, priorityClass)
if err != nil {
t.Fatalf("Failed to get threshold priority from plugin arg params")
}
evictionOpts = evictionOpts.WithPolicyGroupVersion(evictionPolicyGroupVersion)
}
handle, _, err := frameworktesting.InitFrameworkHandle(
ctx,
clientset,
evictions.NewOptions().
WithPolicyGroupVersion(evictionPolicyGroupVersion).
WithMaxPodsToEvictPerNamespace(maxPodsToEvictPerNamespace),
defaultevictor.DefaultEvictorArgs{
EvictSystemCriticalPods: evictCritical,
EvictDaemonSetPods: evictDaemonSet,
PriorityThreshold: &deschedulerapi.PriorityThreshold{
Value: &thresholdPriority,
},
},
nil,
)
handle, _, err := frameworktesting.InitFrameworkHandle(ctx, clientset, evictionOpts, evictorArgs, nil)
if err != nil {
t.Fatalf("Unable to initialize a framework handle: %v", err)
}
maxPodLifeTimeSeconds := uint(1)
plugin, err := podlifetime.New(ctx, &podlifetime.PodLifeTimeArgs{
MaxPodLifeTimeSeconds: &maxPodLifeTimeSeconds,
LabelSelector: labelSelector,
Namespaces: namespaces,
}, handle)
plugin, err := podlifetime.New(ctx, args, handle)
if err != nil {
t.Fatalf("Unable to initialize the plugin: %v", err)
}
@@ -707,9 +681,14 @@ func TestNamespaceConstraintsInclude(t *testing.T) {
t.Logf("Existing pods: %v", initialPodNames)
t.Logf("run the plugin to delete pods from %v namespace", rc.Namespace)
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, &deschedulerapi.Namespaces{
Include: []string{rc.Namespace},
}, "", nil, false, false, nil, nil)
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer,
&podlifetime.PodLifeTimeArgs{
MaxPodLifeTimeSeconds: utilptr.To[uint](1),
Namespaces: &deschedulerapi.Namespaces{Include: []string{rc.Namespace}},
},
defaultevictor.DefaultEvictorArgs{},
nil,
)
// All pods are supposed to be deleted, wait until all the old pods are deleted
if err := wait.PollUntilContextTimeout(ctx, time.Second, 20*time.Second, true, func(ctx context.Context) (bool, error) {
@@ -777,9 +756,14 @@ func TestNamespaceConstraintsExclude(t *testing.T) {
t.Logf("Existing pods: %v", initialPodNames)
t.Logf("run the plugin to delete pods from namespaces except the %v namespace", rc.Namespace)
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, &deschedulerapi.Namespaces{
Exclude: []string{rc.Namespace},
}, "", nil, false, false, nil, nil)
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer,
&podlifetime.PodLifeTimeArgs{
MaxPodLifeTimeSeconds: utilptr.To[uint](1),
Namespaces: &deschedulerapi.Namespaces{Exclude: []string{rc.Namespace}},
},
defaultevictor.DefaultEvictorArgs{},
nil,
)
t.Logf("Waiting 10s")
time.Sleep(10 * time.Second)
@@ -890,11 +874,24 @@ func testEvictSystemCritical(t *testing.T, isPriorityClass bool) {
sort.Strings(initialPodNames)
t.Logf("Existing pods: %v", initialPodNames)
var thresholdPriority int32
if isPriorityClass {
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, true, false, nil, nil)
resolved, err := utils.GetPriorityFromPriorityClass(ctx, clientSet, highPriorityClass.Name)
if err != nil {
t.Fatalf("Failed to get priority from priority class: %v", err)
}
thresholdPriority = resolved
} else {
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, "", &highPriority, true, false, nil, nil)
thresholdPriority = highPriority
}
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer,
&podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)},
defaultevictor.DefaultEvictorArgs{
EvictSystemCriticalPods: true,
PriorityThreshold: &deschedulerapi.PriorityThreshold{Value: &thresholdPriority},
},
nil,
)
// All pods are supposed to be deleted, wait until all pods in the test namespace are terminating
t.Logf("All pods in the test namespace, no matter their priority (including system-node-critical and system-cluster-critical), will be deleted")
@@ -961,7 +958,11 @@ func testEvictDaemonSetPod(t *testing.T, isDaemonSet bool) {
sort.Strings(initialPodNames)
t.Logf("Existing pods: %v", initialPodNames)
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, "", nil, false, isDaemonSet, nil, nil)
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer,
&podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)},
defaultevictor.DefaultEvictorArgs{EvictDaemonSetPods: isDaemonSet},
nil,
)
// All pods are supposed to be deleted, wait until all pods in the test namespace are terminating
t.Logf("All daemonset pods in the test namespace, will be deleted")
@@ -1074,13 +1075,25 @@ func testPriority(t *testing.T, isPriorityClass bool) {
sort.Strings(expectEvictPodNames)
t.Logf("Pods not expected to be evicted: %v, pods expected to be evicted: %v", expectReservePodNames, expectEvictPodNames)
var thresholdPriority int32
if isPriorityClass {
t.Logf("run the plugin to delete pods with priority lower than priority class %s", highPriorityClass.Name)
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, false, false, nil, nil)
resolved, err := utils.GetPriorityFromPriorityClass(ctx, clientSet, highPriorityClass.Name)
if err != nil {
t.Fatalf("Failed to get priority from priority class: %v", err)
}
thresholdPriority = resolved
} else {
t.Logf("run the plugin to delete pods with priority lower than %d", highPriority)
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, "", &highPriority, false, false, nil, nil)
thresholdPriority = highPriority
}
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer,
&podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)},
defaultevictor.DefaultEvictorArgs{
PriorityThreshold: &deschedulerapi.PriorityThreshold{Value: &thresholdPriority},
},
nil,
)
t.Logf("Waiting 10s")
time.Sleep(10 * time.Second)
@@ -1182,7 +1195,14 @@ func TestPodLabelSelector(t *testing.T) {
t.Logf("Pods not expected to be evicted: %v, pods expected to be evicted: %v", expectReservePodNames, expectEvictPodNames)
t.Logf("run the plugin to delete pods with label test:podlifetime-evict")
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, "", nil, false, false, nil, &metav1.LabelSelector{MatchLabels: map[string]string{"test": "podlifetime-evict"}})
runPodLifetimePlugin(ctx, t, clientSet, nodeInformer,
&podlifetime.PodLifeTimeArgs{
MaxPodLifeTimeSeconds: utilptr.To[uint](1),
LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"test": "podlifetime-evict"}},
},
defaultevictor.DefaultEvictorArgs{},
nil,
)
t.Logf("Waiting 10s")
time.Sleep(10 * time.Second)
@@ -1281,7 +1301,11 @@ func TestEvictAnnotation(t *testing.T) {
t.Logf("Existing pods: %v", initialPodNames)
t.Log("Running PodLifetime plugin")
runPodLifetimePlugin(ctx, t, clientSet, nodeLister, nil, "", nil, false, false, nil, nil)
runPodLifetimePlugin(ctx, t, clientSet, nodeLister,
&podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)},
defaultevictor.DefaultEvictorArgs{},
nil,
)
if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, time.Minute, true, func(ctx context.Context) (bool, error) {
podList, err = clientSet.CoreV1().Pods(rc.Namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rc.Spec.Template.Labels).String()})
@@ -1346,7 +1370,11 @@ func TestPodLifeTimeOldestEvicted(t *testing.T) {
t.Log("Running PodLifetime plugin with maxPodsToEvictPerNamespace=1 to ensure only the oldest pod is evicted")
var maxPodsToEvictPerNamespace uint = 1
runPodLifetimePlugin(ctx, t, clientSet, nodeLister, nil, "", nil, false, false, &maxPodsToEvictPerNamespace, nil)
runPodLifetimePlugin(ctx, t, clientSet, nodeLister,
&podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)},
defaultevictor.DefaultEvictorArgs{},
evictions.NewOptions().WithMaxPodsToEvictPerNamespace(&maxPodsToEvictPerNamespace),
)
t.Log("Finished PodLifetime plugin")
t.Logf("Wait for terminating pod to disappear")