diff --git a/README.md b/README.md index 1a29901d4..f66e03c71 100644 --- a/README.md +++ b/README.md @@ -297,7 +297,7 @@ Balance Plugins: These plugins process all pods, or groups of pods, and determin | [RemovePodsViolatingNodeTaints](#removepodsviolatingnodetaints) |Deschedule|Evicts pods violating node taints| | [RemovePodsViolatingTopologySpreadConstraint](#removepodsviolatingtopologyspreadconstraint) |Balance|Evicts pods violating TopologySpreadConstraints| | [RemovePodsHavingTooManyRestarts](#removepodshavingtoomanyrestarts) |Deschedule|Evicts pods having too many restarts| -| [PodLifeTime](#podlifetime) |Deschedule|Evicts pods that have exceeded a specified age limit| +| [PodLifeTime](#podlifetime) |Deschedule|Evicts pods based on age, status transitions, conditions, states, exit codes, and owner kinds| | [RemoveFailedPods](#removefailedpods) |Deschedule|Evicts pods with certain failed reasons and exit codes| @@ -785,30 +785,52 @@ profiles: ### PodLifeTime -This strategy evicts pods that are older than `maxPodLifeTimeSeconds`. +This strategy evicts pods based on their age, status transitions, conditions, states, exit codes, and owner kinds. It supports both simple age-based eviction and fine-grained cleanup of pods matching specific transition criteria. -You can also specify `states` parameter to **only** evict pods matching the following conditions: -> The primary purpose for using states like `Succeeded` and `Failed` is releasing resources so that new pods can be rescheduled. -> I.e., the main motivation is not for cleaning pods, rather to release resources. - - [Pod Phase](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase) status of: `Running`, `Pending`, `Succeeded`, `Failed`, `Unknown` - - [Pod Reason](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-conditions) reasons of: `NodeAffinity`, `NodeLost`, `Shutdown`, `UnexpectedAdmissionError` - - [Container State Waiting](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-state-waiting) condition of: `PodInitializing`, `ContainerCreating`, `ImagePullBackOff`, `CrashLoopBackOff`, `CreateContainerConfigError`, `ErrImagePull`, `ImagePullBackOff`, `CreateContainerError`, `InvalidImageName` +All non-empty filter categories are ANDed (a pod must satisfy every specified filter). Within each category, items are ORed (matching any one entry satisfies that filter). For `conditions`, a pod is eligible for eviction if **any** of the listed condition filters match — each filter is evaluated independently against the pod's `status.conditions[]` entries. Pods are processed from oldest to newest based on their creation time. -If a value for `states` or `podStatusPhases` is not specified, -Pods in any state (even `Running`) are considered for eviction. +See the [plugin README](pkg/framework/plugins/podlifetime/README.md) for detailed documentation and advanced use cases. **Parameters:** -| Name | Type | Notes | -|--------------------------------|---------------------------------------------------|--------------------------| -| `maxPodLifeTimeSeconds` | int | | -| `states` | list(string) | Only supported in v0.25+ | -| `includingInitContainers` | bool | Only supported in v0.31+ | -| `includingEphemeralContainers` | bool | Only supported in v0.31+ | -| `namespaces` | (see [namespace filtering](#namespace-filtering)) | | -| `labelSelector` | (see [label filtering](#label-filtering)) | | +| Name | Type | Notes | +|------|------|-------| +| `conditions` | list(object) | Each with optional `type`, `status`, `reason`, `minTimeSinceLastTransitionSeconds` fields | +| `exitCodes` | list(int32) | Container terminated exit codes | +| `includingEphemeralContainers` | bool | Extend state filtering to ephemeral containers | +| `includingInitContainers` | bool | Extend state/exitCode filtering to init containers | +| `labelSelector` | (see [label filtering](#label-filtering)) | | +| `maxPodLifeTimeSeconds` | uint | Pods older than this many seconds are evicted | +| `namespaces` | (see [namespace filtering](#namespace-filtering)) | | +| `ownerKinds` | object | `include` or `exclude` list of owner reference kinds | +| `states` | list(string) | Pod phases, pod status reasons, container waiting/terminated reasons | -**Example:** +**Example (transition-based eviction):** + +```yaml +apiVersion: "descheduler/v1alpha2" +kind: "DeschedulerPolicy" +profiles: + - name: ProfileName + pluginConfig: + - name: "PodLifeTime" + args: + states: + - "Succeeded" + conditions: + - reason: "PodCompleted" + status: "True" + minTimeSinceLastTransitionSeconds: 14400 + ownerKinds: + exclude: + - "Job" + plugins: + deschedule: + enabled: + - "PodLifeTime" +``` + +**Example (age-based eviction):** ```yaml apiVersion: "descheduler/v1alpha2" @@ -829,6 +851,7 @@ profiles: ``` ### RemoveFailedPods + This strategy evicts pods that are in failed status phase. You can provide optional parameters to filter by failed pods' and containters' `reasons`. and `exitCodes`. `exitCodes` apply to failed pods' containers with `terminated` state only. `reasons` and `exitCodes` can be expanded to include those of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`. You can specify an optional parameter `minPodLifetimeSeconds` to evict pods that are older than specified seconds. diff --git a/examples/pod-life-time-transition.yml b/examples/pod-life-time-transition.yml new file mode 100644 index 000000000..1403341f4 --- /dev/null +++ b/examples/pod-life-time-transition.yml @@ -0,0 +1,20 @@ +apiVersion: "descheduler/v1alpha2" +kind: "DeschedulerPolicy" +profiles: + - name: ProfileName + pluginConfig: + - name: "PodLifeTime" + args: + states: + - "Succeeded" + conditions: + - reason: "PodCompleted" + status: "True" + minTimeSinceLastTransitionSeconds: 14400 # 4 hours + namespaces: + include: + - "default" + plugins: + deschedule: + enabled: + - "PodLifeTime" diff --git a/pkg/framework/plugins/podlifetime/README.md b/pkg/framework/plugins/podlifetime/README.md index d53954e40..2c75ecc03 100644 --- a/pkg/framework/plugins/podlifetime/README.md +++ b/pkg/framework/plugins/podlifetime/README.md @@ -2,136 +2,116 @@ ## What It Does -The PodLifeTime plugin evicts pods that have been running for too long. You can configure a maximum age threshold, and the plugin evicts pods older than that threshold. The oldest pods are evicted first. +The PodLifeTime plugin evicts pods based on their age, status phase, condition transitions, container states, exit codes, and owner kinds. It can be used for simple age-based eviction or for fine-grained cleanup of pods matching specific transition criteria. ## How It Works -The plugin examines all pods across your nodes and selects those that exceed the configured age threshold. You can further narrow down which pods are considered by specifying: +The plugin builds a filter chain from the configured criteria. All non-empty filter categories are ANDed together (a pod must satisfy every specified filter to be evicted). Within each filter category, items are ORed (matching any one entry satisfies that filter). -- Which namespaces to include or exclude -- Which labels pods must have -- Which states pods must be in (e.g., Running, Pending, CrashLoopBackOff) - -Once pods are selected, they are sorted by age (oldest first) and evicted in that order. Eviction stops when limits are reached (per-node limits, total limits, or Pod Disruption Budget constraints). +Once pods are selected, they are sorted by their creation time with the oldest first, then evicted in order. Eviction stops when limits are reached (per-node limits, total limits, or Pod Disruption Budget constraints). ## Use Cases -- **Resource Leakage Mitigation**: Restart long-running pods that may have accumulated memory leaks, stale cache, or resource leaks +- **Evict completed/succeeded pods that have been idle too long**: ```yaml args: - maxPodLifeTimeSeconds: 604800 # 7 days - states: [Running] + states: [Succeeded] + conditions: + - reason: PodCompleted + status: "True" + minTimeSinceLastTransitionSeconds: 14400 # 4 hours ``` -- **Ephemeral Workload Cleanup**: Remove long-running batch jobs, test pods, or temporary workloads that have exceeded their expected lifetime +- **Evict failed pods**, excluding Job-owned pods: ```yaml args: - maxPodLifeTimeSeconds: 7200 # 2 hours - states: [Succeeded, Failed] - ``` - -- **Node Hygiene**: Remove forgotten or stuck pods that are consuming resources but not making progress - ```yaml - args: - maxPodLifeTimeSeconds: 3600 # 1 hour - states: [CrashLoopBackOff, ImagePullBackOff, ErrImagePull] + states: [Failed] + exitCodes: [1] + ownerKinds: + exclude: [Job] + maxPodLifeTimeSeconds: 3600 includingInitContainers: true ``` -- **Config/Secret Update Pickup**: Force pod restart to pick up updated ConfigMaps, Secrets, or environment variables - ```yaml - args: - maxPodLifeTimeSeconds: 86400 # 1 day - states: [Running] - labelSelector: - matchLabels: - config-refresh: enabled - ``` - -- **Security Rotation**: Periodically refresh pods to pick up new security tokens, certificates, or patched container images - ```yaml - args: - maxPodLifeTimeSeconds: 259200 # 3 days - states: [Running] - namespaces: - exclude: [kube-system] - ``` - -- **Dev/Test Environment Cleanup**: Automatically clean up old pods in development or staging namespaces - ```yaml - args: - maxPodLifeTimeSeconds: 86400 # 1 day - namespaces: - include: [dev, staging, test] - ``` - -- **Cluster Health Freshness**: Ensure pods periodically restart to maintain cluster health and verify workloads can recover from restarts +- **Resource Leakage Mitigation**: Restart long-running pods that may have accumulated memory leaks: ```yaml args: maxPodLifeTimeSeconds: 604800 # 7 days states: [Running] - namespaces: - exclude: [kube-system, production] ``` -- **Rebalancing Assistance**: Work alongside other descheduler strategies by removing old pods to allow better pod distribution +- **Clean up stuck pods in CrashLoopBackOff**: ```yaml args: - maxPodLifeTimeSeconds: 1209600 # 14 days - states: [Running] + states: [CrashLoopBackOff, ImagePullBackOff] ``` -- **Non-Critical Stateful Refresh**: Occasionally reset tolerable stateful workloads that can handle data loss or have external backup mechanisms +- **Evict pods owned only by specific kinds**: ```yaml args: - maxPodLifeTimeSeconds: 2592000 # 30 days - labelSelector: - matchLabels: - stateful-tier: cache + states: [Succeeded, Failed] + ownerKinds: + include: [Job] + maxPodLifeTimeSeconds: 600 ``` ## Configuration | Parameter | Description | Type | Required | Default | |-----------|-------------|------|----------|---------| -| `maxPodLifeTimeSeconds` | Pods older than this many seconds are evicted | `uint` | Yes | - | -| `namespaces` | Limit eviction to specific namespaces (or exclude specific namespaces) | `Namespaces` | No | `nil` | +| `maxPodLifeTimeSeconds` | Pods older than this many seconds are evicted | `uint` | No* | `nil` | +| `states` | Filter pods by phase, pod status reason, or container waiting/terminated reason. A pod matches if any of its state values appear in this list | `[]string` | No | `nil` | +| `conditions` | Only evict pods with matching status conditions (see PodConditionFilter) | `[]PodConditionFilter` | No | `nil` | +| `exitCodes` | Only evict pods with matching container terminated exit codes | `[]int32` | No | `nil` | +| `ownerKinds` | Include or exclude pods by owner reference kind | `OwnerKinds` | No | `nil` | +| `namespaces` | Limit eviction to specific namespaces (include or exclude) | `Namespaces` | No | `nil` | | `labelSelector` | Only evict pods matching these labels | `metav1.LabelSelector` | No | `nil` | -| `states` | Only evict pods in specific states (e.g., Running, CrashLoopBackOff) | `[]string` | No | `nil` | -| `includingInitContainers` | When checking states, also check init container states | `bool` | No | `false` | -| `includingEphemeralContainers` | When checking states, also check ephemeral container states | `bool` | No | `false` | +| `includingInitContainers` | Extend state/exitCode filtering to init containers | `bool` | No | `false` | +| `includingEphemeralContainers` | Extend state filtering to ephemeral containers | `bool` | No | `false` | -### Discovering states +*At least one filtering criterion must be specified (`maxPodLifeTimeSeconds`, `states`, `conditions`, or `exitCodes`). -Each pod is checked for the following locations to discover its relevant state: +### States -1. **Pod Phase** - The overall pod lifecycle phase: - - `Running` - Pod is running on a node - - `Pending` - Pod has been accepted but containers are not yet running - - `Succeeded` - All containers terminated successfully - - `Failed` - All containers terminated, at least one failed - - `Unknown` - Pod state cannot be determined +The `states` field matches pods using an OR across these categories: -2. **Pod Status Reason** - Why the pod is in its current state: - - `NodeAffinity` - Pod cannot be scheduled due to node affinity rules - - `NodeLost` - Node hosting the pod is lost - - `Shutdown` - Pod terminated due to node shutdown - - `UnexpectedAdmissionError` - Pod admission failed unexpectedly +| Category | Examples | +|----------|----------| +| Pod phase | `Running`, `Pending`, `Succeeded`, `Failed`, `Unknown` | +| Pod status reason | `NodeAffinity`, `NodeLost`, `Shutdown`, `UnexpectedAdmissionError` | +| Container waiting reason | `CrashLoopBackOff`, `ImagePullBackOff`, `ErrImagePull`, `CreateContainerConfigError`, `CreateContainerError`, `InvalidImageName`, `PodInitializing`, `ContainerCreating` | +| Container terminated reason | `OOMKilled`, `Error`, `Completed`, `DeadlineExceeded`, `Evicted`, `ContainerCannotRun`, `StartError` | -3. **Container Waiting Reason** - Why containers are waiting to start: - - `PodInitializing` - Pod is still initializing - - `ContainerCreating` - Container is being created - - `ImagePullBackOff` - Image pull is failing and backing off - - `CrashLoopBackOff` - Container is crashing repeatedly - - `CreateContainerConfigError` - Container configuration is invalid - - `ErrImagePull` - Image cannot be pulled - - `CreateContainerError` - Container creation failed - - `InvalidImageName` - Image name is invalid +When `includingInitContainers` is true, init container states are also checked. When `includingEphemeralContainers` is true, ephemeral container states are also checked. -By default, only regular containers are checked. Enable `includingInitContainers` or `includingEphemeralContainers` to also check those container types. +### PodConditionFilter + +Each condition filter matches against `pod.status.conditions[]` entries. Within a single filter, all specified field-level checks must match (AND). Unset fields are not checked. Across the list, condition filters are ORed — a pod is eligible for eviction if **any** of the listed condition filters match. + +| Field | Description | +|-------|-------------| +| `type` | Condition type (e.g., `Ready`, `Initialized`, `ContainersReady`) | +| `status` | Condition status (`True`, `False`, `Unknown`) | +| `reason` | Condition reason (e.g., `PodCompleted`) | +| `minTimeSinceLastTransitionSeconds` | Require the matching condition's `lastTransitionTime` to be at least this many seconds in the past | + +At least one of these fields must be set per filter entry. + +When `minTimeSinceLastTransitionSeconds` is set on a filter, a pod's condition must both match the type/status/reason fields AND have transitioned long enough ago. If the condition has no `lastTransitionTime`, it does not match. + +### OwnerKinds + +| Field | Description | +|-------|-------------| +| `include` | Only evict pods owned by these kinds | +| `exclude` | Do not evict pods owned by these kinds | + +At most one of `include`/`exclude` may be set. ## Example +### Age-based eviction with state filter + ```yaml apiVersion: descheduler/v1alpha2 kind: DeschedulerPolicy @@ -145,11 +125,36 @@ profiles: - name: PodLifeTime args: maxPodLifeTimeSeconds: 86400 # 1 day + states: + - Running namespaces: include: - default - states: - - Running ``` -This configuration evicts Running pods in the `default` namespace that are older than 1 day. +### Transition-based eviction for completed pods + +```yaml +apiVersion: descheduler/v1alpha2 +kind: DeschedulerPolicy +profiles: + - name: default + plugins: + deschedule: + enabled: + - name: PodLifeTime + pluginConfig: + - name: PodLifeTime + args: + states: + - Succeeded + conditions: + - reason: PodCompleted + status: "True" + minTimeSinceLastTransitionSeconds: 14400 + namespaces: + include: + - default +``` + +This configuration evicts Succeeded pods in the `default` namespace that have a `PodCompleted` condition with status `True` and whose last matching transition happened more than 4 hours ago. diff --git a/pkg/framework/plugins/podlifetime/pod_lifetime.go b/pkg/framework/plugins/podlifetime/pod_lifetime.go index e6dfcebde..793c04932 100644 --- a/pkg/framework/plugins/podlifetime/pod_lifetime.go +++ b/pkg/framework/plugins/podlifetime/pod_lifetime.go @@ -26,17 +26,16 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" - frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" ) const PluginName = "PodLifeTime" var _ frameworktypes.DeschedulePlugin = &PodLifeTime{} -// PodLifeTime evicts pods on the node that violate the max pod lifetime threshold +// PodLifeTime evicts pods matching configurable lifetime and status transition criteria. type PodLifeTime struct { logger klog.Logger handle frameworktypes.Handle @@ -44,12 +43,13 @@ type PodLifeTime struct { podFilter podutil.FilterFunc } -// New builds plugin from its arguments while passing a handle +// New builds plugin from its arguments while passing a handle. func New(ctx context.Context, args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plugin, error) { podLifeTimeArgs, ok := args.(*PodLifeTimeArgs) if !ok { return nil, fmt.Errorf("want args to be of type PodLifeTimeArgs, got %T", args) } + logger := klog.FromContext(ctx).WithValues("plugin", PluginName) var includedNamespaces, excludedNamespaces sets.Set[string] @@ -69,53 +69,80 @@ func New(ctx context.Context, args runtime.Object, handle frameworktypes.Handle) return nil, fmt.Errorf("error initializing pod filter function: %v", err) } - podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { - podAgeSeconds := int(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds()) - return podAgeSeconds > int(*podLifeTimeArgs.MaxPodLifeTimeSeconds) - }) + if podLifeTimeArgs.MaxPodLifeTimeSeconds != nil { + podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { + podAge := metav1.Now().Sub(pod.GetCreationTimestamp().Local()) + if podAge < 0 { + return false + } + return uint(podAge.Seconds()) > *podLifeTimeArgs.MaxPodLifeTimeSeconds + }) + } if len(podLifeTimeArgs.States) > 0 { states := sets.New(podLifeTimeArgs.States...) + includeInit := podLifeTimeArgs.IncludingInitContainers + includeEphemeral := podLifeTimeArgs.IncludingEphemeralContainers podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { - // Pod Status Phase if states.Has(string(pod.Status.Phase)) { return true } - - // Pod Status Reason if states.Has(pod.Status.Reason) { return true } - - // Init Container Status Reason - if podLifeTimeArgs.IncludingInitContainers { - for _, containerStatus := range pod.Status.InitContainerStatuses { - if containerStatus.State.Waiting != nil && states.Has(containerStatus.State.Waiting.Reason) { - return true - } - } + if podutil.HasMatchingContainerWaitingState(pod.Status.ContainerStatuses, states) || + podutil.HasMatchingContainerTerminatedState(pod.Status.ContainerStatuses, states) { + return true } - - // Ephemeral Container Status Reason - if podLifeTimeArgs.IncludingEphemeralContainers { - for _, containerStatus := range pod.Status.EphemeralContainerStatuses { - if containerStatus.State.Waiting != nil && states.Has(containerStatus.State.Waiting.Reason) { - return true - } - } + if includeInit && (podutil.HasMatchingContainerWaitingState(pod.Status.InitContainerStatuses, states) || + podutil.HasMatchingContainerTerminatedState(pod.Status.InitContainerStatuses, states)) { + return true } - - // Container Status Reason - for _, containerStatus := range pod.Status.ContainerStatuses { - if containerStatus.State.Waiting != nil && states.Has(containerStatus.State.Waiting.Reason) { - return true - } + if includeEphemeral && (podutil.HasMatchingContainerWaitingState(pod.Status.EphemeralContainerStatuses, states) || + podutil.HasMatchingContainerTerminatedState(pod.Status.EphemeralContainerStatuses, states)) { + return true } - return false }) } + if podLifeTimeArgs.OwnerKinds != nil { + if len(podLifeTimeArgs.OwnerKinds.Include) > 0 { + includeKinds := sets.New(podLifeTimeArgs.OwnerKinds.Include...) + podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { + for _, owner := range podutil.OwnerRef(pod) { + if includeKinds.Has(owner.Kind) { + return true + } + } + return false + }) + } else if len(podLifeTimeArgs.OwnerKinds.Exclude) > 0 { + excludeKinds := sets.New(podLifeTimeArgs.OwnerKinds.Exclude...) + podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { + for _, owner := range podutil.OwnerRef(pod) { + if excludeKinds.Has(owner.Kind) { + return false + } + } + return true + }) + } + } + + if len(podLifeTimeArgs.Conditions) > 0 { + podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { + return matchesAnyPodConditionFilter(pod, podLifeTimeArgs.Conditions) + }) + } + + if len(podLifeTimeArgs.ExitCodes) > 0 { + exitCodesSet := sets.New(podLifeTimeArgs.ExitCodes...) + podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { + return matchesAnyExitCode(pod, exitCodesSet, podLifeTimeArgs.IncludingInitContainers) + }) + } + return &PodLifeTime{ logger: logger, handle: handle, @@ -132,7 +159,6 @@ func (d *PodLifeTime) Name() string { // Deschedule extension point implementation for the plugin func (d *PodLifeTime) Deschedule(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status { podsToEvict := make([]*v1.Pod, 0) - nodeMap := make(map[string]*v1.Node, len(nodes)) logger := klog.FromContext(klog.NewContext(ctx, d.logger)).WithValues("ExtensionPoint", frameworktypes.DescheduleExtensionPoint) for _, node := range nodes { logger.V(2).Info("Processing node", "node", klog.KObj(node)) @@ -143,8 +169,6 @@ func (d *PodLifeTime) Deschedule(ctx context.Context, nodes []*v1.Node) *framewo Err: fmt.Errorf("error listing pods on a node: %v", err), } } - - nodeMap[node.Name] = node podsToEvict = append(podsToEvict, pods...) } @@ -170,3 +194,61 @@ loop: return nil } + +// matchesAnyPodConditionFilter returns true if the pod has at least one +// condition satisfying any of the given filters (OR across filters). +func matchesAnyPodConditionFilter(pod *v1.Pod, filters []PodConditionFilter) bool { + for _, f := range filters { + for _, cond := range pod.Status.Conditions { + if !matchesConditionFields(cond, f) { + continue + } + if f.MinTimeSinceLastTransitionSeconds != nil { + if cond.LastTransitionTime.IsZero() { + continue + } + idle := metav1.Now().Sub(cond.LastTransitionTime.Time) + if idle < 0 || uint(idle.Seconds()) < *f.MinTimeSinceLastTransitionSeconds { + continue + } + } + return true + } + } + return false +} + +// matchesConditionFields checks type, status, and reason fields of a single +// condition against a filter. Unset filter fields are not checked. +func matchesConditionFields(cond v1.PodCondition, filter PodConditionFilter) bool { + if filter.Type != "" && string(cond.Type) != filter.Type { + return false + } + if filter.Status != "" && string(cond.Status) != filter.Status { + return false + } + if filter.Reason != "" && cond.Reason != filter.Reason { + return false + } + // validation ensures that at least one of type, status, reason, or minTimeSinceLastTransitionSeconds is set + return true +} + +func matchesAnyExitCode(pod *v1.Pod, exitCodes sets.Set[int32], includeInit bool) bool { + if hasMatchingExitCode(pod.Status.ContainerStatuses, exitCodes) { + return true + } + if includeInit && hasMatchingExitCode(pod.Status.InitContainerStatuses, exitCodes) { + return true + } + return false +} + +func hasMatchingExitCode(statuses []v1.ContainerStatus, exitCodes sets.Set[int32]) bool { + for _, cs := range statuses { + if cs.State.Terminated != nil && exitCodes.Has(cs.State.Terminated.ExitCode) { + return true + } + } + return false +} diff --git a/pkg/framework/plugins/podlifetime/pod_lifetime_test.go b/pkg/framework/plugins/podlifetime/pod_lifetime_test.go index 318f92541..7a45de677 100644 --- a/pkg/framework/plugins/podlifetime/pod_lifetime_test.go +++ b/pkg/framework/plugins/podlifetime/pod_lifetime_test.go @@ -28,6 +28,7 @@ import ( "k8s.io/client-go/kubernetes/fake" utilptr "k8s.io/utils/ptr" + "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" frameworktesting "sigs.k8s.io/descheduler/pkg/framework/testing" @@ -40,6 +41,8 @@ const nodeName1 = "n1" var ( olderPodCreationTime = metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)) newerPodCreationTime = metav1.NewTime(time.Now()) + oldTransitionTime = metav1.NewTime(time.Now().Add(-2 * time.Hour)) + newTransitionTime = metav1.NewTime(time.Now().Add(-1 * time.Minute)) ) func buildTestNode1() *v1.Node { @@ -73,20 +76,32 @@ func buildTestPodWithRSOwnerRefWithPendingPhaseForNode1(name string, creationTim }) } +func buildPod(name, nodeName string, apply func(*v1.Pod)) *v1.Pod { + pod := test.BuildTestPod(name, 1, 1, nodeName, func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + if apply != nil { + apply(p) + } + }) + return pod +} + type podLifeTimeTestCase struct { description string args *PodLifeTimeArgs pods []*v1.Pod nodes []*v1.Node - expectedEvictedPods []string // if specified, will assert specific pods were evicted + expectedEvictedPods []string expectedEvictedPodCount uint ignorePvcPods bool + nodeFit bool maxPodsToEvictPerNode *uint maxPodsToEvictPerNamespace *uint maxPodsToEvictTotal *uint } func runPodLifeTimeTest(t *testing.T, tc podLifeTimeTestCase) { + t.Helper() ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -106,7 +121,7 @@ func runPodLifeTimeTest(t *testing.T, tc podLifeTimeTestCase) { WithMaxPodsToEvictPerNode(tc.maxPodsToEvictPerNode). WithMaxPodsToEvictPerNamespace(tc.maxPodsToEvictPerNamespace). WithMaxPodsToEvictTotal(tc.maxPodsToEvictTotal), - defaultevictor.DefaultEvictorArgs{IgnorePvcPods: tc.ignorePvcPods}, + defaultevictor.DefaultEvictorArgs{IgnorePvcPods: tc.ignorePvcPods, NodeFit: tc.nodeFit}, nil, ) if err != nil { @@ -804,3 +819,549 @@ func TestPodLifeTime_PodPhaseStates(t *testing.T) { }) } } + +// Tests for new fields (Conditions, ExitCodes, OwnerKinds, MinTimeSinceLastTransitionSeconds) +// and extended States behavior (terminated reason matching) + +func TestStatesWithTerminatedReasons(t *testing.T) { + node := test.BuildTestNode("node1", 2000, 3000, 10, nil) + testCases := []podLifeTimeTestCase{ + { + description: "evict pod with matching terminated reason via states", + args: &PodLifeTimeArgs{ + States: []string{"NodeAffinity"}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + p.Status.ContainerStatuses = []v1.ContainerStatus{ + {State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}}}, + } + }), + }, + expectedEvictedPodCount: 1, + }, + { + description: "evict pod with matching pod status reason via states", + args: &PodLifeTimeArgs{ + States: []string{"Shutdown"}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + p.Status.Reason = "Shutdown" + }), + }, + expectedEvictedPodCount: 1, + }, + { + description: "states matches terminated reason on init container", + args: &PodLifeTimeArgs{ + States: []string{"CreateContainerConfigError"}, + IncludingInitContainers: true, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.InitContainerStatuses = []v1.ContainerStatus{ + {State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}}}, + } + }), + }, + expectedEvictedPodCount: 1, + }, + { + description: "states does not match terminated reason on init container without flag", + args: &PodLifeTimeArgs{ + States: []string{"CreateContainerConfigError"}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.InitContainerStatuses = []v1.ContainerStatus{ + {State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}}}, + } + }), + }, + expectedEvictedPodCount: 0, + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + runPodLifeTimeTest(t, tc) + }) + } +} + +func TestConditionFiltering(t *testing.T) { + node := test.BuildTestNode("node1", 2000, 3000, 10, nil) + testCases := []podLifeTimeTestCase{ + { + description: "evict pod with matching condition reason", + args: &PodLifeTimeArgs{ + Conditions: []PodConditionFilter{ + {Reason: "PodCompleted", Status: "True"}, + }, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Conditions = []v1.PodCondition{ + {Type: v1.PodInitialized, Status: v1.ConditionTrue, Reason: "PodCompleted", LastTransitionTime: oldTransitionTime}, + } + }), + buildPod("p2", "node1", func(p *v1.Pod) { + p.Status.Conditions = []v1.PodCondition{ + {Type: v1.PodInitialized, Status: v1.ConditionTrue, Reason: "OtherReason", LastTransitionTime: oldTransitionTime}, + } + }), + }, + expectedEvictedPodCount: 1, + expectedEvictedPods: []string{"p1"}, + }, + { + description: "evict pod matching condition type only", + args: &PodLifeTimeArgs{ + Conditions: []PodConditionFilter{ + {Type: string(v1.PodReady)}, + }, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Conditions = []v1.PodCondition{ + {Type: v1.PodReady, Status: v1.ConditionFalse, LastTransitionTime: oldTransitionTime}, + } + }), + }, + expectedEvictedPodCount: 1, + }, + { + description: "no matching conditions, 0 evictions", + args: &PodLifeTimeArgs{ + Conditions: []PodConditionFilter{ + {Reason: "PodCompleted"}, + }, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Conditions = []v1.PodCondition{ + {Type: v1.PodReady, Status: v1.ConditionTrue, Reason: "SomethingElse"}, + } + }), + }, + expectedEvictedPodCount: 0, + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + runPodLifeTimeTest(t, tc) + }) + } +} + +func TestTransitionTimeFiltering(t *testing.T) { + node := test.BuildTestNode("node1", 2000, 3000, 10, nil) + var fiveMinutes uint = 300 + var fourHours uint = 14400 + + testCases := []podLifeTimeTestCase{ + { + description: "evict pod with old transition time", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodSucceeded)}, + Conditions: []PodConditionFilter{ + {Reason: "PodCompleted", Status: "True", MinTimeSinceLastTransitionSeconds: &fiveMinutes}, + }, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodSucceeded + p.Status.Conditions = []v1.PodCondition{ + {Type: v1.PodInitialized, Status: v1.ConditionTrue, Reason: "PodCompleted", LastTransitionTime: oldTransitionTime}, + {Type: v1.PodReady, Status: v1.ConditionFalse, Reason: "PodCompleted", LastTransitionTime: oldTransitionTime}, + } + }), + }, + expectedEvictedPodCount: 1, + expectedEvictedPods: []string{"p1"}, + }, + { + description: "do not evict pod with recent transition time", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodSucceeded)}, + Conditions: []PodConditionFilter{ + {MinTimeSinceLastTransitionSeconds: &fourHours}, + }, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodSucceeded + p.Status.Conditions = []v1.PodCondition{ + {Type: v1.PodReady, Status: v1.ConditionFalse, LastTransitionTime: newTransitionTime}, + } + }), + }, + expectedEvictedPodCount: 0, + }, + { + description: "transition time scoped to matching conditions only", + args: &PodLifeTimeArgs{ + Conditions: []PodConditionFilter{ + {Reason: "PodCompleted", MinTimeSinceLastTransitionSeconds: &fiveMinutes}, + }, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Conditions = []v1.PodCondition{ + {Type: v1.PodInitialized, Reason: "PodCompleted", LastTransitionTime: oldTransitionTime}, + {Type: v1.PodReady, Reason: "OtherReason", LastTransitionTime: newTransitionTime}, + } + }), + }, + expectedEvictedPodCount: 1, + expectedEvictedPods: []string{"p1"}, + }, + { + description: "no conditions on pod, transition time filter returns false", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodRunning)}, + Conditions: []PodConditionFilter{ + {MinTimeSinceLastTransitionSeconds: &fiveMinutes}, + }, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodRunning + }), + }, + expectedEvictedPodCount: 0, + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + runPodLifeTimeTest(t, tc) + }) + } +} + +func TestOwnerKindsFiltering(t *testing.T) { + node := test.BuildTestNode("node1", 2000, 3000, 10, nil) + testCases := []podLifeTimeTestCase{ + { + description: "exclude Job owner kind: Job-owned pod not evicted", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodFailed)}, + OwnerKinds: &OwnerKinds{Exclude: []string{"Job"}}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{{Kind: "Job", Name: "job1", APIVersion: "batch/v1"}} + }), + }, + expectedEvictedPodCount: 0, + }, + { + description: "exclude Job owner kind: non-Job pod still evicted", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodFailed)}, + OwnerKinds: &OwnerKinds{Exclude: []string{"Job"}}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{{Kind: "Job", Name: "job1", APIVersion: "batch/v1"}} + }), + buildPod("p2", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + }), + }, + expectedEvictedPodCount: 1, + expectedEvictedPods: []string{"p2"}, + }, + { + description: "include only Job owner kind", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodFailed)}, + OwnerKinds: &OwnerKinds{Include: []string{"Job"}}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{{Kind: "Job", Name: "job1", APIVersion: "batch/v1"}} + }), + buildPod("p2", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + }), + }, + expectedEvictedPodCount: 1, + expectedEvictedPods: []string{"p1"}, + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + runPodLifeTimeTest(t, tc) + }) + } +} + +func TestStatesWithEphemeralContainers(t *testing.T) { + node := test.BuildTestNode("node1", 2000, 3000, 10, nil) + testCases := []podLifeTimeTestCase{ + { + description: "states matches terminated reason on ephemeral container", + args: &PodLifeTimeArgs{ + States: []string{"OOMKilled"}, + IncludingEphemeralContainers: true, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.EphemeralContainerStatuses = []v1.ContainerStatus{ + {State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "OOMKilled"}}}, + } + }), + }, + expectedEvictedPodCount: 1, + }, + { + description: "states does not match ephemeral container without flag", + args: &PodLifeTimeArgs{ + States: []string{"OOMKilled"}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.EphemeralContainerStatuses = []v1.ContainerStatus{ + {State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "OOMKilled"}}}, + } + }), + }, + expectedEvictedPodCount: 0, + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + runPodLifeTimeTest(t, tc) + }) + } +} + +func TestExitCodesFiltering(t *testing.T) { + node := test.BuildTestNode("node1", 2000, 3000, 10, nil) + testCases := []podLifeTimeTestCase{ + { + description: "evict pod with matching exit code", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodFailed)}, + ExitCodes: []int32{1}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + p.Status.ContainerStatuses = []v1.ContainerStatus{ + {State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{ExitCode: 1}}}, + } + }), + }, + expectedEvictedPodCount: 1, + }, + { + description: "exit code not matched, 0 evictions", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodFailed)}, + ExitCodes: []int32{2}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("p1", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + p.Status.ContainerStatuses = []v1.ContainerStatus{ + {State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{ExitCode: 1}}}, + } + }), + }, + expectedEvictedPodCount: 0, + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + runPodLifeTimeTest(t, tc) + }) + } +} + +func TestCombinedFilters(t *testing.T) { + node := test.BuildTestNode("node1", 2000, 3000, 10, nil) + var fiveMinutes uint = 300 + + testCases := []podLifeTimeTestCase{ + { + description: "user scenario: Succeeded + PodCompleted condition + transition time threshold", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodSucceeded)}, + Conditions: []PodConditionFilter{ + {Reason: "PodCompleted", Status: "True", MinTimeSinceLastTransitionSeconds: &fiveMinutes}, + }, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("stale-completed", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodSucceeded + p.Status.Conditions = []v1.PodCondition{ + {Type: v1.PodInitialized, Status: v1.ConditionTrue, Reason: "PodCompleted", LastTransitionTime: oldTransitionTime}, + {Type: v1.PodReady, Status: v1.ConditionFalse, Reason: "PodCompleted", LastTransitionTime: oldTransitionTime}, + } + }), + buildPod("recent-completed", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodSucceeded + p.Status.Conditions = []v1.PodCondition{ + {Type: v1.PodInitialized, Status: v1.ConditionTrue, Reason: "PodCompleted", LastTransitionTime: newTransitionTime}, + } + }), + buildPod("running-pod", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodRunning + }), + }, + expectedEvictedPodCount: 1, + expectedEvictedPods: []string{"stale-completed"}, + }, + { + description: "failed pod removal compat: Failed + min age + exclude Job", + args: &PodLifeTimeArgs{ + States: []string{string(v1.PodFailed)}, + MaxPodLifeTimeSeconds: utilptr.To[uint](0), + OwnerKinds: &OwnerKinds{Exclude: []string{"Job"}}, + }, + nodes: []*v1.Node{node}, + pods: []*v1.Pod{ + buildPod("failed-rs", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + p.Status.ContainerStatuses = []v1.ContainerStatus{ + {State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "Error"}}}, + } + }), + buildPod("failed-job", "node1", func(p *v1.Pod) { + p.Status.Phase = v1.PodFailed + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{{Kind: "Job", Name: "job1", APIVersion: "batch/v1"}} + }), + }, + expectedEvictedPodCount: 1, + expectedEvictedPods: []string{"failed-rs"}, + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + runPodLifeTimeTest(t, tc) + }) + } +} + +func TestValidation(t *testing.T) { + testCases := []struct { + description string + args *PodLifeTimeArgs + expectError bool + }{ + { + description: "valid: states set", + args: &PodLifeTimeArgs{States: []string{"Running"}}, + expectError: false, + }, + { + description: "valid: conditions set", + args: &PodLifeTimeArgs{Conditions: []PodConditionFilter{{Reason: "PodCompleted"}}}, + expectError: false, + }, + { + description: "valid: maxPodLifeTimeSeconds set", + args: &PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](600)}, + expectError: false, + }, + { + description: "valid: states with maxPodLifeTimeSeconds", + args: &PodLifeTimeArgs{States: []string{"Running"}, MaxPodLifeTimeSeconds: utilptr.To[uint](600)}, + expectError: false, + }, + { + description: "valid: states with conditions", + args: &PodLifeTimeArgs{States: []string{"Succeeded"}, Conditions: []PodConditionFilter{{Reason: "PodCompleted"}}}, + expectError: false, + }, + { + description: "valid: exitCodes only", + args: &PodLifeTimeArgs{ExitCodes: []int32{1}}, + expectError: false, + }, + { + description: "valid: condition with minTimeSinceLastTransitionSeconds", + args: &PodLifeTimeArgs{ + Conditions: []PodConditionFilter{{Reason: "PodCompleted", MinTimeSinceLastTransitionSeconds: utilptr.To[uint](300)}}, + }, + expectError: false, + }, + { + description: "valid: condition with only minTimeSinceLastTransitionSeconds", + args: &PodLifeTimeArgs{ + Conditions: []PodConditionFilter{{MinTimeSinceLastTransitionSeconds: utilptr.To[uint](300)}}, + }, + expectError: false, + }, + { + description: "invalid: no filter criteria", + args: &PodLifeTimeArgs{}, + expectError: true, + }, + { + description: "invalid: both include and exclude namespaces", + args: &PodLifeTimeArgs{ + States: []string{"Running"}, + Namespaces: &api.Namespaces{Include: []string{"a"}, Exclude: []string{"b"}}, + }, + expectError: true, + }, + { + description: "invalid: both include and exclude ownerKinds", + args: &PodLifeTimeArgs{ + States: []string{"Running"}, + OwnerKinds: &OwnerKinds{Include: []string{"Job"}, Exclude: []string{"ReplicaSet"}}, + }, + expectError: true, + }, + { + description: "invalid: bad state name", + args: &PodLifeTimeArgs{States: []string{"NotAState"}}, + expectError: true, + }, + { + description: "invalid: empty condition filter", + args: &PodLifeTimeArgs{ + Conditions: []PodConditionFilter{{}}, + }, + expectError: true, + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + err := ValidatePodLifeTimeArgs(tc.args) + if tc.expectError && err == nil { + t.Error("Expected validation error but got nil") + } + if !tc.expectError && err != nil { + t.Errorf("Expected no validation error but got: %v", err) + } + }) + } +} diff --git a/pkg/framework/plugins/podlifetime/types.go b/pkg/framework/plugins/podlifetime/types.go index 138e3620b..183a48be0 100644 --- a/pkg/framework/plugins/podlifetime/types.go +++ b/pkg/framework/plugins/podlifetime/types.go @@ -25,10 +25,50 @@ import ( type PodLifeTimeArgs struct { metav1.TypeMeta `json:",inline"` - Namespaces *api.Namespaces `json:"namespaces,omitempty"` - LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` - MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"` - States []string `json:"states,omitempty"` - IncludingInitContainers bool `json:"includingInitContainers,omitempty"` - IncludingEphemeralContainers bool `json:"includingEphemeralContainers,omitempty"` + Namespaces *api.Namespaces `json:"namespaces,omitempty"` + LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` + OwnerKinds *OwnerKinds `json:"ownerKinds,omitempty"` + + MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"` + + // States filters pods by phase, pod status reason, container waiting reason, + // or container terminated reason. A pod matches if any of its states appear + // in this list. + States []string `json:"states,omitempty"` + + // Conditions filters pods by status.conditions entries. A pod matches if + // any of its conditions satisfy at least one filter. Each filter can + // optionally require a minimum time since the condition last transitioned. + Conditions []PodConditionFilter `json:"conditions,omitempty"` + + // ExitCodes filters by container terminated exit codes. + ExitCodes []int32 `json:"exitCodes,omitempty"` + + IncludingInitContainers bool `json:"includingInitContainers,omitempty"` + IncludingEphemeralContainers bool `json:"includingEphemeralContainers,omitempty"` +} + +// +k8s:deepcopy-gen=true + +// OwnerKinds allows filtering pods by owner reference kinds with include/exclude support. +// At most one of Include/Exclude may be set. +type OwnerKinds struct { + Include []string `json:"include,omitempty"` + Exclude []string `json:"exclude,omitempty"` +} + +// +k8s:deepcopy-gen=true + +// PodConditionFilter matches a pod condition by type, status, and/or reason. +// All specified fields must match (AND). Unset fields are not checked. +// When MinTimeSinceLastTransitionSeconds is set, the condition must also have +// a lastTransitionTime older than this many seconds. +type PodConditionFilter struct { + Type string `json:"type,omitempty"` + Status string `json:"status,omitempty"` + Reason string `json:"reason,omitempty"` + + // MinTimeSinceLastTransitionSeconds requires the matching condition's + // lastTransitionTime to be at least this many seconds in the past. + MinTimeSinceLastTransitionSeconds *uint `json:"minTimeSinceLastTransitionSeconds,omitempty"` } diff --git a/pkg/framework/plugins/podlifetime/validation.go b/pkg/framework/plugins/podlifetime/validation.go index fbe5b1aec..08f26c169 100644 --- a/pkg/framework/plugins/podlifetime/validation.go +++ b/pkg/framework/plugins/podlifetime/validation.go @@ -27,57 +27,78 @@ import ( "k8s.io/apimachinery/pkg/util/sets" ) +var podLifeTimeAllowedStates = sets.New( + // Pod Status Phase + string(v1.PodRunning), + string(v1.PodPending), + string(v1.PodSucceeded), + string(v1.PodFailed), + string(v1.PodUnknown), + + // Pod Status Reasons + "NodeAffinity", + "NodeLost", + "Shutdown", + "UnexpectedAdmissionError", + + // Container State Waiting Reasons + "PodInitializing", + "ContainerCreating", + "ImagePullBackOff", + "CrashLoopBackOff", + "CreateContainerConfigError", + "ErrImagePull", + "CreateContainerError", + "InvalidImageName", + + // Container State Terminated Reasons + "OOMKilled", + "Error", + "Completed", + "DeadlineExceeded", + "Evicted", + "ContainerCannotRun", + "StartError", +) + // ValidatePodLifeTimeArgs validates PodLifeTime arguments func ValidatePodLifeTimeArgs(obj runtime.Object) error { args := obj.(*PodLifeTimeArgs) var allErrs []error - if args.MaxPodLifeTimeSeconds == nil { - allErrs = append(allErrs, fmt.Errorf("MaxPodLifeTimeSeconds not set")) - } - // At most one of include/exclude can be set if args.Namespaces != nil && len(args.Namespaces.Include) > 0 && len(args.Namespaces.Exclude) > 0 { allErrs = append(allErrs, fmt.Errorf("only one of Include/Exclude namespaces can be set")) } + if args.OwnerKinds != nil && len(args.OwnerKinds.Include) > 0 && len(args.OwnerKinds.Exclude) > 0 { + allErrs = append(allErrs, fmt.Errorf("only one of Include/Exclude ownerKinds can be set")) + } + if args.LabelSelector != nil { if _, err := metav1.LabelSelectorAsSelector(args.LabelSelector); err != nil { allErrs = append(allErrs, fmt.Errorf("failed to get label selectors from strategy's params: %+v", err)) } } - podLifeTimeAllowedStates := sets.New( - // Pod Status Phase - string(v1.PodRunning), - string(v1.PodPending), - string(v1.PodSucceeded), - string(v1.PodFailed), - string(v1.PodUnknown), - // Pod Status Reasons - "NodeAffinity", - "NodeLost", - "Shutdown", - "UnexpectedAdmissionError", - - // Container Status Reasons - // Container state reasons: https://github.com/kubernetes/kubernetes/blob/release-1.24/pkg/kubelet/kubelet_pods.go#L76-L79 - "PodInitializing", - "ContainerCreating", - - // containerStatuses[*].state.waiting.reason: ImagePullBackOff, etc. - "ImagePullBackOff", - "CrashLoopBackOff", - "CreateContainerConfigError", - "ErrImagePull", - "CreateContainerError", - "InvalidImageName", - ) - - if !podLifeTimeAllowedStates.HasAll(args.States...) { + if len(args.States) > 0 && !podLifeTimeAllowedStates.HasAll(args.States...) { allowed := podLifeTimeAllowedStates.UnsortedList() sort.Strings(allowed) allErrs = append(allErrs, fmt.Errorf("states must be one of %v", allowed)) } + for i, c := range args.Conditions { + if c.Type == "" && c.Status == "" && c.Reason == "" && c.MinTimeSinceLastTransitionSeconds == nil { + allErrs = append(allErrs, fmt.Errorf("conditions[%d]: at least one of type, status, reason, or minTimeSinceLastTransitionSeconds must be set", i)) + } + } + + hasFilter := args.MaxPodLifeTimeSeconds != nil || + len(args.States) > 0 || + len(args.Conditions) > 0 || + len(args.ExitCodes) > 0 + if !hasFilter { + allErrs = append(allErrs, fmt.Errorf("at least one filtering criterion must be specified (maxPodLifeTimeSeconds, states, conditions, or exitCodes)")) + } + return utilerrors.NewAggregate(allErrs) } diff --git a/pkg/framework/plugins/podlifetime/validation_test.go b/pkg/framework/plugins/podlifetime/validation_test.go index 5536432aa..c6583349c 100644 --- a/pkg/framework/plugins/podlifetime/validation_test.go +++ b/pkg/framework/plugins/podlifetime/validation_test.go @@ -55,7 +55,7 @@ func TestValidateRemovePodLifeTimeArgs(t *testing.T) { args: &PodLifeTimeArgs{ MaxPodLifeTimeSeconds: nil, }, - errInfo: fmt.Errorf("MaxPodLifeTimeSeconds not set"), + errInfo: fmt.Errorf("at least one filtering criterion must be specified (maxPodLifeTimeSeconds, states, conditions, or exitCodes)"), }, { description: "invalid pod state arg, expects errors", @@ -63,7 +63,7 @@ func TestValidateRemovePodLifeTimeArgs(t *testing.T) { MaxPodLifeTimeSeconds: func(i uint) *uint { return &i }(1), States: []string{string("InvalidState")}, }, - errInfo: fmt.Errorf("states must be one of [ContainerCreating CrashLoopBackOff CreateContainerConfigError CreateContainerError ErrImagePull Failed ImagePullBackOff InvalidImageName NodeAffinity NodeLost Pending PodInitializing Running Shutdown Succeeded UnexpectedAdmissionError Unknown]"), + errInfo: fmt.Errorf("states must be one of [Completed ContainerCannotRun ContainerCreating CrashLoopBackOff CreateContainerConfigError CreateContainerError DeadlineExceeded ErrImagePull Error Evicted Failed ImagePullBackOff InvalidImageName NodeAffinity NodeLost OOMKilled Pending PodInitializing Running Shutdown StartError Succeeded UnexpectedAdmissionError Unknown]"), }, { description: "nil MaxPodLifeTimeSeconds arg and invalid pod state arg, expects errors", @@ -71,7 +71,7 @@ func TestValidateRemovePodLifeTimeArgs(t *testing.T) { MaxPodLifeTimeSeconds: nil, States: []string{string("InvalidState")}, }, - errInfo: fmt.Errorf("[MaxPodLifeTimeSeconds not set, states must be one of [ContainerCreating CrashLoopBackOff CreateContainerConfigError CreateContainerError ErrImagePull Failed ImagePullBackOff InvalidImageName NodeAffinity NodeLost Pending PodInitializing Running Shutdown Succeeded UnexpectedAdmissionError Unknown]]"), + errInfo: fmt.Errorf("states must be one of [Completed ContainerCannotRun ContainerCreating CrashLoopBackOff CreateContainerConfigError CreateContainerError DeadlineExceeded ErrImagePull Error Evicted Failed ImagePullBackOff InvalidImageName NodeAffinity NodeLost OOMKilled Pending PodInitializing Running Shutdown StartError Succeeded UnexpectedAdmissionError Unknown]"), }, } diff --git a/pkg/framework/plugins/podlifetime/zz_generated.deepcopy.go b/pkg/framework/plugins/podlifetime/zz_generated.deepcopy.go index 4e1794da9..53bfb3e99 100644 --- a/pkg/framework/plugins/podlifetime/zz_generated.deepcopy.go +++ b/pkg/framework/plugins/podlifetime/zz_generated.deepcopy.go @@ -27,6 +27,53 @@ import ( api "sigs.k8s.io/descheduler/pkg/api" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OwnerKinds) DeepCopyInto(out *OwnerKinds) { + *out = *in + if in.Include != nil { + in, out := &in.Include, &out.Include + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Exclude != nil { + in, out := &in.Exclude, &out.Exclude + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OwnerKinds. +func (in *OwnerKinds) DeepCopy() *OwnerKinds { + if in == nil { + return nil + } + out := new(OwnerKinds) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PodConditionFilter) DeepCopyInto(out *PodConditionFilter) { + *out = *in + if in.MinTimeSinceLastTransitionSeconds != nil { + in, out := &in.MinTimeSinceLastTransitionSeconds, &out.MinTimeSinceLastTransitionSeconds + *out = new(uint) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodConditionFilter. +func (in *PodConditionFilter) DeepCopy() *PodConditionFilter { + if in == nil { + return nil + } + out := new(PodConditionFilter) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PodLifeTimeArgs) DeepCopyInto(out *PodLifeTimeArgs) { *out = *in @@ -41,6 +88,11 @@ func (in *PodLifeTimeArgs) DeepCopyInto(out *PodLifeTimeArgs) { *out = new(v1.LabelSelector) (*in).DeepCopyInto(*out) } + if in.OwnerKinds != nil { + in, out := &in.OwnerKinds, &out.OwnerKinds + *out = new(OwnerKinds) + (*in).DeepCopyInto(*out) + } if in.MaxPodLifeTimeSeconds != nil { in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds *out = new(uint) @@ -51,6 +103,18 @@ func (in *PodLifeTimeArgs) DeepCopyInto(out *PodLifeTimeArgs) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]PodConditionFilter, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.ExitCodes != nil { + in, out := &in.ExitCodes, &out.ExitCodes + *out = make([]int32, len(*in)) + copy(*out, *in) + } return } diff --git a/test/e2e/e2e_podlifetime_test.go b/test/e2e/e2e_podlifetime_test.go new file mode 100644 index 000000000..39dec5357 --- /dev/null +++ b/test/e2e/e2e_podlifetime_test.go @@ -0,0 +1,271 @@ +package e2e + +import ( + "context" + "strings" + "testing" + "time" + + batchv1 "k8s.io/api/batch/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + utilptr "k8s.io/utils/ptr" + + deschedulerapi "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + "sigs.k8s.io/descheduler/pkg/framework/plugins/podlifetime" +) + +func TestPodLifeTime_FailedPods(t *testing.T) { + ctx := context.Background() + + clientSet, _, nodeLister, _ := initializeClient(ctx, t) + + t.Log("Creating testing namespace") + testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ReplaceAll(strings.ToLower(t.Name()), "_", "-")}} + if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", testNamespace.Name) + } + defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) + + tests := []struct { + name string + expectedEvictedPodCount int + args *podlifetime.PodLifeTimeArgs + }{ + { + name: "test-transition-failed-pods-default", + expectedEvictedPodCount: 1, + args: &podlifetime.PodLifeTimeArgs{ + States: []string{string(v1.PodFailed)}, + }, + }, + { + name: "test-transition-failed-pods-exclude-job", + expectedEvictedPodCount: 0, + args: &podlifetime.PodLifeTimeArgs{ + States: []string{string(v1.PodFailed)}, + OwnerKinds: &podlifetime.OwnerKinds{Exclude: []string{"Job"}}, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + job := initTransitionTestJob(tc.name, testNamespace.Name) + t.Logf("Creating job %s in %s namespace", job.Name, testNamespace.Name) + jobClient := clientSet.BatchV1().Jobs(testNamespace.Name) + if _, err := jobClient.Create(ctx, job, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error creating Job %s: %v", tc.name, err) + } + deletePropagationPolicy := metav1.DeletePropagationForeground + defer func() { + jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}) + waitForPodsToDisappear(ctx, t, clientSet, job.Labels, testNamespace.Name) + }() + waitForTransitionJobPodPhase(ctx, t, clientSet, job, v1.PodFailed) + + preRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...) + + tc.args.Namespaces = &deschedulerapi.Namespaces{ + Include: []string{testNamespace.Name}, + } + runPodLifetimePlugin(ctx, t, clientSet, nodeLister, tc.args, + defaultevictor.DefaultEvictorArgs{EvictLocalStoragePods: true}, + nil, + ) + + var meetsExpectations bool + var actualEvictedPodCount int + if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) { + currentRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...) + actualEvictedPod := preRunNames.Difference(currentRunNames) + actualEvictedPodCount = actualEvictedPod.Len() + t.Logf("preRunNames: %v, currentRunNames: %v, actualEvictedPodCount: %v\n", preRunNames.List(), currentRunNames.List(), actualEvictedPodCount) + if actualEvictedPodCount != tc.expectedEvictedPodCount { + t.Logf("Expecting %v number of pods evicted, got %v instead", tc.expectedEvictedPodCount, actualEvictedPodCount) + return false, nil + } + meetsExpectations = true + return true, nil + }); err != nil { + t.Errorf("Error waiting for expected eviction count: %v", err) + } + + if !meetsExpectations { + t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedPodCount, tc.expectedEvictedPodCount) + } else { + t.Logf("Total of %d Pods were evicted for %s", actualEvictedPodCount, tc.name) + } + }) + } +} + +func TestPodLifeTime_SucceededPods(t *testing.T) { + ctx := context.Background() + + clientSet, _, nodeLister, _ := initializeClient(ctx, t) + + t.Log("Creating testing namespace") + testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ReplaceAll(strings.ToLower(t.Name()), "_", "-")}} + if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", testNamespace.Name) + } + defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) + + tests := []struct { + name string + expectedEvictedPodCount int + args *podlifetime.PodLifeTimeArgs + }{ + { + name: "test-transition-succeeded-pods", + expectedEvictedPodCount: 1, + args: &podlifetime.PodLifeTimeArgs{ + States: []string{string(v1.PodSucceeded)}, + }, + }, + { + name: "test-transition-succeeded-condition", + expectedEvictedPodCount: 1, + args: &podlifetime.PodLifeTimeArgs{ + States: []string{string(v1.PodSucceeded)}, + Conditions: []podlifetime.PodConditionFilter{ + {Reason: "PodCompleted", Status: "True"}, + }, + }, + }, + { + name: "test-transition-succeeded-condition-unmatched", + expectedEvictedPodCount: 0, + args: &podlifetime.PodLifeTimeArgs{ + States: []string{string(v1.PodSucceeded)}, + Conditions: []podlifetime.PodConditionFilter{ + {Reason: "ReasonDoesNotMatch"}, + }, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + job := initTransitionSucceededJob(tc.name, testNamespace.Name) + t.Logf("Creating job %s in %s namespace", job.Name, testNamespace.Name) + jobClient := clientSet.BatchV1().Jobs(testNamespace.Name) + if _, err := jobClient.Create(ctx, job, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error creating Job %s: %v", tc.name, err) + } + deletePropagationPolicy := metav1.DeletePropagationForeground + defer func() { + jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}) + waitForPodsToDisappear(ctx, t, clientSet, job.Labels, testNamespace.Name) + }() + waitForTransitionJobPodPhase(ctx, t, clientSet, job, v1.PodSucceeded) + + preRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...) + + tc.args.Namespaces = &deschedulerapi.Namespaces{ + Include: []string{testNamespace.Name}, + } + runPodLifetimePlugin(ctx, t, clientSet, nodeLister, tc.args, + defaultevictor.DefaultEvictorArgs{EvictLocalStoragePods: true}, + nil, + ) + + var meetsExpectations bool + var actualEvictedPodCount int + if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) { + currentRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...) + actualEvictedPod := preRunNames.Difference(currentRunNames) + actualEvictedPodCount = actualEvictedPod.Len() + t.Logf("preRunNames: %v, currentRunNames: %v, actualEvictedPodCount: %v\n", preRunNames.List(), currentRunNames.List(), actualEvictedPodCount) + if actualEvictedPodCount != tc.expectedEvictedPodCount { + t.Logf("Expecting %v number of pods evicted, got %v instead", tc.expectedEvictedPodCount, actualEvictedPodCount) + return false, nil + } + meetsExpectations = true + return true, nil + }); err != nil { + t.Errorf("Error waiting for expected eviction count: %v", err) + } + + if !meetsExpectations { + t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedPodCount, tc.expectedEvictedPodCount) + } else { + t.Logf("Total of %d Pods were evicted for %s", actualEvictedPodCount, tc.name) + } + }) + } +} + +func initTransitionTestJob(name, namespace string) *batchv1.Job { + podSpec := makePodSpec("", nil) + podSpec.Containers[0].Command = []string{"/bin/false"} + podSpec.RestartPolicy = v1.RestartPolicyNever + labelsSet := labels.Set{"test": name, "name": name} + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labelsSet, + Name: name, + Namespace: namespace, + }, + Spec: batchv1.JobSpec{ + Template: v1.PodTemplateSpec{ + Spec: podSpec, + ObjectMeta: metav1.ObjectMeta{Labels: labelsSet}, + }, + BackoffLimit: utilptr.To[int32](0), + }, + } +} + +func initTransitionSucceededJob(name, namespace string) *batchv1.Job { + podSpec := makePodSpec("", nil) + podSpec.Containers[0].Image = "registry.k8s.io/e2e-test-images/agnhost:2.43" + podSpec.Containers[0].Command = []string{"/bin/sh", "-c", "exit 0"} + podSpec.RestartPolicy = v1.RestartPolicyNever + labelsSet := labels.Set{"test": name, "name": name} + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labelsSet, + Name: name, + Namespace: namespace, + }, + Spec: batchv1.JobSpec{ + Template: v1.PodTemplateSpec{ + Spec: podSpec, + ObjectMeta: metav1.ObjectMeta{Labels: labelsSet}, + }, + BackoffLimit: utilptr.To[int32](0), + }, + } +} + +func waitForTransitionJobPodPhase(ctx context.Context, t *testing.T, clientSet clientset.Interface, job *batchv1.Job, phase v1.PodPhase) { + podClient := clientSet.CoreV1().Pods(job.Namespace) + if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 30*time.Second, true, func(ctx context.Context) (bool, error) { + t.Log(labels.FormatLabels(job.Labels)) + if podList, err := podClient.List(ctx, metav1.ListOptions{LabelSelector: labels.FormatLabels(job.Labels)}); err != nil { + return false, err + } else { + if len(podList.Items) == 0 { + t.Logf("Job controller has not created Pod for job %s yet", job.Name) + return false, nil + } + for _, pod := range podList.Items { + if pod.Status.Phase != phase { + t.Logf("Pod %v not in %s phase yet, is %v instead", pod.Name, phase, pod.Status.Phase) + return false, nil + } + } + t.Logf("Job %v Pod is in %s phase now", job.Name, phase) + return true, nil + } + }); err != nil { + t.Fatalf("Error waiting for pods in %s phase: %v", phase, err) + } +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 6629b4739..09f0fbdf4 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -421,14 +421,12 @@ func runPodLifetimePlugin( t *testing.T, clientset clientset.Interface, nodeLister listersv1.NodeLister, - namespaces *deschedulerapi.Namespaces, - priorityClass string, - priority *int32, - evictCritical bool, - evictDaemonSet bool, - maxPodsToEvictPerNamespace *uint, - labelSelector *metav1.LabelSelector, + args *podlifetime.PodLifeTimeArgs, + evictorArgs defaultevictor.DefaultEvictorArgs, + evictionOpts *evictions.Options, ) { + t.Helper() + evictionPolicyGroupVersion, err := eutils.SupportEviction(clientset) if err != nil || len(evictionPolicyGroupVersion) == 0 { t.Fatalf("%v", err) @@ -439,42 +437,18 @@ func runPodLifetimePlugin( t.Fatalf("%v", err) } - var thresholdPriority int32 - if priority != nil { - thresholdPriority = *priority + if evictionOpts == nil { + evictionOpts = evictions.NewOptions().WithPolicyGroupVersion(evictionPolicyGroupVersion) } else { - thresholdPriority, err = utils.GetPriorityFromPriorityClass(ctx, clientset, priorityClass) - if err != nil { - t.Fatalf("Failed to get threshold priority from plugin arg params") - } + evictionOpts = evictionOpts.WithPolicyGroupVersion(evictionPolicyGroupVersion) } - handle, _, err := frameworktesting.InitFrameworkHandle( - ctx, - clientset, - evictions.NewOptions(). - WithPolicyGroupVersion(evictionPolicyGroupVersion). - WithMaxPodsToEvictPerNamespace(maxPodsToEvictPerNamespace), - defaultevictor.DefaultEvictorArgs{ - EvictSystemCriticalPods: evictCritical, - EvictDaemonSetPods: evictDaemonSet, - PriorityThreshold: &deschedulerapi.PriorityThreshold{ - Value: &thresholdPriority, - }, - }, - nil, - ) + handle, _, err := frameworktesting.InitFrameworkHandle(ctx, clientset, evictionOpts, evictorArgs, nil) if err != nil { t.Fatalf("Unable to initialize a framework handle: %v", err) } - maxPodLifeTimeSeconds := uint(1) - - plugin, err := podlifetime.New(ctx, &podlifetime.PodLifeTimeArgs{ - MaxPodLifeTimeSeconds: &maxPodLifeTimeSeconds, - LabelSelector: labelSelector, - Namespaces: namespaces, - }, handle) + plugin, err := podlifetime.New(ctx, args, handle) if err != nil { t.Fatalf("Unable to initialize the plugin: %v", err) } @@ -707,9 +681,14 @@ func TestNamespaceConstraintsInclude(t *testing.T) { t.Logf("Existing pods: %v", initialPodNames) t.Logf("run the plugin to delete pods from %v namespace", rc.Namespace) - runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, &deschedulerapi.Namespaces{ - Include: []string{rc.Namespace}, - }, "", nil, false, false, nil, nil) + runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, + &podlifetime.PodLifeTimeArgs{ + MaxPodLifeTimeSeconds: utilptr.To[uint](1), + Namespaces: &deschedulerapi.Namespaces{Include: []string{rc.Namespace}}, + }, + defaultevictor.DefaultEvictorArgs{}, + nil, + ) // All pods are supposed to be deleted, wait until all the old pods are deleted if err := wait.PollUntilContextTimeout(ctx, time.Second, 20*time.Second, true, func(ctx context.Context) (bool, error) { @@ -777,9 +756,14 @@ func TestNamespaceConstraintsExclude(t *testing.T) { t.Logf("Existing pods: %v", initialPodNames) t.Logf("run the plugin to delete pods from namespaces except the %v namespace", rc.Namespace) - runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, &deschedulerapi.Namespaces{ - Exclude: []string{rc.Namespace}, - }, "", nil, false, false, nil, nil) + runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, + &podlifetime.PodLifeTimeArgs{ + MaxPodLifeTimeSeconds: utilptr.To[uint](1), + Namespaces: &deschedulerapi.Namespaces{Exclude: []string{rc.Namespace}}, + }, + defaultevictor.DefaultEvictorArgs{}, + nil, + ) t.Logf("Waiting 10s") time.Sleep(10 * time.Second) @@ -890,11 +874,24 @@ func testEvictSystemCritical(t *testing.T, isPriorityClass bool) { sort.Strings(initialPodNames) t.Logf("Existing pods: %v", initialPodNames) + var thresholdPriority int32 if isPriorityClass { - runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, true, false, nil, nil) + resolved, err := utils.GetPriorityFromPriorityClass(ctx, clientSet, highPriorityClass.Name) + if err != nil { + t.Fatalf("Failed to get priority from priority class: %v", err) + } + thresholdPriority = resolved } else { - runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, "", &highPriority, true, false, nil, nil) + thresholdPriority = highPriority } + runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, + &podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)}, + defaultevictor.DefaultEvictorArgs{ + EvictSystemCriticalPods: true, + PriorityThreshold: &deschedulerapi.PriorityThreshold{Value: &thresholdPriority}, + }, + nil, + ) // All pods are supposed to be deleted, wait until all pods in the test namespace are terminating t.Logf("All pods in the test namespace, no matter their priority (including system-node-critical and system-cluster-critical), will be deleted") @@ -961,7 +958,11 @@ func testEvictDaemonSetPod(t *testing.T, isDaemonSet bool) { sort.Strings(initialPodNames) t.Logf("Existing pods: %v", initialPodNames) - runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, "", nil, false, isDaemonSet, nil, nil) + runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, + &podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)}, + defaultevictor.DefaultEvictorArgs{EvictDaemonSetPods: isDaemonSet}, + nil, + ) // All pods are supposed to be deleted, wait until all pods in the test namespace are terminating t.Logf("All daemonset pods in the test namespace, will be deleted") @@ -1074,13 +1075,25 @@ func testPriority(t *testing.T, isPriorityClass bool) { sort.Strings(expectEvictPodNames) t.Logf("Pods not expected to be evicted: %v, pods expected to be evicted: %v", expectReservePodNames, expectEvictPodNames) + var thresholdPriority int32 if isPriorityClass { t.Logf("run the plugin to delete pods with priority lower than priority class %s", highPriorityClass.Name) - runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, false, false, nil, nil) + resolved, err := utils.GetPriorityFromPriorityClass(ctx, clientSet, highPriorityClass.Name) + if err != nil { + t.Fatalf("Failed to get priority from priority class: %v", err) + } + thresholdPriority = resolved } else { t.Logf("run the plugin to delete pods with priority lower than %d", highPriority) - runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, "", &highPriority, false, false, nil, nil) + thresholdPriority = highPriority } + runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, + &podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)}, + defaultevictor.DefaultEvictorArgs{ + PriorityThreshold: &deschedulerapi.PriorityThreshold{Value: &thresholdPriority}, + }, + nil, + ) t.Logf("Waiting 10s") time.Sleep(10 * time.Second) @@ -1182,7 +1195,14 @@ func TestPodLabelSelector(t *testing.T) { t.Logf("Pods not expected to be evicted: %v, pods expected to be evicted: %v", expectReservePodNames, expectEvictPodNames) t.Logf("run the plugin to delete pods with label test:podlifetime-evict") - runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, nil, "", nil, false, false, nil, &metav1.LabelSelector{MatchLabels: map[string]string{"test": "podlifetime-evict"}}) + runPodLifetimePlugin(ctx, t, clientSet, nodeInformer, + &podlifetime.PodLifeTimeArgs{ + MaxPodLifeTimeSeconds: utilptr.To[uint](1), + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"test": "podlifetime-evict"}}, + }, + defaultevictor.DefaultEvictorArgs{}, + nil, + ) t.Logf("Waiting 10s") time.Sleep(10 * time.Second) @@ -1281,7 +1301,11 @@ func TestEvictAnnotation(t *testing.T) { t.Logf("Existing pods: %v", initialPodNames) t.Log("Running PodLifetime plugin") - runPodLifetimePlugin(ctx, t, clientSet, nodeLister, nil, "", nil, false, false, nil, nil) + runPodLifetimePlugin(ctx, t, clientSet, nodeLister, + &podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)}, + defaultevictor.DefaultEvictorArgs{}, + nil, + ) if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, time.Minute, true, func(ctx context.Context) (bool, error) { podList, err = clientSet.CoreV1().Pods(rc.Namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rc.Spec.Template.Labels).String()}) @@ -1346,7 +1370,11 @@ func TestPodLifeTimeOldestEvicted(t *testing.T) { t.Log("Running PodLifetime plugin with maxPodsToEvictPerNamespace=1 to ensure only the oldest pod is evicted") var maxPodsToEvictPerNamespace uint = 1 - runPodLifetimePlugin(ctx, t, clientSet, nodeLister, nil, "", nil, false, false, &maxPodsToEvictPerNamespace, nil) + runPodLifetimePlugin(ctx, t, clientSet, nodeLister, + &podlifetime.PodLifeTimeArgs{MaxPodLifeTimeSeconds: utilptr.To[uint](1)}, + defaultevictor.DefaultEvictorArgs{}, + evictions.NewOptions().WithMaxPodsToEvictPerNamespace(&maxPodsToEvictPerNamespace), + ) t.Log("Finished PodLifetime plugin") t.Logf("Wait for terminating pod to disappear")