fix: ensure immediate eviction after grace period expires (#1330)
Some checks failed
Scorecard supply-chain security / Scorecard analysis (push) Failing after 1m3s
Post / images (amd64, addon-manager) (push) Failing after 7m31s
Post / coverage (push) Failing after 9m30s
Post / images (amd64, registration-operator) (push) Failing after 57s
Post / images (amd64, work) (push) Failing after 52s
Post / images (arm64, addon-manager) (push) Failing after 50s
Post / images (arm64, placement) (push) Failing after 52s
Post / images (arm64, registration) (push) Failing after 50s
Post / images (arm64, registration-operator) (push) Failing after 52s
Post / images (arm64, work) (push) Failing after 49s
Post / images (amd64, registration) (push) Failing after 7m6s
Post / images (amd64, placement) (push) Failing after 27m47s
Post / image manifest (addon-manager) (push) Has been cancelled
Post / image manifest (placement) (push) Has been cancelled
Post / image manifest (registration) (push) Has been cancelled
Post / image manifest (registration-operator) (push) Has been cancelled
Post / image manifest (work) (push) Has been cancelled
Post / trigger clusteradm e2e (push) Has been cancelled
Close stale issues and PRs / stale (push) Successful in 3s

Fixed a bug where AppliedManifestWorks were not evicted immediately
after the appliedmanifestwork-eviction-grace-period expired.

Root cause: The controller used an exponential backoff rate limiter
to schedule requeue delays, which caused:
1. Exponentially increasing delays during grace period (1min -> 2min -> 4min...)
2. Unpredictable delays after grace period expired

Solution: Replace rate limiter with direct time calculation. Now the
controller calculates the exact remaining time until eviction and
schedules the next sync for that precise moment:
  remainingTime := evictionTime.Sub(now)

Changes:
- Removed rateLimiter field and workqueue import
- Calculate exact remaining time instead of using exponential backoff
- Added V(4) logging to show scheduled eviction time and remaining time
- Updated unit test expectations (queue length 0 for delayed items)

Impact: AppliedManifestWorks are now evicted immediately when the
grace period expires, instead of being delayed by minutes due to
exponential backoff.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Signed-off-by: zhujian <jiazhu@redhat.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Jian Zhu
2026-01-16 17:48:50 +08:00
committed by GitHub
parent 4dc99cd621
commit c69a2586e5
2 changed files with 8 additions and 9 deletions

View File

@@ -10,7 +10,6 @@ import (
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
workv1client "open-cluster-management.io/api/client/work/clientset/versioned/typed/work/v1"
@@ -41,7 +40,6 @@ type unmanagedAppliedWorkController struct {
hubHash string
agentID string
evictionGracePeriod time.Duration
rateLimiter workqueue.RateLimiter
}
// NewUnManagedAppliedWorkController returns a controller to evict the unmanaged appliedmanifestworks.
@@ -71,7 +69,6 @@ func NewUnManagedAppliedWorkController(
hubHash: hubHash,
agentID: agentID,
evictionGracePeriod: evictionGracePeriod,
rateLimiter: workqueue.NewItemExponentialFailureRateLimiter(1*time.Minute, evictionGracePeriod),
}
return factory.New().
@@ -141,8 +138,13 @@ func (m *unmanagedAppliedWorkController) evictAppliedManifestWork(ctx context.Co
return m.patchEvictionStartTime(ctx, appliedManifestWork, &metav1.Time{Time: now})
}
if now.Before(evictionStartTime.Add(m.evictionGracePeriod)) {
controllerContext.Queue().AddAfter(appliedManifestWork.Name, m.rateLimiter.When(appliedManifestWork.Name))
evictionTime := evictionStartTime.Add(m.evictionGracePeriod)
if now.Before(evictionTime) {
// Calculate the exact remaining time until eviction
remainingTime := evictionTime.Sub(now)
controllerContext.Queue().AddAfter(appliedManifestWork.Name, remainingTime)
logger.V(4).Info("AppliedManifestWork scheduled for eviction",
"evictionTime", evictionTime, "remainingTime", remainingTime)
return nil
}
@@ -160,7 +162,6 @@ func (m *unmanagedAppliedWorkController) stopToEvictAppliedManifestWork(
return nil
}
m.rateLimiter.Forget(appliedManifestWork.Name)
return m.patchEvictionStartTime(ctx, appliedManifestWork, nil)
}

View File

@@ -8,7 +8,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
clienttesting "k8s.io/client-go/testing"
"k8s.io/client-go/util/workqueue"
fakeworkclient "open-cluster-management.io/api/client/work/clientset/versioned/fake"
workinformers "open-cluster-management.io/api/client/work/informers/externalversions"
@@ -204,7 +203,7 @@ func TestSyncUnamanagedAppliedWork(t *testing.T) {
},
},
},
expectedQueueLen: 1,
expectedQueueLen: 0, // Item is added to delayed queue via AddAfter, not the main queue
validateAppliedManifestWorkActions: testingcommon.AssertNoActions,
},
{
@@ -283,7 +282,6 @@ func TestSyncUnamanagedAppliedWork(t *testing.T) {
hubHash: c.hubHash,
agentID: c.agentID,
evictionGracePeriod: c.evictionGracePeriod,
rateLimiter: workqueue.NewItemExponentialFailureRateLimiter(0, c.evictionGracePeriod),
}
controllerContext := testingcommon.NewFakeSyncContext(t, c.appliedManifestWorkName)