diff --git a/pkg/registration/hub/lease/controller.go b/pkg/registration/hub/lease/controller.go index 010347725..1384293d4 100644 --- a/pkg/registration/hub/lease/controller.go +++ b/pkg/registration/hub/lease/controller.go @@ -124,15 +124,21 @@ func (c *leaseController) sync(ctx context.Context, syncCtx factory.SyncContext) } now := time.Now() - if !now.Before(observedLease.Spec.RenewTime.Add(gracePeriod)) { + leaseExpired := !now.Before(observedLease.Spec.RenewTime.Add(gracePeriod)) + + if leaseExpired { // the lease is not updated constantly, change the cluster available condition to unknown if err := c.updateClusterStatus(ctx, cluster); err != nil { return err } + // Requeue after grace period. Recovery will be detected immediately via lease watch. + syncCtx.Queue().AddAfter(clusterName, gracePeriod) + } else { + // Lease is fresh, requeue exactly when it will expire to detect expiration immediately + timeUntilExpiry := observedLease.Spec.RenewTime.Add(gracePeriod).Sub(now) + syncCtx.Queue().AddAfter(clusterName, timeUntilExpiry) } - // always requeue this cluster to check its lease constantly - syncCtx.Queue().AddAfter(clusterName, gracePeriod) return nil } diff --git a/pkg/registration/hub/lease/controller_test.go b/pkg/registration/hub/lease/controller_test.go index 443d84853..3f932cfd3 100644 --- a/pkg/registration/hub/lease/controller_test.go +++ b/pkg/registration/hub/lease/controller_test.go @@ -7,11 +7,14 @@ import ( "testing" "time" + "github.com/openshift/library-go/pkg/operator/events" + "github.com/openshift/library-go/pkg/operator/events/eventstesting" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" kubeinformers "k8s.io/client-go/informers" kubefake "k8s.io/client-go/kubernetes/fake" clienttesting "k8s.io/client-go/testing" + "k8s.io/client-go/util/workqueue" clusterfake "open-cluster-management.io/api/client/cluster/clientset/versioned/fake" clusterscheme "open-cluster-management.io/api/client/cluster/clientset/versioned/scheme" @@ -202,3 +205,128 @@ func newDeletingManagedCluster() *clusterv1.ManagedCluster { cluster.DeletionTimestamp = &now return cluster } + +// spyQueue wraps a real queue and captures AddAfter calls +type spyQueue struct { + workqueue.RateLimitingInterface + addAfterDelay time.Duration + addAfterKey interface{} +} + +func (s *spyQueue) AddAfter(item interface{}, duration time.Duration) { + s.addAfterDelay = duration + s.addAfterKey = item + s.RateLimitingInterface.AddAfter(item, duration) +} + +// testSyncContext is a custom sync context for testing requeue timing +type testSyncContext struct { + queueKey string + recorder events.Recorder + queue *spyQueue +} + +func (t *testSyncContext) Queue() workqueue.RateLimitingInterface { return t.queue } +func (t *testSyncContext) QueueKey() string { return t.queueKey } +func (t *testSyncContext) Recorder() events.Recorder { return t.recorder } + +func newManagedClusterWithLeaseDuration(seconds int32) *clusterv1.ManagedCluster { + cluster := testinghelpers.NewAvailableManagedCluster() + cluster.Spec.LeaseDurationSeconds = seconds + return cluster +} + +func TestRequeueTime(t *testing.T) { + // Using 60 second lease duration (grace period = 5 * 60 = 300 seconds) + cases := []struct { + name string + cluster runtime.Object + clusterLease runtime.Object + expectedRequeueMin time.Duration + expectedRequeueMax time.Duration + }{ + { + name: "requeue for expired lease - recovery detected via watch, use grace period", + cluster: newManagedClusterWithLeaseDuration(60), + // Lease expired 5 minutes ago (grace period is 5 * 60 = 300 seconds) + clusterLease: testinghelpers.NewManagedClusterLease("managed-cluster-lease", now.Add(-5*time.Minute)), + expectedRequeueMin: 295 * time.Second, // Should be ~300s (grace period) + expectedRequeueMax: 305 * time.Second, // Allow some tolerance + }, + { + name: "requeue for fresh lease - should use time until expiry for immediate detection", + cluster: newManagedClusterWithLeaseDuration(60), + // Lease renewed 4 minutes ago, will expire in 1 minute (grace period is 5 minutes) + clusterLease: testinghelpers.NewManagedClusterLease("managed-cluster-lease", now.Add(-4*time.Minute)), + expectedRequeueMin: 55 * time.Second, // Should be ~60s (1 minute) + expectedRequeueMax: 65 * time.Second, // Allow some tolerance + }, + { + name: "requeue for recently renewed lease - should check just before expiry", + cluster: newManagedClusterWithLeaseDuration(60), + // Lease renewed 30 seconds ago, will expire in 4.5 minutes + clusterLease: testinghelpers.NewManagedClusterLease("managed-cluster-lease", now.Add(-30*time.Second)), + expectedRequeueMin: 265 * time.Second, // Should be ~270s (4.5 minutes) + expectedRequeueMax: 275 * time.Second, // Allow some tolerance + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + clusterClient := clusterfake.NewSimpleClientset(c.cluster) + clusterInformerFactory := clusterinformers.NewSharedInformerFactory(clusterClient, time.Minute*10) + clusterStore := clusterInformerFactory.Cluster().V1().ManagedClusters().Informer().GetStore() + if err := clusterStore.Add(c.cluster); err != nil { + t.Fatal(err) + } + + hubClient := kubefake.NewSimpleClientset(c.clusterLease) + leaseInformerFactory := kubeinformers.NewSharedInformerFactory(hubClient, time.Minute*10) + leaseStore := leaseInformerFactory.Coordination().V1().Leases().Informer().GetStore() + if err := leaseStore.Add(c.clusterLease); err != nil { + t.Fatal(err) + } + + ctx := context.TODO() + mcEventRecorder, err := helpers.NewEventRecorder(ctx, clusterscheme.Scheme, hubClient.EventsV1(), "test") + if err != nil { + t.Fatal(err) + } + + // Create a custom sync context with spy queue to capture AddAfter calls + spyQ := &spyQueue{ + RateLimitingInterface: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()), + } + syncCtx := &testSyncContext{ + queueKey: testinghelpers.TestManagedClusterName, + recorder: eventstesting.NewTestingEventRecorder(t), + queue: spyQ, + } + + ctrl := &leaseController{ + kubeClient: hubClient, + patcher: patcher.NewPatcher[ + *clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus]( + clusterClient.ClusterV1().ManagedClusters()), + clusterLister: clusterInformerFactory.Cluster().V1().ManagedClusters().Lister(), + leaseLister: leaseInformerFactory.Coordination().V1().Leases().Lister(), + mcEventRecorder: mcEventRecorder, + } + + syncErr := ctrl.sync(context.TODO(), syncCtx) + if syncErr != nil { + t.Errorf("unexpected err: %v", syncErr) + } + + // Verify the requeue delay captured by the spy + actualDelay := spyQ.addAfterDelay + if actualDelay < c.expectedRequeueMin || actualDelay > c.expectedRequeueMax { + t.Errorf("Unexpected requeue delay: got %v, expected between %v and %v", + actualDelay, c.expectedRequeueMin, c.expectedRequeueMax) + } else { + t.Logf("✓ Requeue delay %v is within expected range [%v, %v]", + actualDelay, c.expectedRequeueMin, c.expectedRequeueMax) + } + }) + } +}