Add startTime initialization and wait 10s in hubTimeoutController (#1191)

Signed-off-by: xuezhaojun <zxue@redhat.com>
2026-02-14 18:09:57 +00:00 · 2025-09-23 15:26:48 +08:00
parent 2f04992d6c
commit 010f5efe6d
1 changed files with 22 additions and 0 deletions
--- a/pkg/registration/spoke/registration/hub_timeout_controller.go
+++ b/pkg/registration/spoke/registration/hub_timeout_controller.go
@@ -21,6 +21,8 @@ type hubTimeoutController struct {
 	timeoutSeconds     int32
 	lastLeaseRenewTime time.Time
 	handleTimeout      func(ctx context.Context) error
 	startTime time.Time
 }
 func NewHubTimeoutController(
@@ -35,6 +37,7 @@ func NewHubTimeoutController(
 		timeoutSeconds: timeoutSeconds,
 		handleTimeout:  handleTimeout,
 		leaseClient:    leaseClient,
 		startTime:      time.Now(),
 	}
 	return factory.New().WithSync(c.sync).ResyncEvery(time.Minute).
 		ToController("HubTimeoutController", recorder)
@@ -57,6 +60,25 @@ func (c *hubTimeoutController) sync(ctx context.Context, syncCtx factory.SyncCon
 		c.lastLeaseRenewTime = lease.Spec.RenewTime.Time
 	}
 	// If `startTime` within 10s, skip the timeout check.
 	// This handles cases where old leases remain due to incomplete cleanup.
 	//
 	// Example scenario:
 	// 1. ManagedCluster-A is connected to Hub1 with an active lease
 	// 2. Hub1 unexpectedly fails (power outage) - no cleanup opportunity
 	// 3. ManagedCluster-A detects timeout and switches to Hub2
 	// 4. Hub1 comes back online with the old stale lease still present
 	// 5. ManagedCluster-A migrates back to Hub1 (which has the expired lease)
 	// 6. With this grace period: lease controller gets time to update the lease
 	//    before timeout checks begin, preventing false timeouts. Otherwise,
 	//    timeout controller runs immediately and detects the stale lease as
 	//    expired, triggering an unwanted timeout
 	//
 	// This also applies to migration scenarios where cleanup is incomplete.
 	if time.Since(c.startTime) < time.Second*10 {
 		return nil
 	}
 	if isTimeout(time.Now(), c.lastLeaseRenewTime, c.timeoutSeconds) {
 		logger.Info("Lease timeout", "cluster", c.clusterName, "lease", leaseName)
 		err := c.handleTimeout(ctx)