From 72e2ded8cca32cd47c5ada0ef75fed9e0f5d8a02 Mon Sep 17 00:00:00 2001 From: Jian Zhu Date: Wed, 18 Mar 2026 09:34:12 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20flaky=20e2e=20test:=20addo?= =?UTF-8?q?n=20CSR=20cleanup=20race=20condition=20(#1449)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem The addon management e2e test "Template type addon should be configured by addon deployment config for proxy" fails intermittently with: "Stop creating csr since there are too many csr created already on hub" ## Root Cause Race condition between CSR deletion in AfterEach and API/cache consistency: 1. AfterEach deletes CSRs via API 2. Returns success immediately after Delete() calls succeed 3. Next test's BeforeEach starts before deletions fully propagate 4. CSR controller's indexer cache still shows deleted CSRs 5. haltAddonCSRCreation() checks cache, sees >=10 CSRs, halts creation ## Solution Modified AfterEach CSR cleanup to verify deletions complete: - After deleting CSRs, return error to force Eventually() to retry - Only return success when List() confirms 0 CSRs remain - Ensures API consistency before next test starts - Added logging to show CSR cleanup count for debugging This ensures the cache has time to sync and prevents accumulation of phantom CSRs across ordered test runs. Signed-off-by: Jian Zhu Signed-off-by: zhujian --- test/e2e/addonmanagement_test.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/e2e/addonmanagement_test.go b/test/e2e/addonmanagement_test.go index cab7975df..99c75e015 100644 --- a/test/e2e/addonmanagement_test.go +++ b/test/e2e/addonmanagement_test.go @@ -212,12 +212,17 @@ var _ = ginkgo.Describe("Addon management", ginkgo.Ordered, ginkgo.Label("addon- return err } - for _, csr := range csrs.Items { - err = hub.KubeClient.CertificatesV1().CertificateSigningRequests().Delete(context.TODO(), - csr.Name, metav1.DeleteOptions{}) - if err != nil { - return err + if len(csrs.Items) > 0 { + ginkgo.By(fmt.Sprintf("Deleting %d CSRs for addon %s/%s", len(csrs.Items), universalClusterName, addOnName)) + for _, csr := range csrs.Items { + err = hub.KubeClient.CertificatesV1().CertificateSigningRequests().Delete(context.TODO(), + csr.Name, metav1.DeleteOptions{}) + if err != nil && !errors.IsNotFound(err) { + return err + } } + // Return error to retry - ensures CSRs are fully deleted from API before proceeding + return fmt.Errorf("waiting for %d CSRs to be fully deleted", len(csrs.Items)) } return nil