ensure immediate requeue for transient errors when work spec is changed (#1335)
Some checks failed
Scorecard supply-chain security / Scorecard analysis (push) Failing after 22s
Post / images (amd64, addon-manager) (push) Failing after 51s
Post / images (amd64, placement) (push) Failing after 46s
Post / images (amd64, registration) (push) Failing after 44s
Post / images (amd64, registration-operator) (push) Failing after 44s
Post / images (amd64, work) (push) Failing after 46s
Post / images (arm64, placement) (push) Failing after 45s
Post / images (arm64, registration) (push) Failing after 45s
Post / images (arm64, registration-operator) (push) Failing after 44s
Post / images (arm64, work) (push) Failing after 45s
Post / images (arm64, addon-manager) (push) Failing after 16m21s
Post / coverage (push) Failing after 39m14s
Post / image manifest (addon-manager) (push) Has been skipped
Post / image manifest (placement) (push) Has been skipped
Post / image manifest (registration) (push) Has been skipped
Post / image manifest (registration-operator) (push) Has been skipped
Post / image manifest (work) (push) Has been skipped
Post / trigger clusteradm e2e (push) Has been skipped

Signed-off-by: Zhiwei Yin <zyin@redhat.com>
This commit is contained in:
Zhiwei Yin
2026-01-19 15:57:39 +08:00
committed by GitHub
parent d83c822129
commit 9a1e925112
2 changed files with 138 additions and 0 deletions

View File

@@ -263,6 +263,14 @@ func onUpdateFunc(queue workqueue.TypedRateLimitingInterface[string]) func(oldOb
}
if !apiequality.Semantic.DeepEqual(newWork.Spec, oldWork.Spec) ||
!apiequality.Semantic.DeepEqual(newWork.Labels, oldWork.Labels) {
// Reset the rate limiter to process the work immediately when the spec or labels change.
// Without this reset, if the work was previously failing and being rate-limited (exponential backoff),
// it would continue to wait for the backoff delay before processing the new spec change.
// By calling Forget(), we clear the rate limiter's failure count and backoff state,
// ensuring the updated work is reconciled immediately if meet failure rather than
// waiting for a long time rate-limited retry.
queue.Forget(newWork.GetName())
queue.Add(newWork.GetName())
}
}

View File

@@ -1280,4 +1280,134 @@ var _ = ginkgo.Describe("ManifestWork", func() {
}, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue())
})
})
ginkgo.Context("Status update timing for invalid manifests", func() {
ginkgo.BeforeEach(func() {
// Create two RoleBindings with valid roleRef
rb1, _ := util.NewRoleBinding(clusterName, "rb1", "default-sa", "default-role")
rb2, _ := util.NewRoleBinding(clusterName, "rb2", "default-sa", "default-role")
manifests = []workapiv1.Manifest{
util.ToManifest(rb1),
util.ToManifest(rb2),
}
})
ginkgo.It("should update conditions correctly when RoleRef changes", func() {
ginkgo.By("verify initial conditions are True")
util.AssertWorkCondition(work.Namespace, work.Name, hubWorkClient, workapiv1.WorkApplied, metav1.ConditionTrue,
[]metav1.ConditionStatus{metav1.ConditionTrue, metav1.ConditionTrue}, eventuallyTimeout, eventuallyInterval)
util.AssertWorkCondition(work.Namespace, work.Name, hubWorkClient, workapiv1.WorkAvailable, metav1.ConditionTrue,
[]metav1.ConditionStatus{metav1.ConditionTrue, metav1.ConditionTrue}, eventuallyTimeout, eventuallyInterval)
// Verify observedGeneration matches generation
util.AssertWorkGeneration(work.Namespace, work.Name, hubWorkClient, workapiv1.WorkApplied, eventuallyTimeout, eventuallyInterval)
util.AssertWorkGeneration(work.Namespace, work.Name, hubWorkClient, workapiv1.WorkAvailable, eventuallyTimeout, eventuallyInterval)
ginkgo.By("change RoleRef of the first rolebinding to a non-existent role")
updatedWork, err := hubWorkClient.WorkV1().ManifestWorks(clusterName).Get(context.Background(), work.Name, metav1.GetOptions{})
gomega.Expect(err).ToNot(gomega.HaveOccurred())
// Update first rolebinding to reference a non-existent role
rb1Invalid, _ := util.NewRoleBinding(clusterName, "rb1", "default-sa", "changed-role-1")
newWork := updatedWork.DeepCopy()
newWork.Spec.Workload.Manifests[0] = util.ToManifest(rb1Invalid)
pathBytes, err := util.NewWorkPatch(updatedWork, newWork)
gomega.Expect(err).ToNot(gomega.HaveOccurred())
_, err = hubWorkClient.WorkV1().ManifestWorks(clusterName).Patch(
context.Background(), updatedWork.Name, types.MergePatchType, pathBytes, metav1.PatchOptions{})
gomega.Expect(err).ToNot(gomega.HaveOccurred())
ginkgo.By("verify Applied condition is False, Available condition is True, and ObservedGeneration matches")
gomega.Eventually(func() error {
work, err = hubWorkClient.WorkV1().ManifestWorks(clusterName).Get(context.Background(), work.Name, metav1.GetOptions{})
if err != nil {
return err
}
// Check Applied condition is False
appliedCond := meta.FindStatusCondition(work.Status.Conditions, workapiv1.WorkApplied)
if appliedCond == nil {
return fmt.Errorf("applied condition not found")
}
if appliedCond.Status != metav1.ConditionFalse {
return fmt.Errorf("applied condition status is %s, expected False", appliedCond.Status)
}
if appliedCond.ObservedGeneration != work.Generation {
return fmt.Errorf("applied observedGeneration %d does not match generation %d",
appliedCond.ObservedGeneration, work.Generation)
}
// Check Available condition is True
availableCond := meta.FindStatusCondition(work.Status.Conditions, workapiv1.WorkAvailable)
if availableCond == nil {
return fmt.Errorf("available condition not found")
}
if availableCond.Status != metav1.ConditionTrue {
return fmt.Errorf("available condition status is %s, expected True", availableCond.Status)
}
if availableCond.ObservedGeneration != work.Generation {
return fmt.Errorf("available observedGeneration %d does not match generation %d",
availableCond.ObservedGeneration, work.Generation)
}
return nil
}, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed())
ginkgo.By("change RoleRef of the second rolebinding to a non-existent role")
updatedWork, err = hubWorkClient.WorkV1().ManifestWorks(clusterName).Get(context.Background(), work.Name, metav1.GetOptions{})
gomega.Expect(err).ToNot(gomega.HaveOccurred())
// Update second rolebinding to reference a non-existent role
rb2Invalid, _ := util.NewRoleBinding(clusterName, "rb2", "default-sa", "changed-role-2")
newWork = updatedWork.DeepCopy()
newWork.Spec.Workload.Manifests[1] = util.ToManifest(rb2Invalid)
pathBytes, err = util.NewWorkPatch(updatedWork, newWork)
gomega.Expect(err).ToNot(gomega.HaveOccurred())
_, err = hubWorkClient.WorkV1().ManifestWorks(clusterName).Patch(
context.Background(), updatedWork.Name, types.MergePatchType, pathBytes, metav1.PatchOptions{})
gomega.Expect(err).ToNot(gomega.HaveOccurred())
ginkgo.By("verify Applied condition is still False, Available condition is True, and ObservedGeneration matches")
gomega.Eventually(func() error {
work, err = hubWorkClient.WorkV1().ManifestWorks(clusterName).Get(context.Background(), work.Name, metav1.GetOptions{})
if err != nil {
return err
}
// Check Applied condition is False
appliedCond := meta.FindStatusCondition(work.Status.Conditions, workapiv1.WorkApplied)
if appliedCond == nil {
return fmt.Errorf("applied condition not found")
}
if appliedCond.Status != metav1.ConditionFalse {
return fmt.Errorf("applied condition status is %s, expected False", appliedCond.Status)
}
if appliedCond.ObservedGeneration != work.Generation {
return fmt.Errorf("applied observedGeneration %d does not match generation %d",
appliedCond.ObservedGeneration, work.Generation)
}
// Check Available condition is True
availableCond := meta.FindStatusCondition(work.Status.Conditions, workapiv1.WorkAvailable)
if availableCond == nil {
return fmt.Errorf("available condition not found")
}
if availableCond.Status != metav1.ConditionTrue {
return fmt.Errorf("available condition status is %s, expected True", availableCond.Status)
}
if availableCond.ObservedGeneration != work.Generation {
return fmt.Errorf("available observedGeneration %d does not match generation %d",
availableCond.ObservedGeneration, work.Generation)
}
return nil
}, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed())
})
})
})