diff --git a/apis/core.oam.dev/common/types.go b/apis/core.oam.dev/common/types.go index a998cee9c..e136ced31 100644 --- a/apis/core.oam.dev/common/types.go +++ b/apis/core.oam.dev/common/types.go @@ -174,12 +174,15 @@ type ApplicationComponentStatus struct { Cluster string `json:"cluster,omitempty"` Env string `json:"env,omitempty"` // WorkloadDefinition is the definition of a WorkloadDefinition, such as deployments/apps.v1 - WorkloadDefinition WorkloadGVK `json:"workloadDefinition,omitempty"` - Healthy bool `json:"healthy"` - Details map[string]string `json:"details,omitempty"` - Message string `json:"message,omitempty"` - Traits []ApplicationTraitStatus `json:"traits,omitempty"` - Scopes []corev1.ObjectReference `json:"scopes,omitempty"` + WorkloadDefinition WorkloadGVK `json:"workloadDefinition,omitempty"` + Healthy bool `json:"healthy"` + // WorkloadHealthy indicates the workload health without considering trait health. + // +optional + WorkloadHealthy bool `json:"workloadHealthy,omitempty"` + Details map[string]string `json:"details,omitempty"` + Message string `json:"message,omitempty"` + Traits []ApplicationTraitStatus `json:"traits,omitempty"` + Scopes []corev1.ObjectReference `json:"scopes,omitempty"` } // Equal check if two ApplicationComponentStatus are equal @@ -192,6 +195,7 @@ func (in ApplicationComponentStatus) Equal(r ApplicationComponentStatus) bool { type ApplicationTraitStatus struct { Type string `json:"type"` Healthy bool `json:"healthy"` + Pending bool `json:"pending,omitempty"` Details map[string]string `json:"details,omitempty"` Message string `json:"message,omitempty"` } diff --git a/charts/vela-core/crds/core.oam.dev_applicationrevisions.yaml b/charts/vela-core/crds/core.oam.dev_applicationrevisions.yaml index 4ab406158..138119d45 100644 --- a/charts/vela-core/crds/core.oam.dev_applicationrevisions.yaml +++ b/charts/vela-core/crds/core.oam.dev_applicationrevisions.yaml @@ -632,6 +632,8 @@ spec: type: boolean message: type: string + pending: + type: boolean type: type: string required: @@ -651,6 +653,10 @@ spec: - apiVersion - kind type: object + workloadHealthy: + description: WorkloadHealthy indicates the workload + health without considering trait health. + type: boolean required: - healthy - name diff --git a/charts/vela-core/crds/core.oam.dev_applications.yaml b/charts/vela-core/crds/core.oam.dev_applications.yaml index 5e0e06990..7b826fa44 100644 --- a/charts/vela-core/crds/core.oam.dev_applications.yaml +++ b/charts/vela-core/crds/core.oam.dev_applications.yaml @@ -580,6 +580,8 @@ spec: type: boolean message: type: string + pending: + type: boolean type: type: string required: @@ -599,6 +601,10 @@ spec: - apiVersion - kind type: object + workloadHealthy: + description: WorkloadHealthy indicates the workload health without + considering trait health. + type: boolean required: - healthy - name diff --git a/pkg/controller/core.oam.dev/v1beta1/application/application_controller.go b/pkg/controller/core.oam.dev/v1beta1/application/application_controller.go index a31916ec1..b6c1658ee 100644 --- a/pkg/controller/core.oam.dev/v1beta1/application/application_controller.go +++ b/pkg/controller/core.oam.dev/v1beta1/application/application_controller.go @@ -568,6 +568,9 @@ func isHealthy(services []common.ApplicationComponentStatus) bool { return false } for _, tr := range service.Traits { + if tr.Pending { + continue + } if !tr.Healthy { return false } diff --git a/pkg/controller/core.oam.dev/v1beta1/application/apply.go b/pkg/controller/core.oam.dev/v1beta1/application/apply.go index 59adb7495..4d0be2c95 100644 --- a/pkg/controller/core.oam.dev/v1beta1/application/apply.go +++ b/pkg/controller/core.oam.dev/v1beta1/application/apply.go @@ -18,12 +18,15 @@ package application import ( "context" + "maps" + "slices" "sync" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + utilfeature "k8s.io/apiserver/pkg/util/feature" "sigs.k8s.io/controller-runtime/pkg/client" monitorContext "github.com/kubevela/pkg/monitor/context" @@ -37,6 +40,7 @@ import ( "github.com/oam-dev/kubevela/apis/types" "github.com/oam-dev/kubevela/pkg/appfile" velaprocess "github.com/oam-dev/kubevela/pkg/cue/process" + "github.com/oam-dev/kubevela/pkg/features" "github.com/oam-dev/kubevela/pkg/monitor/metrics" "github.com/oam-dev/kubevela/pkg/multicluster" "github.com/oam-dev/kubevela/pkg/oam" @@ -333,11 +337,32 @@ func (h *AppHandler) collectHealthStatus(ctx context.Context, comp *appfile.Comp if err != nil { return nil, nil, nil, false, err } + status.WorkloadHealthy = isHealth } - var traitStatusList []common.ApplicationTraitStatus + multiStagingEnabled := utilfeature.DefaultMutableFeatureGate.Enabled(features.MultiStageComponentApply) + type traitKey struct { + Type string + Index int + } + traitStatusByKey := make(map[traitKey]common.ApplicationTraitStatus, len(status.Traits)) + traitIndexByType := make(map[string]int) + for _, ts := range status.Traits { + key := traitKey{Type: ts.Type, Index: traitIndexByType[ts.Type]} + traitIndexByType[ts.Type]++ + if _, exists := traitStatusByKey[key]; exists { + continue + } + traitStatusByKey[key] = ts + } + addTraitStatus := func(key traitKey, ts common.ApplicationTraitStatus) { + traitStatusByKey[key] = ts + } + traitIndexByType = make(map[string]int) collectNext: for _, tr := range comp.Traits { + key := traitKey{Type: tr.Name, Index: traitIndexByType[tr.Name]} + traitIndexByType[tr.Name]++ for _, filter := range traitFilters { // If filtered out by one of the filters if filter(*tr) { @@ -355,17 +380,56 @@ collectNext: if status.Message == "" && traitStatus.Message != "" { status.Message = traitStatus.Message } - traitStatusList = append(traitStatusList, traitStatus) - - var oldStatus []common.ApplicationTraitStatus - for _, _trait := range status.Traits { - if _trait.Type != tr.Name { - oldStatus = append(oldStatus, _trait) - } - } - status.Traits = oldStatus + addTraitStatus(key, traitStatus) } - status.Traits = append(status.Traits, traitStatusList...) + if multiStagingEnabled && !status.WorkloadHealthy { + for _, component := range h.currentAppRev.Spec.Application.Spec.Components { + if component.Name != comp.Name { + continue + } + traitIndexByType = make(map[string]int) + for _, trait := range component.Traits { + key := traitKey{Type: trait.Type, Index: traitIndexByType[trait.Type]} + traitIndexByType[trait.Type]++ + if _, ok := traitStatusByKey[key]; ok { + continue + } + traitStage, err := getTraitDispatchStage(h.Client, trait.Type, h.currentAppRev, h.app.Annotations) + isPostDispatch := err == nil && traitStage == PostDispatch + if isPostDispatch { + addTraitStatus( + key, + common.ApplicationTraitStatus{ + Type: trait.Type, + Healthy: false, + Pending: true, + Message: "\u23f3 Waiting for component to be healthy", + }, + ) + } + } + break + } + } + traitHealthy := true + for _, ts := range traitStatusByKey { + if ts.Pending { + continue + } + if !ts.Healthy { + traitHealthy = false + break + } + } + if !skipWorkload { + status.Healthy = status.WorkloadHealthy && traitHealthy + } else if !traitHealthy { + status.Healthy = false + if status.Message == "" { + status.Message = "traits are not healthy" + } + } + status.Traits = slices.Collect(maps.Values(traitStatusByKey)) h.addServiceStatus(true, status) return &status, output, outputs, isHealth, nil } @@ -451,7 +515,11 @@ func extractOutputs(templateContext map[string]interface{}) []*unstructured.Unst // This is called after the workflow succeeds and component health is confirmed. func (h *AppHandler) applyPostDispatchTraits(ctx monitorContext.Context, appParser *appfile.Parser, af *appfile.Appfile) error { for _, svc := range h.services { - if !svc.Healthy { + workloadHealthy := svc.WorkloadHealthy + if !workloadHealthy && svc.Healthy { + workloadHealthy = true + } + if !workloadHealthy { continue } @@ -555,6 +623,24 @@ func (h *AppHandler) applyPostDispatchTraits(ctx monitorContext.Context, appPars if err := h.Dispatch(dispatchCtx, h.Client, svc.Cluster, common.WorkflowResourceCreator, readyTraits...); err != nil { return errors.WithMessagef(err, "failed to dispatch PostDispatch traits for component %s", comp.Name) } + // Restore all traits and collect health status to update the application status. + // + // Why this is necessary: + // When the workflow is in "executing" state (e.g., one component is unhealthy), + // the reconcile loop returns early after applyPostDispatchTraits() and does NOT + // call evalStatus(). This means collectHealthStatus() would never be called for + // the healthy component's traits. + // + // During the initial workflow apply, prepareWorkloadAndManifests() filters out + // PostDispatch traits when serviceHealthy=false, so the status only contains + // non-PostDispatch traits (like "scaler"). Without this explicit call here, + // PostDispatch traits would be dispatched to the cluster but never reflected + // in the application status. + // + healthCtx := multicluster.ContextWithClusterName(ctx.GetContext(), svc.Cluster) + if _, _, _, _, err := h.collectHealthStatus(healthCtx, wl, svc.Namespace, false); err != nil { + ctx.Error(err, "failed to refresh PostDispatch trait status", "component", comp.Name) + } } return nil } diff --git a/pkg/controller/core.oam.dev/v1beta1/application/generator.go b/pkg/controller/core.oam.dev/v1beta1/application/generator.go index fa0be3549..1da05c6b8 100644 --- a/pkg/controller/core.oam.dev/v1beta1/application/generator.go +++ b/pkg/controller/core.oam.dev/v1beta1/application/generator.go @@ -392,7 +392,10 @@ func (h *AppHandler) prepareWorkloadAndManifests(ctx context.Context, needPostDispatchOutputs := componentOutputsConsumed(comp, af.Components) for _, svc := range h.services { if svc.Name == comp.Name { - serviceHealthy = svc.Healthy + serviceHealthy = svc.WorkloadHealthy + if !serviceHealthy && svc.Healthy { + serviceHealthy = true + } break } } diff --git a/test/e2e-test/postdispatch_trait_test.go b/test/e2e-test/postdispatch_trait_test.go index 5c38cf24d..5725982e8 100644 --- a/test/e2e-test/postdispatch_trait_test.go +++ b/test/e2e-test/postdispatch_trait_test.go @@ -379,7 +379,7 @@ isHealth: *_isHealth | bool Properties: &runtime.RawExtension{Raw: []byte(`{"image":"nginx:1.21","port":80,"cpu":"100m","memory":"128Mi"}`)}, Traits: []common.ApplicationTrait{ {Type: "scaler", Properties: &runtime.RawExtension{Raw: []byte(`{"replicas":3}`)}}, - {Type: deploymentTraitName, Properties: &runtime.RawExtension{Raw: []byte(`{"name":"trait-deployment","image":"nginx:alpine"}`)}}, + {Type: deploymentTraitName, Properties: &runtime.RawExtension{Raw: []byte(`{"name":"trait-deployment","image":"nginx:1.21"}`)}}, {Type: cmTraitName}, }, }, @@ -415,6 +415,7 @@ isHealth: *_isHealth | bool g.Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: app.Name}, checkApp)).Should(Succeed()) g.Expect(checkApp.Status.Services).ShouldNot(BeEmpty()) svc := checkApp.Status.Services[0] + g.Expect(svc.Healthy).Should(BeFalse()) traitFound := false for _, traitStatus := range svc.Traits { @@ -572,7 +573,7 @@ isHealth: *_isHealth | bool By("Creating application that uses PostDispatch traits") Expect(k8sClient.Create(ctx, app)).Should(Succeed()) - By("Waiting for trait to remain pending and not show in status while component image fails") + By("Waiting for trait to remain pending and show in application detail status while component image fails") Eventually(func(g Gomega) { checkApp := &v1beta1.Application{} g.Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: app.Name}, checkApp)).Should(Succeed()) @@ -582,11 +583,14 @@ isHealth: *_isHealth | bool traitFound := false for _, traitStatus := range svc.Traits { - if traitStatus.Type == deploymentTraitName { + if traitStatus.Type == deploymentTraitName || traitStatus.Type == cmTraitName { traitFound = true + g.Expect(traitStatus.Healthy).Should(BeFalse()) + g.Expect(traitStatus.Pending).Should(BeTrue()) + g.Expect(traitStatus.Message).Should(ContainSubstring("Waiting for component to be healthy")) } } - g.Expect(traitFound).Should(BeFalse()) + g.Expect(traitFound).Should(BeTrue()) }, 180*time.Second, 5*time.Second).Should(Succeed()) }) }) @@ -703,6 +707,10 @@ outputs: statusConfigMap: { { Type: traitDefName, }, + { + Type: "scaler", + Properties: &runtime.RawExtension{Raw: []byte(`{"replicas":3}`)}, + }, }, }, }, @@ -715,6 +723,10 @@ outputs: statusConfigMap: { checkApp := &v1beta1.Application{} g.Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: "test-postdispatch-app"}, checkApp)).Should(Succeed()) g.Expect(checkApp.Status.Phase).Should(Equal(common.ApplicationRunning)) + + dep := &appsv1.Deployment{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: "test-worker"}, dep)).Should(Succeed()) + g.Expect(*dep.Spec.Replicas).Should(Equal(int32(3))) }, 60*time.Second, 3*time.Second).Should(Succeed()) By("Verifying component Deployment is created and healthy") @@ -732,7 +744,7 @@ outputs: statusConfigMap: { g.Expect(status).ShouldNot(BeNil()) replicas, _, _ := unstructured.NestedInt64(status, "replicas") - g.Expect(replicas).Should(Equal(int64(1))) + g.Expect(replicas).Should(Equal(int64(3))) }, 30*time.Second, 2*time.Second).Should(Succeed()) By("Verifying PostDispatch trait ConfigMap was created with status data") @@ -741,8 +753,8 @@ outputs: statusConfigMap: { g.Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: "test-component-status"}, cm)).Should(Succeed()) g.Expect(cm.Data).ShouldNot(BeNil()) g.Expect(cm.Data["componentName"]).Should(Equal("test-component")) - g.Expect(cm.Data["replicas"]).Should(Equal("1")) - g.Expect(cm.Data["readyReplicas"]).Should(Equal("1")) + g.Expect(cm.Data["replicas"]).Should(Equal("3")) + g.Expect(cm.Data["readyReplicas"]).Should(Equal("3")) }, 300*time.Second, 3*time.Second).Should(Succeed()) By("Verifying PostDispatch trait appears in application status") @@ -908,14 +920,15 @@ outputs: marker: { foundPendingTrait := false for _, trait := range svc.Traits { if trait.Type == traitDefName { - // Trait should show as pending and not healthy + // Trait should be pending and not healthy foundPendingTrait = true break } } - // If workflow is running, we will not be able to see the pending trait status yet if checkApp.Status.Phase == common.ApplicationRunningWorkflow { - g.Expect(foundPendingTrait).Should(BeFalse()) + g.Expect(foundPendingTrait).Should(BeTrue()) + g.Expect(svc.Traits[0].Pending).Should(BeTrue()) + g.Expect(svc.Traits[0].Message).Should(ContainSubstring("Waiting for component to be healthy")) } }, 20*time.Second, 500*time.Millisecond).Should(Succeed()) @@ -943,6 +956,7 @@ outputs: marker: { foundTrait = true // Trait should be healthy, not pending, and not waiting anymore g.Expect(trait.Healthy).Should(BeTrue()) + g.Expect(trait.Pending).Should(BeFalse()) break } } @@ -1102,4 +1116,554 @@ outputs: statusConfigMap: { Expect(k8sClient.Delete(ctx, compDef)).Should(Succeed()) }) }) + + Context("Test PostDispatch health status with multiple components", func() { + It("Should mark all components and PostDispatch traits healthy", func() { + deploymentTraitName := "test-deployment-trait-" + randomNamespaceName("") + cmTraitName := "test-cm-trait-" + randomNamespaceName("") + appName := "app-postdispatch-multi-healthy-" + randomNamespaceName("") + + By("Creating PostDispatch deployment trait definition") + deploymentTrait := &v1beta1.TraitDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentTraitName, + Namespace: "vela-system", + }, + Spec: v1beta1.TraitDefinitionSpec{ + Stage: v1beta1.PostDispatch, + Schematic: &common.Schematic{ + CUE: &common.CUE{ + Template: ` +outputs: statusPod: { + apiVersion: "apps/v1" + kind: "Deployment" + metadata: { + name: parameter.name + } + spec: { + replicas: context.output.status.replicas + selector: matchLabels: { + app: parameter.name + } + template: { + metadata: labels: { + app: parameter.name + } + spec: containers: [{ + name: parameter.name + image: parameter.image + }] + } + } +} + +parameter: { + name: string + image: string +} +`, + }, + }, + Status: &common.Status{ + HealthPolicy: `pod: context.outputs.statusPod +ready: { + updatedReplicas: *0 | int + readyReplicas: *0 | int + replicas: *0 | int + observedGeneration: *0 | int +} & { + if pod.status.updatedReplicas != _|_ { + updatedReplicas: pod.status.updatedReplicas + } + if pod.status.readyReplicas != _|_ { + readyReplicas: pod.status.readyReplicas + } + if pod.status.replicas != _|_ { + replicas: pod.status.replicas + } + if pod.status.observedGeneration != _|_ { + observedGeneration: pod.status.observedGeneration + } +} +_isHealth: (pod.spec.replicas == ready.readyReplicas) && (pod.spec.replicas == ready.updatedReplicas) && (pod.spec.replicas == ready.replicas) && (ready.observedGeneration == pod.metadata.generation || ready.observedGeneration > pod.metadata.generation) +isHealth: *_isHealth | bool +if pod.metadata.annotations != _|_ { + if pod.metadata.annotations["app.oam.dev/disable-health-check"] != _|_ { + isHealth: true + } +} +`, + }, + }, + } + Expect(k8sClient.Create(ctx, deploymentTrait)).Should(Succeed()) + + By("Creating PostDispatch configmap trait definition") + cmTrait := &v1beta1.TraitDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmTraitName, + Namespace: "vela-system", + }, + Spec: v1beta1.TraitDefinitionSpec{ + Stage: v1beta1.PostDispatch, + Schematic: &common.Schematic{ + CUE: &common.CUE{ + Template: ` +outputs: statusConfigMap: { + apiVersion: "v1" + kind: "ConfigMap" + metadata: { + name: context.name + "-status" + namespace: context.namespace + } + data: { + replicas: "\(context.output.status.replicas)" + readyReplicas: "\(context.output.status.readyReplicas)" + componentName: context.name + } +} +`, + }, + }, + Status: &common.Status{ + HealthPolicy: `cm: context.outputs.statusConfigMap +_isHealth: cm.data.readyReplicas != "2" +isHealth: *_isHealth | bool +`, + }, + }, + } + Expect(k8sClient.Create(ctx, cmTrait)).Should(Succeed()) + DeferCleanup(func() { + _ = k8sClient.Delete(ctx, deploymentTrait) + _ = k8sClient.Delete(ctx, cmTrait) + }) + + app := &v1beta1.Application{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + Namespace: namespace, + }, + Spec: v1beta1.ApplicationSpec{ + Components: []common.ApplicationComponent{ + { + Name: "test-deployment-a", + Type: "webservice", + Properties: &runtime.RawExtension{Raw: []byte(`{"image":"nginx:1.21","port":80,"cpu":"100m","memory":"128Mi"}`)}, + Traits: []common.ApplicationTrait{ + {Type: "scaler", Properties: &runtime.RawExtension{Raw: []byte(`{"replicas":3}`)}}, + {Type: deploymentTraitName, Properties: &runtime.RawExtension{Raw: []byte(`{"name":"trait-deployment-a","image":"nginx:1.21"}`)}}, + {Type: cmTraitName}, + }, + }, + { + Name: "test-deployment-b", + Type: "webservice", + Properties: &runtime.RawExtension{Raw: []byte(`{"image":"nginx:1.21","port":80,"cpu":"100m","memory":"128Mi"}`)}, + Traits: []common.ApplicationTrait{ + {Type: "scaler", Properties: &runtime.RawExtension{Raw: []byte(`{"replicas":3}`)}}, + {Type: deploymentTraitName, Properties: &runtime.RawExtension{Raw: []byte(`{"name":"trait-deployment-b","image":"nginx:1.21"}`)}}, + {Type: cmTraitName}, + }, + }, + }, + }, + } + DeferCleanup(func() { _ = k8sClient.Delete(ctx, app) }) + + By("Creating application with multiple components") + Expect(k8sClient.Create(ctx, app)).Should(Succeed()) + + By("Waiting for application, components, and PostDispatch traits to become healthy") + Eventually(func(g Gomega) { + checkApp := &v1beta1.Application{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: appName}, checkApp)).Should(Succeed()) + g.Expect(checkApp.Status.Phase).Should(Equal(common.ApplicationRunning)) + g.Expect(checkApp.Status.Services).Should(HaveLen(2)) + for _, svc := range checkApp.Status.Services { + g.Expect(svc.Healthy).Should(BeTrue()) + for _, traitStatus := range svc.Traits { + g.Expect(traitStatus.Healthy).Should(BeTrue()) + g.Expect(traitStatus.Pending).Should(BeFalse()) + } + } + }, 180*time.Second, 5*time.Second).Should(Succeed()) + }) + + It("Should show one PostDispatch trait unhealthy while others stay healthy", func() { + deploymentTraitName := "test-deployment-trait-" + randomNamespaceName("") + cmTraitName := "test-cm-trait-" + randomNamespaceName("") + appName := "app-postdispatch-multi-trait-unhealthy-" + randomNamespaceName("") + + By("Creating PostDispatch deployment trait definition") + deploymentTrait := &v1beta1.TraitDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentTraitName, + Namespace: "vela-system", + }, + Spec: v1beta1.TraitDefinitionSpec{ + Stage: v1beta1.PostDispatch, + Schematic: &common.Schematic{ + CUE: &common.CUE{ + Template: ` +outputs: statusPod: { + apiVersion: "apps/v1" + kind: "Deployment" + metadata: { + name: parameter.name + } + spec: { + replicas: context.output.status.replicas + selector: matchLabels: { + app: parameter.name + } + template: { + metadata: labels: { + app: parameter.name + } + spec: containers: [{ + name: parameter.name + image: parameter.image + }] + } + } +} + +parameter: { + name: string + image: string +} +`, + }, + }, + Status: &common.Status{ + HealthPolicy: `pod: context.outputs.statusPod +ready: { + updatedReplicas: *0 | int + readyReplicas: *0 | int + replicas: *0 | int + observedGeneration: *0 | int +} & { + if pod.status.updatedReplicas != _|_ { + updatedReplicas: pod.status.updatedReplicas + } + if pod.status.readyReplicas != _|_ { + readyReplicas: pod.status.readyReplicas + } + if pod.status.replicas != _|_ { + replicas: pod.status.replicas + } + if pod.status.observedGeneration != _|_ { + observedGeneration: pod.status.observedGeneration + } +} +_isHealth: (pod.spec.replicas == ready.readyReplicas) && (pod.spec.replicas == ready.updatedReplicas) && (pod.spec.replicas == ready.replicas) && (ready.observedGeneration == pod.metadata.generation || ready.observedGeneration > pod.metadata.generation) +isHealth: *_isHealth | bool +if pod.metadata.annotations != _|_ { + if pod.metadata.annotations["app.oam.dev/disable-health-check"] != _|_ { + isHealth: true + } +} +`, + }, + }, + } + Expect(k8sClient.Create(ctx, deploymentTrait)).Should(Succeed()) + + By("Creating PostDispatch configmap trait definition") + cmTrait := &v1beta1.TraitDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmTraitName, + Namespace: "vela-system", + }, + Spec: v1beta1.TraitDefinitionSpec{ + Stage: v1beta1.PostDispatch, + Schematic: &common.Schematic{ + CUE: &common.CUE{ + Template: ` +outputs: statusConfigMap: { + apiVersion: "v1" + kind: "ConfigMap" + metadata: { + name: context.name + "-status" + namespace: context.namespace + } + data: { + replicas: "\(context.output.status.replicas)" + readyReplicas: "\(context.output.status.readyReplicas)" + componentName: context.name + } +} +`, + }, + }, + Status: &common.Status{ + HealthPolicy: `cm: context.outputs.statusConfigMap +_isHealth: cm.data.readyReplicas != "2" +isHealth: *_isHealth | bool +`, + }, + }, + } + Expect(k8sClient.Create(ctx, cmTrait)).Should(Succeed()) + DeferCleanup(func() { + _ = k8sClient.Delete(ctx, deploymentTrait) + _ = k8sClient.Delete(ctx, cmTrait) + }) + + app := &v1beta1.Application{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + Namespace: namespace, + }, + Spec: v1beta1.ApplicationSpec{ + Components: []common.ApplicationComponent{ + { + Name: "test-deployment-a", + Type: "webservice", + Properties: &runtime.RawExtension{Raw: []byte(`{"image":"nginx:1.21","port":80,"cpu":"100m","memory":"128Mi"}`)}, + Traits: []common.ApplicationTrait{ + {Type: "scaler", Properties: &runtime.RawExtension{Raw: []byte(`{"replicas":2}`)}}, + {Type: deploymentTraitName, Properties: &runtime.RawExtension{Raw: []byte(`{"name":"trait-deployment-a","image":"nginx:1.21"}`)}}, + {Type: cmTraitName}, + }, + }, + { + Name: "test-deployment-b", + Type: "webservice", + Properties: &runtime.RawExtension{Raw: []byte(`{"image":"nginx:1.21","port":80,"cpu":"100m","memory":"128Mi"}`)}, + Traits: []common.ApplicationTrait{ + {Type: "scaler", Properties: &runtime.RawExtension{Raw: []byte(`{"replicas":3}`)}}, + {Type: deploymentTraitName, Properties: &runtime.RawExtension{Raw: []byte(`{"name":"trait-deployment-b","image":"nginx:1.21"}`)}}, + {Type: cmTraitName}, + }, + }, + }, + }, + } + DeferCleanup(func() { _ = k8sClient.Delete(ctx, app) }) + + By("Creating application with a faulty PostDispatch trait") + Expect(k8sClient.Create(ctx, app)).Should(Succeed()) + + By("Waiting for the faulty PostDispatch trait to report unhealthy") + Eventually(func(g Gomega) { + checkApp := &v1beta1.Application{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: appName}, checkApp)).Should(Succeed()) + g.Expect(checkApp.Status.Services).Should(HaveLen(2)) + + for _, svc := range checkApp.Status.Services { + switch svc.Name { + case "test-deployment-a": + g.Expect(svc.Healthy).Should(BeFalse()) + var pdDeployHealthy, pdCMHealthy bool + for _, traitStatus := range svc.Traits { + if traitStatus.Type == deploymentTraitName { + pdDeployHealthy = traitStatus.Healthy + } + if traitStatus.Type == cmTraitName { + pdCMHealthy = traitStatus.Healthy + } + } + g.Expect(pdDeployHealthy).Should(BeTrue()) + g.Expect(pdCMHealthy).Should(BeFalse()) + case "test-deployment-b": + g.Expect(svc.Healthy).Should(BeTrue()) + for _, traitStatus := range svc.Traits { + g.Expect(traitStatus.Healthy).Should(BeTrue()) + g.Expect(traitStatus.Pending).Should(BeFalse()) + } + } + } + }, 240*time.Second, 5*time.Second).Should(Succeed()) + }) + + It("Should keep PostDispatch traits pending for an unhealthy component while other component stays healthy", func() { + deploymentTraitName := "test-deployment-trait-" + randomNamespaceName("") + cmTraitName := "test-cm-trait-" + randomNamespaceName("") + appName := "app-postdispatch-multi-component-unhealthy-" + randomNamespaceName("") + + By("Creating PostDispatch deployment trait definition") + deploymentTrait := &v1beta1.TraitDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentTraitName, + Namespace: "vela-system", + }, + Spec: v1beta1.TraitDefinitionSpec{ + Stage: v1beta1.PostDispatch, + Schematic: &common.Schematic{ + CUE: &common.CUE{ + Template: ` +outputs: statusPod: { + apiVersion: "apps/v1" + kind: "Deployment" + metadata: { + name: parameter.name + } + spec: { + replicas: context.output.status.replicas + selector: matchLabels: { + app: parameter.name + } + template: { + metadata: labels: { + app: parameter.name + } + spec: containers: [{ + name: parameter.name + image: parameter.image + }] + } + } +} + +parameter: { + name: string + image: string +} +`, + }, + }, + Status: &common.Status{ + HealthPolicy: `pod: context.outputs.statusPod +ready: { + updatedReplicas: *0 | int + readyReplicas: *0 | int + replicas: *0 | int + observedGeneration: *0 | int +} & { + if pod.status.updatedReplicas != _|_ { + updatedReplicas: pod.status.updatedReplicas + } + if pod.status.readyReplicas != _|_ { + readyReplicas: pod.status.readyReplicas + } + if pod.status.replicas != _|_ { + replicas: pod.status.replicas + } + if pod.status.observedGeneration != _|_ { + observedGeneration: pod.status.observedGeneration + } +} +_isHealth: (pod.spec.replicas == ready.readyReplicas) && (pod.spec.replicas == ready.updatedReplicas) && (pod.spec.replicas == ready.replicas) && (ready.observedGeneration == pod.metadata.generation || ready.observedGeneration > pod.metadata.generation) +isHealth: *_isHealth | bool +if pod.metadata.annotations != _|_ { + if pod.metadata.annotations["app.oam.dev/disable-health-check"] != _|_ { + isHealth: true + } +} +`, + }, + }, + } + Expect(k8sClient.Create(ctx, deploymentTrait)).Should(Succeed()) + + By("Creating PostDispatch configmap trait definition") + cmTrait := &v1beta1.TraitDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmTraitName, + Namespace: "vela-system", + }, + Spec: v1beta1.TraitDefinitionSpec{ + Stage: v1beta1.PostDispatch, + Schematic: &common.Schematic{ + CUE: &common.CUE{ + Template: ` +outputs: statusConfigMap: { + apiVersion: "v1" + kind: "ConfigMap" + metadata: { + name: context.name + "-status" + namespace: context.namespace + } + data: { + replicas: "\(context.output.status.replicas)" + readyReplicas: "\(context.output.status.readyReplicas)" + componentName: context.name + } +} +`, + }, + }, + Status: &common.Status{ + HealthPolicy: `cm: context.outputs.statusConfigMap +_isHealth: cm.data.readyReplicas != "2" +isHealth: *_isHealth | bool +`, + }, + }, + } + Expect(k8sClient.Create(ctx, cmTrait)).Should(Succeed()) + DeferCleanup(func() { + _ = k8sClient.Delete(ctx, deploymentTrait) + _ = k8sClient.Delete(ctx, cmTrait) + }) + + app := &v1beta1.Application{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + Namespace: namespace, + }, + Spec: v1beta1.ApplicationSpec{ + Components: []common.ApplicationComponent{ + { + Name: "bad-component", + Type: "webservice", + Properties: &runtime.RawExtension{Raw: []byte(`{"image":"nginx:1.21abc","port":80,"cpu":"100m","memory":"128Mi"}`)}, + Traits: []common.ApplicationTrait{ + {Type: "scaler", Properties: &runtime.RawExtension{Raw: []byte(`{"replicas":1}`)}}, + {Type: deploymentTraitName, Properties: &runtime.RawExtension{Raw: []byte(`{"name":"trait-deployment-bad","image":"nginx:1.21"}`)}}, + {Type: cmTraitName}, + }, + }, + { + Name: "good-component", + Type: "webservice", + Properties: &runtime.RawExtension{Raw: []byte(`{"image":"nginx:1.21","port":80,"cpu":"100m","memory":"128Mi"}`)}, + Traits: []common.ApplicationTrait{ + {Type: "scaler", Properties: &runtime.RawExtension{Raw: []byte(`{"replicas":3}`)}}, + {Type: deploymentTraitName, Properties: &runtime.RawExtension{Raw: []byte(`{"name":"trait-deployment-good","image":"nginx:1.21"}`)}}, + {Type: cmTraitName}, + }, + }, + }, + }, + } + DeferCleanup(func() { _ = k8sClient.Delete(ctx, app) }) + + By("Creating application with one unhealthy component") + Expect(k8sClient.Create(ctx, app)).Should(Succeed()) + + By("Waiting for PostDispatch traits to remain pending for the unhealthy component") + Eventually(func(g Gomega) { + checkApp := &v1beta1.Application{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: appName}, checkApp)).Should(Succeed()) + g.Expect(checkApp.Status.Services).Should(HaveLen(2)) + + for _, svc := range checkApp.Status.Services { + switch svc.Name { + case "bad-component": + g.Expect(svc.Healthy).Should(BeFalse()) + for _, traitStatus := range svc.Traits { + if traitStatus.Type == deploymentTraitName || traitStatus.Type == cmTraitName { + g.Expect(traitStatus.Healthy).Should(BeFalse()) + g.Expect(traitStatus.Pending).Should(BeTrue()) + g.Expect(traitStatus.Message).Should(ContainSubstring("Waiting for component to be healthy")) + } + if traitStatus.Type == "scaler" { + g.Expect(traitStatus.Healthy).Should(BeTrue()) + g.Expect(traitStatus.Pending).Should(BeFalse()) + } + } + case "good-component": + g.Expect(svc.Healthy).Should(BeTrue()) + for _, traitStatus := range svc.Traits { + g.Expect(traitStatus.Healthy).Should(BeTrue()) + g.Expect(traitStatus.Pending).Should(BeFalse()) + } + } + } + }, 240*time.Second, 5*time.Second).Should(Succeed()) + }) + }) })