Files
flagger/pkg/canary/deployment_ready.go
Sanskar Jaiswal 757d90121b scheduler: fail canary according to progress deadline
Modify `canary.IsPrimaryReady()` and `canary.Initialize()` to return a
boolean indicating if the error is retriable. Modify the scheduler to
rollback the analysis and mark the Canary object as failed if the above
two functions or `canary.IsCanaryRead()` returns false along with an
error.

Signed-off-by: Sanskar Jaiswal <jaiswalsanskar078@gmail.com>
2024-03-05 00:10:30 +05:30

123 lines
5.2 KiB
Go

/*
Copyright 2020 The Flux authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package canary
import (
"context"
"fmt"
"time"
appsv1 "k8s.io/api/apps/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
flaggerv1 "github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1"
)
// IsPrimaryReady checks the primary deployment status and returns an error if
// the deployment is in the middle of a rolling update or if the pods are unhealthy
// it will return a non retryable error if the rolling update is stuck
func (c *DeploymentController) IsPrimaryReady(cd *flaggerv1.Canary) (bool, error) {
primaryName := fmt.Sprintf("%s-primary", cd.Spec.TargetRef.Name)
primary, err := c.kubeClient.AppsV1().Deployments(cd.Namespace).Get(context.TODO(), primaryName, metav1.GetOptions{})
if err != nil {
return true, fmt.Errorf("deployment %s.%s get query error: %w", primaryName, cd.Namespace, err)
}
retriable, err := c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds(), cd.GetAnalysisPrimaryReadyThreshold())
if err != nil {
return retriable, fmt.Errorf("%s.%s not ready: %w", primaryName, cd.Namespace, err)
}
if primary.Spec.Replicas == int32p(0) {
return false, fmt.Errorf("halt %s.%s advancement: primary deployment is scaled to zero",
cd.Name, cd.Namespace)
}
return true, nil
}
// IsCanaryReady checks the canary deployment status and returns an error if
// the deployment is in the middle of a rolling update or if the pods are unhealthy
// it will return a non retriable error if the rolling update is stuck
func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error) {
targetName := cd.Spec.TargetRef.Name
canary, err := c.kubeClient.AppsV1().Deployments(cd.Namespace).Get(context.TODO(), targetName, metav1.GetOptions{})
if err != nil {
return true, fmt.Errorf("deployment %s.%s get query error: %w", targetName, cd.Namespace, err)
}
retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds(), cd.GetAnalysisCanaryReadyThreshold())
if err != nil {
return retryable, fmt.Errorf(
"canary deployment %s.%s not ready: %w",
targetName, cd.Namespace, err,
)
}
return true, nil
}
// isDeploymentReady determines if a deployment is ready by checking the status conditions
// if a deployment has exceeded the progress deadline it returns a non retriable error
func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int, readyThreshold int) (bool, error) {
retriable := true
if deployment.Generation <= deployment.Status.ObservedGeneration {
progress := c.getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing)
if progress != nil {
// Determine if the deployment is stuck by checking if there is a minimum replicas unavailable condition
// and if the last update time exceeds the deadline
available := c.getDeploymentCondition(deployment.Status, appsv1.DeploymentAvailable)
if available != nil && available.Status == "False" && available.Reason == "MinimumReplicasUnavailable" {
from := available.LastUpdateTime
delta := time.Duration(deadline) * time.Second
retriable = !from.Add(delta).Before(time.Now())
}
}
readyThresholdRatio := float32(readyThreshold) / float32(100)
readyThresholdUpdatedReplicas := int32(float32(deployment.Status.UpdatedReplicas) * readyThresholdRatio)
if progress != nil && progress.Reason == "ProgressDeadlineExceeded" {
return false, fmt.Errorf("deployment %q exceeded its progress deadline", deployment.GetName())
} else if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d out of %d new replicas have been updated",
deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas)
} else if deployment.Status.Replicas > deployment.Status.UpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d old replicas are pending termination",
deployment.Status.Replicas-deployment.Status.UpdatedReplicas)
} else if deployment.Status.AvailableReplicas < readyThresholdUpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated replicas are available",
deployment.Status.AvailableReplicas, readyThresholdUpdatedReplicas, readyThreshold)
}
} else {
return true, fmt.Errorf(
"waiting for rollout to finish: observed deployment generation less than desired generation")
}
return true, nil
}
func (c *DeploymentController) getDeploymentCondition(
status appsv1.DeploymentStatus,
conditionType appsv1.DeploymentConditionType,
) *appsv1.DeploymentCondition {
for i := range status.Conditions {
c := status.Conditions[i]
if c.Type == conditionType {
return &c
}
}
return nil
}