mirror of
https://github.com/fluxcd/flagger.git
synced 2026-03-02 17:51:00 +00:00
Resolves #371 --- This adds the support for `corev1.Service` as the `targetRef.kind`, so that we can use Flagger just for canary analysis and traffic-shifting on existing and pre-created services. Flagger doesn't touch deployments and HPAs in this mode. This is useful for keeping your full-control on the resources backing the service to be canary-released, including pods(behind a ClusterIP service) and external services(behind an ExternalName service). Major use-case in my mind are: - Canary-release a K8s cluster. You create two clusters and a master cluster. In the master cluster, you create two `ExternalName` services pointing to (the hostname of the loadbalancer of the targeted app instance in) each cluster. Flagger runs on the master cluster and helps safely rolling-out a new K8s cluster by doing a canary release on the `ExternalName` service. - You want annotations and labels added to the service for integrating with things like external lbs(without extending Flagger to support customizing any aspect of the K8s service it manages **Design**: A canary release on a K8s service is almost the same as one on a K8s deployment. The only fundamental difference is that it operates only on a set of K8s services. For example, one may start by creating two Helm releases for `podinfo-blue` and `podinfo-green`, and a K8s service `podinfo`. The `podinfo` service should initially have the same `Spec` as that of `podinfo-blue`. On a new release, you update `podinfo-green`, then trigger Flagger by updating the K8s service `podinfo` so that it points to pods or `externalName` as declared in `podinfo-green`. Flagger does the rest. The end result is the traffic to `podinfo` is gradually and safely shifted from `podinfo-blue` to `podinfo-green`. **How it works**: Under the hood, Flagger maintains two K8s services, `podinfo-primary` and `podinfo-canary`. Compared to canaries on K8s deployments, it doesn't create the service named `podinfo`, as it is already provided by YOU. Once Flagger detects the change in the `podinfo` service, it updates the `podinfo-canary` service and the routes, then analyzes the canary. On successful analysis, it promotes the canary service to the `podinfo-primary` service. You expose the `podinfo` service via any L7 ingress solution or a service mesh so that the traffic is managed by Flagger for safe deployments. **Giving it a try**: To give it a try, create a `Canary` as usual, but its `targetRef` pointed to a K8s service: ``` apiVersion: flagger.app/v1alpha3 kind: Canary metadata: name: podinfo spec: provider: kubernetes targetRef: apiVersion: core/v1 kind: Service name: podinfo service: port: 9898 canaryAnalysis: # schedule interval (default 60s) interval: 10s # max number of failed checks before rollback threshold: 2 # number of checks to run before rollback iterations: 2 # Prometheus checks based on # http_request_duration_seconds histogram metrics: [] ``` Create a K8s service named `podinfo`, and update it. Now watch for the services `podinfo`, `podinfo-primary`, `podinfo-canary`. Flagger tracks `podinfo` service for changes. Upon any change, it reconciles `podinfo-primary` and `podinfo-canary` services. `podinfo-canary` always replicate the latest `podinfo`. In contract, `podinfo-primary` replicates the latest successful `podinfo-canary`. **Notes**: - For the canary cluster use-case, we would need to write a K8s operator to, e.g. for App Mesh, sync `ExternalName` services to AppMesh `VirtualNode`s. But that's another story!
273 lines
8.7 KiB
Go
273 lines
8.7 KiB
Go
package canary
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/mitchellh/hashstructure"
|
|
ex "github.com/pkg/errors"
|
|
"github.com/weaveworks/flagger/pkg/client/clientset/versioned"
|
|
corev1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/client-go/util/retry"
|
|
|
|
flaggerv1 "github.com/weaveworks/flagger/pkg/apis/flagger/v1alpha3"
|
|
)
|
|
|
|
// SyncStatus encodes the canary pod spec and updates the canary status
|
|
func (c *DeploymentController) SyncStatus(cd *flaggerv1.Canary, status flaggerv1.CanaryStatus) error {
|
|
dep, err := c.kubeClient.AppsV1().Deployments(cd.Namespace).Get(cd.Spec.TargetRef.Name, metav1.GetOptions{})
|
|
if err != nil {
|
|
if errors.IsNotFound(err) {
|
|
return fmt.Errorf("deployment %s.%s not found", cd.Spec.TargetRef.Name, cd.Namespace)
|
|
}
|
|
return ex.Wrap(err, "SyncStatus deployment query error")
|
|
}
|
|
|
|
configs, err := c.configTracker.GetConfigRefs(cd)
|
|
if err != nil {
|
|
return ex.Wrap(err, "SyncStatus configs query error")
|
|
}
|
|
|
|
return syncCanaryStatus(c.flaggerClient, cd, status, dep.Spec.Template, func(cdCopy *flaggerv1.Canary) {
|
|
cdCopy.Status.TrackedConfigs = configs
|
|
})
|
|
}
|
|
|
|
func syncCanaryStatus(flaggerClient versioned.Interface, cd *flaggerv1.Canary, status flaggerv1.CanaryStatus, canaryResource interface{}, setAll func(cdCopy *flaggerv1.Canary)) error {
|
|
hash, err := hashstructure.Hash(canaryResource, nil)
|
|
if err != nil {
|
|
return ex.Wrap(err, "SyncStatus hash error")
|
|
}
|
|
|
|
firstTry := true
|
|
err = retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
|
|
var selErr error
|
|
if !firstTry {
|
|
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
|
|
if selErr != nil {
|
|
return selErr
|
|
}
|
|
}
|
|
cdCopy := cd.DeepCopy()
|
|
cdCopy.Status.Phase = status.Phase
|
|
cdCopy.Status.CanaryWeight = status.CanaryWeight
|
|
cdCopy.Status.FailedChecks = status.FailedChecks
|
|
cdCopy.Status.Iterations = status.Iterations
|
|
cdCopy.Status.LastAppliedSpec = fmt.Sprintf("%d", hash)
|
|
cdCopy.Status.LastTransitionTime = metav1.Now()
|
|
setAll(cdCopy)
|
|
|
|
if ok, conditions := MakeStatusConditions(cd.Status, status.Phase); ok {
|
|
cdCopy.Status.Conditions = conditions
|
|
}
|
|
|
|
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
|
|
firstTry = false
|
|
return
|
|
})
|
|
if err != nil {
|
|
return ex.Wrap(err, "SyncStatus")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// SetStatusFailedChecks updates the canary failed checks counter
|
|
func (c *DeploymentController) SetStatusFailedChecks(cd *flaggerv1.Canary, val int) error {
|
|
return setStatusFailedChecks(c.flaggerClient, cd, val)
|
|
}
|
|
|
|
func setStatusFailedChecks(flaggerClient versioned.Interface, cd *flaggerv1.Canary, val int) error {
|
|
firstTry := true
|
|
err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
|
|
var selErr error
|
|
if !firstTry {
|
|
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
|
|
if selErr != nil {
|
|
return selErr
|
|
}
|
|
}
|
|
cdCopy := cd.DeepCopy()
|
|
cdCopy.Status.FailedChecks = val
|
|
cdCopy.Status.LastTransitionTime = metav1.Now()
|
|
|
|
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
|
|
firstTry = false
|
|
return
|
|
})
|
|
if err != nil {
|
|
return ex.Wrap(err, "SetStatusFailedChecks")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// SetStatusWeight updates the canary status weight value
|
|
func (c *DeploymentController) SetStatusWeight(cd *flaggerv1.Canary, val int) error {
|
|
return setStatusWeight(c.flaggerClient, cd, val)
|
|
}
|
|
|
|
func setStatusWeight(flaggerClient versioned.Interface, cd *flaggerv1.Canary, val int) error {
|
|
firstTry := true
|
|
err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
|
|
var selErr error
|
|
if !firstTry {
|
|
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
|
|
if selErr != nil {
|
|
return selErr
|
|
}
|
|
}
|
|
cdCopy := cd.DeepCopy()
|
|
cdCopy.Status.CanaryWeight = val
|
|
cdCopy.Status.LastTransitionTime = metav1.Now()
|
|
|
|
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
|
|
firstTry = false
|
|
return
|
|
})
|
|
if err != nil {
|
|
return ex.Wrap(err, "SetStatusWeight")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// SetStatusIterations updates the canary status iterations value
|
|
func (c *DeploymentController) SetStatusIterations(cd *flaggerv1.Canary, val int) error {
|
|
return setStatusIterations(c.flaggerClient, cd, val)
|
|
}
|
|
|
|
func setStatusIterations(flaggerClient versioned.Interface, cd *flaggerv1.Canary, val int) error {
|
|
firstTry := true
|
|
err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
|
|
var selErr error
|
|
if !firstTry {
|
|
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
|
|
if selErr != nil {
|
|
return selErr
|
|
}
|
|
}
|
|
|
|
cdCopy := cd.DeepCopy()
|
|
cdCopy.Status.Iterations = val
|
|
cdCopy.Status.LastTransitionTime = metav1.Now()
|
|
|
|
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
|
|
firstTry = false
|
|
return
|
|
})
|
|
|
|
if err != nil {
|
|
return ex.Wrap(err, "SetStatusIterations")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// SetStatusPhase updates the canary status phase
|
|
func (c *DeploymentController) SetStatusPhase(cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase) error {
|
|
return setStatusPhase(c.flaggerClient, cd, phase)
|
|
}
|
|
|
|
func setStatusPhase(flaggerClient versioned.Interface, cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase) error {
|
|
firstTry := true
|
|
err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
|
|
var selErr error
|
|
if !firstTry {
|
|
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
|
|
if selErr != nil {
|
|
return selErr
|
|
}
|
|
}
|
|
cdCopy := cd.DeepCopy()
|
|
cdCopy.Status.Phase = phase
|
|
cdCopy.Status.LastTransitionTime = metav1.Now()
|
|
|
|
if phase != flaggerv1.CanaryPhaseProgressing && phase != flaggerv1.CanaryPhaseWaiting {
|
|
cdCopy.Status.CanaryWeight = 0
|
|
cdCopy.Status.Iterations = 0
|
|
}
|
|
|
|
// on promotion set primary spec hash
|
|
if phase == flaggerv1.CanaryPhaseInitialized || phase == flaggerv1.CanaryPhaseSucceeded {
|
|
cdCopy.Status.LastPromotedSpec = cd.Status.LastAppliedSpec
|
|
}
|
|
|
|
if ok, conditions := MakeStatusConditions(cdCopy.Status, phase); ok {
|
|
cdCopy.Status.Conditions = conditions
|
|
}
|
|
|
|
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
|
|
firstTry = false
|
|
return
|
|
})
|
|
if err != nil {
|
|
return ex.Wrap(err, "SetStatusPhase")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// getStatusCondition returns a condition based on type
|
|
func getStatusCondition(status flaggerv1.CanaryStatus, conditionType flaggerv1.CanaryConditionType) *flaggerv1.CanaryCondition {
|
|
for i := range status.Conditions {
|
|
c := status.Conditions[i]
|
|
if c.Type == conditionType {
|
|
return &c
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MakeStatusCondition updates the canary status conditions based on canary phase
|
|
func MakeStatusConditions(canaryStatus flaggerv1.CanaryStatus,
|
|
phase flaggerv1.CanaryPhase) (bool, []flaggerv1.CanaryCondition) {
|
|
currentCondition := getStatusCondition(canaryStatus, flaggerv1.PromotedType)
|
|
|
|
message := "New deployment detected, starting initialization."
|
|
status := corev1.ConditionUnknown
|
|
switch phase {
|
|
case flaggerv1.CanaryPhaseInitializing:
|
|
status = corev1.ConditionUnknown
|
|
message = "New deployment detected, starting initialization."
|
|
case flaggerv1.CanaryPhaseInitialized:
|
|
status = corev1.ConditionTrue
|
|
message = "Deployment initialization completed."
|
|
case flaggerv1.CanaryPhaseWaiting:
|
|
status = corev1.ConditionUnknown
|
|
message = "Waiting for approval."
|
|
case flaggerv1.CanaryPhaseProgressing:
|
|
status = corev1.ConditionUnknown
|
|
message = "New revision detected, starting canary analysis."
|
|
case flaggerv1.CanaryPhasePromoting:
|
|
status = corev1.ConditionUnknown
|
|
message = "Canary analysis completed, starting primary rolling update."
|
|
case flaggerv1.CanaryPhaseFinalising:
|
|
status = corev1.ConditionUnknown
|
|
message = "Canary analysis completed, routing all traffic to primary."
|
|
case flaggerv1.CanaryPhaseSucceeded:
|
|
status = corev1.ConditionTrue
|
|
message = "Canary analysis completed successfully, promotion finished."
|
|
case flaggerv1.CanaryPhaseFailed:
|
|
status = corev1.ConditionFalse
|
|
message = "Canary analysis failed, deployment scaled to zero."
|
|
}
|
|
|
|
newCondition := &flaggerv1.CanaryCondition{
|
|
Type: flaggerv1.PromotedType,
|
|
Status: status,
|
|
LastUpdateTime: metav1.Now(),
|
|
LastTransitionTime: metav1.Now(),
|
|
Message: message,
|
|
Reason: string(phase),
|
|
}
|
|
|
|
if currentCondition != nil &&
|
|
currentCondition.Status == newCondition.Status &&
|
|
currentCondition.Reason == newCondition.Reason {
|
|
return false, nil
|
|
}
|
|
|
|
if currentCondition != nil && currentCondition.Status == newCondition.Status {
|
|
newCondition.LastTransitionTime = currentCondition.LastTransitionTime
|
|
}
|
|
|
|
return true, []flaggerv1.CanaryCondition{*newCondition}
|
|
}
|