Files
flagger/pkg/canary/status.go
Yusuke Kuoka 1ba595bc6f feat: Canary-release anything behind K8s service
Resolves #371

---

This adds the support for `corev1.Service` as the `targetRef.kind`, so that we can use Flagger just for canary analysis and traffic-shifting on existing and pre-created services. Flagger doesn't touch deployments and HPAs in this mode.

This is useful for keeping your full-control on the resources backing the service to be canary-released, including pods(behind a ClusterIP service) and external services(behind an ExternalName service).

Major use-case in my mind are:

- Canary-release a K8s cluster. You create two clusters and a master cluster. In the master cluster, you create two `ExternalName` services pointing to (the hostname of the loadbalancer of the targeted app instance in) each cluster. Flagger runs on the master cluster and helps safely rolling-out a new K8s cluster by doing a canary release on the `ExternalName` service.
- You want annotations and labels added to the service for integrating with things like external lbs(without extending Flagger to support customizing any aspect of the K8s service it manages

**Design**:

A canary release on a K8s service is almost the same as one on a K8s deployment. The only fundamental difference is that it operates only on a set of K8s services.

For example, one may start by creating two Helm releases for `podinfo-blue` and `podinfo-green`, and a K8s service `podinfo`. The `podinfo` service should initially have the same `Spec` as that of  `podinfo-blue`.

On a new release, you update `podinfo-green`, then trigger Flagger by updating the K8s service `podinfo` so that it points to pods or `externalName` as declared in `podinfo-green`. Flagger does the rest. The end result is the traffic to `podinfo` is gradually and safely shifted from `podinfo-blue` to `podinfo-green`.

**How it works**:

Under the hood, Flagger maintains two K8s services, `podinfo-primary` and `podinfo-canary`. Compared to canaries on K8s deployments, it doesn't create the service named `podinfo`, as it is already provided by YOU.

Once Flagger detects the change in the `podinfo` service, it updates the `podinfo-canary` service and the routes, then analyzes the canary. On successful analysis, it promotes the canary service to the `podinfo-primary` service. You expose the `podinfo` service via any L7 ingress solution or a service mesh so that the traffic is managed by Flagger for safe deployments.

**Giving it a try**:

To give it a try, create a `Canary` as usual, but its `targetRef` pointed to a K8s service:

```
apiVersion: flagger.app/v1alpha3
kind: Canary
metadata:
  name: podinfo
spec:
  provider: kubernetes
  targetRef:
    apiVersion: core/v1
    kind: Service
    name: podinfo
  service:
    port: 9898
  canaryAnalysis:
    # schedule interval (default 60s)
    interval: 10s
    # max number of failed checks before rollback
    threshold: 2
    # number of checks to run before rollback
    iterations: 2
    # Prometheus checks based on
    # http_request_duration_seconds histogram
    metrics: []
```

Create a K8s service named `podinfo`, and update it. Now watch for the services `podinfo`, `podinfo-primary`, `podinfo-canary`.

Flagger tracks `podinfo` service for changes. Upon any change, it reconciles `podinfo-primary` and `podinfo-canary` services. `podinfo-canary` always replicate the latest `podinfo`. In contract, `podinfo-primary` replicates the latest successful `podinfo-canary`.

**Notes**:

- For the canary cluster use-case, we would need to write a K8s operator to, e.g. for App Mesh, sync `ExternalName` services to AppMesh `VirtualNode`s. But that's another story!
2019-11-27 09:07:29 +09:00

273 lines
8.7 KiB
Go

package canary
import (
"fmt"
"github.com/mitchellh/hashstructure"
ex "github.com/pkg/errors"
"github.com/weaveworks/flagger/pkg/client/clientset/versioned"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/retry"
flaggerv1 "github.com/weaveworks/flagger/pkg/apis/flagger/v1alpha3"
)
// SyncStatus encodes the canary pod spec and updates the canary status
func (c *DeploymentController) SyncStatus(cd *flaggerv1.Canary, status flaggerv1.CanaryStatus) error {
dep, err := c.kubeClient.AppsV1().Deployments(cd.Namespace).Get(cd.Spec.TargetRef.Name, metav1.GetOptions{})
if err != nil {
if errors.IsNotFound(err) {
return fmt.Errorf("deployment %s.%s not found", cd.Spec.TargetRef.Name, cd.Namespace)
}
return ex.Wrap(err, "SyncStatus deployment query error")
}
configs, err := c.configTracker.GetConfigRefs(cd)
if err != nil {
return ex.Wrap(err, "SyncStatus configs query error")
}
return syncCanaryStatus(c.flaggerClient, cd, status, dep.Spec.Template, func(cdCopy *flaggerv1.Canary) {
cdCopy.Status.TrackedConfigs = configs
})
}
func syncCanaryStatus(flaggerClient versioned.Interface, cd *flaggerv1.Canary, status flaggerv1.CanaryStatus, canaryResource interface{}, setAll func(cdCopy *flaggerv1.Canary)) error {
hash, err := hashstructure.Hash(canaryResource, nil)
if err != nil {
return ex.Wrap(err, "SyncStatus hash error")
}
firstTry := true
err = retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
var selErr error
if !firstTry {
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
if selErr != nil {
return selErr
}
}
cdCopy := cd.DeepCopy()
cdCopy.Status.Phase = status.Phase
cdCopy.Status.CanaryWeight = status.CanaryWeight
cdCopy.Status.FailedChecks = status.FailedChecks
cdCopy.Status.Iterations = status.Iterations
cdCopy.Status.LastAppliedSpec = fmt.Sprintf("%d", hash)
cdCopy.Status.LastTransitionTime = metav1.Now()
setAll(cdCopy)
if ok, conditions := MakeStatusConditions(cd.Status, status.Phase); ok {
cdCopy.Status.Conditions = conditions
}
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
firstTry = false
return
})
if err != nil {
return ex.Wrap(err, "SyncStatus")
}
return nil
}
// SetStatusFailedChecks updates the canary failed checks counter
func (c *DeploymentController) SetStatusFailedChecks(cd *flaggerv1.Canary, val int) error {
return setStatusFailedChecks(c.flaggerClient, cd, val)
}
func setStatusFailedChecks(flaggerClient versioned.Interface, cd *flaggerv1.Canary, val int) error {
firstTry := true
err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
var selErr error
if !firstTry {
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
if selErr != nil {
return selErr
}
}
cdCopy := cd.DeepCopy()
cdCopy.Status.FailedChecks = val
cdCopy.Status.LastTransitionTime = metav1.Now()
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
firstTry = false
return
})
if err != nil {
return ex.Wrap(err, "SetStatusFailedChecks")
}
return nil
}
// SetStatusWeight updates the canary status weight value
func (c *DeploymentController) SetStatusWeight(cd *flaggerv1.Canary, val int) error {
return setStatusWeight(c.flaggerClient, cd, val)
}
func setStatusWeight(flaggerClient versioned.Interface, cd *flaggerv1.Canary, val int) error {
firstTry := true
err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
var selErr error
if !firstTry {
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
if selErr != nil {
return selErr
}
}
cdCopy := cd.DeepCopy()
cdCopy.Status.CanaryWeight = val
cdCopy.Status.LastTransitionTime = metav1.Now()
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
firstTry = false
return
})
if err != nil {
return ex.Wrap(err, "SetStatusWeight")
}
return nil
}
// SetStatusIterations updates the canary status iterations value
func (c *DeploymentController) SetStatusIterations(cd *flaggerv1.Canary, val int) error {
return setStatusIterations(c.flaggerClient, cd, val)
}
func setStatusIterations(flaggerClient versioned.Interface, cd *flaggerv1.Canary, val int) error {
firstTry := true
err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
var selErr error
if !firstTry {
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
if selErr != nil {
return selErr
}
}
cdCopy := cd.DeepCopy()
cdCopy.Status.Iterations = val
cdCopy.Status.LastTransitionTime = metav1.Now()
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
firstTry = false
return
})
if err != nil {
return ex.Wrap(err, "SetStatusIterations")
}
return nil
}
// SetStatusPhase updates the canary status phase
func (c *DeploymentController) SetStatusPhase(cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase) error {
return setStatusPhase(c.flaggerClient, cd, phase)
}
func setStatusPhase(flaggerClient versioned.Interface, cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase) error {
firstTry := true
err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
var selErr error
if !firstTry {
cd, selErr = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).Get(cd.GetName(), metav1.GetOptions{})
if selErr != nil {
return selErr
}
}
cdCopy := cd.DeepCopy()
cdCopy.Status.Phase = phase
cdCopy.Status.LastTransitionTime = metav1.Now()
if phase != flaggerv1.CanaryPhaseProgressing && phase != flaggerv1.CanaryPhaseWaiting {
cdCopy.Status.CanaryWeight = 0
cdCopy.Status.Iterations = 0
}
// on promotion set primary spec hash
if phase == flaggerv1.CanaryPhaseInitialized || phase == flaggerv1.CanaryPhaseSucceeded {
cdCopy.Status.LastPromotedSpec = cd.Status.LastAppliedSpec
}
if ok, conditions := MakeStatusConditions(cdCopy.Status, phase); ok {
cdCopy.Status.Conditions = conditions
}
_, err = flaggerClient.FlaggerV1alpha3().Canaries(cd.Namespace).UpdateStatus(cdCopy)
firstTry = false
return
})
if err != nil {
return ex.Wrap(err, "SetStatusPhase")
}
return nil
}
// getStatusCondition returns a condition based on type
func getStatusCondition(status flaggerv1.CanaryStatus, conditionType flaggerv1.CanaryConditionType) *flaggerv1.CanaryCondition {
for i := range status.Conditions {
c := status.Conditions[i]
if c.Type == conditionType {
return &c
}
}
return nil
}
// MakeStatusCondition updates the canary status conditions based on canary phase
func MakeStatusConditions(canaryStatus flaggerv1.CanaryStatus,
phase flaggerv1.CanaryPhase) (bool, []flaggerv1.CanaryCondition) {
currentCondition := getStatusCondition(canaryStatus, flaggerv1.PromotedType)
message := "New deployment detected, starting initialization."
status := corev1.ConditionUnknown
switch phase {
case flaggerv1.CanaryPhaseInitializing:
status = corev1.ConditionUnknown
message = "New deployment detected, starting initialization."
case flaggerv1.CanaryPhaseInitialized:
status = corev1.ConditionTrue
message = "Deployment initialization completed."
case flaggerv1.CanaryPhaseWaiting:
status = corev1.ConditionUnknown
message = "Waiting for approval."
case flaggerv1.CanaryPhaseProgressing:
status = corev1.ConditionUnknown
message = "New revision detected, starting canary analysis."
case flaggerv1.CanaryPhasePromoting:
status = corev1.ConditionUnknown
message = "Canary analysis completed, starting primary rolling update."
case flaggerv1.CanaryPhaseFinalising:
status = corev1.ConditionUnknown
message = "Canary analysis completed, routing all traffic to primary."
case flaggerv1.CanaryPhaseSucceeded:
status = corev1.ConditionTrue
message = "Canary analysis completed successfully, promotion finished."
case flaggerv1.CanaryPhaseFailed:
status = corev1.ConditionFalse
message = "Canary analysis failed, deployment scaled to zero."
}
newCondition := &flaggerv1.CanaryCondition{
Type: flaggerv1.PromotedType,
Status: status,
LastUpdateTime: metav1.Now(),
LastTransitionTime: metav1.Now(),
Message: message,
Reason: string(phase),
}
if currentCondition != nil &&
currentCondition.Status == newCondition.Status &&
currentCondition.Reason == newCondition.Reason {
return false, nil
}
if currentCondition != nil && currentCondition.Status == newCondition.Status {
newCondition.LastTransitionTime = currentCondition.LastTransitionTime
}
return true, []flaggerv1.CanaryCondition{*newCondition}
}