mirror of
https://github.com/fluxcd/flagger.git
synced 2026-04-15 06:57:34 +00:00
Refactor canary analysis
- move CRD metrics and weight setting to canary analysis - add max weight to CRD spec
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
# run tester: kubectl run -n test tester --image=quay.io/stefanprodan/podinfo:1.2.1 -- ./podinfo --port=9898
|
||||
# generate latency: watch curl http://podinfo-canary:9898/delay/1
|
||||
# generate errors: watch curl http://podinfo-canary:9898/status/500
|
||||
# run load test: kubectl run -n test -it --rm --restart=Never hey --image=stefanprodan/loadtest -- sh
|
||||
# generate load: hey -z 2m -h2 -m POST -d '{test: 1}' -c 10 -q 5 http://podinfo:9898/api/echo
|
||||
apiVersion: apps.weave.works/v1beta1
|
||||
kind: Rollout
|
||||
metadata:
|
||||
@@ -12,25 +14,29 @@ metadata:
|
||||
namespace: test
|
||||
spec:
|
||||
targetKind: Deployment
|
||||
virtualService:
|
||||
name: podinfo
|
||||
primary:
|
||||
name: podinfo
|
||||
host: podinfo
|
||||
canary:
|
||||
name: podinfo-canary
|
||||
host: podinfo-canary
|
||||
virtualService:
|
||||
name: podinfo
|
||||
canaryAnalysis:
|
||||
# max traffic percentage routed to canary
|
||||
# percentage (0-100)
|
||||
maxWeight: 100
|
||||
# canary increment step
|
||||
# percentage (0-100)
|
||||
weight: 10
|
||||
metrics:
|
||||
- name: istio_requests_total
|
||||
# minimum req success rate (non 5xx responses)
|
||||
# percentage (0-100)
|
||||
threshold: 99
|
||||
interval: 1m
|
||||
- name: istio_request_duration_seconds_bucket
|
||||
# maximum req duration P99
|
||||
# milliseconds
|
||||
threshold: 500
|
||||
interval: 1m
|
||||
stepWeight: 10
|
||||
metrics:
|
||||
- name: istio_requests_total
|
||||
# minimum req success rate (non 5xx responses)
|
||||
# percentage (0-100)
|
||||
threshold: 99
|
||||
interval: 1m
|
||||
- name: istio_request_duration_seconds_bucket
|
||||
# maximum req duration P99
|
||||
# milliseconds
|
||||
threshold: 500
|
||||
interval: 30s
|
||||
|
||||
@@ -21,10 +21,18 @@ spec:
|
||||
properties:
|
||||
spec:
|
||||
required:
|
||||
- targetKind
|
||||
- virtualService
|
||||
- primary
|
||||
- canary
|
||||
- virtualService
|
||||
- canaryAnalysis
|
||||
properties:
|
||||
targetKind:
|
||||
type: string
|
||||
virtualService:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
primary:
|
||||
properties:
|
||||
name:
|
||||
@@ -37,22 +45,23 @@ spec:
|
||||
type: string
|
||||
host:
|
||||
type: string
|
||||
virtualService:
|
||||
canaryAnalysis:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
weight:
|
||||
maxWeight:
|
||||
type: number
|
||||
metrics:
|
||||
type: array
|
||||
properties:
|
||||
items:
|
||||
type: object
|
||||
stepWeight:
|
||||
type: number
|
||||
metrics:
|
||||
type: array
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
interval:
|
||||
type: string
|
||||
pattern: "^[0-9]+(m)"
|
||||
threshold:
|
||||
type: number
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
interval:
|
||||
type: string
|
||||
pattern: "^[0-9]+(m)"
|
||||
threshold:
|
||||
type: number
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ spec:
|
||||
serviceAccountName: steerer
|
||||
containers:
|
||||
- name: steerer
|
||||
image: stefanprodan/steerer:0.0.1-beta.12
|
||||
image: stefanprodan/steerer:0.0.1-rc.1
|
||||
imagePullPolicy: Always
|
||||
ports:
|
||||
- name: http
|
||||
|
||||
@@ -57,7 +57,7 @@ spec:
|
||||
- http
|
||||
- localhost:9898/readyz
|
||||
failureThreshold: 3
|
||||
periodSeconds: 10
|
||||
periodSeconds: 3
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 2
|
||||
resources:
|
||||
@@ -65,7 +65,7 @@ spec:
|
||||
cpu: 1000m
|
||||
memory: 256Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
cpu: 100m
|
||||
memory: 16Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -3,16 +3,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: podinfo
|
||||
namespace: test
|
||||
annotations:
|
||||
apps.weave.works/progressive: "true"
|
||||
labels:
|
||||
app: podinfo
|
||||
spec:
|
||||
replicas: 2
|
||||
replicas: 1
|
||||
strategy:
|
||||
rollingUpdate:
|
||||
maxSurge: 25%
|
||||
maxUnavailable: 1
|
||||
maxUnavailable: 0
|
||||
type: RollingUpdate
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -48,6 +45,7 @@ spec:
|
||||
- check
|
||||
- http
|
||||
- localhost:9898/healthz
|
||||
initialDelaySeconds: 5
|
||||
failureThreshold: 3
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
@@ -59,11 +57,15 @@ spec:
|
||||
- check
|
||||
- http
|
||||
- localhost:9898/readyz
|
||||
initialDelaySeconds: 5
|
||||
failureThreshold: 3
|
||||
periodSeconds: 10
|
||||
periodSeconds: 3
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
resources:
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 512Mi
|
||||
requests:
|
||||
cpu: 1m
|
||||
memory: 16Mi
|
||||
cpu: 10m
|
||||
memory: 64Mi
|
||||
|
||||
@@ -14,6 +14,8 @@ spec:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
# scale up if usage is above
|
||||
# 99% of the requested CPU (100m)
|
||||
targetAverageUtilization: 99
|
||||
- type: Resource
|
||||
resource:
|
||||
|
||||
19
artifacts/workloads/hpa.yaml
Normal file
19
artifacts/workloads/hpa.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
apiVersion: autoscaling/v2beta1
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: podinfo
|
||||
namespace: test
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: podinfo
|
||||
minReplicas: 2
|
||||
maxReplicas: 4
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
# scale up if usage is above
|
||||
# 99% of the requested CPU (100m)
|
||||
targetAverageUtilization: 99
|
||||
@@ -1,13 +1,10 @@
|
||||
apiVersion: networking.istio.io/v1alpha3
|
||||
kind: VirtualService
|
||||
metadata:
|
||||
annotations:
|
||||
apps.weave.works/progressive-revision: ""
|
||||
apps.weave.works/progressive-status: ""
|
||||
labels:
|
||||
app: podinfo
|
||||
name: podinfo
|
||||
namespace: test
|
||||
labels:
|
||||
app: podinfo
|
||||
spec:
|
||||
gateways:
|
||||
- public-gateway.istio-system.svc.cluster.local
|
||||
@@ -26,4 +23,7 @@ spec:
|
||||
port:
|
||||
number: 9898
|
||||
weight: 0
|
||||
timeout: 30s
|
||||
timeout: 10s
|
||||
retries:
|
||||
attempts: 3
|
||||
perTryTimeout: 2s
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
apiVersion: v1
|
||||
name: steerer
|
||||
version: 0.0.1
|
||||
appVersion: 0.0.1-beta.12
|
||||
appVersion: 0.0.1-rc.1
|
||||
description: Steerer is a Kubernetes operator that automates the promotion of canary deployments using Istio routing for traffic shifting and Prometheus metrics for canary analysis.
|
||||
|
||||
@@ -22,10 +22,18 @@ spec:
|
||||
properties:
|
||||
spec:
|
||||
required:
|
||||
- targetKind
|
||||
- virtualService
|
||||
- primary
|
||||
- canary
|
||||
- virtualService
|
||||
- canaryAnalysis
|
||||
properties:
|
||||
targetKind:
|
||||
type: string
|
||||
virtualService:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
primary:
|
||||
properties:
|
||||
name:
|
||||
@@ -38,23 +46,23 @@ spec:
|
||||
type: string
|
||||
host:
|
||||
type: string
|
||||
virtualService:
|
||||
canaryAnalysis:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
weight:
|
||||
maxWeight:
|
||||
type: number
|
||||
metrics:
|
||||
type: array
|
||||
properties:
|
||||
items:
|
||||
type: object
|
||||
stepWeight:
|
||||
type: number
|
||||
metrics:
|
||||
type: array
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
interval:
|
||||
type: string
|
||||
pattern: "^[0-9]+(m)"
|
||||
threshold:
|
||||
type: number
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
interval:
|
||||
type: string
|
||||
pattern: "^[0-9]+(m)"
|
||||
threshold:
|
||||
type: number
|
||||
{{- end }}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
image:
|
||||
repository: stefanprodan/steerer
|
||||
tag: 0.0.1-beta.12
|
||||
tag: 0.0.1-rc.1
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
controlLoopInterval: "10s"
|
||||
|
||||
@@ -2,14 +2,14 @@ apiVersion: v1
|
||||
entries:
|
||||
steerer:
|
||||
- apiVersion: v1
|
||||
appVersion: 0.0.1-beta.12
|
||||
created: 2018-09-28T16:49:49.90177054+03:00
|
||||
appVersion: 0.0.1-rc.1
|
||||
created: 2018-09-29T11:08:25.598356915+03:00
|
||||
description: Steerer is a Kubernetes operator that automates the promotion of
|
||||
canary deployments using Istio routing for traffic shifting and Prometheus metrics
|
||||
for canary analysis.
|
||||
digest: 792a2bf520cac415a2e2a4a3b2b7142c9fb09a4737ea7135146bd5796c5f9d94
|
||||
digest: af14826edae5afcda1b2afebf17e3b8007f1d2a35e65093ab32e786a6599b201
|
||||
name: steerer
|
||||
urls:
|
||||
- https://stefanprodan.github.io/steerer/steerer-0.0.1.tgz
|
||||
version: 0.0.1
|
||||
generated: 2018-09-28T16:49:49.900919976+03:00
|
||||
generated: 2018-09-29T11:08:25.597473362+03:00
|
||||
|
||||
Binary file not shown.
@@ -37,8 +37,8 @@ type RolloutSpec struct {
|
||||
TargetKind string `json:"targetKind"`
|
||||
Primary Target `json:"primary"`
|
||||
Canary Target `json:"canary"`
|
||||
CanaryAnalysis CanaryAnalysis `json:"canaryAnalysis"`
|
||||
VirtualService VirtualService `json:"virtualService"`
|
||||
Metrics []Metric `json:"metrics"`
|
||||
}
|
||||
|
||||
type Target struct {
|
||||
@@ -47,8 +47,13 @@ type Target struct {
|
||||
}
|
||||
|
||||
type VirtualService struct {
|
||||
Name string `json:"name"`
|
||||
Weight int `json:"weight"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type CanaryAnalysis struct {
|
||||
MaxWeight int `json:"maxWeight"`
|
||||
StepWeight int `json:"stepWeight"`
|
||||
Metrics []Metric `json:"metrics"`
|
||||
}
|
||||
|
||||
type Metric struct {
|
||||
|
||||
@@ -24,6 +24,27 @@ import (
|
||||
runtime "k8s.io/apimachinery/pkg/runtime"
|
||||
)
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *CanaryAnalysis) DeepCopyInto(out *CanaryAnalysis) {
|
||||
*out = *in
|
||||
if in.Metrics != nil {
|
||||
in, out := &in.Metrics, &out.Metrics
|
||||
*out = make([]Metric, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CanaryAnalysis.
|
||||
func (in *CanaryAnalysis) DeepCopy() *CanaryAnalysis {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(CanaryAnalysis)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *Metric) DeepCopyInto(out *Metric) {
|
||||
*out = *in
|
||||
@@ -106,12 +127,8 @@ func (in *RolloutSpec) DeepCopyInto(out *RolloutSpec) {
|
||||
*out = *in
|
||||
out.Primary = in.Primary
|
||||
out.Canary = in.Canary
|
||||
in.CanaryAnalysis.DeepCopyInto(&out.CanaryAnalysis)
|
||||
out.VirtualService = in.VirtualService
|
||||
if in.Metrics != nil {
|
||||
in, out := &in.Metrics, &out.Metrics
|
||||
*out = make([]Metric, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -180,8 +180,8 @@ func (c *Controller) syncHandler(key string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
c.logger.Infof("Adding %s.%s to cache", rollout.Name, rollout.Namespace)
|
||||
c.rollouts.Store(fmt.Sprintf("%s.%s", rollout.Name, rollout.Namespace), rollout)
|
||||
c.logger.Infof("Synced %s", key)
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -206,6 +206,11 @@ func (c *Controller) recordEventErrorf(r *rolloutv1.Rollout, template string, ar
|
||||
c.recorder.Event(r, corev1.EventTypeWarning, "Synced", fmt.Sprintf(template, args...))
|
||||
}
|
||||
|
||||
func (c *Controller) recordEventWarningf(r *rolloutv1.Rollout, template string, args ...interface{}) {
|
||||
c.logger.Infof(template, args...)
|
||||
c.recorder.Event(r, corev1.EventTypeWarning, "Synced", fmt.Sprintf(template, args...))
|
||||
}
|
||||
|
||||
func checkCustomResourceType(obj interface{}, logger *zap.SugaredLogger) (rolloutv1.Rollout, bool) {
|
||||
var roll *rolloutv1.Rollout
|
||||
var ok bool
|
||||
|
||||
@@ -32,10 +32,10 @@ func (c *Controller) advanceDeploymentRollout(name string, namespace string) {
|
||||
return
|
||||
}
|
||||
|
||||
// gate stage: check if primary deployment exists and is healthy
|
||||
primary, ok := c.getDeployment(r, r.Spec.Primary.Name, r.Namespace)
|
||||
if !ok {
|
||||
return
|
||||
// set max weight default value to 100%
|
||||
maxWeight := 100
|
||||
if r.Spec.CanaryAnalysis.MaxWeight > 0 {
|
||||
maxWeight = r.Spec.CanaryAnalysis.MaxWeight
|
||||
}
|
||||
|
||||
// gate stage: check if canary deployment exists and is healthy
|
||||
@@ -44,6 +44,12 @@ func (c *Controller) advanceDeploymentRollout(name string, namespace string) {
|
||||
return
|
||||
}
|
||||
|
||||
// gate stage: check if primary deployment exists and is healthy
|
||||
primary, ok := c.getDeployment(r, r.Spec.Primary.Name, r.Namespace)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// gate stage: check if virtual service exists
|
||||
// and if it contains weighted destination routes to the primary and canary services
|
||||
vs, primaryRoute, canaryRoute, ok := c.getVirtualService(r)
|
||||
@@ -67,15 +73,15 @@ func (c *Controller) advanceDeploymentRollout(name string, namespace string) {
|
||||
}
|
||||
|
||||
// routing stage: increase canary traffic percentage
|
||||
if canaryRoute.Weight != 100 {
|
||||
primaryRoute.Weight -= r.Spec.VirtualService.Weight
|
||||
if primaryRoute.Weight > 100 {
|
||||
primaryRoute.Weight = 100
|
||||
}
|
||||
canaryRoute.Weight += r.Spec.VirtualService.Weight
|
||||
if canaryRoute.Weight < maxWeight {
|
||||
primaryRoute.Weight -= r.Spec.CanaryAnalysis.StepWeight
|
||||
if primaryRoute.Weight < 0 {
|
||||
primaryRoute.Weight = 0
|
||||
}
|
||||
canaryRoute.Weight += r.Spec.CanaryAnalysis.StepWeight
|
||||
if primaryRoute.Weight > 100 {
|
||||
primaryRoute.Weight = 100
|
||||
}
|
||||
|
||||
if ok := c.updateVirtualServiceRoutes(r, vs, primaryRoute, canaryRoute); !ok {
|
||||
return
|
||||
@@ -84,7 +90,7 @@ func (c *Controller) advanceDeploymentRollout(name string, namespace string) {
|
||||
c.recordEventInfof(r, "Advance rollout %s.%s weight %v", r.Name, r.Namespace, canaryRoute.Weight)
|
||||
|
||||
// promotion stage: override primary.template.spec with the canary spec
|
||||
if canaryRoute.Weight == 100 {
|
||||
if canaryRoute.Weight == maxWeight {
|
||||
c.recordEventInfof(r, "Copying %s.%s template spec to %s.%s",
|
||||
canary.GetName(), canary.Namespace, primary.GetName(), primary.Namespace)
|
||||
|
||||
@@ -175,7 +181,7 @@ func (c *Controller) getDeployment(r *rolloutv1.Rollout, name string, namespace
|
||||
}
|
||||
|
||||
if msg, healthy := getDeploymentStatus(dep); !healthy {
|
||||
c.logger.Infof("Halt rollout for %s.%s %s", dep.GetName(), dep.Namespace, msg)
|
||||
c.recordEventWarningf(r, "Halt rollout %s.%s %s", dep.GetName(), dep.Namespace, msg)
|
||||
return nil, false
|
||||
}
|
||||
|
||||
@@ -187,7 +193,7 @@ func (c *Controller) getDeployment(r *rolloutv1.Rollout, name string, namespace
|
||||
}
|
||||
|
||||
func (c *Controller) checkDeploymentMetrics(r *rolloutv1.Rollout) bool {
|
||||
for _, metric := range r.Spec.Metrics {
|
||||
for _, metric := range r.Spec.CanaryAnalysis.Metrics {
|
||||
if metric.Name == "istio_requests_total" {
|
||||
val, err := c.getDeploymentCounter(r.Spec.Canary.Name, r.Namespace, metric.Name, metric.Interval)
|
||||
if err != nil {
|
||||
@@ -195,7 +201,7 @@ func (c *Controller) checkDeploymentMetrics(r *rolloutv1.Rollout) bool {
|
||||
return false
|
||||
}
|
||||
if float64(metric.Threshold) > val {
|
||||
c.recordEventErrorf(r, "Halt rollout %s.%s success rate %.2f%% < %v%%",
|
||||
c.recordEventWarningf(r, "Halt rollout %s.%s success rate %.2f%% < %v%%",
|
||||
r.Name, r.Namespace, val, metric.Threshold)
|
||||
return false
|
||||
}
|
||||
@@ -209,7 +215,7 @@ func (c *Controller) checkDeploymentMetrics(r *rolloutv1.Rollout) bool {
|
||||
}
|
||||
t := time.Duration(metric.Threshold) * time.Millisecond
|
||||
if val > t {
|
||||
c.recordEventErrorf(r, "Halt rollout %s.%s request duration %v > %v",
|
||||
c.recordEventWarningf(r, "Halt rollout %s.%s request duration %v > %v",
|
||||
r.Name, r.Namespace, val, t)
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package version
|
||||
|
||||
var VERSION = "0.0.1-beta.12"
|
||||
var VERSION = "0.0.1-rc.1"
|
||||
var REVISION = "unknown"
|
||||
|
||||
Reference in New Issue
Block a user