diff --git a/artifacts/rollouts/podinfo.yaml b/artifacts/rollouts/podinfo.yaml index a9f4e915..fd6473d2 100644 --- a/artifacts/rollouts/podinfo.yaml +++ b/artifacts/rollouts/podinfo.yaml @@ -2,6 +2,8 @@ # run tester: kubectl run -n test tester --image=quay.io/stefanprodan/podinfo:1.2.1 -- ./podinfo --port=9898 # generate latency: watch curl http://podinfo-canary:9898/delay/1 # generate errors: watch curl http://podinfo-canary:9898/status/500 +# run load test: kubectl run -n test -it --rm --restart=Never hey --image=stefanprodan/loadtest -- sh +# generate load: hey -z 2m -h2 -m POST -d '{test: 1}' -c 10 -q 5 http://podinfo:9898/api/echo apiVersion: apps.weave.works/v1beta1 kind: Rollout metadata: @@ -12,25 +14,29 @@ metadata: namespace: test spec: targetKind: Deployment + virtualService: + name: podinfo primary: name: podinfo host: podinfo canary: name: podinfo-canary host: podinfo-canary - virtualService: - name: podinfo + canaryAnalysis: + # max traffic percentage routed to canary + # percentage (0-100) + maxWeight: 100 # canary increment step # percentage (0-100) - weight: 10 - metrics: - - name: istio_requests_total - # minimum req success rate (non 5xx responses) - # percentage (0-100) - threshold: 99 - interval: 1m - - name: istio_request_duration_seconds_bucket - # maximum req duration P99 - # milliseconds - threshold: 500 - interval: 1m + stepWeight: 10 + metrics: + - name: istio_requests_total + # minimum req success rate (non 5xx responses) + # percentage (0-100) + threshold: 99 + interval: 1m + - name: istio_request_duration_seconds_bucket + # maximum req duration P99 + # milliseconds + threshold: 500 + interval: 30s diff --git a/artifacts/steerer/crd.yaml b/artifacts/steerer/crd.yaml index ef9ffe1a..f0126196 100644 --- a/artifacts/steerer/crd.yaml +++ b/artifacts/steerer/crd.yaml @@ -21,10 +21,18 @@ spec: properties: spec: required: + - targetKind + - virtualService - primary - canary - - virtualService + - canaryAnalysis properties: + targetKind: + type: string + virtualService: + properties: + name: + type: string primary: properties: name: @@ -37,22 +45,23 @@ spec: type: string host: type: string - virtualService: + canaryAnalysis: properties: - name: - type: string - weight: + maxWeight: type: number - metrics: - type: array - properties: - items: - type: object + stepWeight: + type: number + metrics: + type: array properties: - name: - type: string - interval: - type: string - pattern: "^[0-9]+(m)" - threshold: - type: number + items: + type: object + properties: + name: + type: string + interval: + type: string + pattern: "^[0-9]+(m)" + threshold: + type: number + diff --git a/artifacts/steerer/deployment.yaml b/artifacts/steerer/deployment.yaml index 04a95465..c3b76069 100644 --- a/artifacts/steerer/deployment.yaml +++ b/artifacts/steerer/deployment.yaml @@ -22,7 +22,7 @@ spec: serviceAccountName: steerer containers: - name: steerer - image: stefanprodan/steerer:0.0.1-beta.12 + image: stefanprodan/steerer:0.0.1-rc.1 imagePullPolicy: Always ports: - name: http diff --git a/artifacts/workloads/deployment-canary.yaml b/artifacts/workloads/deployment-canary.yaml index c9827df0..5ec7620b 100644 --- a/artifacts/workloads/deployment-canary.yaml +++ b/artifacts/workloads/deployment-canary.yaml @@ -57,7 +57,7 @@ spec: - http - localhost:9898/readyz failureThreshold: 3 - periodSeconds: 10 + periodSeconds: 3 successThreshold: 1 timeoutSeconds: 2 resources: @@ -65,7 +65,7 @@ spec: cpu: 1000m memory: 256Mi requests: - cpu: 10m + cpu: 100m memory: 16Mi volumeMounts: - mountPath: /data diff --git a/artifacts/workloads/deployment.yaml b/artifacts/workloads/deployment.yaml index b035f491..a16eaa2f 100644 --- a/artifacts/workloads/deployment.yaml +++ b/artifacts/workloads/deployment.yaml @@ -3,16 +3,13 @@ kind: Deployment metadata: name: podinfo namespace: test - annotations: - apps.weave.works/progressive: "true" labels: app: podinfo spec: - replicas: 2 + replicas: 1 strategy: rollingUpdate: - maxSurge: 25% - maxUnavailable: 1 + maxUnavailable: 0 type: RollingUpdate selector: matchLabels: @@ -48,6 +45,7 @@ spec: - check - http - localhost:9898/healthz + initialDelaySeconds: 5 failureThreshold: 3 periodSeconds: 10 successThreshold: 1 @@ -59,11 +57,15 @@ spec: - check - http - localhost:9898/readyz + initialDelaySeconds: 5 failureThreshold: 3 - periodSeconds: 10 + periodSeconds: 3 successThreshold: 1 timeoutSeconds: 1 resources: + limits: + cpu: 2000m + memory: 512Mi requests: - cpu: 1m - memory: 16Mi + cpu: 10m + memory: 64Mi diff --git a/artifacts/workloads/hpa-canary.yaml b/artifacts/workloads/hpa-canary.yaml index aeb57387..18bec4b7 100644 --- a/artifacts/workloads/hpa-canary.yaml +++ b/artifacts/workloads/hpa-canary.yaml @@ -14,6 +14,8 @@ spec: - type: Resource resource: name: cpu + # scale up if usage is above + # 99% of the requested CPU (100m) targetAverageUtilization: 99 - type: Resource resource: diff --git a/artifacts/workloads/hpa.yaml b/artifacts/workloads/hpa.yaml new file mode 100644 index 00000000..fa2b5a6f --- /dev/null +++ b/artifacts/workloads/hpa.yaml @@ -0,0 +1,19 @@ +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: podinfo + namespace: test +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: podinfo + minReplicas: 2 + maxReplicas: 4 + metrics: + - type: Resource + resource: + name: cpu + # scale up if usage is above + # 99% of the requested CPU (100m) + targetAverageUtilization: 99 diff --git a/artifacts/workloads/virtual-service.yaml b/artifacts/workloads/virtual-service.yaml index b36e74a8..bfcf7b72 100644 --- a/artifacts/workloads/virtual-service.yaml +++ b/artifacts/workloads/virtual-service.yaml @@ -1,13 +1,10 @@ apiVersion: networking.istio.io/v1alpha3 kind: VirtualService metadata: - annotations: - apps.weave.works/progressive-revision: "" - apps.weave.works/progressive-status: "" - labels: - app: podinfo name: podinfo namespace: test + labels: + app: podinfo spec: gateways: - public-gateway.istio-system.svc.cluster.local @@ -26,4 +23,7 @@ spec: port: number: 9898 weight: 0 - timeout: 30s + timeout: 10s + retries: + attempts: 3 + perTryTimeout: 2s diff --git a/chart/steerer/Chart.yaml b/chart/steerer/Chart.yaml index 65cf60f6..cac84709 100644 --- a/chart/steerer/Chart.yaml +++ b/chart/steerer/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v1 name: steerer version: 0.0.1 -appVersion: 0.0.1-beta.12 +appVersion: 0.0.1-rc.1 description: Steerer is a Kubernetes operator that automates the promotion of canary deployments using Istio routing for traffic shifting and Prometheus metrics for canary analysis. diff --git a/chart/steerer/templates/crd.yaml b/chart/steerer/templates/crd.yaml index 53427b46..153c2f83 100644 --- a/chart/steerer/templates/crd.yaml +++ b/chart/steerer/templates/crd.yaml @@ -22,10 +22,18 @@ spec: properties: spec: required: + - targetKind + - virtualService - primary - canary - - virtualService + - canaryAnalysis properties: + targetKind: + type: string + virtualService: + properties: + name: + type: string primary: properties: name: @@ -38,23 +46,23 @@ spec: type: string host: type: string - virtualService: + canaryAnalysis: properties: - name: - type: string - weight: + maxWeight: type: number - metrics: - type: array - properties: - items: - type: object + stepWeight: + type: number + metrics: + type: array properties: - name: - type: string - interval: - type: string - pattern: "^[0-9]+(m)" - threshold: - type: number + items: + type: object + properties: + name: + type: string + interval: + type: string + pattern: "^[0-9]+(m)" + threshold: + type: number {{- end }} diff --git a/chart/steerer/values.yaml b/chart/steerer/values.yaml index 37da5ada..0d9bb1b6 100644 --- a/chart/steerer/values.yaml +++ b/chart/steerer/values.yaml @@ -2,7 +2,7 @@ image: repository: stefanprodan/steerer - tag: 0.0.1-beta.12 + tag: 0.0.1-rc.1 pullPolicy: IfNotPresent controlLoopInterval: "10s" diff --git a/docs/index.yaml b/docs/index.yaml index 829ce00c..5f71a325 100755 --- a/docs/index.yaml +++ b/docs/index.yaml @@ -2,14 +2,14 @@ apiVersion: v1 entries: steerer: - apiVersion: v1 - appVersion: 0.0.1-beta.12 - created: 2018-09-28T16:49:49.90177054+03:00 + appVersion: 0.0.1-rc.1 + created: 2018-09-29T11:08:25.598356915+03:00 description: Steerer is a Kubernetes operator that automates the promotion of canary deployments using Istio routing for traffic shifting and Prometheus metrics for canary analysis. - digest: 792a2bf520cac415a2e2a4a3b2b7142c9fb09a4737ea7135146bd5796c5f9d94 + digest: af14826edae5afcda1b2afebf17e3b8007f1d2a35e65093ab32e786a6599b201 name: steerer urls: - https://stefanprodan.github.io/steerer/steerer-0.0.1.tgz version: 0.0.1 -generated: 2018-09-28T16:49:49.900919976+03:00 +generated: 2018-09-29T11:08:25.597473362+03:00 diff --git a/docs/steerer-0.0.1.tgz b/docs/steerer-0.0.1.tgz index 61148685..09776f90 100644 Binary files a/docs/steerer-0.0.1.tgz and b/docs/steerer-0.0.1.tgz differ diff --git a/pkg/apis/rollout/v1beta1/types.go b/pkg/apis/rollout/v1beta1/types.go index c773ad71..874d4824 100755 --- a/pkg/apis/rollout/v1beta1/types.go +++ b/pkg/apis/rollout/v1beta1/types.go @@ -37,8 +37,8 @@ type RolloutSpec struct { TargetKind string `json:"targetKind"` Primary Target `json:"primary"` Canary Target `json:"canary"` + CanaryAnalysis CanaryAnalysis `json:"canaryAnalysis"` VirtualService VirtualService `json:"virtualService"` - Metrics []Metric `json:"metrics"` } type Target struct { @@ -47,8 +47,13 @@ type Target struct { } type VirtualService struct { - Name string `json:"name"` - Weight int `json:"weight"` + Name string `json:"name"` +} + +type CanaryAnalysis struct { + MaxWeight int `json:"maxWeight"` + StepWeight int `json:"stepWeight"` + Metrics []Metric `json:"metrics"` } type Metric struct { diff --git a/pkg/apis/rollout/v1beta1/zz_generated.deepcopy.go b/pkg/apis/rollout/v1beta1/zz_generated.deepcopy.go index 01f457a2..7f236063 100644 --- a/pkg/apis/rollout/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/rollout/v1beta1/zz_generated.deepcopy.go @@ -24,6 +24,27 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CanaryAnalysis) DeepCopyInto(out *CanaryAnalysis) { + *out = *in + if in.Metrics != nil { + in, out := &in.Metrics, &out.Metrics + *out = make([]Metric, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CanaryAnalysis. +func (in *CanaryAnalysis) DeepCopy() *CanaryAnalysis { + if in == nil { + return nil + } + out := new(CanaryAnalysis) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Metric) DeepCopyInto(out *Metric) { *out = *in @@ -106,12 +127,8 @@ func (in *RolloutSpec) DeepCopyInto(out *RolloutSpec) { *out = *in out.Primary = in.Primary out.Canary = in.Canary + in.CanaryAnalysis.DeepCopyInto(&out.CanaryAnalysis) out.VirtualService = in.VirtualService - if in.Metrics != nil { - in, out := &in.Metrics, &out.Metrics - *out = make([]Metric, len(*in)) - copy(*out, *in) - } return } diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 76e620e9..3355e2ba 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -180,8 +180,8 @@ func (c *Controller) syncHandler(key string) error { return nil } - c.logger.Infof("Adding %s.%s to cache", rollout.Name, rollout.Namespace) c.rollouts.Store(fmt.Sprintf("%s.%s", rollout.Name, rollout.Namespace), rollout) + c.logger.Infof("Synced %s", key) return nil } @@ -206,6 +206,11 @@ func (c *Controller) recordEventErrorf(r *rolloutv1.Rollout, template string, ar c.recorder.Event(r, corev1.EventTypeWarning, "Synced", fmt.Sprintf(template, args...)) } +func (c *Controller) recordEventWarningf(r *rolloutv1.Rollout, template string, args ...interface{}) { + c.logger.Infof(template, args...) + c.recorder.Event(r, corev1.EventTypeWarning, "Synced", fmt.Sprintf(template, args...)) +} + func checkCustomResourceType(obj interface{}, logger *zap.SugaredLogger) (rolloutv1.Rollout, bool) { var roll *rolloutv1.Rollout var ok bool diff --git a/pkg/controller/deployment.go b/pkg/controller/deployment.go index bc2dbfb4..82076737 100644 --- a/pkg/controller/deployment.go +++ b/pkg/controller/deployment.go @@ -32,10 +32,10 @@ func (c *Controller) advanceDeploymentRollout(name string, namespace string) { return } - // gate stage: check if primary deployment exists and is healthy - primary, ok := c.getDeployment(r, r.Spec.Primary.Name, r.Namespace) - if !ok { - return + // set max weight default value to 100% + maxWeight := 100 + if r.Spec.CanaryAnalysis.MaxWeight > 0 { + maxWeight = r.Spec.CanaryAnalysis.MaxWeight } // gate stage: check if canary deployment exists and is healthy @@ -44,6 +44,12 @@ func (c *Controller) advanceDeploymentRollout(name string, namespace string) { return } + // gate stage: check if primary deployment exists and is healthy + primary, ok := c.getDeployment(r, r.Spec.Primary.Name, r.Namespace) + if !ok { + return + } + // gate stage: check if virtual service exists // and if it contains weighted destination routes to the primary and canary services vs, primaryRoute, canaryRoute, ok := c.getVirtualService(r) @@ -67,15 +73,15 @@ func (c *Controller) advanceDeploymentRollout(name string, namespace string) { } // routing stage: increase canary traffic percentage - if canaryRoute.Weight != 100 { - primaryRoute.Weight -= r.Spec.VirtualService.Weight - if primaryRoute.Weight > 100 { - primaryRoute.Weight = 100 - } - canaryRoute.Weight += r.Spec.VirtualService.Weight + if canaryRoute.Weight < maxWeight { + primaryRoute.Weight -= r.Spec.CanaryAnalysis.StepWeight if primaryRoute.Weight < 0 { primaryRoute.Weight = 0 } + canaryRoute.Weight += r.Spec.CanaryAnalysis.StepWeight + if primaryRoute.Weight > 100 { + primaryRoute.Weight = 100 + } if ok := c.updateVirtualServiceRoutes(r, vs, primaryRoute, canaryRoute); !ok { return @@ -84,7 +90,7 @@ func (c *Controller) advanceDeploymentRollout(name string, namespace string) { c.recordEventInfof(r, "Advance rollout %s.%s weight %v", r.Name, r.Namespace, canaryRoute.Weight) // promotion stage: override primary.template.spec with the canary spec - if canaryRoute.Weight == 100 { + if canaryRoute.Weight == maxWeight { c.recordEventInfof(r, "Copying %s.%s template spec to %s.%s", canary.GetName(), canary.Namespace, primary.GetName(), primary.Namespace) @@ -175,7 +181,7 @@ func (c *Controller) getDeployment(r *rolloutv1.Rollout, name string, namespace } if msg, healthy := getDeploymentStatus(dep); !healthy { - c.logger.Infof("Halt rollout for %s.%s %s", dep.GetName(), dep.Namespace, msg) + c.recordEventWarningf(r, "Halt rollout %s.%s %s", dep.GetName(), dep.Namespace, msg) return nil, false } @@ -187,7 +193,7 @@ func (c *Controller) getDeployment(r *rolloutv1.Rollout, name string, namespace } func (c *Controller) checkDeploymentMetrics(r *rolloutv1.Rollout) bool { - for _, metric := range r.Spec.Metrics { + for _, metric := range r.Spec.CanaryAnalysis.Metrics { if metric.Name == "istio_requests_total" { val, err := c.getDeploymentCounter(r.Spec.Canary.Name, r.Namespace, metric.Name, metric.Interval) if err != nil { @@ -195,7 +201,7 @@ func (c *Controller) checkDeploymentMetrics(r *rolloutv1.Rollout) bool { return false } if float64(metric.Threshold) > val { - c.recordEventErrorf(r, "Halt rollout %s.%s success rate %.2f%% < %v%%", + c.recordEventWarningf(r, "Halt rollout %s.%s success rate %.2f%% < %v%%", r.Name, r.Namespace, val, metric.Threshold) return false } @@ -209,7 +215,7 @@ func (c *Controller) checkDeploymentMetrics(r *rolloutv1.Rollout) bool { } t := time.Duration(metric.Threshold) * time.Millisecond if val > t { - c.recordEventErrorf(r, "Halt rollout %s.%s request duration %v > %v", + c.recordEventWarningf(r, "Halt rollout %s.%s request duration %v > %v", r.Name, r.Namespace, val, t) return false } diff --git a/pkg/version/version.go b/pkg/version/version.go index b4ed4aba..addeb583 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -1,4 +1,4 @@ package version -var VERSION = "0.0.1-beta.12" +var VERSION = "0.0.1-rc.1" var REVISION = "unknown"