mirror of
https://github.com/jpetazzo/container.training.git
synced 2026-03-02 01:10:20 +00:00
merge operators
This commit is contained in:
21
k8s/elasticsearch-cluster.yaml
Normal file
21
k8s/elasticsearch-cluster.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
apiVersion: enterprises.upmc.com/v1
|
||||
kind: ElasticsearchCluster
|
||||
metadata:
|
||||
name: es
|
||||
spec:
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana-oss:6.1.3
|
||||
image-pull-policy: Always
|
||||
cerebro:
|
||||
image: upmcenterprises/cerebro:0.7.2
|
||||
image-pull-policy: Always
|
||||
elastic-search-image: upmcenterprises/docker-elasticsearch-kubernetes:6.1.3_0
|
||||
image-pull-policy: Always
|
||||
client-node-replicas: 2
|
||||
master-node-replicas: 3
|
||||
data-node-replicas: 3
|
||||
network-host: 0.0.0.0
|
||||
use-ssl: false
|
||||
data-volume-size: 10Gi
|
||||
java-options: "-Xms512m -Xmx512m"
|
||||
|
||||
94
k8s/elasticsearch-operator.yaml
Normal file
94
k8s/elasticsearch-operator.yaml
Normal file
@@ -0,0 +1,94 @@
|
||||
# This is mirrored from https://github.com/upmc-enterprises/elasticsearch-operator/blob/master/example/controller.yaml but using the elasticsearch-operator namespace instead of operator
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: elasticsearch-operator
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: elasticsearch-operator
|
||||
namespace: elasticsearch-operator
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: elasticsearch-operator
|
||||
rules:
|
||||
- apiGroups: ["extensions"]
|
||||
resources: ["deployments", "replicasets", "daemonsets"]
|
||||
verbs: ["create", "get", "update", "delete", "list"]
|
||||
- apiGroups: ["apiextensions.k8s.io"]
|
||||
resources: ["customresourcedefinitions"]
|
||||
verbs: ["create", "get", "update", "delete", "list"]
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["storageclasses"]
|
||||
verbs: ["get", "list", "create", "delete", "deletecollection"]
|
||||
- apiGroups: [""]
|
||||
resources: ["persistentvolumes", "persistentvolumeclaims", "services", "secrets", "configmaps"]
|
||||
verbs: ["create", "get", "update", "delete", "list"]
|
||||
- apiGroups: ["batch"]
|
||||
resources: ["cronjobs", "jobs"]
|
||||
verbs: ["create", "get", "deletecollection", "delete"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["list", "get", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["statefulsets", "deployments"]
|
||||
verbs: ["*"]
|
||||
- apiGroups: ["enterprises.upmc.com"]
|
||||
resources: ["elasticsearchclusters"]
|
||||
verbs: ["*"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: elasticsearch-operator
|
||||
namespace: elasticsearch-operator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: elasticsearch-operator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: elasticsearch-operator
|
||||
namespace: elasticsearch-operator
|
||||
---
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: elasticsearch-operator
|
||||
namespace: elasticsearch-operator
|
||||
spec:
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: elasticsearch-operator
|
||||
spec:
|
||||
containers:
|
||||
- name: operator
|
||||
image: upmcenterprises/elasticsearch-operator:0.2.0
|
||||
imagePullPolicy: Always
|
||||
env:
|
||||
- name: NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /live
|
||||
port: 8000
|
||||
initialDelaySeconds: 10
|
||||
timeoutSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: 8000
|
||||
initialDelaySeconds: 10
|
||||
timeoutSeconds: 5
|
||||
serviceAccount: elasticsearch-operator
|
||||
167
k8s/filebeat.yaml
Normal file
167
k8s/filebeat.yaml
Normal file
@@ -0,0 +1,167 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: filebeat-config
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: filebeat
|
||||
data:
|
||||
filebeat.yml: |-
|
||||
filebeat.config:
|
||||
inputs:
|
||||
# Mounted `filebeat-inputs` configmap:
|
||||
path: ${path.config}/inputs.d/*.yml
|
||||
# Reload inputs configs as they change:
|
||||
reload.enabled: false
|
||||
modules:
|
||||
path: ${path.config}/modules.d/*.yml
|
||||
# Reload module configs as they change:
|
||||
reload.enabled: false
|
||||
|
||||
# To enable hints based autodiscover, remove `filebeat.config.inputs` configuration and uncomment this:
|
||||
#filebeat.autodiscover:
|
||||
# providers:
|
||||
# - type: kubernetes
|
||||
# hints.enabled: true
|
||||
|
||||
processors:
|
||||
- add_cloud_metadata:
|
||||
|
||||
cloud.id: ${ELASTIC_CLOUD_ID}
|
||||
cloud.auth: ${ELASTIC_CLOUD_AUTH}
|
||||
|
||||
output.elasticsearch:
|
||||
hosts: ['${ELASTICSEARCH_HOST:elasticsearch}:${ELASTICSEARCH_PORT:9200}']
|
||||
username: ${ELASTICSEARCH_USERNAME}
|
||||
password: ${ELASTICSEARCH_PASSWORD}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: filebeat-inputs
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: filebeat
|
||||
data:
|
||||
kubernetes.yml: |-
|
||||
- type: docker
|
||||
containers.ids:
|
||||
- "*"
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
in_cluster: true
|
||||
---
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: filebeat
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: filebeat
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: filebeat
|
||||
spec:
|
||||
serviceAccountName: filebeat
|
||||
terminationGracePeriodSeconds: 30
|
||||
containers:
|
||||
- name: filebeat
|
||||
image: docker.elastic.co/beats/filebeat-oss:7.0.1
|
||||
args: [
|
||||
"-c", "/etc/filebeat.yml",
|
||||
"-e",
|
||||
]
|
||||
env:
|
||||
- name: ELASTICSEARCH_HOST
|
||||
value: elasticsearch-es.default.svc.cluster.local
|
||||
- name: ELASTICSEARCH_PORT
|
||||
value: "9200"
|
||||
- name: ELASTICSEARCH_USERNAME
|
||||
value: elastic
|
||||
- name: ELASTICSEARCH_PASSWORD
|
||||
value: changeme
|
||||
- name: ELASTIC_CLOUD_ID
|
||||
value:
|
||||
- name: ELASTIC_CLOUD_AUTH
|
||||
value:
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
# If using Red Hat OpenShift uncomment this:
|
||||
#privileged: true
|
||||
resources:
|
||||
limits:
|
||||
memory: 200Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/filebeat.yml
|
||||
readOnly: true
|
||||
subPath: filebeat.yml
|
||||
- name: inputs
|
||||
mountPath: /usr/share/filebeat/inputs.d
|
||||
readOnly: true
|
||||
- name: data
|
||||
mountPath: /usr/share/filebeat/data
|
||||
- name: varlibdockercontainers
|
||||
mountPath: /var/lib/docker/containers
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
defaultMode: 0600
|
||||
name: filebeat-config
|
||||
- name: varlibdockercontainers
|
||||
hostPath:
|
||||
path: /var/lib/docker/containers
|
||||
- name: inputs
|
||||
configMap:
|
||||
defaultMode: 0600
|
||||
name: filebeat-inputs
|
||||
# data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart
|
||||
- name: data
|
||||
hostPath:
|
||||
path: /var/lib/filebeat-data
|
||||
type: DirectoryOrCreate
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: filebeat
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: filebeat
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: filebeat
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: filebeat
|
||||
labels:
|
||||
k8s-app: filebeat
|
||||
rules:
|
||||
- apiGroups: [""] # "" indicates the core API group
|
||||
resources:
|
||||
- namespaces
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- watch
|
||||
- list
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: filebeat
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: filebeat
|
||||
---
|
||||
110
k8s/local-path-storage.yaml
Normal file
110
k8s/local-path-storage.yaml
Normal file
@@ -0,0 +1,110 @@
|
||||
# This is a local copy of:
|
||||
# https://github.com/rancher/local-path-provisioner/blob/master/deploy/local-path-storage.yaml
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: local-path-storage
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: local-path-provisioner-service-account
|
||||
namespace: local-path-storage
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: local-path-provisioner-role
|
||||
namespace: local-path-storage
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes", "persistentvolumeclaims"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["endpoints", "persistentvolumes", "pods"]
|
||||
verbs: ["*"]
|
||||
- apiGroups: [""]
|
||||
resources: ["events"]
|
||||
verbs: ["create", "patch"]
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["storageclasses"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: local-path-provisioner-bind
|
||||
namespace: local-path-storage
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: local-path-provisioner-role
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: local-path-provisioner-service-account
|
||||
namespace: local-path-storage
|
||||
---
|
||||
apiVersion: apps/v1beta2
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: local-path-provisioner
|
||||
namespace: local-path-storage
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: local-path-provisioner
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: local-path-provisioner
|
||||
spec:
|
||||
serviceAccountName: local-path-provisioner-service-account
|
||||
containers:
|
||||
- name: local-path-provisioner
|
||||
image: rancher/local-path-provisioner:v0.0.8
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- local-path-provisioner
|
||||
- --debug
|
||||
- start
|
||||
- --config
|
||||
- /etc/config/config.json
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config/
|
||||
env:
|
||||
- name: POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: local-path-config
|
||||
---
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: local-path
|
||||
provisioner: rancher.io/local-path
|
||||
volumeBindingMode: WaitForFirstConsumer
|
||||
reclaimPolicy: Delete
|
||||
---
|
||||
kind: ConfigMap
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: local-path-config
|
||||
namespace: local-path-storage
|
||||
data:
|
||||
config.json: |-
|
||||
{
|
||||
"nodePathMap":[
|
||||
{
|
||||
"node":"DEFAULT_PATH_FOR_NON_LISTED_NODES",
|
||||
"paths":["/opt/local-path-provisioner"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
356
slides/k8s/operators-design.md
Normal file
356
slides/k8s/operators-design.md
Normal file
@@ -0,0 +1,356 @@
|
||||
## What does it take to write an operator?
|
||||
|
||||
- Writing a quick-and-dirty operator, or a POC/MVP, is easy
|
||||
|
||||
- Writing a robust operator is hard
|
||||
|
||||
- We will describe the general idea
|
||||
|
||||
- We will identify some of the associated challenges
|
||||
|
||||
- We will list a few tools that can help us
|
||||
|
||||
---
|
||||
|
||||
## Top-down vs. bottom-up
|
||||
|
||||
- Both approaches are possible
|
||||
|
||||
- Let's see what they entail, and their respective pros and cons
|
||||
|
||||
---
|
||||
|
||||
## Top-down approach
|
||||
|
||||
- Start with high-level design (see next slide)
|
||||
|
||||
- Pros:
|
||||
|
||||
- can yield cleaner design that will be more robust
|
||||
|
||||
- Cons:
|
||||
|
||||
- must be able to anticipate all the events that might happen
|
||||
|
||||
- design will be better only to the extend of what we anticipated
|
||||
|
||||
- hard to anticipate if we don't have production experience
|
||||
|
||||
---
|
||||
|
||||
## High-level design
|
||||
|
||||
- What are we solving?
|
||||
|
||||
(e.g.: geographic databases backed by PostGIS with Redis caches)
|
||||
|
||||
- What are our use-cases, stories?
|
||||
|
||||
(e.g.: adding/resizing caches and read replicas; load balancing queries)
|
||||
|
||||
- What kind of outage do we want to address?
|
||||
|
||||
(e.g.: loss of individual node, pod, volume)
|
||||
|
||||
- What are our *non-features*, the things we don't want to address?
|
||||
|
||||
(e.g.: loss of datacenter/zone; differentiating between read and write queries;
|
||||
<br/>
|
||||
cache invalidation; upgrading to newer major versions of Redis, PostGIS, PostgreSQL)
|
||||
|
||||
---
|
||||
|
||||
## Low-level design
|
||||
|
||||
- What Custom Resource Definitions do we need?
|
||||
|
||||
(one, many?)
|
||||
|
||||
- How will we store configuration information?
|
||||
|
||||
(part of the CRD spec fields, annotations, other?)
|
||||
|
||||
- Do we need to store state? If so, where?
|
||||
|
||||
- state that is small and doesn't change much can be stored via the Kubernetes API
|
||||
<br/>
|
||||
(e.g.: leader information, configuration, credentials)
|
||||
|
||||
- things that are big and/or change a lot should go elsewhere
|
||||
<br/>
|
||||
(e.g.: metrics, bigger configuration file like GeoIP)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## What can we store via the Kubernetes API?
|
||||
|
||||
- The API server stores most Kubernetes resources into etcd
|
||||
|
||||
- Etcd is designed for reliability, not for performance
|
||||
|
||||
- If our storage needs exceed what etcd can offer, we need to use something else:
|
||||
|
||||
- either directly
|
||||
|
||||
- or by extending the API server
|
||||
<br/>(for instance by using the agregation layer, like [metrics server](https://github.com/kubernetes-incubator/metrics-server) does)
|
||||
|
||||
---
|
||||
|
||||
## Bottom-up approach
|
||||
|
||||
- Start with existing Kubernetes resources (Deployment, Stateful Set...)
|
||||
|
||||
- Run the system in production
|
||||
|
||||
- Add scripts, automation, to facilitate day-to-day operations
|
||||
|
||||
- Turn the scripts into an operator
|
||||
|
||||
- Pros: simpler to get started; reflects actual use-cases
|
||||
|
||||
- Cons: can result in convoluted designs requiring extensive refactor
|
||||
|
||||
---
|
||||
|
||||
## General idea
|
||||
|
||||
- Our operator will watch its CRDs *and associated resources*
|
||||
|
||||
- Drawing state diagrams and finite state automata helps a lot
|
||||
|
||||
- It's OK if some transitions lead to a big catch-all "human intervention"
|
||||
|
||||
- Over time, we will learn about new failure modes and add to these diagrams
|
||||
|
||||
- It's OK to start with CRD creation / deletion and prevent any modification
|
||||
|
||||
(that's the easy POC/MVP we were talking about)
|
||||
|
||||
- *Presentation* and *validation* will help our users
|
||||
|
||||
(more on that later)
|
||||
|
||||
---
|
||||
|
||||
## Challenges
|
||||
|
||||
- Reacting to infrastructure disruption can seem hard at first
|
||||
|
||||
- Kubernetes gives us a lot of primitives to help:
|
||||
|
||||
- Pods and Persistent Volumes will *eventually* recover
|
||||
|
||||
- Stateful Sets give us easy ways to "add N copies" of a thing
|
||||
|
||||
- The real challenges come with configuration changes
|
||||
|
||||
(i.e., what to do when our users update our CRDs)
|
||||
|
||||
- Keep in mind that [some] of the [largest] cloud [outages] haven't been caused by [natural catastrophes], or even code bugs, but by configuration changes
|
||||
|
||||
[some]: https://www.datacenterdynamics.com/news/gcp-outage-mainone-leaked-google-cloudflare-ip-addresses-china-telecom/
|
||||
[largest]: https://aws.amazon.com/message/41926/
|
||||
[outages]: https://aws.amazon.com/message/65648/
|
||||
[natural catastrophes]: https://www.datacenterknowledge.com/amazon/aws-says-it-s-never-seen-whole-data-center-go-down
|
||||
|
||||
---
|
||||
|
||||
## Configuration changes
|
||||
|
||||
- It is helpful to analyze and understand how Kubernetes controllers work:
|
||||
|
||||
- watch resource for modifications
|
||||
|
||||
- compare desired state (CRD) and current state
|
||||
|
||||
- issue actions to converge state
|
||||
|
||||
- Configuration changes will probably require *another* state diagram or FSA
|
||||
|
||||
- Again, it's OK to have transitions labeled as "unsupported"
|
||||
|
||||
(i.e. reject some modifications because we can't execute them)
|
||||
|
||||
---
|
||||
|
||||
## Tools
|
||||
|
||||
- CoreOS / RedHat Operator Framework
|
||||
|
||||
[GitHub](https://github.com/operator-framework)
|
||||
|
|
||||
[Blog](https://developers.redhat.com/blog/2018/12/18/introduction-to-the-kubernetes-operator-framework/)
|
||||
|
|
||||
[Intro talk](https://www.youtube.com/watch?v=8k_ayO1VRXE)
|
||||
|
|
||||
[Deep dive talk](https://www.youtube.com/watch?v=fu7ecA2rXmc)
|
||||
|
||||
- Zalando Kubernetes Operator Pythonic Framework (KOPF)
|
||||
|
||||
[GitHub](https://github.com/zalando-incubator/kopf)
|
||||
|
|
||||
[Docs](https://kopf.readthedocs.io/)
|
||||
|
|
||||
[Step-by-step tutorial](https://kopf.readthedocs.io/en/stable/walkthrough/problem/)
|
||||
|
||||
- Mesosphere Kubernetes Universal Declarative Operator (KUDO)
|
||||
|
||||
[GitHub](https://github.com/kudobuilder/kudo)
|
||||
|
|
||||
[Blog](https://mesosphere.com/blog/announcing-maestro-a-declarative-no-code-approach-to-kubernetes-day-2-operators/)
|
||||
|
|
||||
[Docs](https://kudo.dev/)
|
||||
|
|
||||
[Zookeeper example](https://github.com/kudobuilder/frameworks/tree/master/repo/stable/zookeeper)
|
||||
|
||||
---
|
||||
|
||||
## Validation
|
||||
|
||||
- By default, a CRD is "free form"
|
||||
|
||||
(we can put pretty much anything we want in it)
|
||||
|
||||
- When creating a CRD, we can provide an OpenAPI v3 schema
|
||||
([Example](https://github.com/amaizfinance/redis-operator/blob/master/deploy/crds/k8s_v1alpha1_redis_crd.yaml#L34))
|
||||
|
||||
- The API server will then validate resources created/edited with this schema
|
||||
|
||||
- If we need a stronger validation, we can use a Validating Admission Webhook:
|
||||
|
||||
- run an [admission webhook server](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#write-an-admission-webhook-server) to receive validation requests
|
||||
|
||||
- register the webhook by creating a [ValidatingWebhookConfiguration](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#configure-admission-webhooks-on-the-fly)
|
||||
|
||||
- each time the API server receives a request matching the configuration,
|
||||
<br/>the request is sent to our server for validation
|
||||
|
||||
---
|
||||
|
||||
## Presentation
|
||||
|
||||
- By default, `kubectl get mycustomresource` won't display much information
|
||||
|
||||
(just the name and age of each resource)
|
||||
|
||||
- When creating a CRD, we can specify additional columns to print
|
||||
([Example](https://github.com/amaizfinance/redis-operator/blob/master/deploy/crds/k8s_v1alpha1_redis_crd.yaml#L6),
|
||||
[Docs](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#additional-printer-columns))
|
||||
|
||||
- By default, `kubectl describe mycustomresource` will also be generic
|
||||
|
||||
- `kubectl describe` can show events related to our custom resources
|
||||
|
||||
(for that, we need to create Event resources, and fill the `involvedObject` field)
|
||||
|
||||
- For scalable resources, we can define a `scale` sub-resource
|
||||
|
||||
- This will enable the use of `kubectl scale` and other scaling-related operations
|
||||
|
||||
---
|
||||
|
||||
## About scaling
|
||||
|
||||
- It is possible to use the HPA (Horizontal Pod Autoscaler) with CRDs
|
||||
|
||||
- But it is not always desirable
|
||||
|
||||
- The HPA works very well for homogenous, stateless workloads
|
||||
|
||||
- For other workloads, your mileage may vary
|
||||
|
||||
- Some systems can scale across multiple dimensions
|
||||
|
||||
(for instance: increase number of replicas, or number of shards?)
|
||||
|
||||
- If autoscaling is desired, the operator will have to take complex decisions
|
||||
|
||||
(example: Zalando's Elasticsearch Operator ([Video](https://www.youtube.com/watch?v=lprE0J0kAq0)))
|
||||
|
||||
---
|
||||
|
||||
## Versioning
|
||||
|
||||
- As our operator evolves over time, we may have to change the CRD
|
||||
|
||||
(add, remove, change fields)
|
||||
|
||||
- Like every other resource in Kubernetes, [custom resources are versioned](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definition-versioning/
|
||||
)
|
||||
|
||||
- When creating a CRD, we need to specify a *list* of versions
|
||||
|
||||
- Versions can be marked as `stored` and/or `served`
|
||||
|
||||
---
|
||||
|
||||
## Stored version
|
||||
|
||||
- Exactly one version has to be marked as the `stored` version
|
||||
|
||||
- As the name implies, it is the one that will be stored in etcd
|
||||
|
||||
- Resources in storage are never converted automatically
|
||||
|
||||
(we need to read and re-write them ourselves)
|
||||
|
||||
- Yes, this means that we can have different versions in etcd at any time
|
||||
|
||||
- Our code needs to handle all the versions that still exist in storage
|
||||
|
||||
---
|
||||
|
||||
## Served versions
|
||||
|
||||
- By default, the Kubernetes API will serve resources "as-is"
|
||||
|
||||
(using their stored version)
|
||||
|
||||
- It will assume that all versions are compatible storage-wise
|
||||
|
||||
(i.e. that the spec and fields are compatible between versions)
|
||||
|
||||
- We can provide [conversion webhooks](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definition-versioning/#webhook-conversion) to "translate" requests
|
||||
|
||||
(the alternative is to upgrade all stored resources and stop serving old versions)
|
||||
|
||||
---
|
||||
|
||||
## Operator reliability
|
||||
|
||||
- Remember that the operator itself must be resilient
|
||||
|
||||
(e.g.: the node running it can fail)
|
||||
|
||||
- Our operator must be able to restart and recover gracefully
|
||||
|
||||
- Do not store state locally
|
||||
|
||||
(unless we can reconstruct that state when we restart)
|
||||
|
||||
- As indicated earlier, we can use the Kubernetes API to store data:
|
||||
|
||||
- in the custom resources themselves
|
||||
|
||||
- in other resources' annotations
|
||||
|
||||
---
|
||||
|
||||
## Beyond CRDs
|
||||
|
||||
- CRDs cannot use custom storage (e.g. for time series data)
|
||||
|
||||
- CRDs cannot support arbitrary subresources (like logs or exec for Pods)
|
||||
|
||||
- CRDs cannot support protobuf (for faster, more efficient communication)
|
||||
|
||||
- If we need these things, we can use the [aggregation layer](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/apiserver-aggregation/) instead
|
||||
|
||||
- The aggregation layer proxies all requests below a specific path to another server
|
||||
|
||||
(this is used e.g. by the metrics server)
|
||||
|
||||
- [This documentation page](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#choosing-a-method-for-adding-custom-resources) compares the features of CRDs and API aggregation
|
||||
389
slides/k8s/operators.md
Normal file
389
slides/k8s/operators.md
Normal file
@@ -0,0 +1,389 @@
|
||||
# Operators
|
||||
|
||||
- Operators are one of the many ways to extend Kubernetes
|
||||
|
||||
- We will define operators
|
||||
|
||||
- We will see how they work
|
||||
|
||||
- We will install a specific operator (for ElasticSearch)
|
||||
|
||||
- We will use it to provision an ElasticSearch cluster
|
||||
|
||||
---
|
||||
|
||||
## What are operators?
|
||||
|
||||
*An operator represents **human operational knowledge in software,**
|
||||
<br/>
|
||||
to reliably manage an application.
|
||||
— [CoreOS](https://coreos.com/blog/introducing-operators.html)*
|
||||
|
||||
Examples:
|
||||
|
||||
- Deploying and configuring replication with MySQL, PostgreSQL ...
|
||||
|
||||
- Setting up Elasticsearch, Kafka, RabbitMQ, Zookeeper ...
|
||||
|
||||
- Reacting to failures when intervention is needed
|
||||
|
||||
- Scaling up and down these systems
|
||||
|
||||
---
|
||||
|
||||
## What are they made from?
|
||||
|
||||
- Operators combine two things:
|
||||
|
||||
- Custom Resource Definitions
|
||||
|
||||
- controller code watching the corresponding resources and acting upon them
|
||||
|
||||
- A given operator can define one or multiple CRDs
|
||||
|
||||
- The controller code (control loop) typically runs within the cluster
|
||||
|
||||
(running as a Deployment with 1 replica is a common scenario)
|
||||
|
||||
- But it could also run elsewhere
|
||||
|
||||
(nothing mandates that the code run on the cluster, as long as it has API access)
|
||||
|
||||
---
|
||||
|
||||
## Why use operators?
|
||||
|
||||
- Kubernetes gives us Deployments, StatefulSets, Services ...
|
||||
|
||||
- These mechanisms give us building blocks to deploy applications
|
||||
|
||||
- They work great for services that are made of *N* identical containers
|
||||
|
||||
(like stateless ones)
|
||||
|
||||
- They also work great for some stateful applications like Consul, etcd ...
|
||||
|
||||
(with the help of highly persistent volumes)
|
||||
|
||||
- They're not enough for complex services:
|
||||
|
||||
- where different containers have different roles
|
||||
|
||||
- where extra steps have to be taken when scaling or replacing containers
|
||||
|
||||
---
|
||||
|
||||
## Use-cases for operators
|
||||
|
||||
- Systems with primary/secondary replication
|
||||
|
||||
Examples: MariaDB, MySQL, PostgreSQL, Redis ...
|
||||
|
||||
- Systems where different groups of nodes have different roles
|
||||
|
||||
Examples: ElasticSearch, MongoDB ...
|
||||
|
||||
- Systems with complex dependencies (that are themselves managed with operators)
|
||||
|
||||
Examples: Flink or Kafka, which both depend on Zookeeper
|
||||
|
||||
---
|
||||
|
||||
## More use-cases
|
||||
|
||||
- Representing and managing external resources
|
||||
|
||||
(Example: [AWS Service Operator](https://operatorhub.io/operator/alpha/aws-service-operator.v0.0.1))
|
||||
|
||||
- Managing complex cluster add-ons
|
||||
|
||||
(Example: [Istio operator](https://operatorhub.io/operator/beta/istio-operator.0.1.6))
|
||||
|
||||
- Deploying and managing our applications' lifecycles
|
||||
|
||||
(more on that later)
|
||||
|
||||
---
|
||||
|
||||
## How operators work
|
||||
|
||||
- An operator creates one or more CRDs
|
||||
|
||||
(i.e., it creates new "Kinds" of resources on our cluster)
|
||||
|
||||
- The operator also runs a *controller* that will watch its resources
|
||||
|
||||
- Each time we create/update/delete a resource, the controller is notified
|
||||
|
||||
(we could write our own cheap controller with `kubectl get --watch`)
|
||||
|
||||
---
|
||||
|
||||
## One operator in action
|
||||
|
||||
- We will install the UPMC Enterprises ElasticSearch operator
|
||||
|
||||
- This operator requires PersistentVolumes
|
||||
|
||||
- We will install Rancher's [local path storage provisioner](https://github.com/rancher/local-path-provisioner) to automatically create these
|
||||
|
||||
- Then, we will create an ElasticSearch resource
|
||||
|
||||
- The operator will detect that resource and provision the cluster
|
||||
|
||||
---
|
||||
|
||||
## Installing a Persistent Volume provisioner
|
||||
|
||||
(This step can be skipped if you already have a dynamic volume provisioner.)
|
||||
|
||||
- This provisioner creates Persistent Volumes backed by `hostPath`
|
||||
|
||||
(local directories on our nodes)
|
||||
|
||||
- It doesn't require anything special ...
|
||||
|
||||
- ... But losing a node = losing the volumes on that node!
|
||||
|
||||
.exercise[
|
||||
|
||||
- Install the local path storage provisioner:
|
||||
```bash
|
||||
kubectl apply -f ~/container.training/k8s/local-path-storage.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Making sure we have a default StorageClass
|
||||
|
||||
- The ElasticSearch operator will create StatefulSets
|
||||
|
||||
- These StatefulSets will instantiate PersistentVolumeClaims
|
||||
|
||||
- These PVCs need to be explicitly associated with a StorageClass
|
||||
|
||||
- Or we need to tag a StorageClass to be used as the default one
|
||||
|
||||
.exercise[
|
||||
|
||||
- List StorageClasses:
|
||||
```bash
|
||||
kubectl get storageclasses
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
We should see the `local-path` StorageClass.
|
||||
|
||||
---
|
||||
|
||||
## Setting a default StorageClass
|
||||
|
||||
- This is done by adding an annotation to the StorageClass:
|
||||
|
||||
`storageclass.kubernetes.io/is-default-class: true`
|
||||
|
||||
.exercise[
|
||||
|
||||
- Tag the StorageClass so that it's the default one:
|
||||
```bash
|
||||
kubectl annotate storageclass local-path \
|
||||
storageclass.kubernetes.io/is-default-class=true
|
||||
```
|
||||
|
||||
- Check the result:
|
||||
```bash
|
||||
kubectl get storageclasses
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
Now, the StorageClass should have `(default)` next to its name.
|
||||
|
||||
---
|
||||
|
||||
## Install the ElasticSearch operator
|
||||
|
||||
- The operator needs:
|
||||
|
||||
- a Deployment for its controller
|
||||
- a ServiceAccount, ClusterRole, ClusterRoleBinding for permissions
|
||||
- a Namespace
|
||||
|
||||
- We have grouped all the definitions for these resources in a YAML file
|
||||
|
||||
.exercise[
|
||||
|
||||
- Install the operator:
|
||||
```bash
|
||||
kubectl apply -f ~/container.training/k8s/elasticsearch-operator.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Wait for the operator to be ready
|
||||
|
||||
- Some operators require to create their CRDs separately
|
||||
|
||||
- This operator will create its CRD itself
|
||||
|
||||
(i.e. the CRD is not listed in the YAML that we applied earlier)
|
||||
|
||||
.exercise[
|
||||
|
||||
- Wait until the `elasticsearchclusters` CRD shows up:
|
||||
```bash
|
||||
kubectl get crds
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Create an ElasticSearch resource
|
||||
|
||||
- We can now create a resource with `kind: ElasticsearchCluster`
|
||||
|
||||
- The YAML for that resource will specify all the desired parameters:
|
||||
|
||||
- how many nodes do we want of each type (client, master, data)
|
||||
- image to use
|
||||
- add-ons (kibana, cerebro, ...)
|
||||
- whether to use TLS or not
|
||||
- etc.
|
||||
|
||||
.exercise[
|
||||
|
||||
- Create our ElasticSearch cluster:
|
||||
```bash
|
||||
kubectl apply -f ~/container.training/k8s/elasticsearch-cluster.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Operator in action
|
||||
|
||||
- Over the next minutes, the operator will create:
|
||||
|
||||
- StatefulSets (one for master nodes, one for data nodes)
|
||||
|
||||
- Deployments (for client nodes; and for add-ons like cerebro and kibana)
|
||||
|
||||
- Services (for all these pods)
|
||||
|
||||
.exercise[
|
||||
|
||||
- Wait for all the StatefulSets to be fully up and running:
|
||||
```bash
|
||||
kubectl get statefulsets -w
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Connecting to our cluster
|
||||
|
||||
- Since connecting directly to the ElasticSearch API is a bit raw,
|
||||
<br/>we'll connect to the cerebro frontend instead
|
||||
|
||||
.exercise[
|
||||
|
||||
- Edit the cerebro service to change its type from ClusterIP to NodePort:
|
||||
```bash
|
||||
kubectl patch svc cerebro-es -p "spec: { type: NodePort }"
|
||||
```
|
||||
|
||||
- Retrieve the NodePort that was allocated:
|
||||
```bash
|
||||
kubectl get svc cerebreo-es
|
||||
```
|
||||
|
||||
- Connect to that port with a browser
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## (Bonus) Setup filebeat
|
||||
|
||||
- Let's send some data to our brand new ElasticSearch cluster!
|
||||
|
||||
- We'll deploy a filebeat DaemonSet to collect node logs
|
||||
|
||||
.exercise[
|
||||
|
||||
- Deploy filebeat:
|
||||
```bash
|
||||
kubectl apply -f ~/container.training/k8s/filebeat.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
We should see at least one index being created in cerebro.
|
||||
|
||||
---
|
||||
|
||||
## (Bonus) Access log data with kibana
|
||||
|
||||
- Let's expose kibana (by making kibana-es a NodePort too)
|
||||
|
||||
- Then access kibana
|
||||
|
||||
- We'll need to configure kibana indexes
|
||||
|
||||
---
|
||||
|
||||
## Deploying our apps with operators
|
||||
|
||||
- It is very simple to deploy with `kubectl run` / `kubectl expose`
|
||||
|
||||
- We can unlock more features by writing YAML and using `kubectl apply`
|
||||
|
||||
- Kustomize or Helm let us deploy in multiple environments
|
||||
|
||||
(and adjust/tweak parameters in each environment)
|
||||
|
||||
- We can also use an operator to deploy our application
|
||||
|
||||
---
|
||||
|
||||
## Pros and cons of deploying with operators
|
||||
|
||||
- The app definition and configuration is persisted in the Kubernetes API
|
||||
|
||||
- Multiple instances of the app can be manipulated with `kubectl get`
|
||||
|
||||
- We can add labels, annotations to the app instances
|
||||
|
||||
- Our controller can execute custom code for any lifecycle event
|
||||
|
||||
- However, we need to write this controller
|
||||
|
||||
- We need to be careful about changes
|
||||
|
||||
(what happens when the resource `spec` is updated?)
|
||||
|
||||
---
|
||||
|
||||
## Operators are not magic
|
||||
|
||||
- Look at the ElasticSearch resource definition
|
||||
|
||||
(`~/container.training/k8s/elasticsearch-cluster.yaml`)
|
||||
|
||||
- What should happen if we flip the `use-tls` flag? Twice?
|
||||
|
||||
- What should happen if we remove / re-add the kibana or cerebro sections?
|
||||
|
||||
- What should happen if we change the number of nodes?
|
||||
|
||||
- What if we want different images or parameters for the different nodes?
|
||||
|
||||
*Operators can be very powerful, iff we know exactly the scenarios that they can handle.*
|
||||
@@ -93,5 +93,3 @@ chapters:
|
||||
- - k8s/lastwords-admin.md
|
||||
- k8s/links.md
|
||||
- shared/thankyou.md
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user