diff --git a/k8s/elasticsearch-cluster.yaml b/k8s/elasticsearch-cluster.yaml new file mode 100644 index 00000000..23d8108d --- /dev/null +++ b/k8s/elasticsearch-cluster.yaml @@ -0,0 +1,21 @@ +apiVersion: enterprises.upmc.com/v1 +kind: ElasticsearchCluster +metadata: + name: es +spec: + kibana: + image: docker.elastic.co/kibana/kibana-oss:6.1.3 + image-pull-policy: Always + cerebro: + image: upmcenterprises/cerebro:0.7.2 + image-pull-policy: Always + elastic-search-image: upmcenterprises/docker-elasticsearch-kubernetes:6.1.3_0 + image-pull-policy: Always + client-node-replicas: 2 + master-node-replicas: 3 + data-node-replicas: 3 + network-host: 0.0.0.0 + use-ssl: false + data-volume-size: 10Gi + java-options: "-Xms512m -Xmx512m" + diff --git a/k8s/elasticsearch-operator.yaml b/k8s/elasticsearch-operator.yaml new file mode 100644 index 00000000..0049541e --- /dev/null +++ b/k8s/elasticsearch-operator.yaml @@ -0,0 +1,94 @@ +# This is mirrored from https://github.com/upmc-enterprises/elasticsearch-operator/blob/master/example/controller.yaml but using the elasticsearch-operator namespace instead of operator +--- +apiVersion: v1 +kind: Namespace +metadata: + name: elasticsearch-operator +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: elasticsearch-operator + namespace: elasticsearch-operator +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: elasticsearch-operator +rules: +- apiGroups: ["extensions"] + resources: ["deployments", "replicasets", "daemonsets"] + verbs: ["create", "get", "update", "delete", "list"] +- apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["create", "get", "update", "delete", "list"] +- apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "create", "delete", "deletecollection"] +- apiGroups: [""] + resources: ["persistentvolumes", "persistentvolumeclaims", "services", "secrets", "configmaps"] + verbs: ["create", "get", "update", "delete", "list"] +- apiGroups: ["batch"] + resources: ["cronjobs", "jobs"] + verbs: ["create", "get", "deletecollection", "delete"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["list", "get", "watch"] +- apiGroups: ["apps"] + resources: ["statefulsets", "deployments"] + verbs: ["*"] +- apiGroups: ["enterprises.upmc.com"] + resources: ["elasticsearchclusters"] + verbs: ["*"] +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: elasticsearch-operator + namespace: elasticsearch-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: elasticsearch-operator +subjects: +- kind: ServiceAccount + name: elasticsearch-operator + namespace: elasticsearch-operator +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: elasticsearch-operator + namespace: elasticsearch-operator +spec: + replicas: 1 + template: + metadata: + labels: + name: elasticsearch-operator + spec: + containers: + - name: operator + image: upmcenterprises/elasticsearch-operator:0.2.0 + imagePullPolicy: Always + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - containerPort: 8000 + name: http + livenessProbe: + httpGet: + path: /live + port: 8000 + initialDelaySeconds: 10 + timeoutSeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 8000 + initialDelaySeconds: 10 + timeoutSeconds: 5 + serviceAccount: elasticsearch-operator diff --git a/k8s/filebeat.yaml b/k8s/filebeat.yaml new file mode 100644 index 00000000..690e9613 --- /dev/null +++ b/k8s/filebeat.yaml @@ -0,0 +1,167 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: filebeat-config + namespace: kube-system + labels: + k8s-app: filebeat +data: + filebeat.yml: |- + filebeat.config: + inputs: + # Mounted `filebeat-inputs` configmap: + path: ${path.config}/inputs.d/*.yml + # Reload inputs configs as they change: + reload.enabled: false + modules: + path: ${path.config}/modules.d/*.yml + # Reload module configs as they change: + reload.enabled: false + + # To enable hints based autodiscover, remove `filebeat.config.inputs` configuration and uncomment this: + #filebeat.autodiscover: + # providers: + # - type: kubernetes + # hints.enabled: true + + processors: + - add_cloud_metadata: + + cloud.id: ${ELASTIC_CLOUD_ID} + cloud.auth: ${ELASTIC_CLOUD_AUTH} + + output.elasticsearch: + hosts: ['${ELASTICSEARCH_HOST:elasticsearch}:${ELASTICSEARCH_PORT:9200}'] + username: ${ELASTICSEARCH_USERNAME} + password: ${ELASTICSEARCH_PASSWORD} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: filebeat-inputs + namespace: kube-system + labels: + k8s-app: filebeat +data: + kubernetes.yml: |- + - type: docker + containers.ids: + - "*" + processors: + - add_kubernetes_metadata: + in_cluster: true +--- +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: filebeat + namespace: kube-system + labels: + k8s-app: filebeat +spec: + template: + metadata: + labels: + k8s-app: filebeat + spec: + serviceAccountName: filebeat + terminationGracePeriodSeconds: 30 + containers: + - name: filebeat + image: docker.elastic.co/beats/filebeat-oss:7.0.1 + args: [ + "-c", "/etc/filebeat.yml", + "-e", + ] + env: + - name: ELASTICSEARCH_HOST + value: elasticsearch-es.default.svc.cluster.local + - name: ELASTICSEARCH_PORT + value: "9200" + - name: ELASTICSEARCH_USERNAME + value: elastic + - name: ELASTICSEARCH_PASSWORD + value: changeme + - name: ELASTIC_CLOUD_ID + value: + - name: ELASTIC_CLOUD_AUTH + value: + securityContext: + runAsUser: 0 + # If using Red Hat OpenShift uncomment this: + #privileged: true + resources: + limits: + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - name: config + mountPath: /etc/filebeat.yml + readOnly: true + subPath: filebeat.yml + - name: inputs + mountPath: /usr/share/filebeat/inputs.d + readOnly: true + - name: data + mountPath: /usr/share/filebeat/data + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + volumes: + - name: config + configMap: + defaultMode: 0600 + name: filebeat-config + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + - name: inputs + configMap: + defaultMode: 0600 + name: filebeat-inputs + # data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart + - name: data + hostPath: + path: /var/lib/filebeat-data + type: DirectoryOrCreate +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: filebeat +subjects: +- kind: ServiceAccount + name: filebeat + namespace: kube-system +roleRef: + kind: ClusterRole + name: filebeat + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: filebeat + labels: + k8s-app: filebeat +rules: +- apiGroups: [""] # "" indicates the core API group + resources: + - namespaces + - pods + verbs: + - get + - watch + - list +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: filebeat + namespace: kube-system + labels: + k8s-app: filebeat +--- diff --git a/k8s/local-path-storage.yaml b/k8s/local-path-storage.yaml new file mode 100644 index 00000000..7374a02a --- /dev/null +++ b/k8s/local-path-storage.yaml @@ -0,0 +1,110 @@ +# This is a local copy of: +# https://github.com/rancher/local-path-provisioner/blob/master/deploy/local-path-storage.yaml +--- +apiVersion: v1 +kind: Namespace +metadata: + name: local-path-storage +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: local-path-provisioner-service-account + namespace: local-path-storage +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: local-path-provisioner-role + namespace: local-path-storage +rules: +- apiGroups: [""] + resources: ["nodes", "persistentvolumeclaims"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["endpoints", "persistentvolumes", "pods"] + verbs: ["*"] +- apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] +- apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: local-path-provisioner-bind + namespace: local-path-storage +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: local-path-provisioner-role +subjects: +- kind: ServiceAccount + name: local-path-provisioner-service-account + namespace: local-path-storage +--- +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: local-path-provisioner + namespace: local-path-storage +spec: + replicas: 1 + selector: + matchLabels: + app: local-path-provisioner + template: + metadata: + labels: + app: local-path-provisioner + spec: + serviceAccountName: local-path-provisioner-service-account + containers: + - name: local-path-provisioner + image: rancher/local-path-provisioner:v0.0.8 + imagePullPolicy: Always + command: + - local-path-provisioner + - --debug + - start + - --config + - /etc/config/config.json + volumeMounts: + - name: config-volume + mountPath: /etc/config/ + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumes: + - name: config-volume + configMap: + name: local-path-config +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: local-path +provisioner: rancher.io/local-path +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Delete +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: local-path-config + namespace: local-path-storage +data: + config.json: |- + { + "nodePathMap":[ + { + "node":"DEFAULT_PATH_FOR_NON_LISTED_NODES", + "paths":["/opt/local-path-provisioner"] + } + ] + } + diff --git a/slides/k8s/operators-design.md b/slides/k8s/operators-design.md new file mode 100644 index 00000000..9a02f2b8 --- /dev/null +++ b/slides/k8s/operators-design.md @@ -0,0 +1,356 @@ +## What does it take to write an operator? + +- Writing a quick-and-dirty operator, or a POC/MVP, is easy + +- Writing a robust operator is hard + +- We will describe the general idea + +- We will identify some of the associated challenges + +- We will list a few tools that can help us + +--- + +## Top-down vs. bottom-up + +- Both approaches are possible + +- Let's see what they entail, and their respective pros and cons + +--- + +## Top-down approach + +- Start with high-level design (see next slide) + +- Pros: + + - can yield cleaner design that will be more robust + +- Cons: + + - must be able to anticipate all the events that might happen + + - design will be better only to the extend of what we anticipated + + - hard to anticipate if we don't have production experience + +--- + +## High-level design + +- What are we solving? + + (e.g.: geographic databases backed by PostGIS with Redis caches) + +- What are our use-cases, stories? + + (e.g.: adding/resizing caches and read replicas; load balancing queries) + +- What kind of outage do we want to address? + + (e.g.: loss of individual node, pod, volume) + +- What are our *non-features*, the things we don't want to address? + + (e.g.: loss of datacenter/zone; differentiating between read and write queries; +
+ cache invalidation; upgrading to newer major versions of Redis, PostGIS, PostgreSQL) + +--- + +## Low-level design + +- What Custom Resource Definitions do we need? + + (one, many?) + +- How will we store configuration information? + + (part of the CRD spec fields, annotations, other?) + +- Do we need to store state? If so, where? + + - state that is small and doesn't change much can be stored via the Kubernetes API +
+ (e.g.: leader information, configuration, credentials) + + - things that are big and/or change a lot should go elsewhere +
+ (e.g.: metrics, bigger configuration file like GeoIP) + +--- + +class: extra-details + +## What can we store via the Kubernetes API? + +- The API server stores most Kubernetes resources into etcd + +- Etcd is designed for reliability, not for performance + +- If our storage needs exceed what etcd can offer, we need to use something else: + + - either directly + + - or by extending the API server +
(for instance by using the agregation layer, like [metrics server](https://github.com/kubernetes-incubator/metrics-server) does) + +--- + +## Bottom-up approach + +- Start with existing Kubernetes resources (Deployment, Stateful Set...) + +- Run the system in production + +- Add scripts, automation, to facilitate day-to-day operations + +- Turn the scripts into an operator + +- Pros: simpler to get started; reflects actual use-cases + +- Cons: can result in convoluted designs requiring extensive refactor + +--- + +## General idea + +- Our operator will watch its CRDs *and associated resources* + +- Drawing state diagrams and finite state automata helps a lot + +- It's OK if some transitions lead to a big catch-all "human intervention" + +- Over time, we will learn about new failure modes and add to these diagrams + +- It's OK to start with CRD creation / deletion and prevent any modification + + (that's the easy POC/MVP we were talking about) + +- *Presentation* and *validation* will help our users + + (more on that later) + +--- + +## Challenges + +- Reacting to infrastructure disruption can seem hard at first + +- Kubernetes gives us a lot of primitives to help: + + - Pods and Persistent Volumes will *eventually* recover + + - Stateful Sets give us easy ways to "add N copies" of a thing + +- The real challenges come with configuration changes + + (i.e., what to do when our users update our CRDs) + +- Keep in mind that [some] of the [largest] cloud [outages] haven't been caused by [natural catastrophes], or even code bugs, but by configuration changes + +[some]: https://www.datacenterdynamics.com/news/gcp-outage-mainone-leaked-google-cloudflare-ip-addresses-china-telecom/ +[largest]: https://aws.amazon.com/message/41926/ +[outages]: https://aws.amazon.com/message/65648/ +[natural catastrophes]: https://www.datacenterknowledge.com/amazon/aws-says-it-s-never-seen-whole-data-center-go-down + +--- + +## Configuration changes + +- It is helpful to analyze and understand how Kubernetes controllers work: + + - watch resource for modifications + + - compare desired state (CRD) and current state + + - issue actions to converge state + +- Configuration changes will probably require *another* state diagram or FSA + +- Again, it's OK to have transitions labeled as "unsupported" + + (i.e. reject some modifications because we can't execute them) + +--- + +## Tools + +- CoreOS / RedHat Operator Framework + + [GitHub](https://github.com/operator-framework) + | + [Blog](https://developers.redhat.com/blog/2018/12/18/introduction-to-the-kubernetes-operator-framework/) + | + [Intro talk](https://www.youtube.com/watch?v=8k_ayO1VRXE) + | + [Deep dive talk](https://www.youtube.com/watch?v=fu7ecA2rXmc) + +- Zalando Kubernetes Operator Pythonic Framework (KOPF) + + [GitHub](https://github.com/zalando-incubator/kopf) + | + [Docs](https://kopf.readthedocs.io/) + | + [Step-by-step tutorial](https://kopf.readthedocs.io/en/stable/walkthrough/problem/) + +- Mesosphere Kubernetes Universal Declarative Operator (KUDO) + + [GitHub](https://github.com/kudobuilder/kudo) + | + [Blog](https://mesosphere.com/blog/announcing-maestro-a-declarative-no-code-approach-to-kubernetes-day-2-operators/) + | + [Docs](https://kudo.dev/) + | + [Zookeeper example](https://github.com/kudobuilder/frameworks/tree/master/repo/stable/zookeeper) + +--- + +## Validation + +- By default, a CRD is "free form" + + (we can put pretty much anything we want in it) + +- When creating a CRD, we can provide an OpenAPI v3 schema + ([Example](https://github.com/amaizfinance/redis-operator/blob/master/deploy/crds/k8s_v1alpha1_redis_crd.yaml#L34)) + +- The API server will then validate resources created/edited with this schema + +- If we need a stronger validation, we can use a Validating Admission Webhook: + + - run an [admission webhook server](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#write-an-admission-webhook-server) to receive validation requests + + - register the webhook by creating a [ValidatingWebhookConfiguration](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#configure-admission-webhooks-on-the-fly) + + - each time the API server receives a request matching the configuration, +
the request is sent to our server for validation + +--- + +## Presentation + +- By default, `kubectl get mycustomresource` won't display much information + + (just the name and age of each resource) + +- When creating a CRD, we can specify additional columns to print + ([Example](https://github.com/amaizfinance/redis-operator/blob/master/deploy/crds/k8s_v1alpha1_redis_crd.yaml#L6), + [Docs](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#additional-printer-columns)) + +- By default, `kubectl describe mycustomresource` will also be generic + +- `kubectl describe` can show events related to our custom resources + + (for that, we need to create Event resources, and fill the `involvedObject` field) + +- For scalable resources, we can define a `scale` sub-resource + +- This will enable the use of `kubectl scale` and other scaling-related operations + +--- + +## About scaling + +- It is possible to use the HPA (Horizontal Pod Autoscaler) with CRDs + +- But it is not always desirable + +- The HPA works very well for homogenous, stateless workloads + +- For other workloads, your mileage may vary + +- Some systems can scale across multiple dimensions + + (for instance: increase number of replicas, or number of shards?) + +- If autoscaling is desired, the operator will have to take complex decisions + + (example: Zalando's Elasticsearch Operator ([Video](https://www.youtube.com/watch?v=lprE0J0kAq0))) + +--- + +## Versioning + +- As our operator evolves over time, we may have to change the CRD + + (add, remove, change fields) + +- Like every other resource in Kubernetes, [custom resources are versioned](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definition-versioning/ +) + +- When creating a CRD, we need to specify a *list* of versions + +- Versions can be marked as `stored` and/or `served` + +--- + +## Stored version + +- Exactly one version has to be marked as the `stored` version + +- As the name implies, it is the one that will be stored in etcd + +- Resources in storage are never converted automatically + + (we need to read and re-write them ourselves) + +- Yes, this means that we can have different versions in etcd at any time + +- Our code needs to handle all the versions that still exist in storage + +--- + +## Served versions + +- By default, the Kubernetes API will serve resources "as-is" + + (using their stored version) + +- It will assume that all versions are compatible storage-wise + + (i.e. that the spec and fields are compatible between versions) + +- We can provide [conversion webhooks](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definition-versioning/#webhook-conversion) to "translate" requests + + (the alternative is to upgrade all stored resources and stop serving old versions) + +--- + +## Operator reliability + +- Remember that the operator itself must be resilient + + (e.g.: the node running it can fail) + +- Our operator must be able to restart and recover gracefully + +- Do not store state locally + + (unless we can reconstruct that state when we restart) + +- As indicated earlier, we can use the Kubernetes API to store data: + + - in the custom resources themselves + + - in other resources' annotations + +--- + +## Beyond CRDs + +- CRDs cannot use custom storage (e.g. for time series data) + +- CRDs cannot support arbitrary subresources (like logs or exec for Pods) + +- CRDs cannot support protobuf (for faster, more efficient communication) + +- If we need these things, we can use the [aggregation layer](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/apiserver-aggregation/) instead + +- The aggregation layer proxies all requests below a specific path to another server + + (this is used e.g. by the metrics server) + +- [This documentation page](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#choosing-a-method-for-adding-custom-resources) compares the features of CRDs and API aggregation diff --git a/slides/k8s/operators.md b/slides/k8s/operators.md new file mode 100644 index 00000000..fdf6bbf6 --- /dev/null +++ b/slides/k8s/operators.md @@ -0,0 +1,389 @@ +# Operators + +- Operators are one of the many ways to extend Kubernetes + +- We will define operators + +- We will see how they work + +- We will install a specific operator (for ElasticSearch) + +- We will use it to provision an ElasticSearch cluster + +--- + +## What are operators? + +*An operator represents **human operational knowledge in software,** +
+to reliably manage an application. +— [CoreOS](https://coreos.com/blog/introducing-operators.html)* + +Examples: + +- Deploying and configuring replication with MySQL, PostgreSQL ... + +- Setting up Elasticsearch, Kafka, RabbitMQ, Zookeeper ... + +- Reacting to failures when intervention is needed + +- Scaling up and down these systems + +--- + +## What are they made from? + +- Operators combine two things: + + - Custom Resource Definitions + + - controller code watching the corresponding resources and acting upon them + +- A given operator can define one or multiple CRDs + +- The controller code (control loop) typically runs within the cluster + + (running as a Deployment with 1 replica is a common scenario) + +- But it could also run elsewhere + + (nothing mandates that the code run on the cluster, as long as it has API access) + +--- + +## Why use operators? + +- Kubernetes gives us Deployments, StatefulSets, Services ... + +- These mechanisms give us building blocks to deploy applications + +- They work great for services that are made of *N* identical containers + + (like stateless ones) + +- They also work great for some stateful applications like Consul, etcd ... + + (with the help of highly persistent volumes) + +- They're not enough for complex services: + + - where different containers have different roles + + - where extra steps have to be taken when scaling or replacing containers + +--- + +## Use-cases for operators + +- Systems with primary/secondary replication + + Examples: MariaDB, MySQL, PostgreSQL, Redis ... + +- Systems where different groups of nodes have different roles + + Examples: ElasticSearch, MongoDB ... + +- Systems with complex dependencies (that are themselves managed with operators) + + Examples: Flink or Kafka, which both depend on Zookeeper + +--- + +## More use-cases + +- Representing and managing external resources + + (Example: [AWS Service Operator](https://operatorhub.io/operator/alpha/aws-service-operator.v0.0.1)) + +- Managing complex cluster add-ons + + (Example: [Istio operator](https://operatorhub.io/operator/beta/istio-operator.0.1.6)) + +- Deploying and managing our applications' lifecycles + + (more on that later) + +--- + +## How operators work + +- An operator creates one or more CRDs + + (i.e., it creates new "Kinds" of resources on our cluster) + +- The operator also runs a *controller* that will watch its resources + +- Each time we create/update/delete a resource, the controller is notified + + (we could write our own cheap controller with `kubectl get --watch`) + +--- + +## One operator in action + +- We will install the UPMC Enterprises ElasticSearch operator + +- This operator requires PersistentVolumes + +- We will install Rancher's [local path storage provisioner](https://github.com/rancher/local-path-provisioner) to automatically create these + +- Then, we will create an ElasticSearch resource + +- The operator will detect that resource and provision the cluster + +--- + +## Installing a Persistent Volume provisioner + +(This step can be skipped if you already have a dynamic volume provisioner.) + +- This provisioner creates Persistent Volumes backed by `hostPath` + + (local directories on our nodes) + +- It doesn't require anything special ... + +- ... But losing a node = losing the volumes on that node! + +.exercise[ + +- Install the local path storage provisioner: + ```bash + kubectl apply -f ~/container.training/k8s/local-path-storage.yaml + ``` + +] + +--- + +## Making sure we have a default StorageClass + +- The ElasticSearch operator will create StatefulSets + +- These StatefulSets will instantiate PersistentVolumeClaims + +- These PVCs need to be explicitly associated with a StorageClass + +- Or we need to tag a StorageClass to be used as the default one + +.exercise[ + +- List StorageClasses: + ```bash + kubectl get storageclasses + ``` + +] + +We should see the `local-path` StorageClass. + +--- + +## Setting a default StorageClass + +- This is done by adding an annotation to the StorageClass: + + `storageclass.kubernetes.io/is-default-class: true` + +.exercise[ + +- Tag the StorageClass so that it's the default one: + ```bash + kubectl annotate storageclass local-path \ + storageclass.kubernetes.io/is-default-class=true + ``` + +- Check the result: + ```bash + kubectl get storageclasses + ``` + +] + +Now, the StorageClass should have `(default)` next to its name. + +--- + +## Install the ElasticSearch operator + +- The operator needs: + + - a Deployment for its controller + - a ServiceAccount, ClusterRole, ClusterRoleBinding for permissions + - a Namespace + +- We have grouped all the definitions for these resources in a YAML file + +.exercise[ + +- Install the operator: + ```bash + kubectl apply -f ~/container.training/k8s/elasticsearch-operator.yaml + ``` + +] + +--- + +## Wait for the operator to be ready + +- Some operators require to create their CRDs separately + +- This operator will create its CRD itself + + (i.e. the CRD is not listed in the YAML that we applied earlier) + +.exercise[ + +- Wait until the `elasticsearchclusters` CRD shows up: + ```bash + kubectl get crds + ``` + +] + +--- + +## Create an ElasticSearch resource + +- We can now create a resource with `kind: ElasticsearchCluster` + +- The YAML for that resource will specify all the desired parameters: + + - how many nodes do we want of each type (client, master, data) + - image to use + - add-ons (kibana, cerebro, ...) + - whether to use TLS or not + - etc. + +.exercise[ + +- Create our ElasticSearch cluster: + ```bash + kubectl apply -f ~/container.training/k8s/elasticsearch-cluster.yaml + ``` + +] + +--- + +## Operator in action + +- Over the next minutes, the operator will create: + + - StatefulSets (one for master nodes, one for data nodes) + + - Deployments (for client nodes; and for add-ons like cerebro and kibana) + + - Services (for all these pods) + +.exercise[ + +- Wait for all the StatefulSets to be fully up and running: + ```bash + kubectl get statefulsets -w + ``` + +] + +--- + +## Connecting to our cluster + +- Since connecting directly to the ElasticSearch API is a bit raw, +
we'll connect to the cerebro frontend instead + +.exercise[ + +- Edit the cerebro service to change its type from ClusterIP to NodePort: + ```bash + kubectl patch svc cerebro-es -p "spec: { type: NodePort }" + ``` + +- Retrieve the NodePort that was allocated: + ```bash + kubectl get svc cerebreo-es + ``` + +- Connect to that port with a browser + +] + +--- + +## (Bonus) Setup filebeat + +- Let's send some data to our brand new ElasticSearch cluster! + +- We'll deploy a filebeat DaemonSet to collect node logs + +.exercise[ + +- Deploy filebeat: + ```bash + kubectl apply -f ~/container.training/k8s/filebeat.yaml + ``` + +] + +We should see at least one index being created in cerebro. + +--- + +## (Bonus) Access log data with kibana + +- Let's expose kibana (by making kibana-es a NodePort too) + +- Then access kibana + +- We'll need to configure kibana indexes + +--- + +## Deploying our apps with operators + +- It is very simple to deploy with `kubectl run` / `kubectl expose` + +- We can unlock more features by writing YAML and using `kubectl apply` + +- Kustomize or Helm let us deploy in multiple environments + + (and adjust/tweak parameters in each environment) + +- We can also use an operator to deploy our application + +--- + +## Pros and cons of deploying with operators + +- The app definition and configuration is persisted in the Kubernetes API + +- Multiple instances of the app can be manipulated with `kubectl get` + +- We can add labels, annotations to the app instances + +- Our controller can execute custom code for any lifecycle event + +- However, we need to write this controller + +- We need to be careful about changes + + (what happens when the resource `spec` is updated?) + +--- + +## Operators are not magic + +- Look at the ElasticSearch resource definition + + (`~/container.training/k8s/elasticsearch-cluster.yaml`) + +- What should happen if we flip the `use-tls` flag? Twice? + +- What should happen if we remove / re-add the kibana or cerebro sections? + +- What should happen if we change the number of nodes? + +- What if we want different images or parameters for the different nodes? + +*Operators can be very powerful, iff we know exactly the scenarios that they can handle.* diff --git a/slides/kadm-fourday.yml b/slides/kadm-fourday.yml index 6b6eb9d4..8c713631 100644 --- a/slides/kadm-fourday.yml +++ b/slides/kadm-fourday.yml @@ -93,5 +93,3 @@ chapters: - - k8s/lastwords-admin.md - k8s/links.md - shared/thankyou.md - -