Merge pull request #120 from paigerube14/container_kill

Container kill
2026-04-15 06:57:28 +00:00 · 2021-07-15 15:07:58 -04:00
parent 76efac8f9b 46a1823291
commit f051c1c30f
7 changed files with 163 additions and 16 deletions
--- a/README.md
+++ b/README.md
@@ -36,6 +36,8 @@ Kraken supports pod, node, time/date and [litmus](https://github.com/litmuschaos

 - [Pod Scenarios](docs/pod_scenarios.md)

+- [Container Scenarios](docs/container_scenarios.md)
+
 - [Node Scenarios](docs/node_scenarios.md)

 - [Time Scenarios](docs/time_scenarios.md)
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -5,7 +5,9 @@ kraken:
    litmus_version: v1.10.0                                # Litmus version to install
    litmus_uninstall: False                                # If you want to uninstall litmus if failure
    chaos_scenarios:                                       # List of policies/chaos scenarios to load
-        -   pod_scenarios:                                 # List of chaos pod scenarios to load
+        -   container_scenarios:                                 # List of chaos pod scenarios to load
+            - -    scenarios/container_etcd.yml
+        -   pod_scenarios:
            - -    scenarios/etcd.yml
            - -    scenarios/regex_openshift_pod_kill.yml
              -    scenarios/post_action_regex.py
@@ -19,7 +21,7 @@ kraken:
        -   litmus_scenarios:                              # List of litmus scenarios to load
            - - https://hub.litmuschaos.io/api/chaos/1.10.0?file=charts/generic/node-cpu-hog/rbac.yaml
              - scenarios/node_hog_engine.yaml
-        -   cluster_shut_down_scenarios:
+        - cluster_shut_down_scenarios:
            - - scenarios/cluster_shut_down_scenario.yml
              - scenarios/post_action_shut_down.py
        -   namespace_scenarios:
--- a/docs/container_scenarios.md
+++ b/docs/container_scenarios.md
@@ -0,0 +1,17 @@
+### Container Scenarios
+Kraken uses the `oc exec` command to `kill` specific containers in a pod.
+This can be based on the pods namespace or labels. If you know the exact object you want to kill, you can also specify the specific container name or pod name in the scenario yaml file.
+These scenarios are in a simple yaml format that you can manipulate to run your specific tests or use the pre-existing scenarios to see how it works
+
+####  Example Config
+The following are the components of Kubernetes/OpenShift for which a basic chaos scenario config exists today.
+
+```
+scenarios:
+- name: "<Name of scenario>"
+  namespace: "<specific namespace>" # can specify "*" if you want to find in all namespaces
+  label_selector: "<label of pod(s)>"
+  container_name: "<specific container name>"  # This is optional, can take out and will kill all containers in all pods found under namespace and label
+  pod_names:  # This is optional, can take out and will select all pods with given namespace and label
+  - <pod_name>
+```
--- a/kraken/kubernetes/client.py
+++ b/kraken/kubernetes/client.py
@@ -89,10 +89,13 @@ def list_killable_nodes(label_selector=None):


 # List pods in the given namespace
-def list_pods(namespace):
+def list_pods(namespace, label_selector=None):
    pods = []
    try:
-        ret = cli.list_namespaced_pod(namespace, pretty=True)
+        if label_selector:
+            ret = cli.list_namespaced_pod(namespace, pretty=True, label_selector=label_selector)
+        else:
+            ret = cli.list_namespaced_pod(namespace, pretty=True)
    except ApiException as e:
        logging.error(
            "Exception when calling \
@@ -116,20 +119,33 @@ def get_all_pods(label_selector=None):


 # Execute command in pod
-def exec_cmd_in_pod(command, pod_name, namespace):
+def exec_cmd_in_pod(command, pod_name, namespace, container=None):

    exec_command = ["bash", "-c", command]
    try:
-        ret = stream(
-            cli.connect_get_namespaced_pod_exec,
-            pod_name,
-            namespace,
-            command=exec_command,
-            stderr=True,
-            stdin=False,
-            stdout=True,
-            tty=False,
-        )
+        if container:
+            ret = stream(
+                cli.connect_get_namespaced_pod_exec,
+                pod_name,
+                namespace,
+                container=container,
+                command=exec_command,
+                stderr=True,
+                stdin=False,
+                stdout=True,
+                tty=False,
+            )
+        else:
+            ret = stream(
+                cli.connect_get_namespaced_pod_exec,
+                pod_name,
+                namespace,
+                command=exec_command,
+                stderr=True,
+                stdin=False,
+                stdout=True,
+                tty=False,
+            )
    except Exception:
        return False
    return ret
--- a/kraken/pod_scenarios/setup.py
+++ b/kraken/pod_scenarios/setup.py
@@ -2,7 +2,11 @@ import logging
 import kraken.invoke.command as runcommand
 import kraken.cerberus.setup as cerberus
 import kraken.post_actions.actions as post_actions
+import kraken.kubernetes.client as kubecli
 import time
+import yaml
+import sys
+import random


 # Run pod based scenarios
@@ -34,3 +38,98 @@ def run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_dur
    except Exception as e:
        logging.error("Failed to run scenario: %s. Encountered the following " "exception: %s" % (pod_scenario[0], e))
    return failed_post_scenarios
+
+
+def container_run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration):
+    for container_scenario_config in scenarios_list:
+        with open(container_scenario_config[0], "r") as f:
+            cont_scenario_config = yaml.full_load(f)
+            for cont_scenario in cont_scenario_config["scenarios"]:
+                if len(container_scenario_config) > 1:
+                    pre_action_output = post_actions.run(kubeconfig_path, container_scenario_config[1])
+                else:
+                    pre_action_output = ""
+                container_killing_in_pod(cont_scenario)
+                logging.info("Waiting for the specified duration: %s" % (wait_duration))
+                time.sleep(wait_duration)
+                failed_post_scenarios = post_actions.check_recovery(
+                    kubeconfig_path, container_scenario_config, failed_post_scenarios, pre_action_output
+                )
+                cerberus.publish_kraken_status(config, failed_post_scenarios)
+                logging.info("")
+
+
+def container_killing_in_pod(cont_scenario):
+    scenario_name = cont_scenario.get("name", "")
+    namespace = cont_scenario.get("namespace", "*")
+    label_selector = cont_scenario.get("label_selector", None)
+    pod_names = cont_scenario.get("pod_names", [])
+    container_name = cont_scenario.get("container_name", "")
+    kill_action = cont_scenario.get("action", "kill 1")
+    kill_count = cont_scenario.get("count", 1)
+    if type(pod_names) != list:
+        logging.error("Please make sure your pod_names are in a list format")
+        sys.exit(1)
+    if len(pod_names) == 0:
+        if namespace == "*":
+            # returns double array of pod name and namespace
+            pods = kubecli.get_all_pods(label_selector)
+        else:
+            # Only returns pod names
+            pods = kubecli.list_pods(namespace, label_selector)
+    else:
+        if namespace == "*":
+            logging.error("You must specify the namespace to kill a container in a specific pod")
+            logging.error("Scenario " + scenario_name + " failed")
+            sys.exit(1)
+        pods = pod_names
+    # get container and pod name
+    container_pod_list = []
+    for pod in pods:
+        if type(pod) == list:
+
+            container_names = runcommand.invoke(
+                'oc get pods %s -n %s -o jsonpath="{.spec.containers[*].name}"' % (pod[0], pod[1])
+            ).split(" ")
+            container_pod_list.append([pod[0], pod[1], container_names])
+        else:
+            container_names = runcommand.invoke(
+                'oc get pods %s -n %s -o jsonpath="{.spec.containers[*].name}"' % (pod, namespace)
+            ).split(" ")
+            container_pod_list.append([pod, namespace, container_names])
+
+    killed_count = 0
+
+    while killed_count < kill_count:
+        if len(container_pod_list) == 0:
+            logging.error("Trying to kill more containers than were found, try lowering kill count")
+            logging.error("Scenario " + scenario_name + " failed")
+            sys.exit(1)
+        selected_container_pod = container_pod_list[random.randint(0, len(container_pod_list) - 1)]
+        for c_name in selected_container_pod[2]:
+            if container_name != "":
+                if c_name == container_name:
+                    retry_container_killing(kill_action, selected_container_pod[0], selected_container_pod[1], c_name)
+                    break
+            else:
+                retry_container_killing(kill_action, selected_container_pod[0], selected_container_pod[1], c_name)
+                break
+        container_pod_list.remove(selected_container_pod)
+        killed_count += 1
+    logging.info("Scenario " + scenario_name + " successfully injected")
+
+
+def retry_container_killing(kill_action, podname, namespace, container_name):
+    i = 0
+    while i < 5:
+        logging.info("Killing container %s in pod %s (ns %s)" % (str(container_name), str(podname), str(namespace)))
+        response = kubecli.exec_cmd_in_pod(kill_action, podname, namespace, container_name)
+        i += 1
+        # Blank response means it is done
+        if not response:
+            break
+        elif "unauthorized" in response.lower() or "authorization" in response.lower():
+            time.sleep(2)
+            continue
+        else:
+            continue
--- a/run_kraken.py
+++ b/run_kraken.py
@@ -119,7 +119,11 @@ def main(cfg):
                            failed_post_scenarios = pod_scenarios.run(
                                kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration
                            )
-
+                        elif scenario_type == "container_scenarios":
+                            logging.info("Running container scenarios")
+                            failed_post_scenarios = pod_scenarios.container_run(
+                                kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration
+                            )
                        # Inject node chaos scenarios specified in the config
                        elif scenario_type == "node_scenarios":
                            logging.info("Running node scenarios")
--- a/scenarios/container_etcd.yml
+++ b/scenarios/container_etcd.yml
@@ -0,0 +1,7 @@
+scenarios:
+- name: "kill etcd container"
+  namespace: "openshift-etcd"
+  label_selector: "k8s-app=etcd"
+  container_name: "etcd"
+  action: "kill 1"
+  count: 1