Merge pull request #120 from paigerube14/container_kill

Container kill
This commit is contained in:
Paige Rubendall
2021-07-15 15:07:58 -04:00
committed by GitHub
7 changed files with 163 additions and 16 deletions

View File

@@ -36,6 +36,8 @@ Kraken supports pod, node, time/date and [litmus](https://github.com/litmuschaos
- [Pod Scenarios](docs/pod_scenarios.md)
- [Container Scenarios](docs/container_scenarios.md)
- [Node Scenarios](docs/node_scenarios.md)
- [Time Scenarios](docs/time_scenarios.md)

View File

@@ -5,7 +5,9 @@ kraken:
litmus_version: v1.10.0 # Litmus version to install
litmus_uninstall: False # If you want to uninstall litmus if failure
chaos_scenarios: # List of policies/chaos scenarios to load
- pod_scenarios: # List of chaos pod scenarios to load
- container_scenarios: # List of chaos pod scenarios to load
- - scenarios/container_etcd.yml
- pod_scenarios:
- - scenarios/etcd.yml
- - scenarios/regex_openshift_pod_kill.yml
- scenarios/post_action_regex.py
@@ -19,7 +21,7 @@ kraken:
- litmus_scenarios: # List of litmus scenarios to load
- - https://hub.litmuschaos.io/api/chaos/1.10.0?file=charts/generic/node-cpu-hog/rbac.yaml
- scenarios/node_hog_engine.yaml
- cluster_shut_down_scenarios:
- cluster_shut_down_scenarios:
- - scenarios/cluster_shut_down_scenario.yml
- scenarios/post_action_shut_down.py
- namespace_scenarios:

View File

@@ -0,0 +1,17 @@
### Container Scenarios
Kraken uses the `oc exec` command to `kill` specific containers in a pod.
This can be based on the pods namespace or labels. If you know the exact object you want to kill, you can also specify the specific container name or pod name in the scenario yaml file.
These scenarios are in a simple yaml format that you can manipulate to run your specific tests or use the pre-existing scenarios to see how it works
#### Example Config
The following are the components of Kubernetes/OpenShift for which a basic chaos scenario config exists today.
```
scenarios:
- name: "<Name of scenario>"
namespace: "<specific namespace>" # can specify "*" if you want to find in all namespaces
label_selector: "<label of pod(s)>"
container_name: "<specific container name>" # This is optional, can take out and will kill all containers in all pods found under namespace and label
pod_names: # This is optional, can take out and will select all pods with given namespace and label
- <pod_name>
```

View File

@@ -89,10 +89,13 @@ def list_killable_nodes(label_selector=None):
# List pods in the given namespace
def list_pods(namespace):
def list_pods(namespace, label_selector=None):
pods = []
try:
ret = cli.list_namespaced_pod(namespace, pretty=True)
if label_selector:
ret = cli.list_namespaced_pod(namespace, pretty=True, label_selector=label_selector)
else:
ret = cli.list_namespaced_pod(namespace, pretty=True)
except ApiException as e:
logging.error(
"Exception when calling \
@@ -116,20 +119,33 @@ def get_all_pods(label_selector=None):
# Execute command in pod
def exec_cmd_in_pod(command, pod_name, namespace):
def exec_cmd_in_pod(command, pod_name, namespace, container=None):
exec_command = ["bash", "-c", command]
try:
ret = stream(
cli.connect_get_namespaced_pod_exec,
pod_name,
namespace,
command=exec_command,
stderr=True,
stdin=False,
stdout=True,
tty=False,
)
if container:
ret = stream(
cli.connect_get_namespaced_pod_exec,
pod_name,
namespace,
container=container,
command=exec_command,
stderr=True,
stdin=False,
stdout=True,
tty=False,
)
else:
ret = stream(
cli.connect_get_namespaced_pod_exec,
pod_name,
namespace,
command=exec_command,
stderr=True,
stdin=False,
stdout=True,
tty=False,
)
except Exception:
return False
return ret

View File

@@ -2,7 +2,11 @@ import logging
import kraken.invoke.command as runcommand
import kraken.cerberus.setup as cerberus
import kraken.post_actions.actions as post_actions
import kraken.kubernetes.client as kubecli
import time
import yaml
import sys
import random
# Run pod based scenarios
@@ -34,3 +38,98 @@ def run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_dur
except Exception as e:
logging.error("Failed to run scenario: %s. Encountered the following " "exception: %s" % (pod_scenario[0], e))
return failed_post_scenarios
def container_run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration):
for container_scenario_config in scenarios_list:
with open(container_scenario_config[0], "r") as f:
cont_scenario_config = yaml.full_load(f)
for cont_scenario in cont_scenario_config["scenarios"]:
if len(container_scenario_config) > 1:
pre_action_output = post_actions.run(kubeconfig_path, container_scenario_config[1])
else:
pre_action_output = ""
container_killing_in_pod(cont_scenario)
logging.info("Waiting for the specified duration: %s" % (wait_duration))
time.sleep(wait_duration)
failed_post_scenarios = post_actions.check_recovery(
kubeconfig_path, container_scenario_config, failed_post_scenarios, pre_action_output
)
cerberus.publish_kraken_status(config, failed_post_scenarios)
logging.info("")
def container_killing_in_pod(cont_scenario):
scenario_name = cont_scenario.get("name", "")
namespace = cont_scenario.get("namespace", "*")
label_selector = cont_scenario.get("label_selector", None)
pod_names = cont_scenario.get("pod_names", [])
container_name = cont_scenario.get("container_name", "")
kill_action = cont_scenario.get("action", "kill 1")
kill_count = cont_scenario.get("count", 1)
if type(pod_names) != list:
logging.error("Please make sure your pod_names are in a list format")
sys.exit(1)
if len(pod_names) == 0:
if namespace == "*":
# returns double array of pod name and namespace
pods = kubecli.get_all_pods(label_selector)
else:
# Only returns pod names
pods = kubecli.list_pods(namespace, label_selector)
else:
if namespace == "*":
logging.error("You must specify the namespace to kill a container in a specific pod")
logging.error("Scenario " + scenario_name + " failed")
sys.exit(1)
pods = pod_names
# get container and pod name
container_pod_list = []
for pod in pods:
if type(pod) == list:
container_names = runcommand.invoke(
'oc get pods %s -n %s -o jsonpath="{.spec.containers[*].name}"' % (pod[0], pod[1])
).split(" ")
container_pod_list.append([pod[0], pod[1], container_names])
else:
container_names = runcommand.invoke(
'oc get pods %s -n %s -o jsonpath="{.spec.containers[*].name}"' % (pod, namespace)
).split(" ")
container_pod_list.append([pod, namespace, container_names])
killed_count = 0
while killed_count < kill_count:
if len(container_pod_list) == 0:
logging.error("Trying to kill more containers than were found, try lowering kill count")
logging.error("Scenario " + scenario_name + " failed")
sys.exit(1)
selected_container_pod = container_pod_list[random.randint(0, len(container_pod_list) - 1)]
for c_name in selected_container_pod[2]:
if container_name != "":
if c_name == container_name:
retry_container_killing(kill_action, selected_container_pod[0], selected_container_pod[1], c_name)
break
else:
retry_container_killing(kill_action, selected_container_pod[0], selected_container_pod[1], c_name)
break
container_pod_list.remove(selected_container_pod)
killed_count += 1
logging.info("Scenario " + scenario_name + " successfully injected")
def retry_container_killing(kill_action, podname, namespace, container_name):
i = 0
while i < 5:
logging.info("Killing container %s in pod %s (ns %s)" % (str(container_name), str(podname), str(namespace)))
response = kubecli.exec_cmd_in_pod(kill_action, podname, namespace, container_name)
i += 1
# Blank response means it is done
if not response:
break
elif "unauthorized" in response.lower() or "authorization" in response.lower():
time.sleep(2)
continue
else:
continue

View File

@@ -119,7 +119,11 @@ def main(cfg):
failed_post_scenarios = pod_scenarios.run(
kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration
)
elif scenario_type == "container_scenarios":
logging.info("Running container scenarios")
failed_post_scenarios = pod_scenarios.container_run(
kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration
)
# Inject node chaos scenarios specified in the config
elif scenario_type == "node_scenarios":
logging.info("Running node scenarios")

7
scenarios/container_etcd.yml Executable file
View File

@@ -0,0 +1,7 @@
scenarios:
- name: "kill etcd container"
namespace: "openshift-etcd"
label_selector: "k8s-app=etcd"
container_name: "etcd"
action: "kill 1"
count: 1