mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-04-15 06:57:28 +00:00
@@ -36,6 +36,8 @@ Kraken supports pod, node, time/date and [litmus](https://github.com/litmuschaos
|
||||
|
||||
- [Pod Scenarios](docs/pod_scenarios.md)
|
||||
|
||||
- [Container Scenarios](docs/container_scenarios.md)
|
||||
|
||||
- [Node Scenarios](docs/node_scenarios.md)
|
||||
|
||||
- [Time Scenarios](docs/time_scenarios.md)
|
||||
|
||||
@@ -5,7 +5,9 @@ kraken:
|
||||
litmus_version: v1.10.0 # Litmus version to install
|
||||
litmus_uninstall: False # If you want to uninstall litmus if failure
|
||||
chaos_scenarios: # List of policies/chaos scenarios to load
|
||||
- pod_scenarios: # List of chaos pod scenarios to load
|
||||
- container_scenarios: # List of chaos pod scenarios to load
|
||||
- - scenarios/container_etcd.yml
|
||||
- pod_scenarios:
|
||||
- - scenarios/etcd.yml
|
||||
- - scenarios/regex_openshift_pod_kill.yml
|
||||
- scenarios/post_action_regex.py
|
||||
@@ -19,7 +21,7 @@ kraken:
|
||||
- litmus_scenarios: # List of litmus scenarios to load
|
||||
- - https://hub.litmuschaos.io/api/chaos/1.10.0?file=charts/generic/node-cpu-hog/rbac.yaml
|
||||
- scenarios/node_hog_engine.yaml
|
||||
- cluster_shut_down_scenarios:
|
||||
- cluster_shut_down_scenarios:
|
||||
- - scenarios/cluster_shut_down_scenario.yml
|
||||
- scenarios/post_action_shut_down.py
|
||||
- namespace_scenarios:
|
||||
|
||||
17
docs/container_scenarios.md
Normal file
17
docs/container_scenarios.md
Normal file
@@ -0,0 +1,17 @@
|
||||
### Container Scenarios
|
||||
Kraken uses the `oc exec` command to `kill` specific containers in a pod.
|
||||
This can be based on the pods namespace or labels. If you know the exact object you want to kill, you can also specify the specific container name or pod name in the scenario yaml file.
|
||||
These scenarios are in a simple yaml format that you can manipulate to run your specific tests or use the pre-existing scenarios to see how it works
|
||||
|
||||
#### Example Config
|
||||
The following are the components of Kubernetes/OpenShift for which a basic chaos scenario config exists today.
|
||||
|
||||
```
|
||||
scenarios:
|
||||
- name: "<Name of scenario>"
|
||||
namespace: "<specific namespace>" # can specify "*" if you want to find in all namespaces
|
||||
label_selector: "<label of pod(s)>"
|
||||
container_name: "<specific container name>" # This is optional, can take out and will kill all containers in all pods found under namespace and label
|
||||
pod_names: # This is optional, can take out and will select all pods with given namespace and label
|
||||
- <pod_name>
|
||||
```
|
||||
@@ -89,10 +89,13 @@ def list_killable_nodes(label_selector=None):
|
||||
|
||||
|
||||
# List pods in the given namespace
|
||||
def list_pods(namespace):
|
||||
def list_pods(namespace, label_selector=None):
|
||||
pods = []
|
||||
try:
|
||||
ret = cli.list_namespaced_pod(namespace, pretty=True)
|
||||
if label_selector:
|
||||
ret = cli.list_namespaced_pod(namespace, pretty=True, label_selector=label_selector)
|
||||
else:
|
||||
ret = cli.list_namespaced_pod(namespace, pretty=True)
|
||||
except ApiException as e:
|
||||
logging.error(
|
||||
"Exception when calling \
|
||||
@@ -116,20 +119,33 @@ def get_all_pods(label_selector=None):
|
||||
|
||||
|
||||
# Execute command in pod
|
||||
def exec_cmd_in_pod(command, pod_name, namespace):
|
||||
def exec_cmd_in_pod(command, pod_name, namespace, container=None):
|
||||
|
||||
exec_command = ["bash", "-c", command]
|
||||
try:
|
||||
ret = stream(
|
||||
cli.connect_get_namespaced_pod_exec,
|
||||
pod_name,
|
||||
namespace,
|
||||
command=exec_command,
|
||||
stderr=True,
|
||||
stdin=False,
|
||||
stdout=True,
|
||||
tty=False,
|
||||
)
|
||||
if container:
|
||||
ret = stream(
|
||||
cli.connect_get_namespaced_pod_exec,
|
||||
pod_name,
|
||||
namespace,
|
||||
container=container,
|
||||
command=exec_command,
|
||||
stderr=True,
|
||||
stdin=False,
|
||||
stdout=True,
|
||||
tty=False,
|
||||
)
|
||||
else:
|
||||
ret = stream(
|
||||
cli.connect_get_namespaced_pod_exec,
|
||||
pod_name,
|
||||
namespace,
|
||||
command=exec_command,
|
||||
stderr=True,
|
||||
stdin=False,
|
||||
stdout=True,
|
||||
tty=False,
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
return ret
|
||||
|
||||
@@ -2,7 +2,11 @@ import logging
|
||||
import kraken.invoke.command as runcommand
|
||||
import kraken.cerberus.setup as cerberus
|
||||
import kraken.post_actions.actions as post_actions
|
||||
import kraken.kubernetes.client as kubecli
|
||||
import time
|
||||
import yaml
|
||||
import sys
|
||||
import random
|
||||
|
||||
|
||||
# Run pod based scenarios
|
||||
@@ -34,3 +38,98 @@ def run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_dur
|
||||
except Exception as e:
|
||||
logging.error("Failed to run scenario: %s. Encountered the following " "exception: %s" % (pod_scenario[0], e))
|
||||
return failed_post_scenarios
|
||||
|
||||
|
||||
def container_run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration):
|
||||
for container_scenario_config in scenarios_list:
|
||||
with open(container_scenario_config[0], "r") as f:
|
||||
cont_scenario_config = yaml.full_load(f)
|
||||
for cont_scenario in cont_scenario_config["scenarios"]:
|
||||
if len(container_scenario_config) > 1:
|
||||
pre_action_output = post_actions.run(kubeconfig_path, container_scenario_config[1])
|
||||
else:
|
||||
pre_action_output = ""
|
||||
container_killing_in_pod(cont_scenario)
|
||||
logging.info("Waiting for the specified duration: %s" % (wait_duration))
|
||||
time.sleep(wait_duration)
|
||||
failed_post_scenarios = post_actions.check_recovery(
|
||||
kubeconfig_path, container_scenario_config, failed_post_scenarios, pre_action_output
|
||||
)
|
||||
cerberus.publish_kraken_status(config, failed_post_scenarios)
|
||||
logging.info("")
|
||||
|
||||
|
||||
def container_killing_in_pod(cont_scenario):
|
||||
scenario_name = cont_scenario.get("name", "")
|
||||
namespace = cont_scenario.get("namespace", "*")
|
||||
label_selector = cont_scenario.get("label_selector", None)
|
||||
pod_names = cont_scenario.get("pod_names", [])
|
||||
container_name = cont_scenario.get("container_name", "")
|
||||
kill_action = cont_scenario.get("action", "kill 1")
|
||||
kill_count = cont_scenario.get("count", 1)
|
||||
if type(pod_names) != list:
|
||||
logging.error("Please make sure your pod_names are in a list format")
|
||||
sys.exit(1)
|
||||
if len(pod_names) == 0:
|
||||
if namespace == "*":
|
||||
# returns double array of pod name and namespace
|
||||
pods = kubecli.get_all_pods(label_selector)
|
||||
else:
|
||||
# Only returns pod names
|
||||
pods = kubecli.list_pods(namespace, label_selector)
|
||||
else:
|
||||
if namespace == "*":
|
||||
logging.error("You must specify the namespace to kill a container in a specific pod")
|
||||
logging.error("Scenario " + scenario_name + " failed")
|
||||
sys.exit(1)
|
||||
pods = pod_names
|
||||
# get container and pod name
|
||||
container_pod_list = []
|
||||
for pod in pods:
|
||||
if type(pod) == list:
|
||||
|
||||
container_names = runcommand.invoke(
|
||||
'oc get pods %s -n %s -o jsonpath="{.spec.containers[*].name}"' % (pod[0], pod[1])
|
||||
).split(" ")
|
||||
container_pod_list.append([pod[0], pod[1], container_names])
|
||||
else:
|
||||
container_names = runcommand.invoke(
|
||||
'oc get pods %s -n %s -o jsonpath="{.spec.containers[*].name}"' % (pod, namespace)
|
||||
).split(" ")
|
||||
container_pod_list.append([pod, namespace, container_names])
|
||||
|
||||
killed_count = 0
|
||||
|
||||
while killed_count < kill_count:
|
||||
if len(container_pod_list) == 0:
|
||||
logging.error("Trying to kill more containers than were found, try lowering kill count")
|
||||
logging.error("Scenario " + scenario_name + " failed")
|
||||
sys.exit(1)
|
||||
selected_container_pod = container_pod_list[random.randint(0, len(container_pod_list) - 1)]
|
||||
for c_name in selected_container_pod[2]:
|
||||
if container_name != "":
|
||||
if c_name == container_name:
|
||||
retry_container_killing(kill_action, selected_container_pod[0], selected_container_pod[1], c_name)
|
||||
break
|
||||
else:
|
||||
retry_container_killing(kill_action, selected_container_pod[0], selected_container_pod[1], c_name)
|
||||
break
|
||||
container_pod_list.remove(selected_container_pod)
|
||||
killed_count += 1
|
||||
logging.info("Scenario " + scenario_name + " successfully injected")
|
||||
|
||||
|
||||
def retry_container_killing(kill_action, podname, namespace, container_name):
|
||||
i = 0
|
||||
while i < 5:
|
||||
logging.info("Killing container %s in pod %s (ns %s)" % (str(container_name), str(podname), str(namespace)))
|
||||
response = kubecli.exec_cmd_in_pod(kill_action, podname, namespace, container_name)
|
||||
i += 1
|
||||
# Blank response means it is done
|
||||
if not response:
|
||||
break
|
||||
elif "unauthorized" in response.lower() or "authorization" in response.lower():
|
||||
time.sleep(2)
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
@@ -119,7 +119,11 @@ def main(cfg):
|
||||
failed_post_scenarios = pod_scenarios.run(
|
||||
kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration
|
||||
)
|
||||
|
||||
elif scenario_type == "container_scenarios":
|
||||
logging.info("Running container scenarios")
|
||||
failed_post_scenarios = pod_scenarios.container_run(
|
||||
kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration
|
||||
)
|
||||
# Inject node chaos scenarios specified in the config
|
||||
elif scenario_type == "node_scenarios":
|
||||
logging.info("Running node scenarios")
|
||||
|
||||
7
scenarios/container_etcd.yml
Executable file
7
scenarios/container_etcd.yml
Executable file
@@ -0,0 +1,7 @@
|
||||
scenarios:
|
||||
- name: "kill etcd container"
|
||||
namespace: "openshift-etcd"
|
||||
label_selector: "k8s-app=etcd"
|
||||
container_name: "etcd"
|
||||
action: "kill 1"
|
||||
count: 1
|
||||
Reference in New Issue
Block a user