mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-14 18:10:00 +00:00
Run all the litmus resources in a single namespace
- This eases the usage and debuggability by running the fault injection pods in the same namespace as other resources of litmus. This will also ease the deletion process and ensure that there are no leftover objects on the cluster. - This commit also enables users to use the same rbac template for all the litmus scenarios without having to pull in a specic one for each of the scenarios.
This commit is contained in:
@@ -39,7 +39,6 @@ Instructions on how to setup the config and the options supported can be found a
|
||||
|
||||
|
||||
### Kubernetes/OpenShift chaos scenarios supported
|
||||
Kraken supports pod, node, time/date and [litmus](https://github.com/litmuschaos/litmus) based scenarios.
|
||||
|
||||
- [Pod Scenarios](docs/pod_scenarios.md)
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ kraken:
|
||||
exit_on_failure: False # Exit when a post action scenario fails
|
||||
litmus_version: v1.13.6 # Litmus version to install
|
||||
litmus_uninstall: False # If you want to uninstall litmus if failure
|
||||
litmus_namespace: litmus # Namespace to configure and run litmus based scenarios
|
||||
chaos_scenarios: # List of policies/chaos scenarios to load
|
||||
- container_scenarios: # List of chaos pod scenarios to load
|
||||
- - scenarios/container_etcd.yml
|
||||
@@ -19,11 +20,11 @@ kraken:
|
||||
- time_scenarios: # List of chaos time scenarios to load
|
||||
- scenarios/time_scenarios_example.yml
|
||||
- litmus_scenarios: # List of litmus scenarios to load
|
||||
- - https://hub.litmuschaos.io/api/chaos/1.13.6?file=charts/generic/node-cpu-hog/rbac.yaml
|
||||
- - https://raw.githubusercontent.com/cloud-bulldozer/kraken/master/scenarios/templates/litmus-rbac.yaml
|
||||
- scenarios/node_cpu_hog_engine.yaml
|
||||
- - https://hub.litmuschaos.io/api/chaos/1.13.6?file=charts/generic/node-memory-hog/rbac.yaml
|
||||
- - https://raw.githubusercontent.com/cloud-bulldozer/kraken/master/scenarios/templates/litmus-rbac.yaml
|
||||
- scenarios/node_mem_engine.yaml
|
||||
- - https://hub.litmuschaos.io/api/chaos/1.13.6?file=charts/generic/node-io-stress/rbac.yaml
|
||||
- - https://raw.githubusercontent.com/cloud-bulldozer/kraken/master/scenarios/templates/litmus-rbac.yaml
|
||||
- scenarios/node_io_engine.yaml
|
||||
- cluster_shut_down_scenarios:
|
||||
- - scenarios/cluster_shut_down_scenario.yml
|
||||
|
||||
@@ -4,6 +4,7 @@ kraken:
|
||||
exit_on_failure: False # Exit when a post action scenario fails
|
||||
litmus_version: v1.10.0 # Litmus version to install
|
||||
litmus_uninstall: False # If you want to uninstall litmus if failure
|
||||
litmus_namespace: litmus # Namespace to configure and run litmus based scenarios
|
||||
chaos_scenarios: # List of policies/chaos scenarios to load
|
||||
- pod_scenarios: # List of chaos pod scenarios to load
|
||||
- - scenarios/etcd.yml
|
||||
|
||||
@@ -8,7 +8,7 @@ import kraken.cerberus.setup as cerberus
|
||||
|
||||
|
||||
# Inject litmus scenarios defined in the config
|
||||
def run(scenarios_list, config, litmus_namespaces, litmus_uninstall, wait_duration):
|
||||
def run(scenarios_list, config, litmus_uninstall, wait_duration, litmus_namespace):
|
||||
# Loop to run the scenarios starts here
|
||||
for l_scenario in scenarios_list:
|
||||
start_time = int(time.time())
|
||||
@@ -25,12 +25,10 @@ def run(scenarios_list, config, litmus_namespaces, litmus_uninstall, wait_durati
|
||||
|
||||
if yaml_item["kind"] == "ChaosEngine":
|
||||
engine_name = yaml_item["metadata"]["name"]
|
||||
namespace = yaml_item["metadata"]["namespace"]
|
||||
litmus_namespaces.append(namespace)
|
||||
experiment_names = yaml_item["spec"]["experiments"]
|
||||
for expr in experiment_names:
|
||||
expr_name = expr["name"]
|
||||
experiment_result = check_experiment(engine_name, expr_name, namespace)
|
||||
experiment_result = check_experiment(engine_name, expr_name, litmus_namespace)
|
||||
if experiment_result:
|
||||
logging.info("Scenario: %s has been successfully injected!" % item)
|
||||
else:
|
||||
@@ -51,28 +49,27 @@ def run(scenarios_list, config, litmus_namespaces, litmus_uninstall, wait_durati
|
||||
except Exception as e:
|
||||
logging.error("Failed to run litmus scenario: %s. Encountered " "the following exception: %s" % (item, e))
|
||||
sys.exit(1)
|
||||
return litmus_namespaces
|
||||
|
||||
|
||||
# Install litmus and wait until pod is running
|
||||
def install_litmus(version):
|
||||
def install_litmus(version, namespace):
|
||||
litmus_install = runcommand.invoke(
|
||||
"kubectl apply -f " "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version
|
||||
"kubectl -n %s apply -f " "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % (namespace, version)
|
||||
)
|
||||
if "unable" in litmus_install:
|
||||
logging.info("Unable to install litmus because " + str(litmus_install))
|
||||
sys.exit(1)
|
||||
|
||||
runcommand.invoke(
|
||||
"oc patch -n litmus deployment.apps/chaos-operator-ce --type=json --patch ' "
|
||||
"oc patch -n %s deployment.apps/chaos-operator-ce --type=json --patch ' "
|
||||
'[ { "op": "add", "path": "/spec/template/spec/containers/0/env/-", '
|
||||
'"value": { "name": "ANALYTICS", "value": "FALSE" } } ]\''
|
||||
'"value": { "name": "ANALYTICS", "value": "FALSE" } } ]\'' % namespace
|
||||
)
|
||||
|
||||
runcommand.invoke("oc wait deploy -n litmus chaos-operator-ce --for=condition=Available")
|
||||
runcommand.invoke("oc wait deploy -n %s chaos-operator-ce --for=condition=Available" % namespace)
|
||||
|
||||
|
||||
def deploy_all_experiments(version_string):
|
||||
def deploy_all_experiments(version_string, namespace):
|
||||
|
||||
if not version_string.startswith("v"):
|
||||
logging.error("Incorrect version string for litmus, needs to start with 'v' " "followed by a number")
|
||||
@@ -80,12 +77,13 @@ def deploy_all_experiments(version_string):
|
||||
version = version_string[1:]
|
||||
|
||||
runcommand.invoke(
|
||||
"kubectl apply -f " "https://hub.litmuschaos.io/api/chaos/%s?file=charts/generic/experiments.yaml" % version
|
||||
"kubectl -n %s apply -f "
|
||||
"https://hub.litmuschaos.io/api/chaos/%s?file=charts/generic/experiments.yaml" % (namespace, version)
|
||||
)
|
||||
|
||||
|
||||
def delete_experiments():
|
||||
runcommand.invoke("kubectl delete chaosengine --all")
|
||||
def delete_experiments(namespace):
|
||||
runcommand.invoke("kubectl -n %s delete chaosengine --all" % namespace)
|
||||
|
||||
|
||||
# Check status of experiment
|
||||
|
||||
@@ -37,6 +37,7 @@ def main(cfg):
|
||||
chaos_scenarios = config["kraken"].get("chaos_scenarios", [])
|
||||
litmus_version = config["kraken"].get("litmus_version", "v1.9.1")
|
||||
litmus_uninstall = config["kraken"].get("litmus_uninstall", False)
|
||||
litmus_namespace = config["kraken"].get("litmus_namespace", "litmus")
|
||||
wait_duration = config["tunings"].get("wait_duration", 60)
|
||||
iterations = config["tunings"].get("iterations", 1)
|
||||
daemon_mode = config["tunings"].get("daemon_mode", False)
|
||||
@@ -101,7 +102,6 @@ def main(cfg):
|
||||
iterations = int(iterations)
|
||||
|
||||
failed_post_scenarios = []
|
||||
litmus_namespaces = []
|
||||
litmus_installed = False
|
||||
|
||||
# Capture the start time
|
||||
@@ -142,12 +142,12 @@ def main(cfg):
|
||||
elif scenario_type == "litmus_scenarios":
|
||||
logging.info("Running litmus scenarios")
|
||||
if not litmus_installed:
|
||||
common_litmus.install_litmus(litmus_version)
|
||||
common_litmus.deploy_all_experiments(litmus_version)
|
||||
common_litmus.install_litmus(litmus_version, litmus_namespace)
|
||||
common_litmus.deploy_all_experiments(litmus_version, litmus_namespace)
|
||||
litmus_installed = True
|
||||
litmus_namespaces = common_litmus.run(
|
||||
scenarios_list, config, litmus_namespaces, litmus_uninstall, wait_duration,
|
||||
)
|
||||
common_litmus.run(
|
||||
scenarios_list, config, litmus_uninstall, wait_duration, litmus_namespace,
|
||||
)
|
||||
|
||||
# Inject cluster shutdown scenarios
|
||||
elif scenario_type == "cluster_shut_down_scenarios":
|
||||
@@ -197,9 +197,8 @@ def main(cfg):
|
||||
sys.exit(1)
|
||||
|
||||
if litmus_uninstall and litmus_installed:
|
||||
for namespace in litmus_namespaces:
|
||||
common_litmus.delete_chaos(namespace)
|
||||
common_litmus.delete_experiments()
|
||||
common_litmus.delete_chaos(litmus_namespace)
|
||||
common_litmus.delete_experiments(litmus_namespace)
|
||||
common_litmus.uninstall_litmus(litmus_version)
|
||||
|
||||
if failed_post_scenarios:
|
||||
|
||||
@@ -2,13 +2,13 @@ apiVersion: litmuschaos.io/v1alpha1
|
||||
kind: ChaosEngine
|
||||
metadata:
|
||||
name: nginx-chaos
|
||||
namespace: default
|
||||
namespace: litmus
|
||||
spec:
|
||||
# It can be true/false
|
||||
annotationCheck: 'false'
|
||||
# It can be active/stop
|
||||
engineState: 'active'
|
||||
chaosServiceAccount: node-cpu-hog-sa
|
||||
chaosServiceAccount: litmus-sa
|
||||
monitoring: false
|
||||
# It can be delete/retain
|
||||
jobCleanUpPolicy: 'delete'
|
||||
@@ -23,7 +23,7 @@ spec:
|
||||
|
||||
# Number of cores of node CPU to be consumed
|
||||
- name: NODE_CPU_CORE
|
||||
value: ''
|
||||
value: '1'
|
||||
|
||||
# percentage of total nodes to target
|
||||
- name: NODES_AFFECTED_PERC
|
||||
@@ -31,4 +31,4 @@ spec:
|
||||
|
||||
# ENTER THE COMMA SEPARATED TARGET NODES NAME
|
||||
- name: TARGET_NODES
|
||||
value: ''
|
||||
value: '<node_name>'
|
||||
|
||||
@@ -2,13 +2,13 @@ apiVersion: litmuschaos.io/v1alpha1
|
||||
kind: ChaosEngine
|
||||
metadata:
|
||||
name: nginx-chaos
|
||||
namespace: default
|
||||
namespace: litmus
|
||||
spec:
|
||||
# It can be delete/retain
|
||||
jobCleanUpPolicy: 'retain'
|
||||
# It can be active/stop
|
||||
engineState: 'active'
|
||||
chaosServiceAccount: node-io-stress-sa
|
||||
chaosServiceAccount: litmus-sa
|
||||
experiments:
|
||||
- name: node-io-stress
|
||||
spec:
|
||||
|
||||
@@ -2,13 +2,13 @@ apiVersion: litmuschaos.io/v1alpha1
|
||||
kind: ChaosEngine
|
||||
metadata:
|
||||
name: nginx-chaos
|
||||
namespace: default
|
||||
namespace: litmus
|
||||
spec:
|
||||
# It can be delete/retain
|
||||
jobCleanUpPolicy: 'retain'
|
||||
# It can be active/stop
|
||||
engineState: 'active'
|
||||
chaosServiceAccount: node-memory-hog-sa
|
||||
chaosServiceAccount: litmus-sa
|
||||
experiments:
|
||||
- name: node-memory-hog
|
||||
spec:
|
||||
|
||||
49
scenarios/templates/litmus-rbac.yaml
Normal file
49
scenarios/templates/litmus-rbac.yaml
Normal file
@@ -0,0 +1,49 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: litmus-sa
|
||||
namespace: litmus
|
||||
labels:
|
||||
name: litmus-sa
|
||||
app.kubernetes.io/part-of: litmus
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: litmus-sa
|
||||
labels:
|
||||
name: litmus-sa
|
||||
app.kubernetes.io/part-of: litmus
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["pods","events"]
|
||||
verbs: ["create","list","get","patch","update","delete","deletecollection"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods/exec","pods/log"]
|
||||
verbs: ["list","get","create"]
|
||||
- apiGroups: ["batch"]
|
||||
resources: ["jobs"]
|
||||
verbs: ["create","list","get","delete","deletecollection"]
|
||||
- apiGroups: ["litmuschaos.io"]
|
||||
resources: ["chaosengines","chaosexperiments","chaosresults"]
|
||||
verbs: ["create","list","get","patch","update"]
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get","list"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: litmus-sa
|
||||
labels:
|
||||
name: litmus-sa
|
||||
app.kubernetes.io/part-of: litmus
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: litmus-sa
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: litmus-sa
|
||||
namespace: litmus
|
||||
Reference in New Issue
Block a user