Run all the litmus resources in a single namespace

- This eases the usage and debuggability by running the fault injection pods in
  the same namespace as other resources of litmus. This will also ease the
  deletion process and ensure that there are no leftover objects on the cluster.

- This commit also enables users to use the same rbac template for all the litmus
  scenarios without having to pull in a specic one for each of the scenarios.
This commit is contained in:
Naga Ravi Chaitanya Elluri
2021-09-03 12:37:56 -04:00
parent 68a32666cd
commit 5da0b259c5
9 changed files with 82 additions and 35 deletions

View File

@@ -39,7 +39,6 @@ Instructions on how to setup the config and the options supported can be found a
### Kubernetes/OpenShift chaos scenarios supported
Kraken supports pod, node, time/date and [litmus](https://github.com/litmuschaos/litmus) based scenarios.
- [Pod Scenarios](docs/pod_scenarios.md)

View File

@@ -4,6 +4,7 @@ kraken:
exit_on_failure: False # Exit when a post action scenario fails
litmus_version: v1.13.6 # Litmus version to install
litmus_uninstall: False # If you want to uninstall litmus if failure
litmus_namespace: litmus # Namespace to configure and run litmus based scenarios
chaos_scenarios: # List of policies/chaos scenarios to load
- container_scenarios: # List of chaos pod scenarios to load
- - scenarios/container_etcd.yml
@@ -19,11 +20,11 @@ kraken:
- time_scenarios: # List of chaos time scenarios to load
- scenarios/time_scenarios_example.yml
- litmus_scenarios: # List of litmus scenarios to load
- - https://hub.litmuschaos.io/api/chaos/1.13.6?file=charts/generic/node-cpu-hog/rbac.yaml
- - https://raw.githubusercontent.com/cloud-bulldozer/kraken/master/scenarios/templates/litmus-rbac.yaml
- scenarios/node_cpu_hog_engine.yaml
- - https://hub.litmuschaos.io/api/chaos/1.13.6?file=charts/generic/node-memory-hog/rbac.yaml
- - https://raw.githubusercontent.com/cloud-bulldozer/kraken/master/scenarios/templates/litmus-rbac.yaml
- scenarios/node_mem_engine.yaml
- - https://hub.litmuschaos.io/api/chaos/1.13.6?file=charts/generic/node-io-stress/rbac.yaml
- - https://raw.githubusercontent.com/cloud-bulldozer/kraken/master/scenarios/templates/litmus-rbac.yaml
- scenarios/node_io_engine.yaml
- cluster_shut_down_scenarios:
- - scenarios/cluster_shut_down_scenario.yml

View File

@@ -4,6 +4,7 @@ kraken:
exit_on_failure: False # Exit when a post action scenario fails
litmus_version: v1.10.0 # Litmus version to install
litmus_uninstall: False # If you want to uninstall litmus if failure
litmus_namespace: litmus # Namespace to configure and run litmus based scenarios
chaos_scenarios: # List of policies/chaos scenarios to load
- pod_scenarios: # List of chaos pod scenarios to load
- - scenarios/etcd.yml

View File

@@ -8,7 +8,7 @@ import kraken.cerberus.setup as cerberus
# Inject litmus scenarios defined in the config
def run(scenarios_list, config, litmus_namespaces, litmus_uninstall, wait_duration):
def run(scenarios_list, config, litmus_uninstall, wait_duration, litmus_namespace):
# Loop to run the scenarios starts here
for l_scenario in scenarios_list:
start_time = int(time.time())
@@ -25,12 +25,10 @@ def run(scenarios_list, config, litmus_namespaces, litmus_uninstall, wait_durati
if yaml_item["kind"] == "ChaosEngine":
engine_name = yaml_item["metadata"]["name"]
namespace = yaml_item["metadata"]["namespace"]
litmus_namespaces.append(namespace)
experiment_names = yaml_item["spec"]["experiments"]
for expr in experiment_names:
expr_name = expr["name"]
experiment_result = check_experiment(engine_name, expr_name, namespace)
experiment_result = check_experiment(engine_name, expr_name, litmus_namespace)
if experiment_result:
logging.info("Scenario: %s has been successfully injected!" % item)
else:
@@ -51,28 +49,27 @@ def run(scenarios_list, config, litmus_namespaces, litmus_uninstall, wait_durati
except Exception as e:
logging.error("Failed to run litmus scenario: %s. Encountered " "the following exception: %s" % (item, e))
sys.exit(1)
return litmus_namespaces
# Install litmus and wait until pod is running
def install_litmus(version):
def install_litmus(version, namespace):
litmus_install = runcommand.invoke(
"kubectl apply -f " "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version
"kubectl -n %s apply -f " "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % (namespace, version)
)
if "unable" in litmus_install:
logging.info("Unable to install litmus because " + str(litmus_install))
sys.exit(1)
runcommand.invoke(
"oc patch -n litmus deployment.apps/chaos-operator-ce --type=json --patch ' "
"oc patch -n %s deployment.apps/chaos-operator-ce --type=json --patch ' "
'[ { "op": "add", "path": "/spec/template/spec/containers/0/env/-", '
'"value": { "name": "ANALYTICS", "value": "FALSE" } } ]\''
'"value": { "name": "ANALYTICS", "value": "FALSE" } } ]\'' % namespace
)
runcommand.invoke("oc wait deploy -n litmus chaos-operator-ce --for=condition=Available")
runcommand.invoke("oc wait deploy -n %s chaos-operator-ce --for=condition=Available" % namespace)
def deploy_all_experiments(version_string):
def deploy_all_experiments(version_string, namespace):
if not version_string.startswith("v"):
logging.error("Incorrect version string for litmus, needs to start with 'v' " "followed by a number")
@@ -80,12 +77,13 @@ def deploy_all_experiments(version_string):
version = version_string[1:]
runcommand.invoke(
"kubectl apply -f " "https://hub.litmuschaos.io/api/chaos/%s?file=charts/generic/experiments.yaml" % version
"kubectl -n %s apply -f "
"https://hub.litmuschaos.io/api/chaos/%s?file=charts/generic/experiments.yaml" % (namespace, version)
)
def delete_experiments():
runcommand.invoke("kubectl delete chaosengine --all")
def delete_experiments(namespace):
runcommand.invoke("kubectl -n %s delete chaosengine --all" % namespace)
# Check status of experiment

View File

@@ -37,6 +37,7 @@ def main(cfg):
chaos_scenarios = config["kraken"].get("chaos_scenarios", [])
litmus_version = config["kraken"].get("litmus_version", "v1.9.1")
litmus_uninstall = config["kraken"].get("litmus_uninstall", False)
litmus_namespace = config["kraken"].get("litmus_namespace", "litmus")
wait_duration = config["tunings"].get("wait_duration", 60)
iterations = config["tunings"].get("iterations", 1)
daemon_mode = config["tunings"].get("daemon_mode", False)
@@ -101,7 +102,6 @@ def main(cfg):
iterations = int(iterations)
failed_post_scenarios = []
litmus_namespaces = []
litmus_installed = False
# Capture the start time
@@ -142,12 +142,12 @@ def main(cfg):
elif scenario_type == "litmus_scenarios":
logging.info("Running litmus scenarios")
if not litmus_installed:
common_litmus.install_litmus(litmus_version)
common_litmus.deploy_all_experiments(litmus_version)
common_litmus.install_litmus(litmus_version, litmus_namespace)
common_litmus.deploy_all_experiments(litmus_version, litmus_namespace)
litmus_installed = True
litmus_namespaces = common_litmus.run(
scenarios_list, config, litmus_namespaces, litmus_uninstall, wait_duration,
)
common_litmus.run(
scenarios_list, config, litmus_uninstall, wait_duration, litmus_namespace,
)
# Inject cluster shutdown scenarios
elif scenario_type == "cluster_shut_down_scenarios":
@@ -197,9 +197,8 @@ def main(cfg):
sys.exit(1)
if litmus_uninstall and litmus_installed:
for namespace in litmus_namespaces:
common_litmus.delete_chaos(namespace)
common_litmus.delete_experiments()
common_litmus.delete_chaos(litmus_namespace)
common_litmus.delete_experiments(litmus_namespace)
common_litmus.uninstall_litmus(litmus_version)
if failed_post_scenarios:

View File

@@ -2,13 +2,13 @@ apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: nginx-chaos
namespace: default
namespace: litmus
spec:
# It can be true/false
annotationCheck: 'false'
# It can be active/stop
engineState: 'active'
chaosServiceAccount: node-cpu-hog-sa
chaosServiceAccount: litmus-sa
monitoring: false
# It can be delete/retain
jobCleanUpPolicy: 'delete'
@@ -23,7 +23,7 @@ spec:
# Number of cores of node CPU to be consumed
- name: NODE_CPU_CORE
value: ''
value: '1'
# percentage of total nodes to target
- name: NODES_AFFECTED_PERC
@@ -31,4 +31,4 @@ spec:
# ENTER THE COMMA SEPARATED TARGET NODES NAME
- name: TARGET_NODES
value: ''
value: '<node_name>'

View File

@@ -2,13 +2,13 @@ apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: nginx-chaos
namespace: default
namespace: litmus
spec:
# It can be delete/retain
jobCleanUpPolicy: 'retain'
# It can be active/stop
engineState: 'active'
chaosServiceAccount: node-io-stress-sa
chaosServiceAccount: litmus-sa
experiments:
- name: node-io-stress
spec:

View File

@@ -2,13 +2,13 @@ apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: nginx-chaos
namespace: default
namespace: litmus
spec:
# It can be delete/retain
jobCleanUpPolicy: 'retain'
# It can be active/stop
engineState: 'active'
chaosServiceAccount: node-memory-hog-sa
chaosServiceAccount: litmus-sa
experiments:
- name: node-memory-hog
spec:

View File

@@ -0,0 +1,49 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: litmus-sa
namespace: litmus
labels:
name: litmus-sa
app.kubernetes.io/part-of: litmus
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: litmus-sa
labels:
name: litmus-sa
app.kubernetes.io/part-of: litmus
rules:
- apiGroups: [""]
resources: ["pods","events"]
verbs: ["create","list","get","patch","update","delete","deletecollection"]
- apiGroups: [""]
resources: ["pods/exec","pods/log"]
verbs: ["list","get","create"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create","list","get","delete","deletecollection"]
- apiGroups: ["litmuschaos.io"]
resources: ["chaosengines","chaosexperiments","chaosresults"]
verbs: ["create","list","get","patch","update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get","list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: litmus-sa
labels:
name: litmus-sa
app.kubernetes.io/part-of: litmus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: litmus-sa
subjects:
- kind: ServiceAccount
name: litmus-sa
namespace: litmus