mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-14 18:10:00 +00:00
Integrated cerberus for checking cluster health
This commit is contained in:
committed by
Naga Ravi Chaitanya Elluri
parent
b745a0404f
commit
f1c145e942
@@ -37,7 +37,7 @@ $ python3 run_kraken.py --config <config_file_location>
|
||||
The report is generated in the run directory and it contains the information about each chaos scenario injection along with timestamps.
|
||||
|
||||
#### Checking if the cluster is sane after failures injection
|
||||
[Cerberus](https://github.com/openshift-scale/cerberus) can be used to monitor the cluster under test and the aggregated go/no-go signal generated by it can be consumed by Kraken to determine pass/fail i.e make sure the Kubernetes/OpenShift cluste recovered fine after the failure injetion.
|
||||
[Cerberus](https://github.com/openshift-scale/cerberus) can be used to monitor the cluster under test and the aggregated go/no-go signal generated by it can be consumed by Kraken to determine pass/fail i.e make sure the Kubernetes/OpenShift cluster recovered fine after the failure injection. It is highly recommended to turn on the Cerberus health check feature avaliable in Kraken after installing and setting up Cerberus. To do that, set cerberus_enabled to True and cerberus_url to the url where Cerberus publishes go/no-go signal in the config file.
|
||||
|
||||
### Kubernetes/OpenShift chaos scenarios supported
|
||||
Following are the components of Kubernetes/OpenShift for which a basic chaos scenario config exists today. It currently just supports pod based scenarios, we will be adding more soon. Adding a new pod based scenario is as simple as adding a new config under scenarios directory and defining it in the config.
|
||||
|
||||
@@ -5,5 +5,9 @@ kraken:
|
||||
- scenarios/openshift-kube-apiserver.yml
|
||||
- scenarios/openshift-apiserver.yml
|
||||
|
||||
cerberus:
|
||||
cerberus_enabled: False # Enable it when cerberus is previously installed
|
||||
cerberus_url: # When cerberus_enabled is set to True, provide the url where cerberus publishes go/no-go signal
|
||||
|
||||
tunings:
|
||||
wait_duration: 60 # Duration to wait between each chaos scenario
|
||||
wait_duration: 60 # Duration to wait between each chaos scenario
|
||||
|
||||
@@ -6,6 +6,7 @@ import time
|
||||
import optparse
|
||||
import logging
|
||||
import yaml
|
||||
import requests
|
||||
import kraken.kubernetes.client as kubecli
|
||||
import kraken.invoke.command as runcommand
|
||||
import pyfiglet
|
||||
@@ -23,6 +24,7 @@ def main(cfg):
|
||||
config = yaml.full_load(f)
|
||||
kubeconfig_path = config["kraken"]["kubeconfig_path"]
|
||||
scenarios = config["kraken"]["scenarios"]
|
||||
cerberus_enabled = config["cerberus"]["cerberus_enabled"]
|
||||
wait_duration = config["tunings"]["wait_duration"]
|
||||
|
||||
# Initialize clients
|
||||
@@ -46,8 +48,18 @@ def main(cfg):
|
||||
logging.info("Scenario: %s has been successfully injected!" %(scenario))
|
||||
logging.info("Waiting for the specified duration: %s" %(wait_duration))
|
||||
time.sleep(wait_duration)
|
||||
except:
|
||||
logging.error("Failed to run scenario: %s, please check" %(scenario))
|
||||
if cerberus_enabled:
|
||||
cerberus_url = config["cerberus"]["cerberus_url"]
|
||||
if not cerberus_url:
|
||||
logging.error("url where Cerberus publishes True/False signal is not provided.")
|
||||
sys.exit(1)
|
||||
cerberus_status = requests.get(cerberus_url).content
|
||||
cerberus_status = True if cerberus_status == b'True' else False
|
||||
if not cerberus_status:
|
||||
logging.error("Received a no-go signal from Cerberus, looks like the cluster is unhealthy. Please check the Cerberus report for more details. Test failed.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
logging.error("Failed to run scenario: %s. Encountered the following exception: %s" %(scenario, e))
|
||||
else:
|
||||
logging.error("Cannot find a config at %s, please check" % (cfg))
|
||||
sys.exit(1)
|
||||
|
||||
Reference in New Issue
Block a user