diff --git a/config/config.yaml b/config/config.yaml index 75a4c0e9..92609841 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,16 +1,19 @@ kraken: kubeconfig_path: /root/.kube/config # Path to kubeconfig exit_on_failure: False # Exit when a post action scenario fails - scenarios: # List of policies/chaos scenarios to load - - - scenarios/etcd.yml - - - scenarios/openshift-kube-apiserver.yml - - - scenarios/openshift-apiserver.yml - - - scenarios/regex_openshift_pod_kill.yml - - scenarios/post_action_regex.py - node_scenarios: # List of chaos node scenarios to load - - scenarios/node_scenarios_example.yml - time_scenarios: # List of chaos time scenarios to load - - scenarios/time_scenarios_example.yml + chaos_scenarios: # List of policies/chaos scenarios to load + - pod_scenarios: # List of chaos pod scenarios to load + - - scenarios/etcd.yml + - - scenarios/regex_openshift_pod_kill.yml + - scenarios/post_action_regex.py + - node_scenarios: # List of chaos node scenarios to load + - scenarios/node_scenarios_example.yml + - pod_scenarios: + - - scenarios/openshift-apiserver.yml + - - scenarios/openshift-kube-apiserver.yml + - time_scenarios: # List of chaos time scenarios to load + - scenarios/time_scenarios_example.yml + cerberus: cerberus_enabled: False # Enable it when cerberus is previously installed cerberus_url: # When cerberus_enabled is set to True, provide the url where cerberus publishes go/no-go signal diff --git a/docs/config.md b/docs/config.md index 3b35a3cb..83057776 100644 --- a/docs/config.md +++ b/docs/config.md @@ -4,12 +4,19 @@ Set the scenarios to inject and the tunings like duration to wait between each s ``` kraken: kubeconfig_path: /root/.kube/config # Path to kubeconfig - scenarios: # List of policies/chaos scenarios to load - - scenarios/etcd.yml - - scenarios/openshift-kube-apiserver.yml - - scenarios/openshift-apiserver.yml - node_scenarios: # List of chaos node scenarios to load - - scenarios/node_scenarios_example.yml + exit_on_failure: False # Exit when a post action scenario fails + chaos_scenarios: # List of policies/chaos scenarios to load + - pod_scenarios: # List of chaos pod scenarios to load + - - scenarios/etcd.yml + - - scenarios/regex_openshift_pod_kill.yml + - scenarios/post_action_regex.py + - node_scenarios: # List of chaos node scenarios to load + - scenarios/node_scenarios_example.yml + - pod_scenarios: + - - scenarios/openshift-apiserver.yml + - - scenarios/openshift-kube-apiserver.yml + - time_scenarios: # List of chaos time scenarios to load + - scenarios/time_scenarios_example.yml cerberus: cerberus_enabled: False # Enable it when cerberus is previously installed @@ -19,3 +26,4 @@ tunings: wait_duration: 60 # Duration to wait between each chaos scenario iterations: 1 # Number of times to execute the scenarios daemon_mode: False # Iterations are set to infinity which means that the kraken will cause chaos forever +``` \ No newline at end of file diff --git a/run_kraken.py b/run_kraken.py index 2888f517..0bc1c6fc 100644 --- a/run_kraken.py +++ b/run_kraken.py @@ -152,6 +152,61 @@ def post_actions(kubeconfig_path, scenario, failed_post_scenarios, pre_action_ou return failed_post_scenarios +def pod_scenarios(scenarios_list, config, failed_post_scenarios): + try: + # Loop to run the scenarios starts here + for pod_scenario in scenarios_list: + if len(pod_scenario) > 1: + pre_action_output = run_post_action(kubeconfig_path, pod_scenario[1]) + else: + pre_action_output = '' + runcommand.invoke("powerfulseal autonomous --use-pod-delete-instead-of-ssh-kill" + " --policy-file %s --kubeconfig %s --no-cloud" + " --inventory-kubernetes --headless" + % (pod_scenario[0], kubeconfig_path)) + + logging.info("Scenario: %s has been successfully injected!" % (pod_scenario[0])) + logging.info("Waiting for the specified duration: %s" % (wait_duration)) + time.sleep(wait_duration) + + failed_post_scenarios = post_actions(kubeconfig_path, pod_scenario, + failed_post_scenarios, pre_action_output) + publish_kraken_status(config, failed_post_scenarios) + except Exception as e: + logging.error("Failed to run scenario: %s. Encountered the following " + "exception: %s" % (pod_scenario[0], e)) + return failed_post_scenarios + + +def node_scenarios(scenarios_list, config): + for node_scenario_config in scenarios_list: + with open(node_scenario_config, 'r') as f: + node_scenario_config = yaml.full_load(f) + for node_scenario in node_scenario_config['node_scenarios']: + node_scenario_object = get_node_scenario_object(node_scenario) + if node_scenario['actions']: + for action in node_scenario['actions']: + inject_node_scenario(action, node_scenario, node_scenario_object) + logging.info("Waiting for the specified duration: %s" % (wait_duration)) + time.sleep(wait_duration) + cerberus_integration(config) + logging.info("") + + +def time_scenarios(scenarios_list, config): + for time_scenario_config in scenarios_list: + with open(time_scenario_config, 'r') as f: + scenario_config = yaml.full_load(f) + for time_scenario in scenario_config['time_scenarios']: + object_type, object_names = time_actions.skew_time(time_scenario) + not_reset = time_actions.check_date_time(object_type, object_names) + if len(not_reset) > 0: + logging.info('Object times were not reset') + logging.info("Waiting for the specified duration: %s" % (wait_duration)) + time.sleep(wait_duration) + publish_kraken_status(config, not_reset) + + # Main function def main(cfg): # Start kraken @@ -162,10 +217,9 @@ def main(cfg): if os.path.isfile(cfg): with open(cfg, 'r') as f: config = yaml.full_load(f) + global kubeconfig_path, wait_duration kubeconfig_path = config["kraken"].get("kubeconfig_path", "") - scenarios = config["kraken"].get("scenarios", []) - node_scenarios = config["kraken"].get("node_scenarios", []) - time_scenarios = config['kraken'].get("time_scenarios", []) + chaos_scenarios = config["kraken"].get("chaos_scenarios", []) wait_duration = config["tunings"].get("wait_duration", 60) iterations = config["tunings"].get("iterations", 1) daemon_mode = config["tunings"].get("daemon_mode", False) @@ -205,63 +259,23 @@ def main(cfg): while (int(iteration) < iterations): # Inject chaos scenarios specified in the config logging.info("Executing scenarios for iteration " + str(iteration)) - if scenarios: - try: - # Loop to run the scenarios starts here - for scenario in scenarios: + if chaos_scenarios: + for scenario in chaos_scenarios: + scenario_type = list(scenario.keys())[0] + scenarios_list = scenario[scenario_type] + if scenarios_list: + # Inject pod chaos scenarios specified in the config + if scenario_type == "pod_scenarios": + failed_post_scenarios = pod_scenarios(scenarios_list, config, + failed_post_scenarios) - if len(scenario) > 1: - pre_action_output = run_post_action(kubeconfig_path, scenario[1]) - else: - pre_action_output = '' - runcommand.invoke("powerfulseal autonomous --use-pod-delete-instead-of-ssh-kill" # noqa - " --policy-file %s --kubeconfig %s --no-cloud" - " --inventory-kubernetes --headless" - % (scenario[0], kubeconfig_path)) + # Inject node chaos scenarios specified in the config + elif scenario_type == "node_scenarios": + node_scenarios(scenarios_list, config) - logging.info("Scenario: %s has been successfully injected!" % (scenario[0])) - logging.info("Waiting for the specified duration: %s" % (wait_duration)) - time.sleep(wait_duration) - - failed_post_scenarios = post_actions(kubeconfig_path, scenario, - failed_post_scenarios, - pre_action_output) - publish_kraken_status(config, failed_post_scenarios) - except Exception as e: - logging.error("Failed to run scenario: %s. Encountered the following " - "exception: %s" % (scenario[0], e)) - - # Inject node chaos scenarios specified in the config - if node_scenarios: - for node_scenario_config in node_scenarios: - with open(node_scenario_config, 'r') as f: - node_scenario_config = yaml.full_load(f) - for node_scenario in node_scenario_config['node_scenarios']: - node_scenario_object = get_node_scenario_object(node_scenario) - if node_scenario['actions']: - for action in node_scenario['actions']: - inject_node_scenario(action, node_scenario, - node_scenario_object) - logging.info("Waiting for the specified duration: %s" - % (wait_duration)) - time.sleep(wait_duration) - cerberus_integration(config) - logging.info("") - - # Inject time skew chaos scenarios specified in the config - if time_scenarios: - for time_scenario_config in time_scenarios: - with open(time_scenario_config, 'r') as f: - scenario_config = yaml.full_load(f) - for time_scenario in scenario_config['time_scenarios']: - object_type, object_names = time_actions.skew_time(time_scenario) - not_reset = time_actions.check_date_time(object_type, object_names) - if len(not_reset) > 0: - logging.info('Object times were not reset') - logging.info("Waiting for the specified duration: %s" - % wait_duration) - time.sleep(wait_duration) - publish_kraken_status(config, not_reset) + # Inject time skew chaos scenarios specified in the config + elif scenario_type == "time_scenarios": + time_scenarios(scenarios_list, config) iteration += 1 logging.info("")