mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-14 18:10:00 +00:00
Add duration parameter for node scenarios
This option is enabled only for node_stop_start scenario where user will want to stop the node for certain duration to understand the impact before starting the node back on. This commit also bumps the timeout for the scenario to 360 seconds from 120 seconds to make sure there's enough time for the node to get to Ready state from the Kubernetes side after the node is started on the infra side. Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
import kraken.invoke.command as runcommand
|
||||
import kraken.node_actions.common_node_functions as nodeaction
|
||||
from krkn_lib.k8s import KrknKubernetes
|
||||
@@ -18,9 +19,11 @@ class abstract_node_scenarios:
|
||||
pass
|
||||
|
||||
# Node scenario to stop and then start the node
|
||||
def node_stop_start_scenario(self, instance_kill_count, node, timeout):
|
||||
def node_stop_start_scenario(self, instance_kill_count, node, timeout, duration):
|
||||
logging.info("Starting node_stop_start_scenario injection")
|
||||
self.node_stop_scenario(instance_kill_count, node, timeout)
|
||||
logging.info("Waiting for %s seconds before starting the node" % (duration))
|
||||
time.sleep(duration)
|
||||
self.node_start_scenario(instance_kill_count, node, timeout)
|
||||
logging.info("node_stop_start_scenario has been successfully injected!")
|
||||
|
||||
|
||||
@@ -100,6 +100,8 @@ def inject_node_scenario(action, node_scenario, node_scenario_object, kubecli: K
|
||||
)
|
||||
node_name = get_yaml_item_value(node_scenario, "node_name", "")
|
||||
label_selector = get_yaml_item_value(node_scenario, "label_selector", "")
|
||||
if action == "node_stop_start_scenario":
|
||||
duration = get_yaml_item_value(node_scenario, "duration", 120)
|
||||
timeout = get_yaml_item_value(node_scenario, "timeout", 120)
|
||||
service = get_yaml_item_value(node_scenario, "service", "")
|
||||
ssh_private_key = get_yaml_item_value(
|
||||
@@ -121,7 +123,7 @@ def inject_node_scenario(action, node_scenario, node_scenario_object, kubecli: K
|
||||
elif action == "node_stop_scenario":
|
||||
node_scenario_object.node_stop_scenario(run_kill_count, single_node, timeout)
|
||||
elif action == "node_stop_start_scenario":
|
||||
node_scenario_object.node_stop_start_scenario(run_kill_count, single_node, timeout)
|
||||
node_scenario_object.node_stop_start_scenario(run_kill_count, single_node, timeout, duration)
|
||||
elif action == "node_termination_scenario":
|
||||
node_scenario_object.node_termination_scenario(run_kill_count, single_node, timeout)
|
||||
elif action == "node_reboot_scenario":
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
node_scenarios:
|
||||
- actions: # node chaos scenarios to be injected
|
||||
- node_stop_start_scenario
|
||||
- stop_start_kubelet_scenario
|
||||
- node_crash_scenario
|
||||
node_name: # node on which scenario has to be injected; can set multiple names separated by comma
|
||||
label_selector: node-role.kubernetes.io/worker # when node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection
|
||||
instance_count: 1 # Number of nodes to perform action/select that match the label selector
|
||||
runs: 1 # number of times to inject each scenario under actions (will perform on same node each time)
|
||||
timeout: 120 # duration to wait for completion of node scenario injection
|
||||
cloud_type: aws # cloud type on which Kubernetes/OpenShift runs
|
||||
timeout: 360 # duration to wait for completion of node scenario injection
|
||||
duration: 120 # duration to stop the node before running the start action
|
||||
cloud_type: aws # cloud type on which Kubernetes/OpenShift runs
|
||||
- actions:
|
||||
- node_reboot_scenario
|
||||
node_name:
|
||||
|
||||
@@ -6,3 +6,11 @@ node_scenarios:
|
||||
instance_count: 1
|
||||
timeout: 120
|
||||
cloud_type: azure
|
||||
- actions:
|
||||
- node_stop_start_scenario
|
||||
node_name:
|
||||
label_selector: node-role.kubernetes.io/infra
|
||||
instance_count: 1
|
||||
timeout: 360
|
||||
duration: 120
|
||||
cloud_type: azure
|
||||
|
||||
@@ -5,8 +5,9 @@ node_scenarios:
|
||||
label_selector: node-role.kubernetes.io/worker # When node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection.
|
||||
instance_count: 1 # Number of nodes to perform action/select that match the label selector.
|
||||
runs: 1 # Number of times to inject each scenario under actions (will perform on same node each time).
|
||||
timeout: 120 # Duration to wait for completion of node scenario injection.
|
||||
cloud_type: bm # Cloud type on which Kubernetes/OpenShift runs.
|
||||
timeout: 360 # Duration to wait for completion of node scenario injection.
|
||||
duration: 120 # Duration to stop the node before running the start action
|
||||
cloud_type: bm # Cloud type on which Kubernetes/OpenShift runs.
|
||||
bmc_user: defaultuser # For baremetal (bm) cloud type. The default IPMI username. Optional if specified for all machines.
|
||||
bmc_password: defaultpass # For baremetal (bm) cloud type. The default IPMI password. Optional if specified for all machines.
|
||||
bmc_info: # This section is here to specify baremetal per-machine info, so it is optional if there is no per-machine info.
|
||||
|
||||
@@ -6,3 +6,11 @@ node_scenarios:
|
||||
instance_count: 1
|
||||
timeout: 120
|
||||
cloud_type: gcp
|
||||
- actions:
|
||||
- node_stop_start_scenario
|
||||
node_name:
|
||||
label_selector: node-role.kubernetes.io/worker
|
||||
instance_count: 1
|
||||
timeout: 360
|
||||
duration: 120
|
||||
cloud_type: gcp
|
||||
|
||||
@@ -5,5 +5,6 @@
|
||||
label_selector: "node-role.kubernetes.io/worker" # When node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection
|
||||
runs: 1 # Number of times to inject each scenario under actions (will perform on same node each time)
|
||||
instance_count: 1 # Number of nodes to perform action/select that match the label selector
|
||||
timeout: 30 # Duration to wait for completion of node scenario injection
|
||||
skip_openshift_checks: False # Set to True if you don't want to wait for the status of the nodes to change on OpenShift before passing the scenario
|
||||
timeout: 360 # Duration to wait for completion of node scenario injection
|
||||
duration: 120 # Duration to stop the node before running the start action
|
||||
skip_openshift_checks: False # Set to True if you don't want to wait for the status of the nodes to change on OpenShift before passing the scenario
|
||||
|
||||
Reference in New Issue
Block a user