mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-14 09:59:59 +00:00
Adding node_label_selector for pod scenarios (#888)
Some checks failed
Functional & Unit Tests / Functional & Unit Tests (push) Failing after 10m38s
Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped
Some checks failed
Functional & Unit Tests / Functional & Unit Tests (push) Failing after 10m38s
Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped
* Adding node_label_selector for pod scenarios Signed-off-by: Sahil Shah <sahshah@redhat.com> * using kubernetes function, adding node_name and removing extra config Signed-off-by: Sahil Shah <sahshah@redhat.com> * adding CI test for custom pod scenario Signed-off-by: Sahil Shah <sahshah@redhat.com> * fixing comment * adding test to workflow * adding list parsing logic for krkn hub * parsing not needed, as input is always [] --------- Signed-off-by: Sahil Shah <sahshah@redhat.com>
This commit is contained in:
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
@@ -73,6 +73,7 @@ jobs:
|
||||
echo "test_app_outages" >> ./CI/tests/functional_tests
|
||||
echo "test_container" >> ./CI/tests/functional_tests
|
||||
echo "test_pod" >> ./CI/tests/functional_tests
|
||||
echo "test_customapp_pod" >> ./CI/tests/functional_tests
|
||||
echo "test_namespace" >> ./CI/tests/functional_tests
|
||||
echo "test_net_chaos" >> ./CI/tests/functional_tests
|
||||
echo "test_time" >> ./CI/tests/functional_tests
|
||||
@@ -108,6 +109,7 @@ jobs:
|
||||
echo "test_app_outages" >> ./CI/tests/functional_tests
|
||||
echo "test_container" >> ./CI/tests/functional_tests
|
||||
echo "test_pod" >> ./CI/tests/functional_tests
|
||||
echo "test_customapp_pod" >> ./CI/tests/functional_tests
|
||||
echo "test_namespace" >> ./CI/tests/functional_tests
|
||||
echo "test_net_chaos" >> ./CI/tests/functional_tests
|
||||
echo "test_time" >> ./CI/tests/functional_tests
|
||||
|
||||
18
CI/tests/test_customapp_pod.sh
Executable file
18
CI/tests/test_customapp_pod.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
set -xeEo pipefail
|
||||
|
||||
source CI/tests/common.sh
|
||||
|
||||
trap error ERR
|
||||
trap finish EXIT
|
||||
|
||||
function functional_test_customapp_pod_node_selector {
|
||||
export scenario_type="pod_disruption_scenarios"
|
||||
export scenario_file="scenarios/openshift/customapp_pod.yaml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/customapp_pod_config.yaml
|
||||
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml
|
||||
echo "Pod disruption with node_label_selector test: Success"
|
||||
}
|
||||
|
||||
functional_test_customapp_pod_node_selector
|
||||
@@ -11,6 +11,8 @@ class InputParams:
|
||||
self.label_selector = config["label_selector"] if "label_selector" in config else ""
|
||||
self.namespace_pattern = config["namespace_pattern"] if "namespace_pattern" in config else ""
|
||||
self.name_pattern = config["name_pattern"] if "name_pattern" in config else ""
|
||||
self.node_label_selector = config["node_label_selector"] if "node_label_selector" in config else ""
|
||||
self.node_names = config["node_names"] if "node_names" in config else []
|
||||
|
||||
namespace_pattern: str
|
||||
krkn_pod_recovery_time: int
|
||||
@@ -18,4 +20,6 @@ class InputParams:
|
||||
duration: int
|
||||
kill: int
|
||||
label_selector: str
|
||||
name_pattern: str
|
||||
name_pattern: str
|
||||
node_label_selector: str
|
||||
node_names: list
|
||||
@@ -100,18 +100,86 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
raise Exception(
|
||||
f"impossible to determine monitor parameters, check {kill_scenario} configuration"
|
||||
)
|
||||
|
||||
def _select_pods_with_field_selector(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str, node_name: str = None):
|
||||
"""Helper function to select pods using either label_selector or name_pattern with field_selector, optionally filtered by node"""
|
||||
# Combine field selectors if node targeting is specified
|
||||
if node_name:
|
||||
node_field_selector = f"spec.nodeName={node_name}"
|
||||
if field_selector:
|
||||
combined_field_selector = f"{field_selector},{node_field_selector}"
|
||||
else:
|
||||
combined_field_selector = node_field_selector
|
||||
else:
|
||||
combined_field_selector = field_selector
|
||||
|
||||
if label_selector:
|
||||
return kubecli.select_pods_by_namespace_pattern_and_label(
|
||||
label_selector=label_selector,
|
||||
namespace_pattern=namespace,
|
||||
field_selector=combined_field_selector
|
||||
)
|
||||
else: # name_pattern
|
||||
return kubecli.select_pods_by_name_pattern_and_namespace_pattern(
|
||||
pod_name_pattern=name_pattern,
|
||||
namespace_pattern=namespace,
|
||||
field_selector=combined_field_selector
|
||||
)
|
||||
|
||||
def get_pods(self, name_pattern, label_selector,namespace, kubecli: KrknKubernetes, field_selector: str =None):
|
||||
def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None, quiet: bool = False):
|
||||
if label_selector and name_pattern:
|
||||
logging.error('Only, one of name pattern or label pattern can be specified')
|
||||
elif label_selector:
|
||||
pods = kubecli.select_pods_by_namespace_pattern_and_label(label_selector=label_selector,namespace_pattern=namespace, field_selector=field_selector)
|
||||
elif name_pattern:
|
||||
pods = kubecli.select_pods_by_name_pattern_and_namespace_pattern(pod_name_pattern=name_pattern, namespace_pattern=namespace, field_selector=field_selector)
|
||||
else:
|
||||
return []
|
||||
|
||||
if not label_selector and not name_pattern:
|
||||
logging.error('Name pattern or label pattern must be specified ')
|
||||
return pods
|
||||
return []
|
||||
|
||||
# If specific node names are provided, make multiple calls with field selector
|
||||
if node_names:
|
||||
if not quiet:
|
||||
logging.info(f"Targeting pods on {len(node_names)} specific nodes")
|
||||
all_pods = []
|
||||
for node_name in node_names:
|
||||
pods = self._select_pods_with_field_selector(
|
||||
name_pattern, label_selector, namespace, kubecli, field_selector, node_name
|
||||
)
|
||||
|
||||
if pods:
|
||||
all_pods.extend(pods)
|
||||
|
||||
if not quiet:
|
||||
logging.info(f"Found {len(all_pods)} target pods across {len(node_names)} nodes")
|
||||
return all_pods
|
||||
|
||||
# Node label selector approach - use field selectors
|
||||
if node_label_selector:
|
||||
# Get nodes matching the label selector first
|
||||
nodes_with_label = kubecli.list_nodes(label_selector=node_label_selector)
|
||||
if not nodes_with_label:
|
||||
logging.info(f"No nodes found with label selector: {node_label_selector}")
|
||||
return []
|
||||
|
||||
if not quiet:
|
||||
logging.info(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}")
|
||||
# Use field selector for each node
|
||||
all_pods = []
|
||||
for node_name in nodes_with_label:
|
||||
pods = self._select_pods_with_field_selector(
|
||||
name_pattern, label_selector, namespace, kubecli, field_selector, node_name
|
||||
)
|
||||
|
||||
if pods:
|
||||
all_pods.extend(pods)
|
||||
|
||||
if not quiet:
|
||||
logging.info(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes")
|
||||
return all_pods
|
||||
|
||||
# Standard pod selection (no node targeting)
|
||||
return self._select_pods_with_field_selector(
|
||||
name_pattern, label_selector, namespace, kubecli, field_selector
|
||||
)
|
||||
|
||||
def killing_pods(self, config: InputParams, kubecli: KrknKubernetes):
|
||||
# region Select target pods
|
||||
@@ -120,7 +188,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
if not namespace:
|
||||
logging.error('Namespace pattern must be specified')
|
||||
|
||||
pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running")
|
||||
pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
|
||||
pods_count = len(pods)
|
||||
if len(pods) < config.kill:
|
||||
logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format(
|
||||
@@ -129,23 +197,22 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
|
||||
random.shuffle(pods)
|
||||
for i in range(config.kill):
|
||||
|
||||
pod = pods[i]
|
||||
logging.info(pod)
|
||||
logging.info(f'Deleting pod {pod[0]}')
|
||||
kubecli.delete_pod(pod[0], pod[1])
|
||||
|
||||
self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli)
|
||||
self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
|
||||
return 0
|
||||
|
||||
def wait_for_pods(
|
||||
self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes
|
||||
self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes, node_label_selector, node_names
|
||||
):
|
||||
timeout = False
|
||||
start_time = datetime.now()
|
||||
|
||||
while not timeout:
|
||||
pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli)
|
||||
pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names, quiet=True)
|
||||
if pod_count == len(pods):
|
||||
return
|
||||
|
||||
|
||||
@@ -1,6 +1,15 @@
|
||||
# yaml-language-server: $schema=../plugin.schema.json
|
||||
- id: kill-pods
|
||||
config:
|
||||
namespace_pattern: ^acme-air$
|
||||
namespace_pattern: "kube-system"
|
||||
name_pattern: .*
|
||||
krkn_pod_recovery_time: 120
|
||||
krkn_pod_recovery_time: 60
|
||||
kill: 1 # num of pods to kill
|
||||
#Not needed by default, but can be used if you want to target pods on specific nodes
|
||||
# Option 1: Target pods on nodes with specific labels [master/worker nodes]
|
||||
node_label_selector: node-role.kubernetes.io/control-plane= # Target control-plane nodes (works on both k8s and openshift)
|
||||
# Option 2: Target pods of specific nodes (testing mixed node types)
|
||||
# node_names:
|
||||
# - ip-10-0-31-8.us-east-2.compute.internal # Worker node 1
|
||||
# - ip-10-0-48-188.us-east-2.compute.internal # Worker node 2
|
||||
# - ip-10-0-14-59.us-east-2.compute.internal # Master node 1
|
||||
Reference in New Issue
Block a user