diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c42d61c4..db761db2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -73,6 +73,7 @@ jobs: echo "test_app_outages" >> ./CI/tests/functional_tests echo "test_container" >> ./CI/tests/functional_tests echo "test_pod" >> ./CI/tests/functional_tests + echo "test_customapp_pod" >> ./CI/tests/functional_tests echo "test_namespace" >> ./CI/tests/functional_tests echo "test_net_chaos" >> ./CI/tests/functional_tests echo "test_time" >> ./CI/tests/functional_tests @@ -108,6 +109,7 @@ jobs: echo "test_app_outages" >> ./CI/tests/functional_tests echo "test_container" >> ./CI/tests/functional_tests echo "test_pod" >> ./CI/tests/functional_tests + echo "test_customapp_pod" >> ./CI/tests/functional_tests echo "test_namespace" >> ./CI/tests/functional_tests echo "test_net_chaos" >> ./CI/tests/functional_tests echo "test_time" >> ./CI/tests/functional_tests diff --git a/CI/tests/test_customapp_pod.sh b/CI/tests/test_customapp_pod.sh new file mode 100755 index 00000000..c07869c8 --- /dev/null +++ b/CI/tests/test_customapp_pod.sh @@ -0,0 +1,18 @@ +set -xeEo pipefail + +source CI/tests/common.sh + +trap error ERR +trap finish EXIT + +function functional_test_customapp_pod_node_selector { + export scenario_type="pod_disruption_scenarios" + export scenario_file="scenarios/openshift/customapp_pod.yaml" + export post_config="" + envsubst < CI/config/common_test_config.yaml > CI/config/customapp_pod_config.yaml + + python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml + echo "Pod disruption with node_label_selector test: Success" +} + +functional_test_customapp_pod_node_selector diff --git a/krkn/scenario_plugins/pod_disruption/models/models.py b/krkn/scenario_plugins/pod_disruption/models/models.py index c1c26c8c..6c4f2246 100644 --- a/krkn/scenario_plugins/pod_disruption/models/models.py +++ b/krkn/scenario_plugins/pod_disruption/models/models.py @@ -11,6 +11,8 @@ class InputParams: self.label_selector = config["label_selector"] if "label_selector" in config else "" self.namespace_pattern = config["namespace_pattern"] if "namespace_pattern" in config else "" self.name_pattern = config["name_pattern"] if "name_pattern" in config else "" + self.node_label_selector = config["node_label_selector"] if "node_label_selector" in config else "" + self.node_names = config["node_names"] if "node_names" in config else [] namespace_pattern: str krkn_pod_recovery_time: int @@ -18,4 +20,6 @@ class InputParams: duration: int kill: int label_selector: str - name_pattern: str \ No newline at end of file + name_pattern: str + node_label_selector: str + node_names: list \ No newline at end of file diff --git a/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py b/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py index 692adc66..105b27ec 100644 --- a/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py +++ b/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py @@ -100,18 +100,86 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin): raise Exception( f"impossible to determine monitor parameters, check {kill_scenario} configuration" ) + + def _select_pods_with_field_selector(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str, node_name: str = None): + """Helper function to select pods using either label_selector or name_pattern with field_selector, optionally filtered by node""" + # Combine field selectors if node targeting is specified + if node_name: + node_field_selector = f"spec.nodeName={node_name}" + if field_selector: + combined_field_selector = f"{field_selector},{node_field_selector}" + else: + combined_field_selector = node_field_selector + else: + combined_field_selector = field_selector + if label_selector: + return kubecli.select_pods_by_namespace_pattern_and_label( + label_selector=label_selector, + namespace_pattern=namespace, + field_selector=combined_field_selector + ) + else: # name_pattern + return kubecli.select_pods_by_name_pattern_and_namespace_pattern( + pod_name_pattern=name_pattern, + namespace_pattern=namespace, + field_selector=combined_field_selector + ) - def get_pods(self, name_pattern, label_selector,namespace, kubecli: KrknKubernetes, field_selector: str =None): + def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None, quiet: bool = False): if label_selector and name_pattern: logging.error('Only, one of name pattern or label pattern can be specified') - elif label_selector: - pods = kubecli.select_pods_by_namespace_pattern_and_label(label_selector=label_selector,namespace_pattern=namespace, field_selector=field_selector) - elif name_pattern: - pods = kubecli.select_pods_by_name_pattern_and_namespace_pattern(pod_name_pattern=name_pattern, namespace_pattern=namespace, field_selector=field_selector) - else: + return [] + + if not label_selector and not name_pattern: logging.error('Name pattern or label pattern must be specified ') - return pods + return [] + + # If specific node names are provided, make multiple calls with field selector + if node_names: + if not quiet: + logging.info(f"Targeting pods on {len(node_names)} specific nodes") + all_pods = [] + for node_name in node_names: + pods = self._select_pods_with_field_selector( + name_pattern, label_selector, namespace, kubecli, field_selector, node_name + ) + + if pods: + all_pods.extend(pods) + + if not quiet: + logging.info(f"Found {len(all_pods)} target pods across {len(node_names)} nodes") + return all_pods + + # Node label selector approach - use field selectors + if node_label_selector: + # Get nodes matching the label selector first + nodes_with_label = kubecli.list_nodes(label_selector=node_label_selector) + if not nodes_with_label: + logging.info(f"No nodes found with label selector: {node_label_selector}") + return [] + + if not quiet: + logging.info(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}") + # Use field selector for each node + all_pods = [] + for node_name in nodes_with_label: + pods = self._select_pods_with_field_selector( + name_pattern, label_selector, namespace, kubecli, field_selector, node_name + ) + + if pods: + all_pods.extend(pods) + + if not quiet: + logging.info(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes") + return all_pods + + # Standard pod selection (no node targeting) + return self._select_pods_with_field_selector( + name_pattern, label_selector, namespace, kubecli, field_selector + ) def killing_pods(self, config: InputParams, kubecli: KrknKubernetes): # region Select target pods @@ -120,7 +188,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin): if not namespace: logging.error('Namespace pattern must be specified') - pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running") + pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names) pods_count = len(pods) if len(pods) < config.kill: logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format( @@ -129,23 +197,22 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin): random.shuffle(pods) for i in range(config.kill): - pod = pods[i] logging.info(pod) logging.info(f'Deleting pod {pod[0]}') kubecli.delete_pod(pod[0], pod[1]) - self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli) + self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names) return 0 def wait_for_pods( - self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes + self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes, node_label_selector, node_names ): timeout = False start_time = datetime.now() while not timeout: - pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli) + pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names, quiet=True) if pod_count == len(pods): return diff --git a/scenarios/openshift/customapp_pod.yaml b/scenarios/openshift/customapp_pod.yaml index b060119a..d3abe869 100644 --- a/scenarios/openshift/customapp_pod.yaml +++ b/scenarios/openshift/customapp_pod.yaml @@ -1,6 +1,15 @@ # yaml-language-server: $schema=../plugin.schema.json - id: kill-pods config: - namespace_pattern: ^acme-air$ + namespace_pattern: "kube-system" name_pattern: .* - krkn_pod_recovery_time: 120 \ No newline at end of file + krkn_pod_recovery_time: 60 + kill: 1 # num of pods to kill + #Not needed by default, but can be used if you want to target pods on specific nodes + # Option 1: Target pods on nodes with specific labels [master/worker nodes] + node_label_selector: node-role.kubernetes.io/control-plane= # Target control-plane nodes (works on both k8s and openshift) + # Option 2: Target pods of specific nodes (testing mixed node types) + # node_names: + # - ip-10-0-31-8.us-east-2.compute.internal # Worker node 1 + # - ip-10-0-48-188.us-east-2.compute.internal # Worker node 2 + # - ip-10-0-14-59.us-east-2.compute.internal # Master node 1 \ No newline at end of file