From 543729b18ac4b10a2460654170c2e41f7a4d6c1c Mon Sep 17 00:00:00 2001 From: Tullio Sebastiani Date: Wed, 8 Oct 2025 22:10:27 +0200 Subject: [PATCH] Add exclude_label functionality to pod disruption scenarios (#910) * kill pod exclude label Signed-off-by: Tullio Sebastiani * config alignment Signed-off-by: Tullio Sebastiani --------- Signed-off-by: Tullio Sebastiani --- .../pod_disruption/models/models.py | 4 +++- .../pod_disruption_scenario_plugin.py | 14 ++++++++++++-- scenarios/openshift/etcd.yml | 1 + scenarios/openshift/openshift-apiserver.yml | 1 + scenarios/openshift/openshift-kube-apiserver.yml | 1 + scenarios/openshift/prom_kill.yml | 3 ++- scenarios/openshift/regex_openshift_pod_kill.yml | 1 + 7 files changed, 21 insertions(+), 4 deletions(-) diff --git a/krkn/scenario_plugins/pod_disruption/models/models.py b/krkn/scenario_plugins/pod_disruption/models/models.py index 6c4f2246..b763fdd5 100644 --- a/krkn/scenario_plugins/pod_disruption/models/models.py +++ b/krkn/scenario_plugins/pod_disruption/models/models.py @@ -13,6 +13,7 @@ class InputParams: self.name_pattern = config["name_pattern"] if "name_pattern" in config else "" self.node_label_selector = config["node_label_selector"] if "node_label_selector" in config else "" self.node_names = config["node_names"] if "node_names" in config else [] + self.exclude_label = config["exclude_label"] if "exclude_label" in config else "" namespace_pattern: str krkn_pod_recovery_time: int @@ -22,4 +23,5 @@ class InputParams: label_selector: str name_pattern: str node_label_selector: str - node_names: list \ No newline at end of file + node_names: list + exclude_label: str \ No newline at end of file diff --git a/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py b/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py index 07b5156d..fcaf1c85 100644 --- a/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py +++ b/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py @@ -191,6 +191,13 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin): logging.error('Namespace pattern must be specified') pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names) + exclude_pods = set() + if config.exclude_label: + _exclude_pods = self.get_pods("",config.exclude_label,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names) + for pod in _exclude_pods: + exclude_pods.add(pod[0]) + + pods_count = len(pods) if len(pods) < config.kill: logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format( @@ -201,8 +208,11 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin): for i in range(config.kill): pod = pods[i] logging.info(pod) - logging.info(f'Deleting pod {pod[0]}') - kubecli.delete_pod(pod[0], pod[1]) + if pod[0] in exclude_pods: + logging.info(f"Excluding {pod[0]} from chaos") + else: + logging.info(f'Deleting pod {pod[0]}') + kubecli.delete_pod(pod[0], pod[1]) self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names) return 0 diff --git a/scenarios/openshift/etcd.yml b/scenarios/openshift/etcd.yml index bb91b399..b5916373 100755 --- a/scenarios/openshift/etcd.yml +++ b/scenarios/openshift/etcd.yml @@ -4,3 +4,4 @@ namespace_pattern: ^openshift-etcd$ label_selector: k8s-app=etcd krkn_pod_recovery_time: 120 + exclude_label: "" # excludes pods marked with this label from chaos diff --git a/scenarios/openshift/openshift-apiserver.yml b/scenarios/openshift/openshift-apiserver.yml index bd8458ad..dabc4c79 100755 --- a/scenarios/openshift/openshift-apiserver.yml +++ b/scenarios/openshift/openshift-apiserver.yml @@ -4,4 +4,5 @@ namespace_pattern: ^openshift-apiserver$ label_selector: app=openshift-apiserver-a krkn_pod_recovery_time: 120 + exclude_label: "" # excludes pods marked with this label from chaos diff --git a/scenarios/openshift/openshift-kube-apiserver.yml b/scenarios/openshift/openshift-kube-apiserver.yml index 324653fa..decee177 100755 --- a/scenarios/openshift/openshift-kube-apiserver.yml +++ b/scenarios/openshift/openshift-kube-apiserver.yml @@ -4,4 +4,5 @@ namespace_pattern: ^openshift-kube-apiserver$ label_selector: app=openshift-kube-apiserver krkn_pod_recovery_time: 120 + exclude_label: "" # excludes pods marked with this label from chaos diff --git a/scenarios/openshift/prom_kill.yml b/scenarios/openshift/prom_kill.yml index d1b89570..e1337404 100644 --- a/scenarios/openshift/prom_kill.yml +++ b/scenarios/openshift/prom_kill.yml @@ -2,4 +2,5 @@ config: namespace_pattern: ^openshift-monitoring$ label_selector: statefulset.kubernetes.io/pod-name=prometheus-k8s-0 - krkn_pod_recovery_time: 120 \ No newline at end of file + krkn_pod_recovery_time: 120 + exclude_label: "" # excludes pods marked with this label from chaos \ No newline at end of file diff --git a/scenarios/openshift/regex_openshift_pod_kill.yml b/scenarios/openshift/regex_openshift_pod_kill.yml index 377cd829..5aab9484 100755 --- a/scenarios/openshift/regex_openshift_pod_kill.yml +++ b/scenarios/openshift/regex_openshift_pod_kill.yml @@ -5,3 +5,4 @@ name_pattern: .* kill: 3 krkn_pod_recovery_time: 120 + exclude_label: "" # excludes pods marked with this label from chaos