Add exclude_label functionality to pod disruption scenarios (#910)
Some checks failed
Functional & Unit Tests / Functional & Unit Tests (push) Failing after 9m15s
Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped

* kill pod exclude label

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* config alignment

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

---------

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
This commit is contained in:
Tullio Sebastiani
2025-10-08 22:10:27 +02:00
committed by GitHub
parent a0ea4dc749
commit 543729b18a
7 changed files with 21 additions and 4 deletions

View File

@@ -13,6 +13,7 @@ class InputParams:
self.name_pattern = config["name_pattern"] if "name_pattern" in config else ""
self.node_label_selector = config["node_label_selector"] if "node_label_selector" in config else ""
self.node_names = config["node_names"] if "node_names" in config else []
self.exclude_label = config["exclude_label"] if "exclude_label" in config else ""
namespace_pattern: str
krkn_pod_recovery_time: int
@@ -22,4 +23,5 @@ class InputParams:
label_selector: str
name_pattern: str
node_label_selector: str
node_names: list
node_names: list
exclude_label: str

View File

@@ -191,6 +191,13 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
logging.error('Namespace pattern must be specified')
pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
exclude_pods = set()
if config.exclude_label:
_exclude_pods = self.get_pods("",config.exclude_label,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
for pod in _exclude_pods:
exclude_pods.add(pod[0])
pods_count = len(pods)
if len(pods) < config.kill:
logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format(
@@ -201,8 +208,11 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
for i in range(config.kill):
pod = pods[i]
logging.info(pod)
logging.info(f'Deleting pod {pod[0]}')
kubecli.delete_pod(pod[0], pod[1])
if pod[0] in exclude_pods:
logging.info(f"Excluding {pod[0]} from chaos")
else:
logging.info(f'Deleting pod {pod[0]}')
kubecli.delete_pod(pod[0], pod[1])
self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
return 0

View File

@@ -4,3 +4,4 @@
namespace_pattern: ^openshift-etcd$
label_selector: k8s-app=etcd
krkn_pod_recovery_time: 120
exclude_label: "" # excludes pods marked with this label from chaos

View File

@@ -4,4 +4,5 @@
namespace_pattern: ^openshift-apiserver$
label_selector: app=openshift-apiserver-a
krkn_pod_recovery_time: 120
exclude_label: "" # excludes pods marked with this label from chaos

View File

@@ -4,4 +4,5 @@
namespace_pattern: ^openshift-kube-apiserver$
label_selector: app=openshift-kube-apiserver
krkn_pod_recovery_time: 120
exclude_label: "" # excludes pods marked with this label from chaos

View File

@@ -2,4 +2,5 @@
config:
namespace_pattern: ^openshift-monitoring$
label_selector: statefulset.kubernetes.io/pod-name=prometheus-k8s-0
krkn_pod_recovery_time: 120
krkn_pod_recovery_time: 120
exclude_label: "" # excludes pods marked with this label from chaos

View File

@@ -5,3 +5,4 @@
name_pattern: .*
kill: 3
krkn_pod_recovery_time: 120
exclude_label: "" # excludes pods marked with this label from chaos