adding ibm power using request calls (#923 )

Signed-off-by: Paige Patton <prubenda@redhat.com>
Adds an exclude label for node scenarios (#929 )
2026-02-19 20:40:33 +00:00 · 2025-10-28 12:57:20 -04:00 · 2025-10-28 16:55:16 +01:00 · 2025-10-24 13:21:01 -04:00 · 2025-10-17 17:06:35 +02:00 · 2025-10-17 12:27:53 +02:00
41 changed files with 1004 additions and 795 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -0,0 +1 @@
+*   @paigerube14 @tsebastiani @chaitanyaenr
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -16,14 +16,19 @@ jobs:
        uses: redhat-chaos/actions/kind@main
      - name: Deploy prometheus & Port Forwarding
        uses: redhat-chaos/actions/prometheus@main
-      
      - name: Deploy Elasticsearch
        with:
-          ELASTIC_URL: ${{ vars.ELASTIC_URL }}
-          ELASTIC_PORT: ${{ vars.ELASTIC_PORT }}
-          ELASTIC_USER: ${{ vars.ELASTIC_USER }}
-          ELASTIC_PASSWORD: ${{ vars.ELASTIC_PASSWORD }}
+          ELASTIC_PORT: ${{ env.ELASTIC_PORT }}
+          RUN_ID: ${{ github.run_id }}
        uses: redhat-chaos/actions/elastic@main
+      - name: Download elastic password
+        uses: actions/download-artifact@v4
+        with:
+          name: elastic_password_${{ github.run_id }}
+      - name: Set elastic password on env
+        run: |
+          ELASTIC_PASSWORD=$(cat elastic_password.txt)
+          echo "ELASTIC_PASSWORD=$ELASTIC_PASSWORD" >> "$GITHUB_ENV"
      - name: Install Python
        uses: actions/setup-python@v4
        with:
@@ -37,7 +42,8 @@ jobs:

      - name: Deploy test workloads
        run: |
-          es_pod_name=$(kubectl get pods -l "app.kubernetes.io/instance=elasticsearch" -o name)
+          es_pod_name=$(kubectl get pods -l "app=elasticsearch-master" -o name)
+          echo "POD_NAME: $es_pod_name"
          kubectl --namespace default port-forward $es_pod_name 9200 &
          prom_name=$(kubectl get pods -n monitoring -l "app.kubernetes.io/name=prometheus" -o name)
          kubectl --namespace monitoring port-forward $prom_name 9090 &
@@ -68,11 +74,13 @@ jobs:
            yq -i '.elastic.elastic_port=9200' CI/config/common_test_config.yaml
            yq -i '.elastic.elastic_url="https://localhost"' CI/config/common_test_config.yaml
            yq -i '.elastic.enable_elastic=True' CI/config/common_test_config.yaml
+            yq -i '.elastic.password="${{env.ELASTIC_PASSWORD}}"' CI/config/common_test_config.yaml
            yq -i '.performance_monitoring.prometheus_url="http://localhost:9090"' CI/config/common_test_config.yaml
            echo "test_service_hijacking" > ./CI/tests/functional_tests
            echo "test_app_outages" >> ./CI/tests/functional_tests
            echo "test_container"      >> ./CI/tests/functional_tests
            echo "test_pod" >> ./CI/tests/functional_tests
+            echo "test_customapp_pod" >> ./CI/tests/functional_tests
            echo "test_namespace"      >> ./CI/tests/functional_tests
            echo "test_net_chaos"      >> ./CI/tests/functional_tests
            echo "test_time"           >> ./CI/tests/functional_tests
@@ -98,6 +106,7 @@ jobs:
          yq -i '.kraken.signal_address="0.0.0.0"' CI/config/common_test_config.yaml
          yq -i '.kraken.performance_monitoring="localhost:9090"' CI/config/common_test_config.yaml
          yq -i '.elastic.enable_elastic=True' CI/config/common_test_config.yaml
+          yq -i '.elastic.password="${{env.ELASTIC_PASSWORD}}"' CI/config/common_test_config.yaml
          yq -i '.elastic.elastic_port=9200' CI/config/common_test_config.yaml
          yq -i '.elastic.elastic_url="https://localhost"' CI/config/common_test_config.yaml
          yq -i '.performance_monitoring.prometheus_url="http://localhost:9090"' CI/config/common_test_config.yaml
@@ -108,6 +117,7 @@ jobs:
          echo "test_app_outages" >> ./CI/tests/functional_tests
          echo "test_container"      >> ./CI/tests/functional_tests
          echo "test_pod" >> ./CI/tests/functional_tests
+          echo "test_customapp_pod" >> ./CI/tests/functional_tests
          echo "test_namespace"      >> ./CI/tests/functional_tests
          echo "test_net_chaos"      >> ./CI/tests/functional_tests
          echo "test_time"           >> ./CI/tests/functional_tests
--- a/CI/config/common_test_config.yaml
+++ b/CI/config/common_test_config.yaml
@@ -32,7 +32,7 @@ telemetry:
    api_url: https://yvnn4rfoi7.execute-api.us-west-2.amazonaws.com/test #telemetry service endpoint
    username: $TELEMETRY_USERNAME                                      # telemetry service username
    password: $TELEMETRY_PASSWORD                                      # telemetry service password
-    prometheus_namespace: 'prometheus-k8s'                                # prometheus namespace
+    prometheus_namespace: 'monitoring'                                # prometheus namespace
    prometheus_pod_name: 'prometheus-kind-prometheus-kube-prome-prometheus-0'                                 # prometheus pod_name
    prometheus_container_name: 'prometheus'
    prometheus_backup: True                                 # enables/disables prometheus data collection
--- a/CI/tests/test_customapp_pod.sh
+++ b/CI/tests/test_customapp_pod.sh
@@ -0,0 +1,18 @@
+set -xeEo pipefail
+
+source CI/tests/common.sh
+
+trap error ERR
+trap finish EXIT
+
+function functional_test_customapp_pod_node_selector {
+  export scenario_type="pod_disruption_scenarios"
+  export scenario_file="scenarios/openshift/customapp_pod.yaml"
+  export post_config=""
+  envsubst < CI/config/common_test_config.yaml > CI/config/customapp_pod_config.yaml
+
+  python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml
+  echo "Pod disruption with node_label_selector test: Success"
+}
+
+functional_test_customapp_pod_node_selector
--- a/README.md
+++ b/README.md
@@ -22,14 +22,8 @@ Kraken injects deliberate failures into Kubernetes clusters to check if it is re
 Instructions on how to setup, configure and run Kraken can be found in the [documentation](https://krkn-chaos.dev/docs/).


-### Blogs and other useful resources
- Blog post on introduction to Kraken: https://www.openshift.com/blog/introduction-to-kraken-a-chaos-tool-for-openshift/kubernetes
- Discussion and demo on how Kraken can be leveraged to ensure OpenShift is reliable, performant and scalable: https://www.youtube.com/watch?v=s1PvupI5sD0&ab_channel=OpenShift
- Blog post emphasizing the importance of making Chaos part of Performance and Scale runs to mimic the production environments: https://www.openshift.com/blog/making-chaos-part-of-kubernetes/openshift-performance-and-scalability-tests
- Blog post on findings from Chaos test runs: https://cloud.redhat.com/blog/openshift/kubernetes-chaos-stories
- Discussion with CNCF TAG App Delivery on Krkn workflow, features and addition to CNCF sandbox: [Github](https://github.com/cncf/sandbox/issues/44), [Tracker](https://github.com/cncf/tag-app-delivery/issues/465), [recording](https://www.youtube.com/watch?v=nXQkBFK_MWc&t=722s)
- Blog post on supercharging chaos testing using AI integration in Krkn: https://www.redhat.com/en/blog/supercharging-chaos-testing-using-ai
- Blog post announcing Krkn joining CNCF Sandbox: https://www.redhat.com/en/blog/krknchaos-joining-cncf-sandbox
+### Blogs, podcasts and interviews
+Additional resources, including blog posts, podcasts, and community interviews, can be found on the [website](https://krkn-chaos.dev/blog)


 ### Roadmap
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -8,50 +8,50 @@ kraken:
    signal_address: 0.0.0.0                                # Signal listening address
    port: 8081                                             # Signal port
    chaos_scenarios:
-        # List of policies/chaos scenarios to load
-        - hog_scenarios:
-            - scenarios/kube/cpu-hog.yml
-            - scenarios/kube/memory-hog.yml
-            - scenarios/kube/io-hog.yml
-        - application_outages_scenarios:
-            - scenarios/openshift/app_outage.yaml
-        - container_scenarios:                             # List of chaos pod scenarios to load
-            - scenarios/openshift/container_etcd.yml
-        - pod_network_scenarios:
-              - scenarios/openshift/network_chaos_ingress.yml
-              - scenarios/openshift/pod_network_outage.yml
-        - pod_disruption_scenarios:
-            - scenarios/openshift/etcd.yml
-            - scenarios/openshift/regex_openshift_pod_kill.yml
-            - scenarios/openshift/prom_kill.yml
-            - scenarios/openshift/openshift-apiserver.yml
-            - scenarios/openshift/openshift-kube-apiserver.yml
-        - node_scenarios:                                  # List of chaos node scenarios to load
-            - scenarios/openshift/aws_node_scenarios.yml
-            - scenarios/openshift/vmware_node_scenarios.yml
-            - scenarios/openshift/ibmcloud_node_scenarios.yml
-        - time_scenarios:                                  # List of chaos time scenarios to load
-            - scenarios/openshift/time_scenarios_example.yml
-        - cluster_shut_down_scenarios:
-            - scenarios/openshift/cluster_shut_down_scenario.yml
-        - service_disruption_scenarios:
-             - scenarios/openshift/regex_namespace.yaml
-             - scenarios/openshift/ingress_namespace.yaml
-        - zone_outages_scenarios:
-            - scenarios/openshift/zone_outage.yaml
-        - pvc_scenarios:
-            - scenarios/openshift/pvc_scenario.yaml
-        - network_chaos_scenarios:
-            - scenarios/openshift/network_chaos.yaml
-        - service_hijacking_scenarios:
-              - scenarios/kube/service_hijacking.yaml
-        - syn_flood_scenarios:
-              - scenarios/kube/syn_flood.yaml
-        -  network_chaos_ng_scenarios:
+       # List of policies/chaos scenarios to load
+       - hog_scenarios:
+           - scenarios/kube/cpu-hog.yml
+           - scenarios/kube/memory-hog.yml
+           - scenarios/kube/io-hog.yml
+       - application_outages_scenarios:
+           - scenarios/openshift/app_outage.yaml
+       - container_scenarios:                             # List of chaos pod scenarios to load
+           - scenarios/openshift/container_etcd.yml
+       - pod_network_scenarios:
+             - scenarios/openshift/network_chaos_ingress.yml
+             - scenarios/openshift/pod_network_outage.yml
+       - pod_disruption_scenarios:
+           - scenarios/openshift/etcd.yml
+           - scenarios/openshift/regex_openshift_pod_kill.yml
+           - scenarios/openshift/prom_kill.yml
+           - scenarios/openshift/openshift-apiserver.yml
+           - scenarios/openshift/openshift-kube-apiserver.yml
+       - node_scenarios:                                  # List of chaos node scenarios to load
+           - scenarios/openshift/aws_node_scenarios.yml
+           - scenarios/openshift/vmware_node_scenarios.yml
+           - scenarios/openshift/ibmcloud_node_scenarios.yml
+       - time_scenarios:                                  # List of chaos time scenarios to load
+           - scenarios/openshift/time_scenarios_example.yml
+       - cluster_shut_down_scenarios:
+           - scenarios/openshift/cluster_shut_down_scenario.yml
+       - service_disruption_scenarios:
+            - scenarios/openshift/regex_namespace.yaml
+            - scenarios/openshift/ingress_namespace.yaml
+       - zone_outages_scenarios:
+           - scenarios/openshift/zone_outage.yaml
+       - pvc_scenarios:
+           - scenarios/openshift/pvc_scenario.yaml
+       - network_chaos_scenarios:
+           - scenarios/openshift/network_chaos.yaml
+       - service_hijacking_scenarios:
+             - scenarios/kube/service_hijacking.yaml
+       - syn_flood_scenarios:
+             - scenarios/kube/syn_flood.yaml
+       - network_chaos_ng_scenarios:
               - scenarios/kube/pod-network-filter.yml
               - scenarios/kube/node-network-filter.yml
-        -  kubevirt_vm_outage:
-               - scenarios/kubevirt/kubevirt-vm-outage.yaml
+       -  kubevirt_vm_outage:
+              - scenarios/kubevirt/kubevirt-vm-outage.yaml

 cerberus:
    cerberus_enabled: False                                # Enable it when cerberus is previously installed
@@ -79,7 +79,7 @@ elastic:
    telemetry_index: "krkn-telemetry"

 tunings:
-    wait_duration: 60                                      # Duration to wait between each chaos scenario
+    wait_duration: 1                                      # Duration to wait between each chaos scenario
    iterations: 1                                          # Number of times to execute the scenarios
    daemon_mode: False                                     # Iterations are set to infinity which means that the kraken will cause chaos forever
 telemetry:
--- a/krkn/prometheus/client.py
+++ b/krkn/prometheus/client.py
@@ -75,10 +75,12 @@ def alerts(
 def critical_alerts(
    prom_cli: KrknPrometheus,
    summary: ChaosRunAlertSummary,
+    elastic: KrknElastic,
    run_id,
    scenario,
    start_time,
    end_time,
+    elastic_alerts_index
 ):
    summary.scenario = scenario
    summary.run_id = run_id
@@ -113,7 +115,6 @@ def critical_alerts(
            summary.chaos_alerts.append(alert)

    post_critical_alerts = prom_cli.process_query(query)
-
    for alert in post_critical_alerts:
        if "metric" in alert:
            alertname = (
@@ -136,6 +137,21 @@ def critical_alerts(
            )
            alert = ChaosRunAlert(alertname, alertstate, namespace, severity)
            summary.post_chaos_alerts.append(alert)
+            if elastic:
+                elastic_alert = ElasticAlert(
+                    run_uuid=run_id,
+                    severity=severity,
+                    alert=alertname,
+                    created_at=end_time,
+                    namespace=namespace,
+                    alertstate=alertstate,
+                    phase="post_chaos"
+                )
+                result = elastic.push_alert(elastic_alert, elastic_alerts_index)
+                if result == -1:
+                    logging.error("failed to save alert on ElasticSearch")
+                pass
+

    during_critical_alerts_count = len(during_critical_alerts)
    post_critical_alerts_count = len(post_critical_alerts)
@@ -149,8 +165,8 @@ def critical_alerts(

    if not firing_alerts:
        logging.info("No critical alerts are firing!!")
-
-
+    
+   
 def metrics(
    prom_cli: KrknPrometheus,
    elastic: KrknElastic,
@@ -252,6 +268,14 @@ def metrics(
                        metric[k] = v
                        metric['timestamp'] = str(datetime.datetime.now())
                    metrics_list.append(metric.copy())
+        if telemetry_json['virt_checks']:
+            for virt_check in telemetry_json["virt_checks"]:
+                    metric_name = "virt_check_recovery"
+                    metric = {"metricName": metric_name}
+                    for k,v in virt_check.items():
+                        metric[k] = v
+                        metric['timestamp'] = str(datetime.datetime.now())
+                    metrics_list.append(metric.copy())

        save_metrics = False
        if elastic is not None and elastic_metrics_index is not None:
--- a/krkn/scenario_plugins/container/container_scenario_plugin.py
+++ b/krkn/scenario_plugins/container/container_scenario_plugin.py
@@ -37,9 +37,11 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
                    snapshot = future_snapshot.result()
                    result = snapshot.get_pods_status()
                    scenario_telemetry.affected_pods = result
-
-        except (RuntimeError, Exception):
-            logging.error("ContainerScenarioPlugin exiting due to Exception %s")
+                    if len(result.unrecovered) > 0:
+                        logging.info("ContainerScenarioPlugin failed with unrecovered containers")
+                        return 1
+        except (RuntimeError, Exception) as e:
+            logging.error("ContainerScenarioPlugin exiting due to Exception %s" % e)
            return 1
        else:
            return 0
--- a/krkn/scenario_plugins/kubevirt_vm_outage/kubevirt_vm_outage_scenario_plugin.py
+++ b/krkn/scenario_plugins/kubevirt_vm_outage/kubevirt_vm_outage_scenario_plugin.py
@@ -149,44 +149,48 @@ class KubevirtVmOutageScenarioPlugin(AbstractScenarioPlugin):
            disable_auto_restart = params.get("disable_auto_restart", False)
            
            if not vm_name:
-                raise Exception("vm_name parameter is required")
+                logging.error("vm_name parameter is required")
+                return 1
+            self.pods_status = PodsStatus()
            vmis_list = self.get_vmis(vm_name,namespace)
-            if len(vmis_list) == 0:
-                raise Exception(f"No matching VMs with name {vm_name} in namespace {namespace}")
-            rand_int = random.randint(0, len(vmis_list) - 1)
-            vmi = vmis_list[rand_int]
+            for _ in range(kill_count):
                
-            logging.info(f"Starting KubeVirt VM outage scenario for VM: {vm_name} in namespace: {namespace}")
-            vmi_name = vmi.get("metadata").get("name")
-            if not self.validate_environment(vmi_name, namespace):
-                return self.pods_status
-                
-            vmi = self.get_vmi(vmi_name, namespace)
-            self.affected_pod = AffectedPod(
-                pod_name=vmi_name,
-                namespace=namespace,
-            )
-            if not vmi:
-                logging.error(f"VMI {vm_name} not found in namespace {namespace}")
-                return self.pods_status
-                
-            self.original_vmi = vmi
-            logging.info(f"Captured initial state of VMI: {vm_name}")
-            result = self.delete_vmi(vmi_name, namespace, disable_auto_restart)
-            if result != 0:
-                return self.pods_status
+                rand_int = random.randint(0, len(vmis_list) - 1)
+                vmi = vmis_list[rand_int]
+                    
+                logging.info(f"Starting KubeVirt VM outage scenario for VM: {vm_name} in namespace: {namespace}")
+                vmi_name = vmi.get("metadata").get("name")
+                if not self.validate_environment(vmi_name, namespace):
+                    return 1
+                    
+                vmi = self.get_vmi(vmi_name, namespace)
+                self.affected_pod = AffectedPod(
+                    pod_name=vmi_name,
+                    namespace=namespace,
+                )
+                if not vmi:
+                    logging.error(f"VMI {vm_name} not found in namespace {namespace}")
+                    return 1
+                    
+                self.original_vmi = vmi
+                logging.info(f"Captured initial state of VMI: {vm_name}")
+                result = self.delete_vmi(vmi_name, namespace, disable_auto_restart)
+                if result != 0:
+                    self.pods_status.unrecovered.append(self.affected_pod)
+                    continue

-            result = self.wait_for_running(vmi_name,namespace, timeout)
-            if result != 0:
-                return self.pods_status
-            
-            self.affected_pod.total_recovery_time = (
-                self.affected_pod.pod_readiness_time
-                + self.affected_pod.pod_rescheduling_time
-            )
+                result = self.wait_for_running(vmi_name,namespace, timeout)
+                if result != 0:
+                    self.pods_status.unrecovered.append(self.affected_pod)
+                    continue
+                
+                self.affected_pod.total_recovery_time = (
+                    self.affected_pod.pod_readiness_time
+                    + self.affected_pod.pod_rescheduling_time
+                )

-            self.pods_status.recovered.append(self.affected_pod)
-            logging.info(f"Successfully completed KubeVirt VM outage scenario for VM: {vm_name}")
+                self.pods_status.recovered.append(self.affected_pod)
+                logging.info(f"Successfully completed KubeVirt VM outage scenario for VM: {vm_name}")
            
            return self.pods_status
            
@@ -316,13 +320,13 @@ class KubevirtVmOutageScenarioPlugin(AbstractScenarioPlugin):
                time.sleep(1)
                
            logging.error(f"Timed out waiting for VMI {vm_name} to be deleted")
-            self.pods_status.unrecovered = self.affected_pod
+            self.pods_status.unrecovered.append(self.affected_pod)
            return 1
            
        except Exception as e:
            logging.error(f"Error deleting VMI {vm_name}: {e}")
            log_exception(e)
-            self.pods_status.unrecovered = self.affected_pod
+            self.pods_status.unrecovered.append(self.affected_pod)
            return 1

    def wait_for_running(self, vm_name: str, namespace: str, timeout: int = 120) -> int: 
--- a/krkn/scenario_plugins/native/pod_network_outage/kubernetes_functions.py
+++ b/krkn/scenario_plugins/native/pod_network_outage/kubernetes_functions.py
@@ -23,8 +23,7 @@ def create_job(batch_cli, body, namespace="default"):
    """

    try:
-        api_response = batch_cli.create_namespaced_job(
-            body=body, namespace=namespace)
+        api_response = batch_cli.create_namespaced_job(body=body, namespace=namespace)
        return api_response
    except ApiException as api:
        logging.warning(
@@ -71,7 +70,8 @@ def create_pod(cli, body, namespace, timeout=120):
        end_time = time.time() + timeout
        while True:
            pod_stat = cli.read_namespaced_pod(
-                name=body["metadata"]["name"], namespace=namespace)
+                name=body["metadata"]["name"], namespace=namespace
+            )
            if pod_stat.status.phase == "Running":
                break
            if time.time() > end_time:
@@ -121,16 +121,18 @@ def exec_cmd_in_pod(cli, command, pod_name, namespace, container=None):
    return ret


-def list_pods(cli, namespace, label_selector=None):
+def list_pods(cli, namespace, label_selector=None, exclude_label=None):
    """
-    Function used to list pods in a given namespace and having a certain label
+    Function used to list pods in a given namespace and having a certain label and excluding pods with exclude_label
+    and excluding pods with exclude_label
    """

    pods = []
    try:
        if label_selector:
            ret = cli.list_namespaced_pod(
-                namespace, pretty=True, label_selector=label_selector)
+                namespace, pretty=True, label_selector=label_selector
+            )
        else:
            ret = cli.list_namespaced_pod(namespace, pretty=True)
    except ApiException as e:
@@ -140,7 +142,16 @@ def list_pods(cli, namespace, label_selector=None):
            % e
        )
        raise e
+
    for pod in ret.items:
+        # Skip pods with the exclude label if specified
+        if exclude_label and pod.metadata.labels:
+            exclude_key, exclude_value = exclude_label.split("=", 1)
+            if (
+                exclude_key in pod.metadata.labels
+                and pod.metadata.labels[exclude_key] == exclude_value
+            ):
+                continue
        pods.append(pod.metadata.name)

    return pods
@@ -152,8 +163,7 @@ def get_job_status(batch_cli, name, namespace="default"):
    """

    try:
-        return batch_cli.read_namespaced_job_status(
-            name=name, namespace=namespace)
+        return batch_cli.read_namespaced_job_status(name=name, namespace=namespace)
    except Exception as e:
        logging.error(
            "Exception when calling \
@@ -169,7 +179,10 @@ def get_pod_log(cli, name, namespace="default"):
    """

    return cli.read_namespaced_pod_log(
-        name=name, namespace=namespace, _return_http_data_only=True, _preload_content=False
+        name=name,
+        namespace=namespace,
+        _return_http_data_only=True,
+        _preload_content=False,
    )


@@ -191,7 +204,8 @@ def delete_job(batch_cli, name, namespace="default"):
            name=name,
            namespace=namespace,
            body=client.V1DeleteOptions(
-                propagation_policy="Foreground", grace_period_seconds=0),
+                propagation_policy="Foreground", grace_period_seconds=0
+            ),
        )
        logging.debug("Job deleted. status='%s'" % str(api_response.status))
        return api_response
@@ -247,11 +261,8 @@ def get_node(node_name, label_selector, instance_kill_count, cli):
        )
    nodes = list_ready_nodes(cli, label_selector)
    if not nodes:
-        raise Exception(
-            "Ready nodes with the provided label selector do not exist")
-    logging.info(
-        "Ready nodes with the label selector %s: %s" % (label_selector, nodes)
-    )
+        raise Exception("Ready nodes with the provided label selector do not exist")
+    logging.info("Ready nodes with the label selector %s: %s" % (label_selector, nodes))
    number_of_nodes = len(nodes)
    if instance_kill_count == number_of_nodes:
        return nodes
--- a/krkn/scenario_plugins/native/pod_network_outage/pod_network_outage_plugin.py
+++ b/krkn/scenario_plugins/native/pod_network_outage/pod_network_outage_plugin.py
@@ -19,7 +19,11 @@ from . import cerberus


 def get_test_pods(
-    pod_name: str, pod_label: str, namespace: str, kubecli: KrknKubernetes
+    pod_name: str,
+    pod_label: str,
+    namespace: str,
+    kubecli: KrknKubernetes,
+    exclude_label: str = None,
 ) -> typing.List[str]:
    """
    Function that returns a list of pods to apply network policy
@@ -38,11 +42,16 @@ def get_test_pods(
        kubecli (KrknKubernetes)
            - Object to interact with Kubernetes Python client

+        exclude_label (string)
+            - pods matching this label will be excluded from the outage
+
    Returns:
        pod names (string) in the namespace
    """
    pods_list = []
-    pods_list = kubecli.list_pods(label_selector=pod_label, namespace=namespace)
+    pods_list = kubecli.list_pods(
+        label_selector=pod_label, namespace=namespace, exclude_label=exclude_label
+    )
    if pod_name and pod_name not in pods_list:
        raise Exception("pod name not found in namespace ")
    elif pod_name and pod_name in pods_list:
@@ -226,6 +235,10 @@ def apply_outage_policy(

        image (string)
            - Image of network chaos tool
+            
+        exclude_label (string)
+            - pods matching this label will be excluded from the outage
+
    Returns:
        The name of the job created that executes the commands on a node
        for ingress chaos scenario
@@ -324,6 +337,9 @@ def apply_ingress_policy(
        test_execution (String)
            - The order in which the filters are applied

+        exclude_label (string)
+            - pods matching this label will be excluded from the outage
+
    Returns:
        The name of the job created that executes the traffic shaping
        filter
@@ -407,6 +423,9 @@ def apply_net_policy(
        test_execution (String)
            - The order in which the filters are applied

+        exclude_label (string)
+            - pods matching this label will be excluded from the outage
+
    Returns:
        The name of the job created that executes the traffic shaping
        filter
@@ -466,6 +485,9 @@ def get_ingress_cmd(
        duration (str):
            - Duration for which the traffic control is to be done

+        exclude_label (string)
+            - pods matching this label will be excluded from the outage
+
    Returns:
        str: ingress filter
    """
@@ -517,6 +539,9 @@ def get_egress_cmd(
        duration (str):
            - Duration for which the traffic control is to be done

+        exclude_label (string)
+            - pods matching this label will be excluded from the outage
+
    Returns:
        str: egress filter
    """
@@ -652,6 +677,10 @@ def list_bridges(node: str, pod_template, kubecli: KrknKubernetes, image: str) -

        image (string)
            - Image of network chaos tool
+            
+        exclude_label (string)
+            - pods matching this label will be excluded from the outage
+
    Returns:
        List of bridges on the node.
    """
@@ -829,6 +858,9 @@ def check_bridge_interface(
        kubecli (KrknKubernetes)
            - Object to interact with Kubernetes Python client

+        exclude_label (string)
+            - pods matching this label will be excluded from the outage
+
    Returns:
        Returns True if the bridge is found in the  node.
    """
@@ -922,6 +954,15 @@ class InputParams:
        },
    )

+    exclude_label: typing.Optional[str] = field(
+        default=None,
+        metadata={
+            "name": "Exclude label",
+            "description": "Kubernetes label selector for pods to exclude from the chaos. "
+            "Pods matching this label will be excluded even if they match the label_selector",
+        },
+    )
+
    kraken_config: typing.Dict[str, typing.Any] = field(
        default=None,
        metadata={
@@ -1055,7 +1096,11 @@ def pod_outage(

        br_name = get_bridge_name(api_ext, custom_obj)
        pods_list = get_test_pods(
-            test_pod_name, test_label_selector, test_namespace, kubecli
+            test_pod_name,
+            test_label_selector,
+            test_namespace,
+            kubecli,
+            params.exclude_label,
        )

        while not len(pods_list) <= params.instance_count:
@@ -1176,6 +1221,15 @@ class EgressParams:
        },
    )

+    exclude_label: typing.Optional[str] = field(
+        default=None,
+        metadata={
+            "name": "Exclude label",
+            "description": "Kubernetes label selector for pods to exclude from the chaos. "
+            "Pods matching this label will be excluded even if they match the label_selector",
+        },
+    )
+
    kraken_config: typing.Dict[str, typing.Any] = field(
        default=None,
        metadata={
@@ -1314,7 +1368,11 @@ def pod_egress_shaping(

        br_name = get_bridge_name(api_ext, custom_obj)
        pods_list = get_test_pods(
-            test_pod_name, test_label_selector, test_namespace, kubecli
+            test_pod_name,
+            test_label_selector,
+            test_namespace,
+            kubecli,
+            params.exclude_label,
        )

        while not len(pods_list) <= params.instance_count:
@@ -1450,6 +1508,15 @@ class IngressParams:
        },
    )

+    exclude_label: typing.Optional[str] = field(
+        default=None,
+        metadata={
+            "name": "Exclude label",
+            "description": "Kubernetes label selector for pods to exclude from the chaos. "
+            "Pods matching this label will be excluded even if they match the label_selector",
+        },
+    )
+
    kraken_config: typing.Dict[str, typing.Any] = field(
        default=None,
        metadata={
@@ -1589,7 +1656,11 @@ def pod_ingress_shaping(

        br_name = get_bridge_name(api_ext, custom_obj)
        pods_list = get_test_pods(
-            test_pod_name, test_label_selector, test_namespace, kubecli
+            test_pod_name,
+            test_label_selector,
+            test_namespace,
+            kubecli,
+            params.exclude_label,
        )

        while not len(pods_list) <= params.instance_count:
--- a/krkn/scenario_plugins/network_chaos_ng/modules/pod_network_filter.py
+++ b/krkn/scenario_plugins/network_chaos_ng/modules/pod_network_filter.py
@@ -1,3 +1,4 @@
+import logging
 import queue
 import time

@@ -12,10 +13,9 @@ from krkn.scenario_plugins.network_chaos_ng.models import (
 from krkn.scenario_plugins.network_chaos_ng.modules.abstract_network_chaos_module import (
    AbstractNetworkChaosModule,
 )
-from krkn.scenario_plugins.network_chaos_ng.modules.utils import log_info
+from krkn.scenario_plugins.network_chaos_ng.modules.utils import log_info, log_error
 from krkn.scenario_plugins.network_chaos_ng.modules.utils_network_filter import (
    deploy_network_filter_pod,
-    get_default_interface,
    generate_namespaced_rules,
    apply_network_rules,
    clean_network_rules_namespaced,
@@ -56,23 +56,28 @@ class PodNetworkFilterModule(AbstractNetworkChaosModule):
                pod_name,
                self.kubecli.get_lib_kubernetes(),
                container_name,
+                host_network=False,
            )

            if len(self.config.interfaces) == 0:
-                interfaces = [
-                    get_default_interface(
-                        pod_name,
-                        self.config.namespace,
-                        self.kubecli.get_lib_kubernetes(),
+                interfaces = (
+                    self.kubecli.get_lib_kubernetes().list_pod_network_interfaces(
+                        target, self.config.namespace
                    )
-                ]
+                )

+                if len(interfaces) == 0:
+                    log_error(
+                        "no network interface found in pod, impossible to execute the network filter scenario",
+                        parallel,
+                        pod_name,
+                    )
+                    return
                log_info(
-                    f"detected default interface {interfaces[0]}",
+                    f"detected network interfaces: {','.join(interfaces)}",
                    parallel,
                    pod_name,
                )
-
            else:
                interfaces = self.config.interfaces

--- a/krkn/scenario_plugins/network_chaos_ng/modules/utils.py
+++ b/krkn/scenario_plugins/network_chaos_ng/modules/utils.py
@@ -11,7 +11,7 @@ def log_info(message: str, parallel: bool = False, node_name: str = ""):
        logging.info(message)


-def log_error(self, message: str, parallel: bool = False, node_name: str = ""):
+def log_error(message: str, parallel: bool = False, node_name: str = ""):
    """
    log helper method for ERROR severity to be used in the scenarios
    """
@@ -21,7 +21,7 @@ def log_error(self, message: str, parallel: bool = False, node_name: str = ""):
        logging.error(message)


-def log_warning(self, message: str, parallel: bool = False, node_name: str = ""):
+def log_warning(message: str, parallel: bool = False, node_name: str = ""):
    """
    log helper method for WARNING severity to be used in the scenarios
    """
--- a/krkn/scenario_plugins/network_chaos_ng/modules/utils_network_filter.py
+++ b/krkn/scenario_plugins/network_chaos_ng/modules/utils_network_filter.py
@@ -54,6 +54,7 @@ def deploy_network_filter_pod(
    pod_name: str,
    kubecli: KrknKubernetes,
    container_name: str = "fedora",
+    host_network: bool = True,
 ):
    file_loader = FileSystemLoader(os.path.abspath(os.path.dirname(__file__)))
    env = Environment(loader=file_loader, autoescape=True)
@@ -78,17 +79,16 @@ def deploy_network_filter_pod(
            toleration["value"] = value
        tolerations.append(toleration)

-
    pod_body = yaml.safe_load(
        pod_template.render(
            pod_name=pod_name,
            namespace=config.namespace,
-            host_network=True,
+            host_network=host_network,
            target=target_node,
            container_name=container_name,
            workload_image=config.image,
            taints=tolerations,
-            service_account=config.service_account
+            service_account=config.service_account,
        )
    )

--- a/krkn/scenario_plugins/node_actions/abstract_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/abstract_node_scenarios.py
@@ -60,7 +60,7 @@ class abstract_node_scenarios:
        pass

    # Node scenario to reboot the node
-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        pass

    # Node scenario to stop the kubelet
--- a/krkn/scenario_plugins/node_actions/alibaba_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/alibaba_node_scenarios.py
@@ -316,7 +316,7 @@ class alibaba_node_scenarios(abstract_node_scenarios):
            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to reboot the node
-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        for _ in range(instance_kill_count):
            affected_node = AffectedNode(node)
            try:
--- a/krkn/scenario_plugins/node_actions/aws_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/aws_node_scenarios.py
@@ -358,7 +358,7 @@ class aws_node_scenarios(abstract_node_scenarios):
            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to reboot the node
-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        for _ in range(instance_kill_count):
            affected_node = AffectedNode(node)
            try:
--- a/krkn/scenario_plugins/node_actions/az_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/az_node_scenarios.py
@@ -308,7 +308,7 @@ class azure_node_scenarios(abstract_node_scenarios):


    # Node scenario to reboot the node
-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        for _ in range(instance_kill_count):
            affected_node = AffectedNode(node)
            try:
--- a/krkn/scenario_plugins/node_actions/bm_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/bm_node_scenarios.py
@@ -214,7 +214,7 @@ class bm_node_scenarios(abstract_node_scenarios):
        logging.info("Node termination scenario is not supported on baremetal")

    # Node scenario to reboot the node
-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        for _ in range(instance_kill_count):
            affected_node = AffectedNode(node)
            try:
--- a/krkn/scenario_plugins/node_actions/common_node_functions.py
+++ b/krkn/scenario_plugins/node_actions/common_node_functions.py
@@ -16,20 +16,22 @@ def get_node_by_name(node_name_list, kubecli: KrknKubernetes):
            )
            return
    return node_name_list
-        
+

 # Pick a random node with specified label selector
 def get_node(label_selector, instance_kill_count, kubecli: KrknKubernetes):

-    label_selector_list  = label_selector.split(",")
+    label_selector_list = label_selector.split(",")
    nodes = []
-    for label_selector in label_selector_list: 
+    for label_selector in label_selector_list:
        nodes.extend(kubecli.list_killable_nodes(label_selector))
    if not nodes:
        raise Exception("Ready nodes with the provided label selector do not exist")
-    logging.info("Ready nodes with the label selector %s: %s" % (label_selector_list, nodes))
+    logging.info(
+        "Ready nodes with the label selector %s: %s" % (label_selector_list, nodes)
+    )
    number_of_nodes = len(nodes)
-    if instance_kill_count == number_of_nodes:
+    if instance_kill_count == number_of_nodes or instance_kill_count == 0:
        return nodes
    nodes_to_return = []
    for i in range(instance_kill_count):
@@ -38,23 +40,30 @@ def get_node(label_selector, instance_kill_count, kubecli: KrknKubernetes):
        nodes.remove(node_to_add)
    return nodes_to_return

+
 # krkn_lib
 # Wait until the node status becomes Ready
-def wait_for_ready_status(node, timeout, kubecli: KrknKubernetes, affected_node: AffectedNode = None):
-    affected_node =  kubecli.watch_node_status(node, "True", timeout, affected_node)
+def wait_for_ready_status(
+    node, timeout, kubecli: KrknKubernetes, affected_node: AffectedNode = None
+):
+    affected_node = kubecli.watch_node_status(node, "True", timeout, affected_node)
    return affected_node
-   
+

 # krkn_lib
 # Wait until the node status becomes Not Ready
-def wait_for_not_ready_status(node, timeout, kubecli: KrknKubernetes, affected_node: AffectedNode = None):
+def wait_for_not_ready_status(
+    node, timeout, kubecli: KrknKubernetes, affected_node: AffectedNode = None
+):
    affected_node = kubecli.watch_node_status(node, "False", timeout, affected_node)
    return affected_node
-    
+

 # krkn_lib
 # Wait until the node status becomes Unknown
-def wait_for_unknown_status(node, timeout, kubecli: KrknKubernetes, affected_node: AffectedNode = None):
+def wait_for_unknown_status(
+    node, timeout, kubecli: KrknKubernetes, affected_node: AffectedNode = None
+):
    affected_node = kubecli.watch_node_status(node, "Unknown", timeout, affected_node)
    return affected_node

--- a/krkn/scenario_plugins/node_actions/docker_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/docker_node_scenarios.py
@@ -120,7 +120,7 @@ class docker_node_scenarios(abstract_node_scenarios):
                raise e

    # Node scenario to reboot the node
-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        for _ in range(instance_kill_count):
            affected_node = AffectedNode(node)
            try:
--- a/krkn/scenario_plugins/node_actions/gcp_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/gcp_node_scenarios.py
@@ -321,7 +321,7 @@ class gcp_node_scenarios(abstract_node_scenarios):
            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to reboot the node
-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        for _ in range(instance_kill_count):
            affected_node = AffectedNode(node)
            try:
--- a/krkn/scenario_plugins/node_actions/general_cloud_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/general_cloud_node_scenarios.py
@@ -39,7 +39,7 @@ class general_node_scenarios(abstract_node_scenarios):
        )

    # Node scenario to reboot the node
-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        logging.info(
            "Node reboot is not set up yet for this cloud type,"
            " no action is going to be taken"
--- a/krkn/scenario_plugins/node_actions/ibmcloud_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/ibmcloud_node_scenarios.py
@@ -338,7 +338,7 @@ class ibm_node_scenarios(abstract_node_scenarios):
            logging.error("node_stop_scenario injection failed!")


-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        try:
            instance_id = self.ibmcloud.get_instance_id(node)
            for _ in range(instance_kill_count):
--- a/krkn/scenario_plugins/node_actions/ibmcloud_power_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/ibmcloud_power_node_scenarios.py
@@ -0,0 +1,403 @@
+#!/usr/bin/env python
+import time
+from os import environ
+from dataclasses import dataclass
+import logging
+
+from krkn_lib.k8s import KrknKubernetes
+import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction
+from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
+    abstract_node_scenarios,
+)
+import requests
+import sys
+import json
+
+
+#  -o, --operation string   Operation to be done in a PVM server instance. 
+# Valid values are: hard-reboot, immediate-shutdown, soft-reboot, reset-state, start, stop.
+
+from krkn_lib.models.k8s import AffectedNodeStatus, AffectedNode
+
+
+class IbmCloudPower:
+    def __init__(self):
+        """
+        Initialize the ibm cloud client by using the the env variables:
+            'IBMC_APIKEY' 'IBMC_URL'
+        """
+        self.api_key = environ.get("IBMC_APIKEY")
+        self.service_url = environ.get("IBMC_POWER_URL")
+        self.CRN = environ.get("IBMC_POWER_CRN")
+        self.cloud_instance_id = self.CRN.split(":")[-3]
+        print(self.cloud_instance_id)
+        self.headers = None
+        self.token = None
+        if not self.api_key:
+            raise Exception("Environmental variable 'IBMC_APIKEY' is not set")
+        if not self.service_url:
+            raise Exception("Environmental variable 'IBMC_POWER_URL' is not set")
+        if not self.CRN:
+            raise Exception("Environmental variable 'IBMC_POWER_CRN' is not set")
+        try:
+            self.authenticate()
+            
+        except Exception as e:
+            logging.error("error authenticating" + str(e))
+    
+    def authenticate(self):
+        url = "https://iam.cloud.ibm.com/identity/token"
+        iam_auth_headers = {
+            "content-type": "application/x-www-form-urlencoded",
+            "accept": "application/json",
+        }
+        data = {
+            "grant_type": "urn:ibm:params:oauth:grant-type:apikey",
+            "apikey": self.api_key,
+        }
+
+        response = self._make_request("POST", url, data=data, headers=iam_auth_headers)
+        if response.status_code == 200:
+            self.token = response.json()
+            self.headers = {
+                "Authorization": f"Bearer {self.token['access_token']}",
+                "Content-Type": "application/json",
+                "CRN": self.CRN,
+            }
+        else:
+            logging.error(f"Authentication Error: {response.status_code}")
+            return None, None
+            
+
+    def _make_request(self,method, url, data=None, headers=None):
+        try:
+            response = requests.request(method, url, data=data, headers=headers)
+            response.raise_for_status()
+            return response
+        except Exception as e:
+            raise Exception(f"API Error: {e}")
+
+    # Get the instance ID of the node
+    def get_instance_id(self, node_name):
+
+        url = f"{self.service_url}/pcloud/v1/cloud-instances/{self.cloud_instance_id}/pvm-instances/"
+        response = self._make_request("GET", url, headers=self.headers)
+        for node in response.json()["pvmInstances"]:
+            if node_name == node["serverName"]:
+                return node["pvmInstanceID"]
+        logging.error("Couldn't find node with name " + str(node_name) + ", you could try another region")
+        sys.exit(1)
+
+    def delete_instance(self, instance_id):
+        """
+        Deletes the Instance whose name is given by 'instance_id'
+        """
+        try:
+            url = f"{self.service_url}/pcloud/v1/cloud-instances/{self.cloud_instance_id}/pvm-instances/{instance_id}/action"
+            self._make_request("POST", url, headers=self.headers, data=json.dumps({"action": "immediate-shutdown"}))
+            logging.info("Deleted Instance -- '{}'".format(instance_id))
+        except Exception as e:
+            logging.info("Instance '{}' could not be deleted. ".format(instance_id))
+            return False
+
+    def reboot_instances(self, instance_id, soft=False):
+        """
+        Reboots the Instance whose name is given by 'instance_id'. Returns True if successful, or
+        returns False if the Instance is not powered on
+        """
+
+        try:
+            if soft:
+                action = "soft-reboot"
+            else:
+                action = "hard-reboot"
+            url = f"{self.service_url}/pcloud/v1/cloud-instances/{self.cloud_instance_id}/pvm-instances/{instance_id}/action"
+            self._make_request("POST", url, headers=self.headers, data=json.dumps({"action": action}))
+            logging.info("Reset Instance -- '{}'".format(instance_id))
+            return True
+        except Exception as e:
+            logging.info("Instance '{}' could not be rebooted".format(instance_id))
+            return False
+
+    def stop_instances(self, instance_id):
+        """
+        Stops the Instance whose name is given by 'instance_id'. Returns True if successful, or
+        returns False if the Instance is already stopped
+        """
+
+        try:
+            url = f"{self.service_url}/pcloud/v1/cloud-instances/{self.cloud_instance_id}/pvm-instances/{instance_id}/action"
+            self._make_request("POST", url, headers=self.headers, data=json.dumps({"action": "stop"}))
+            logging.info("Stopped Instance -- '{}'".format(instance_id))
+            return True
+        except Exception as e:
+            logging.info("Instance '{}' could not be stopped".format(instance_id))
+            logging.info("error" + str(e))
+            return False
+
+    def start_instances(self, instance_id):
+        """
+        Stops the Instance whose name is given by 'instance_id'. Returns True if successful, or
+        returns False if the Instance is already running
+        """
+
+        try:
+            url = f"{self.service_url}/pcloud/v1/cloud-instances/{self.cloud_instance_id}/pvm-instances/{instance_id}/action"
+            self._make_request("POST", url, headers=self.headers, data=json.dumps({"action": "start"}))
+            logging.info("Started Instance -- '{}'".format(instance_id))
+            return True
+        except Exception as e:
+            logging.info("Instance '{}' could not start running".format(instance_id))
+            return False
+
+    def list_instances(self):
+        """
+        Returns a list of Instances present in the datacenter
+        """
+        instance_names = []
+        try:
+            url = f"{self.service_url}/pcloud/v1/cloud-instances/{self.cloud_instance_id}/pvm-instances/"
+            response = self._make_request("GET", url, headers=self.headers)
+            for pvm in response.json()["pvmInstances"]:
+                instance_names.append({"serverName": pvm.serverName, "pvmInstanceID": pvm.pvmInstanceID})
+        except Exception as e:
+            logging.error("Error listing out instances: " + str(e))
+            sys.exit(1)
+        return instance_names
+
+    def find_id_in_list(self, name, vpc_list):
+        for vpc in vpc_list:
+            if vpc["vpc_name"] == name:
+                return vpc["vpc_id"]
+
+    def get_instance_status(self, instance_id):
+        """
+        Returns the status of the Instance whose name is given by 'instance_id'
+        """
+
+        try:
+            url = f"{self.service_url}/pcloud/v1/cloud-instances/{self.cloud_instance_id}/pvm-instances/{instance_id}"
+            response = self._make_request("GET", url, headers=self.headers)
+            state = response.json()["status"]
+            return state
+        except Exception as e:
+            logging.error(
+                "Failed to get node instance status %s. Encountered following "
+                "exception: %s." % (instance_id, e)
+            )
+            return None
+
+    def wait_until_deleted(self, instance_id, timeout, affected_node=None):
+        """
+        Waits until the instance is deleted or until the timeout. Returns True if
+        the instance is successfully deleted, else returns False
+        """
+        start_time = time.time()
+        time_counter = 0
+        vpc = self.get_instance_status(instance_id)
+        while vpc is not None:
+            vpc = self.get_instance_status(instance_id)
+            logging.info(
+                "Instance %s is still being deleted, sleeping for 5 seconds"
+                % instance_id
+            )
+            time.sleep(5)
+            time_counter += 5
+            if time_counter >= timeout:
+                logging.info(
+                    "Instance %s is still not deleted in allotted time" % instance_id
+                )
+                return False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("terminated", end_time - start_time)
+        return True
+
+    def wait_until_running(self, instance_id, timeout, affected_node=None):
+        """
+        Waits until the Instance switches to running state or until the timeout.
+        Returns True if the Instance switches to running, else returns False
+        """
+        start_time = time.time()
+        time_counter = 0
+        status = self.get_instance_status(instance_id)
+        while status != "ACTIVE":
+            status = self.get_instance_status(instance_id)
+            logging.info(
+                "Instance %s is still not running, sleeping for 5 seconds" % instance_id
+            )
+            time.sleep(5)
+            time_counter += 5
+            if time_counter >= timeout:
+                logging.info(
+                    "Instance %s is still not ready in allotted time" % instance_id
+                )
+                return False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("running", end_time - start_time)
+        return True
+
+    def wait_until_stopped(self, instance_id, timeout, affected_node):
+        """
+        Waits until the Instance switches to stopped state or until the timeout.
+        Returns True if the Instance switches to stopped, else returns False
+        """
+        start_time = time.time()
+        time_counter = 0
+        status = self.get_instance_status(instance_id)
+        while status != "STOPPED":
+            status = self.get_instance_status(instance_id)
+            logging.info(
+                "Instance %s is still not stopped, sleeping for 5 seconds" % instance_id
+            )
+            time.sleep(5)
+            time_counter += 5
+            if time_counter >= timeout:
+                logging.info(
+                    "Instance %s is still not stopped in allotted time" % instance_id
+                )
+                return False
+        end_time = time.time()
+        print('affected_node' + str(affected_node))
+        if affected_node:
+            affected_node.set_affected_node_status("stopped", end_time - start_time)
+        return True
+
+
+    def wait_until_rebooted(self, instance_id, timeout, affected_node):
+        """
+        Waits until the Instance switches to restarting state and then running state or until the timeout.
+        Returns True if the Instance switches back to running, else returns False
+        """
+
+        time_counter = 0
+        status = self.get_instance_status(instance_id)
+        while status == "HARD_REBOOT" or status == "SOFT_REBOOT":
+            status = self.get_instance_status(instance_id)
+            logging.info(
+                "Instance %s is still restarting, sleeping for 5 seconds" % instance_id
+            )
+            time.sleep(5)
+            time_counter += 5
+            if time_counter >= timeout:
+                logging.info(
+                    "Instance %s is still restarting after allotted time" % instance_id
+                )
+                return False
+        self.wait_until_running(instance_id, timeout, affected_node)
+        print('affected_node' + str(affected_node))
+        return True
+
+
+@dataclass
+class ibmcloud_power_node_scenarios(abstract_node_scenarios):
+    def __init__(self, kubecli: KrknKubernetes, node_action_kube_check: bool, affected_nodes_status: AffectedNodeStatus, disable_ssl_verification: bool):
+        super().__init__(kubecli, node_action_kube_check, affected_nodes_status)
+        self.ibmcloud_power = IbmCloudPower()
+        
+        self.node_action_kube_check = node_action_kube_check
+
+    def node_start_scenario(self, instance_kill_count, node, timeout):
+        try:
+            instance_id = self.ibmcloud_power.get_instance_id( node)
+            affected_node = AffectedNode(node, node_id=instance_id)
+            for _ in range(instance_kill_count):
+                logging.info("Starting node_start_scenario injection")
+                logging.info("Starting the node %s " % (node))
+                
+                if instance_id:
+                    vm_started = self.ibmcloud_power.start_instances(instance_id)
+                    if vm_started:
+                        self.ibmcloud_power.wait_until_running(instance_id, timeout, affected_node)
+                        if self.node_action_kube_check: 
+                            nodeaction.wait_for_ready_status(
+                                node, timeout, self.kubecli, affected_node
+                            )
+                    logging.info(
+                        "Node with instance ID: %s is in running state" % node
+                    )
+                    logging.info(
+                        "node_start_scenario has been successfully injected!"
+                    )
+                else:
+                    logging.error(
+                        "Failed to find node that matched instances on ibm cloud in region"
+                    )
+
+        except Exception as e:
+            logging.error("Failed to start node instance. Test Failed")
+            logging.error("node_start_scenario injection failed!")
+        self.affected_nodes_status.affected_nodes.append(affected_node)
+
+
+    def node_stop_scenario(self, instance_kill_count, node, timeout):
+        try:
+            instance_id = self.ibmcloud_power.get_instance_id(node)
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node, instance_id)
+                logging.info("Starting node_stop_scenario injection")
+                logging.info("Stopping the node %s " % (node))
+                vm_stopped = self.ibmcloud_power.stop_instances(instance_id)
+                if vm_stopped:
+                    self.ibmcloud_power.wait_until_stopped(instance_id, timeout, affected_node)
+                logging.info(
+                    "Node with instance ID: %s is in stopped state" % node
+                )
+                logging.info(
+                    "node_stop_scenario has been successfully injected!"
+                )
+        except Exception as e:
+            logging.error("Failed to stop node instance. Test Failed")
+            logging.error("node_stop_scenario injection failed!")
+
+
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
+        try:
+            instance_id = self.ibmcloud_power.get_instance_id(node)
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node, node_id=instance_id)
+                logging.info("Starting node_reboot_scenario injection")
+                logging.info("Rebooting the node %s " % (node))
+                self.ibmcloud_power.reboot_instances(instance_id, soft_reboot)
+                self.ibmcloud_power.wait_until_rebooted(instance_id, timeout, affected_node)
+                if self.node_action_kube_check:
+                    nodeaction.wait_for_unknown_status(
+                        node, timeout, affected_node
+                    )
+                    nodeaction.wait_for_ready_status(
+                        node, timeout, affected_node
+                    )
+                logging.info(
+                    "Node with instance ID: %s has rebooted successfully" % node
+                )
+                logging.info(
+                    "node_reboot_scenario has been successfully injected!"
+                )
+
+        except Exception as e:
+            logging.error("Failed to reboot node instance. Test Failed")
+            logging.error("node_reboot_scenario injection failed!")
+
+
+    def node_terminate_scenario(self, instance_kill_count, node, timeout):
+        try:
+            instance_id = self.ibmcloud_power.get_instance_id(node)
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node, node_id=instance_id)
+                logging.info(
+                    "Starting node_termination_scenario injection by first stopping the node"
+                )
+                logging.info("Deleting the node with instance ID: %s " % (node))
+                self.ibmcloud_power.delete_instance(instance_id)
+                self.ibmcloud_power.wait_until_deleted(node, timeout, affected_node)
+                logging.info(
+                    "Node with instance ID: %s has been released" % node
+                )
+                logging.info(
+                    "node_terminate_scenario has been successfully injected!"
+                )
+        except Exception as e:
+            logging.error("Failed to terminate node instance. Test Failed")
+            logging.error("node_terminate_scenario injection failed!")
+
--- a/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py
+++ b/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py
@@ -22,8 +22,16 @@ from krkn.scenario_plugins.node_actions.gcp_node_scenarios import gcp_node_scena
 from krkn.scenario_plugins.node_actions.general_cloud_node_scenarios import (
    general_node_scenarios,
 )
-from krkn.scenario_plugins.node_actions.vmware_node_scenarios import vmware_node_scenarios
-from krkn.scenario_plugins.node_actions.ibmcloud_node_scenarios import ibm_node_scenarios
+from krkn.scenario_plugins.node_actions.vmware_node_scenarios import (
+    vmware_node_scenarios,
+)
+from krkn.scenario_plugins.node_actions.ibmcloud_node_scenarios import (
+    ibm_node_scenarios,
+)
+
+from krkn.scenario_plugins.node_actions.ibmcloud_power_node_scenarios import (
+     ibmcloud_power_node_scenarios,
+)
 node_general = False


@@ -63,29 +71,39 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
    def get_node_scenario_object(self, node_scenario, kubecli: KrknKubernetes):
        affected_nodes_status = AffectedNodeStatus()

-        node_action_kube_check = get_yaml_item_value(node_scenario,"kube_check",True)
+        node_action_kube_check = get_yaml_item_value(node_scenario, "kube_check", True)
        if (
            "cloud_type" not in node_scenario.keys()
            or node_scenario["cloud_type"] == "generic"
        ):
            global node_general
            node_general = True
-            return general_node_scenarios(kubecli, node_action_kube_check, affected_nodes_status)
+            return general_node_scenarios(
+                kubecli, node_action_kube_check, affected_nodes_status
+            )
        if node_scenario["cloud_type"].lower() == "aws":
-            return aws_node_scenarios(kubecli, node_action_kube_check, affected_nodes_status)
+            return aws_node_scenarios(
+                kubecli, node_action_kube_check, affected_nodes_status
+            )
        elif node_scenario["cloud_type"].lower() == "gcp":
-            return gcp_node_scenarios(kubecli, node_action_kube_check, affected_nodes_status)
+            return gcp_node_scenarios(
+                kubecli, node_action_kube_check, affected_nodes_status
+            )
        elif node_scenario["cloud_type"].lower() == "openstack":
            from krkn.scenario_plugins.node_actions.openstack_node_scenarios import (
                openstack_node_scenarios,
            )

-            return openstack_node_scenarios(kubecli, node_action_kube_check, affected_nodes_status)
+            return openstack_node_scenarios(
+                kubecli, node_action_kube_check, affected_nodes_status
+            )
        elif (
            node_scenario["cloud_type"].lower() == "azure"
            or node_scenario["cloud_type"].lower() == "az"
        ):
-            return azure_node_scenarios(kubecli, node_action_kube_check, affected_nodes_status)
+            return azure_node_scenarios(
+                kubecli, node_action_kube_check, affected_nodes_status
+            )
        elif (
            node_scenario["cloud_type"].lower() == "alibaba"
            or node_scenario["cloud_type"].lower() == "alicloud"
@@ -94,7 +112,9 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
                alibaba_node_scenarios,
            )

-            return alibaba_node_scenarios(kubecli, node_action_kube_check, affected_nodes_status)
+            return alibaba_node_scenarios(
+                kubecli, node_action_kube_check, affected_nodes_status
+            )
        elif node_scenario["cloud_type"].lower() == "bm":
            from krkn.scenario_plugins.node_actions.bm_node_scenarios import (
                bm_node_scenarios,
@@ -106,22 +126,31 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
                node_scenario.get("bmc_password", None),
                kubecli,
                node_action_kube_check,
-                affected_nodes_status
+                affected_nodes_status,
            )
        elif node_scenario["cloud_type"].lower() == "docker":
-            return docker_node_scenarios(kubecli,node_action_kube_check,
-                affected_nodes_status)
+            return docker_node_scenarios(
+                kubecli, node_action_kube_check, affected_nodes_status
+            )
        elif (
            node_scenario["cloud_type"].lower() == "vsphere"
            or node_scenario["cloud_type"].lower() == "vmware"
        ):
-            return vmware_node_scenarios(kubecli, node_action_kube_check,affected_nodes_status)
+            return vmware_node_scenarios(
+                kubecli, node_action_kube_check, affected_nodes_status
+            )
        elif (
            node_scenario["cloud_type"].lower() == "ibm"
            or node_scenario["cloud_type"].lower() == "ibmcloud"
        ):
            disable_ssl_verification = get_yaml_item_value(node_scenario, "disable_ssl_verification", True)
            return ibm_node_scenarios(kubecli, node_action_kube_check, affected_nodes_status, disable_ssl_verification)
+        elif (
+            node_scenario["cloud_type"].lower() == "ibmpower"
+            or node_scenario["cloud_type"].lower() == "ibmcloudpower"
+        ):
+            disable_ssl_verification = get_yaml_item_value(node_scenario, "disable_ssl_verification", True)
+            return ibmcloud_power_node_scenarios(kubecli, node_action_kube_check, affected_nodes_status, disable_ssl_verification)
        else:
            logging.error(
                "Cloud type "
@@ -139,16 +168,22 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
            )

    def inject_node_scenario(
-        self, action, node_scenario, node_scenario_object, kubecli: KrknKubernetes, scenario_telemetry: ScenarioTelemetry
+        self,
+        action,
+        node_scenario,
+        node_scenario_object,
+        kubecli: KrknKubernetes,
+        scenario_telemetry: ScenarioTelemetry,
    ):
-        
+
        # Get the node scenario configurations for setting nodes
-       
+
        instance_kill_count = get_yaml_item_value(node_scenario, "instance_count", 1)
        node_name = get_yaml_item_value(node_scenario, "node_name", "")
        label_selector = get_yaml_item_value(node_scenario, "label_selector", "")
+        exclude_label = get_yaml_item_value(node_scenario, "exclude_label", "")
        parallel_nodes = get_yaml_item_value(node_scenario, "parallel", False)
-        
+
        # Get the node to apply the scenario
        if node_name:
            node_name_list = node_name.split(",")
@@ -157,11 +192,22 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
            nodes = common_node_functions.get_node(
                label_selector, instance_kill_count, kubecli
            )
-        
-        # GCP api doesn't support multiprocessing calls, will only actually run 1 
+            if exclude_label:
+                exclude_nodes = common_node_functions.get_node(
+                    exclude_label, 0, kubecli
+                )
+
+                for node in nodes:
+                    if node in exclude_nodes:
+                        logging.info(
+                            f"excluding node {node} with exclude label {exclude_nodes}"
+                        )
+                        nodes.remove(node)
+
+        # GCP api doesn't support multiprocessing calls, will only actually run 1
        if parallel_nodes:
            self.multiprocess_nodes(nodes, node_scenario_object, action, node_scenario)
-        else: 
+        else:
            for single_node in nodes:
                self.run_node(single_node, node_scenario_object, action, node_scenario)
        affected_nodes_status = node_scenario_object.affected_nodes_status
@@ -171,14 +217,21 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
        try:
            # pool object with number of element
            pool = ThreadPool(processes=len(nodes))
-    
-            pool.starmap(self.run_node,zip(nodes, repeat(node_scenario_object), repeat(action), repeat(node_scenario)))
+
+            pool.starmap(
+                self.run_node,
+                zip(
+                    nodes,
+                    repeat(node_scenario_object),
+                    repeat(action),
+                    repeat(node_scenario),
+                ),
+            )

            pool.close()
        except Exception as e:
            logging.info("Error on pool multiprocessing: " + str(e))

-
    def run_node(self, single_node, node_scenario_object, action, node_scenario):
        # Get the scenario specifics for running action nodes
        run_kill_count = get_yaml_item_value(node_scenario, "runs", 1)
@@ -186,6 +239,7 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):

        timeout = get_yaml_item_value(node_scenario, "timeout", 120)
        service = get_yaml_item_value(node_scenario, "service", "")
+        soft_reboot = get_yaml_item_value(node_scenario, "soft_reboot", False)
        ssh_private_key = get_yaml_item_value(
            node_scenario, "ssh_private_key", "~/.ssh/id_rsa"
        )
@@ -216,11 +270,12 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
                )
            elif action == "node_reboot_scenario":
                node_scenario_object.node_reboot_scenario(
-                    run_kill_count, single_node, timeout
+                    run_kill_count, single_node, timeout, soft_reboot
                )
            elif action == "node_disk_detach_attach_scenario":
                node_scenario_object.node_disk_detach_attach_scenario(
-                    run_kill_count, single_node, timeout, duration)
+                    run_kill_count, single_node, timeout, duration
+                )
            elif action == "stop_start_kubelet_scenario":
                node_scenario_object.stop_start_kubelet_scenario(
                    run_kill_count, single_node, timeout
@@ -248,9 +303,7 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
                else:
                    if not node_scenario["helper_node_ip"]:
                        logging.error("Helper node IP address is not provided")
-                        raise Exception(
-                            "Helper node IP address is not provided"
-                        )
+                        raise Exception("Helper node IP address is not provided")
                    node_scenario_object.helper_node_stop_start_scenario(
                        run_kill_count, node_scenario["helper_node_ip"], timeout
                    )
@@ -270,6 +323,5 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
                    % action
                )

-
    def get_scenario_types(self) -> list[str]:
        return ["node_scenarios"]
--- a/krkn/scenario_plugins/node_actions/openstack_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/openstack_node_scenarios.py
@@ -171,7 +171,7 @@ class openstack_node_scenarios(abstract_node_scenarios):
            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to reboot the node
-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        for _ in range(instance_kill_count):
            affected_node = AffectedNode(node)
            try:
--- a/krkn/scenario_plugins/node_actions/vmware_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/vmware_node_scenarios.py
@@ -432,7 +432,7 @@ class vmware_node_scenarios(abstract_node_scenarios):
            )
                

-    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+    def node_reboot_scenario(self, instance_kill_count, node, timeout, soft_reboot=False):
        try:
            for _ in range(instance_kill_count):
                affected_node = AffectedNode(node)
--- a/krkn/scenario_plugins/pod_disruption/models/models.py
+++ b/krkn/scenario_plugins/pod_disruption/models/models.py
@@ -11,6 +11,9 @@ class InputParams:
            self.label_selector = config["label_selector"] if "label_selector" in config else ""
            self.namespace_pattern = config["namespace_pattern"] if "namespace_pattern" in config else ""
            self.name_pattern = config["name_pattern"] if "name_pattern" in config else ""
+            self.node_label_selector = config["node_label_selector"] if "node_label_selector" in config else ""
+            self.node_names = config["node_names"] if "node_names" in config else []
+            self.exclude_label = config["exclude_label"] if "exclude_label" in config else ""

    namespace_pattern: str
    krkn_pod_recovery_time: int
@@ -18,4 +21,7 @@ class InputParams:
    duration: int
    kill: int
    label_selector: str
-    name_pattern: str
+    name_pattern: str
+    node_label_selector: str
+    node_names: list
+    exclude_label: str
--- a/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py
+++ b/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py
@@ -47,7 +47,9 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
                    snapshot = future_snapshot.result()
                    result = snapshot.get_pods_status()
                    scenario_telemetry.affected_pods = result
-
+                    if len(result.unrecovered) > 0:
+                        logging.info("PodDisruptionScenarioPlugin failed with unrecovered pods")
+                        return 1

        except (RuntimeError, Exception) as e:
            logging.error("PodDisruptionScenariosPlugin exiting due to Exception %s" % e)
@@ -100,18 +102,86 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
            raise Exception(
                f"impossible to determine monitor parameters, check {kill_scenario} configuration"
            )
+    
+    def _select_pods_with_field_selector(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str, node_name: str = None):
+        """Helper function to select pods using either label_selector or name_pattern with field_selector, optionally filtered by node"""
+        # Combine field selectors if node targeting is specified
+        if node_name:
+            node_field_selector = f"spec.nodeName={node_name}"
+            if field_selector:
+                combined_field_selector = f"{field_selector},{node_field_selector}"
+            else:
+                combined_field_selector = node_field_selector
+        else:
+            combined_field_selector = field_selector
        
+        if label_selector:
+            return kubecli.select_pods_by_namespace_pattern_and_label(
+                label_selector=label_selector, 
+                namespace_pattern=namespace, 
+                field_selector=combined_field_selector
+            )
+        else:  # name_pattern
+            return kubecli.select_pods_by_name_pattern_and_namespace_pattern(
+                pod_name_pattern=name_pattern, 
+                namespace_pattern=namespace, 
+                field_selector=combined_field_selector
+            )

-    def get_pods(self, name_pattern, label_selector,namespace, kubecli: KrknKubernetes, field_selector: str =None): 
+    def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None, quiet: bool = False): 
        if label_selector and name_pattern: 
            logging.error('Only, one of name pattern or label pattern can be specified')
-        elif label_selector:
-            pods = kubecli.select_pods_by_namespace_pattern_and_label(label_selector=label_selector,namespace_pattern=namespace, field_selector=field_selector)
-        elif name_pattern:
-            pods = kubecli.select_pods_by_name_pattern_and_namespace_pattern(pod_name_pattern=name_pattern, namespace_pattern=namespace, field_selector=field_selector)
-        else: 
+            return []
+        
+        if not label_selector and not name_pattern:
            logging.error('Name pattern or label pattern must be specified ')
-        return pods
+            return []
+        
+        # If specific node names are provided, make multiple calls with field selector
+        if node_names:
+            if not quiet:
+                logging.info(f"Targeting pods on {len(node_names)} specific nodes")
+            all_pods = []
+            for node_name in node_names:
+                pods = self._select_pods_with_field_selector(
+                    name_pattern, label_selector, namespace, kubecli, field_selector, node_name
+                )
+                
+                if pods:
+                    all_pods.extend(pods)
+            
+            if not quiet:
+                logging.info(f"Found {len(all_pods)} target pods across {len(node_names)} nodes")
+            return all_pods
+        
+        #  Node label selector approach - use field selectors
+        if node_label_selector:
+            # Get nodes matching the label selector first
+            nodes_with_label = kubecli.list_nodes(label_selector=node_label_selector)
+            if not nodes_with_label:
+                logging.info(f"No nodes found with label selector: {node_label_selector}")
+                return []
+            
+            if not quiet:
+                logging.info(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}")
+            # Use field selector for each node
+            all_pods = []
+            for node_name in nodes_with_label:
+                pods = self._select_pods_with_field_selector(
+                    name_pattern, label_selector, namespace, kubecli, field_selector, node_name
+                )
+                
+                if pods:
+                    all_pods.extend(pods)
+            
+            if not quiet:
+                logging.info(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes")
+            return all_pods
+        
+        # Standard pod selection (no node targeting)
+        return self._select_pods_with_field_selector(
+            name_pattern, label_selector, namespace, kubecli, field_selector
+        )
    
    def killing_pods(self, config: InputParams, kubecli: KrknKubernetes):
        # region Select target pods
@@ -120,7 +190,14 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
        if not namespace: 
            logging.error('Namespace pattern must be specified')

-        pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running")
+        pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
+        exclude_pods = set()
+        if config.exclude_label:
+            _exclude_pods = self.get_pods("",config.exclude_label,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
+            for pod in _exclude_pods:
+                exclude_pods.add(pod[0])
+
+
        pods_count = len(pods)
        if len(pods) < config.kill:
            logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format(
@@ -129,23 +206,25 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
        
        random.shuffle(pods)
        for i in range(config.kill):
-
            pod = pods[i]
            logging.info(pod)
-            logging.info(f'Deleting pod {pod[0]}')
-            kubecli.delete_pod(pod[0], pod[1])
+            if pod[0] in exclude_pods:
+                logging.info(f"Excluding {pod[0]} from chaos")
+            else:
+                logging.info(f'Deleting pod {pod[0]}')
+                kubecli.delete_pod(pod[0], pod[1])
        
-        self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli)
+        self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
        return 0

    def wait_for_pods(
-        self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes
+        self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes, node_label_selector, node_names
    ):
        timeout = False
        start_time = datetime.now()

        while not timeout:
-            pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli)
+            pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names, quiet=True)
            if pod_count == len(pods):
                return
               
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,7 @@ google-cloud-compute==1.22.0
 ibm_cloud_sdk_core==3.18.0
 ibm_vpc==0.20.0
 jinja2==3.1.6
-krkn-lib==5.1.5
+krkn-lib==5.1.9
 lxml==5.1.0
 kubernetes==28.1.0
 numpy==1.26.4
--- a/run_kraken.py
+++ b/run_kraken.py
@@ -375,10 +375,12 @@ def main(options, command: Optional[str]) -> int:
                            prometheus_plugin.critical_alerts(
                                prometheus,
                                summary,
+                                elastic_search,
                                run_uuid,
                                scenario_type,
                                start_time,
                                datetime.datetime.now(),
+                                elastic_alerts_index
                            )

                            chaos_output.critical_alerts = summary
--- a/scenarios/openshift/customapp_pod.yaml
+++ b/scenarios/openshift/customapp_pod.yaml
@@ -1,6 +1,15 @@
 # yaml-language-server: $schema=../plugin.schema.json
 - id: kill-pods
  config:
-    namespace_pattern: ^acme-air$
+    namespace_pattern: "kube-system"
    name_pattern: .*
-    krkn_pod_recovery_time: 120
+    krkn_pod_recovery_time: 60
+    kill: 1     # num of pods to kill
+    #Not needed by default, but can be used if you want to target pods on specific nodes
+    # Option 1: Target pods on nodes with specific labels [master/worker nodes]
+    node_label_selector: node-role.kubernetes.io/control-plane=      # Target control-plane nodes (works on both k8s and openshift) 
+    # Option 2: Target pods of specific nodes (testing mixed node types)
+    # node_names: 
+    #   - ip-10-0-31-8.us-east-2.compute.internal      # Worker node 1
+    #   - ip-10-0-48-188.us-east-2.compute.internal    # Worker node 2  
+    #   - ip-10-0-14-59.us-east-2.compute.internal     # Master node 1
--- a/scenarios/openshift/etcd.yml
+++ b/scenarios/openshift/etcd.yml
@@ -4,3 +4,4 @@
    namespace_pattern: ^openshift-etcd$
    label_selector: k8s-app=etcd
    krkn_pod_recovery_time: 120
+    exclude_label: "" # excludes pods marked with this label from chaos
--- a/scenarios/openshift/openshift-apiserver.yml
+++ b/scenarios/openshift/openshift-apiserver.yml
@@ -4,4 +4,5 @@
    namespace_pattern: ^openshift-apiserver$
    label_selector: app=openshift-apiserver-a
    krkn_pod_recovery_time: 120
+    exclude_label: "" # excludes pods marked with this label from chaos

--- a/scenarios/openshift/openshift-kube-apiserver.yml
+++ b/scenarios/openshift/openshift-kube-apiserver.yml
@@ -4,4 +4,5 @@
    namespace_pattern: ^openshift-kube-apiserver$
    label_selector: app=openshift-kube-apiserver
    krkn_pod_recovery_time: 120
+    exclude_label: "" # excludes pods marked with this label from chaos

--- a/scenarios/openshift/prom_kill.yml
+++ b/scenarios/openshift/prom_kill.yml
@@ -2,4 +2,5 @@
  config:
    namespace_pattern: ^openshift-monitoring$
    label_selector: statefulset.kubernetes.io/pod-name=prometheus-k8s-0
-    krkn_pod_recovery_time: 120
+    krkn_pod_recovery_time: 120
+    exclude_label: "" # excludes pods marked with this label from chaos
--- a/scenarios/openshift/regex_openshift_pod_kill.yml
+++ b/scenarios/openshift/regex_openshift_pod_kill.yml
@@ -5,3 +5,4 @@
    name_pattern: .*
    kill: 3
    krkn_pod_recovery_time: 120
+    exclude_label: "" # excludes pods marked with this label from chaos
--- a/scenarios/plugin.schema.README.md
+++ b/scenarios/plugin.schema.README.md
@@ -1,5 +0,0 @@
-This file is generated by running the "plugins" module in the kraken project:
-
-```
-python -m kraken.plugins >scenarios/plugin.schema.json
-```
--- a/scenarios/plugin.schema.json
+++ b/scenarios/plugin.schema.json
@@ -1,584 +0,0 @@
-{
-	"$id": "https://github.com/redhat-chaos/krkn/",
-	"$schema": "https://json-schema.org/draft/2020-12/schema",
-	"title": "Kraken Arcaflow scenarios",
-	"description": "Serial execution of Arcaflow Python plugins. See https://github.com/arcaflow for details.",
-	"type": "array",
-	"minContains": 1,
-	"items": {
-		"oneOf": [
-			{
-				"type": "object",
-				"properties": {
-					"id": {
-						"type": "string",
-						"const": "run_python"
-					},
-					"config": {
-						"$defs": {
-							"RunPythonFileInput": {
-								"type": "object",
-								"properties": {
-									"filename": {
-										"type": "string"
-									}
-								},
-								"required": [
-									"filename"
-								],
-								"additionalProperties": false,
-								"dependentRequired": {}
-							}
-						},
-						"type": "object",
-						"properties": {
-							"filename": {
-								"type": "string"
-							}
-						},
-						"required": [
-							"filename"
-						],
-						"additionalProperties": false,
-						"dependentRequired": {}
-					}
-				},
-				"required": [
-					"id",
-					"config"
-				]
-			},
-			{
-				"type": "object",
-				"title": "pod_network_outage Arcaflow scenarios",
-				"properties": {
-					"id": {
-						"type": "string",
-						"const": "pod_network_outage"
-					},
-					"config": {
-						"$defs": {
-							"InputParams": {
-								"type": "object",
-								"properties": {
-									"namespace": {
-										"type": "string",
-										"minLength": 1,
-										"title": "Namespace",
-										"description": "Namespace of the pod to which filter need to be appliedfor details."
-									},
-									"image": {
-										"type": "string",
-										"minLength": 1,
-										"title": "Image",
-										"default": "image: quay.io/krkn-chaos/krkn:tools",
-										"description": "Image of the krkn tools to run network outage."
-									},
-									"direction": {
-										"type": "array",
-										"items": {
-											"type": "string"
-										},
-										"default": [
-											"ingress",
-											"egress"
-										],
-										"title": "Direction",
-										"description": "List of directions to apply filtersDefault both egress and ingress."
-									},
-									"ingress_ports": {
-										"type": "array",
-										"items": {
-											"type": "integer"
-										},
-										"default": [],
-										"title": "Ingress ports",
-										"description": "List of ports to block traffic onDefault [], i.e. all ports"
-									},
-									"egress_ports": {
-										"type": "array",
-										"items": {
-											"type": "integer"
-										},
-										"default": [],
-										"title": "Egress ports",
-										"description": "List of ports to block traffic onDefault [], i.e. all ports"
-									},
-									"kubeconfig_path": {
-										"type": "string",
-										"title": "Kubeconfig path",
-										"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
-									},
-									"pod_name": {
-										"type": "string",
-										"title": "Pod name",
-										"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
-									},
-									"label_selector": {
-										"type": "string",
-										"title": "Label selector",
-										"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
-									},
-									"kraken_config": {
-										"type": "string",
-										"title": "Kraken Config",
-										"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
-									},
-									"test_duration": {
-										"type": "integer",
-										"minimum": 1,
-										"default": 120,
-										"title": "Test duration",
-										"description": "Duration for which each step of the ingress chaos testing is to be performed."
-									},
-									"wait_duration": {
-										"type": "integer",
-										"minimum": 1,
-										"default": 300,
-										"title": "Wait Duration",
-										"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
-									},
-									"instance_count": {
-										"type": "integer",
-										"minimum": 1,
-										"default": 1,
-										"title": "Instance Count",
-										"description": "Number of pods to perform action/select that match the label selector."
-									}
-								},
-								"required": [
-									"namespace"
-								],
-								"additionalProperties": false,
-								"dependentRequired": {}
-							}
-						},
-						"type": "object",
-						"properties": {
-							"namespace": {
-								"type": "string",
-								"minLength": 1,
-								"title": "Namespace",
-								"description": "Namespace of the pod to which filter need to be appliedfor details."
-							},
-							"direction": {
-								"type": "array",
-								"items": {
-									"type": "string"
-								},
-								"default": [
-									"ingress",
-									"egress"
-								],
-								"title": "Direction",
-								"description": "List of directions to apply filtersDefault both egress and ingress."
-							},
-							"ingress_ports": {
-								"type": "array",
-								"items": {
-									"type": "integer"
-								},
-								"default": [],
-								"title": "Ingress ports",
-								"description": "List of ports to block traffic onDefault [], i.e. all ports"
-							},
-							"egress_ports": {
-								"type": "array",
-								"items": {
-									"type": "integer"
-								},
-								"default": [],
-								"title": "Egress ports",
-								"description": "List of ports to block traffic onDefault [], i.e. all ports"
-							},
-							"kubeconfig_path": {
-								"type": "string",
-								"title": "Kubeconfig path",
-								"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
-							},
-							"pod_name": {
-								"type": "string",
-								"title": "Pod name",
-								"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
-							},
-							"label_selector": {
-								"type": "string",
-								"title": "Label selector",
-								"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
-							},
-							"kraken_config": {
-								"type": "string",
-								"title": "Kraken Config",
-								"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
-							},
-							"test_duration": {
-								"type": "integer",
-								"minimum": 1,
-								"default": 120,
-								"title": "Test duration",
-								"description": "Duration for which each step of the ingress chaos testing is to be performed."
-							},
-							"wait_duration": {
-								"type": "integer",
-								"minimum": 1,
-								"default": 300,
-								"title": "Wait Duration",
-								"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
-							},
-							"instance_count": {
-								"type": "integer",
-								"minimum": 1,
-								"default": 1,
-								"title": "Instance Count",
-								"description": "Number of pods to perform action/select that match the label selector."
-							}
-						},
-						"required": [
-							"namespace"
-						],
-						"additionalProperties": false,
-						"dependentRequired": {}
-					}
-				},
-				"required": [
-					"id",
-					"config"
-				]
-			},
-			{
-				"type": "object",
-				"title": "pod_egress_shaping Arcaflow scenarios",
-				"properties": {
-					"id": {
-						"type": "string",
-						"const": "pod_egress_shaping"
-					},
-					"config": {
-						"$defs": {
-							"EgressParams": {
-								"type": "object",
-								"properties": {
-									"namespace": {
-										"type": "string",
-										"minLength": 1,
-										"title": "Namespace",
-										"description": "Namespace of the pod to which filter need to be appliedfor details."
-									},
-									"image": {
-										"type": "string",
-										"minLength": 1,
-										"title": "Image",
-										"default": "image: quay.io/krkn-chaos/krkn:tools",
-										"description": "Image of the krkn tools to run network outage."
-									},
-									"kubeconfig_path": {
-										"type": "string",
-										"title": "Kubeconfig path",
-										"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
-									},
-									"pod_name": {
-										"type": "string",
-										"title": "Pod name",
-										"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
-									},
-									"label_selector": {
-										"type": "string",
-										"title": "Label selector",
-										"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
-									},
-									"kraken_config": {
-										"type": "string",
-										"title": "Kraken Config",
-										"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
-									},
-									"test_duration": {
-										"type": "integer",
-										"minimum": 1,
-										"default": 90,
-										"title": "Test duration",
-										"description": "Duration for which each step of the ingress chaos testing is to be performed."
-									},
-									"wait_duration": {
-										"type": "integer",
-										"minimum": 1,
-										"default": 300,
-										"title": "Wait Duration",
-										"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
-									},
-									"instance_count": {
-										"type": "integer",
-										"minimum": 1,
-										"default": 1,
-										"title": "Instance Count",
-										"description": "Number of pods to perform action/select that match the label selector."
-									},
-									"execution_type": {
-										"type": "string",
-										"default": "parallel",
-										"title": "Execution Type",
-										"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
-									},
-									"network_params": {
-										"type": "object",
-										"propertyNames": {},
-										"additionalProperties": {
-											"type": "string"
-										},
-										"title": "Network Parameters",
-										"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
-									}
-								},
-								"required": [
-									"namespace"
-								],
-								"additionalProperties": false,
-								"dependentRequired": {}
-							}
-						},
-						"type": "object",
-						"properties": {
-							"namespace": {
-								"type": "string",
-								"minLength": 1,
-								"title": "Namespace",
-								"description": "Namespace of the pod to which filter need to be appliedfor details."
-							},
-							"kubeconfig_path": {
-								"type": "string",
-								"title": "Kubeconfig path",
-								"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
-							},
-							"pod_name": {
-								"type": "string",
-								"title": "Pod name",
-								"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
-							},
-							"label_selector": {
-								"type": "string",
-								"title": "Label selector",
-								"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
-							},
-							"kraken_config": {
-								"type": "string",
-								"title": "Kraken Config",
-								"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
-							},
-							"test_duration": {
-								"type": "integer",
-								"minimum": 1,
-								"default": 90,
-								"title": "Test duration",
-								"description": "Duration for which each step of the ingress chaos testing is to be performed."
-							},
-							"wait_duration": {
-								"type": "integer",
-								"minimum": 1,
-								"default": 300,
-								"title": "Wait Duration",
-								"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
-							},
-							"instance_count": {
-								"type": "integer",
-								"minimum": 1,
-								"default": 1,
-								"title": "Instance Count",
-								"description": "Number of pods to perform action/select that match the label selector."
-							},
-							"execution_type": {
-								"type": "string",
-								"default": "parallel",
-								"title": "Execution Type",
-								"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
-							},
-							"network_params": {
-								"type": "object",
-								"propertyNames": {},
-								"additionalProperties": {
-									"type": "string"
-								},
-								"title": "Network Parameters",
-								"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
-							}
-						},
-						"required": [
-							"namespace"
-						],
-						"additionalProperties": false,
-						"dependentRequired": {}
-					}
-				},
-				"required": [
-					"id",
-					"config"
-				]
-			},
-			{
-				"type": "object",
-				"title": "pod_ingress_shaping Arcaflow scenarios",
-				"properties": {
-					"id": {
-						"type": "string",
-						"const": "pod_ingress_shaping"
-					},
-					"config": {
-						"$defs": {
-							"IngressParams": {
-								"type": "object",
-								"properties": {
-									"namespace": {
-										"type": "string",
-										"minLength": 1,
-										"title": "Namespace",
-										"description": "Namespace of the pod to which filter need to be appliedfor details."
-									},
-									"image": {
-										"type": "string",
-										"minLength": 1,
-										"title": "Image",
-										"default": "image: quay.io/krkn-chaos/krkn:tools",
-										"description": "Image of the krkn tools to run network outage."
-									},
-									"network_params": {
-										"type": "object",
-										"propertyNames": {},
-										"additionalProperties": {
-											"type": "string"
-										},
-										"title": "Network Parameters",
-										"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
-									},
-									"kubeconfig_path": {
-										"type": "string",
-										"title": "Kubeconfig path",
-										"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
-									},
-									"pod_name": {
-										"type": "string",
-										"title": "Pod name",
-										"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
-									},
-									"label_selector": {
-										"type": "string",
-										"title": "Label selector",
-										"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
-									},
-									"kraken_config": {
-										"type": "string",
-										"title": "Kraken Config",
-										"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
-									},
-									"test_duration": {
-										"type": "integer",
-										"minimum": 1,
-										"default": 90,
-										"title": "Test duration",
-										"description": "Duration for which each step of the ingress chaos testing is to be performed."
-									},
-									"wait_duration": {
-										"type": "integer",
-										"minimum": 1,
-										"default": 300,
-										"title": "Wait Duration",
-										"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
-									},
-									"instance_count": {
-										"type": "integer",
-										"minimum": 1,
-										"default": 1,
-										"title": "Instance Count",
-										"description": "Number of pods to perform action/select that match the label selector."
-									},
-									"execution_type": {
-										"type": "string",
-										"default": "parallel",
-										"title": "Execution Type",
-										"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
-									}
-								},
-								"required": [
-									"namespace"
-								],
-								"additionalProperties": false,
-								"dependentRequired": {}
-							}
-						},
-						"type": "object",
-						"properties": {
-							"namespace": {
-								"type": "string",
-								"minLength": 1,
-								"title": "Namespace",
-								"description": "Namespace of the pod to which filter need to be appliedfor details."
-							},
-							"network_params": {
-								"type": "object",
-								"propertyNames": {},
-								"additionalProperties": {
-									"type": "string"
-								},
-								"title": "Network Parameters",
-								"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
-							},
-							"kubeconfig_path": {
-								"type": "string",
-								"title": "Kubeconfig path",
-								"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
-							},
-							"pod_name": {
-								"type": "string",
-								"title": "Pod name",
-								"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
-							},
-							"label_selector": {
-								"type": "string",
-								"title": "Label selector",
-								"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
-							},
-							"kraken_config": {
-								"type": "string",
-								"title": "Kraken Config",
-								"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
-							},
-							"test_duration": {
-								"type": "integer",
-								"minimum": 1,
-								"default": 90,
-								"title": "Test duration",
-								"description": "Duration for which each step of the ingress chaos testing is to be performed."
-							},
-							"wait_duration": {
-								"type": "integer",
-								"minimum": 1,
-								"default": 300,
-								"title": "Wait Duration",
-								"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
-							},
-							"instance_count": {
-								"type": "integer",
-								"minimum": 1,
-								"default": 1,
-								"title": "Instance Count",
-								"description": "Number of pods to perform action/select that match the label selector."
-							},
-							"execution_type": {
-								"type": "string",
-								"default": "parallel",
-								"title": "Execution Type",
-								"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
-							}
-						},
-						"required": [
-							"namespace"
-						],
-						"additionalProperties": false,
-						"dependentRequired": {}
-					}
-				},
-				"required": [
-					"id",
-					"config"
-				]
-			}
-		]
-	}
-}
--- a/tests/test_pod_network_outage.py
+++ b/tests/test_pod_network_outage.py
@@ -0,0 +1,93 @@
+import unittest
+from unittest.mock import MagicMock, patch
+import sys
+import os
+
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+
+from krkn.scenario_plugins.native.pod_network_outage.kubernetes_functions import (
+    list_pods,
+)
+from krkn.scenario_plugins.native.pod_network_outage.pod_network_outage_plugin import (
+    get_test_pods,
+)
+
+
+class TestPodNetworkOutage(unittest.TestCase):
+    def test_list_pods_with_exclude_label(self):
+        """Test that list_pods correctly excludes pods with matching exclude_label"""
+        # Create mock pod items
+        pod1 = MagicMock()
+        pod1.metadata.name = "pod1"
+        pod1.metadata.labels = {"app": "test", "skip": "true"}
+
+        pod2 = MagicMock()
+        pod2.metadata.name = "pod2"
+        pod2.metadata.labels = {"app": "test"}
+
+        pod3 = MagicMock()
+        pod3.metadata.name = "pod3"
+        pod3.metadata.labels = {"app": "test", "skip": "false"}
+
+        # Create mock API response
+        mock_response = MagicMock()
+        mock_response.items = [pod1, pod2, pod3]
+
+        # Create mock client
+        mock_cli = MagicMock()
+        mock_cli.list_namespaced_pod.return_value = mock_response
+
+        # Test without exclude_label
+        result = list_pods(mock_cli, "test-namespace", "app=test")
+        self.assertEqual(result, ["pod1", "pod2", "pod3"])
+
+        # Test with exclude_label
+        result = list_pods(mock_cli, "test-namespace", "app=test", "skip=true")
+        self.assertEqual(result, ["pod2", "pod3"])
+
+    def test_get_test_pods_with_exclude_label(self):
+        """Test that get_test_pods passes exclude_label to list_pods correctly"""
+        # Create mock kubecli
+        mock_kubecli = MagicMock()
+        mock_kubecli.list_pods.return_value = ["pod2", "pod3"]
+
+        # Test get_test_pods with exclude_label
+        result = get_test_pods(
+            None, "app=test", "test-namespace", mock_kubecli, "skip=true"
+        )
+
+        # Verify list_pods was called with the correct parameters
+        mock_kubecli.list_pods.assert_called_once_with(
+            label_selector="app=test",
+            namespace="test-namespace",
+            exclude_label="skip=true",
+        )
+
+        # Verify the result
+        self.assertEqual(result, ["pod2", "pod3"])
+
+    def test_get_test_pods_with_pod_name_and_exclude_label(self):
+        """Test that get_test_pods prioritizes pod_name over label filters"""
+        # Create mock kubecli
+        mock_kubecli = MagicMock()
+        mock_kubecli.list_pods.return_value = ["pod1", "pod2", "pod3"]
+
+        # Test get_test_pods with both pod_name and exclude_label
+        # The pod_name should take precedence
+        result = get_test_pods(
+            "pod1", "app=test", "test-namespace", mock_kubecli, "skip=true"
+        )
+
+        # Verify list_pods was called with the correct parameters
+        mock_kubecli.list_pods.assert_called_once_with(
+            label_selector="app=test",
+            namespace="test-namespace",
+            exclude_label="skip=true",
+        )
+
+        # Verify the result contains only the specified pod
+        self.assertEqual(result, ["pod1"])
+
+
+if __name__ == "__main__":
+    unittest.main()
Author	SHA1	Message	Date
Paige Patton	3eea42770f	adding ibm power using request calls (#923 ) Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 8m56s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped Signed-off-by: Paige Patton <prubenda@redhat.com>	2025-10-28 12:57:20 -04:00
Tullio Sebastiani	77a46e3869	Adds an exclude label for node scenarios (#929 ) * added exclude label for node scenarios Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * pipeline fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> --------- Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2025-10-28 16:55:16 +01:00
Paige Patton	b801308d4a	Setting config back to all scenarios running Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 9m4s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped Signed-off-by: Paige Patton <prubenda@redhat.com>	2025-10-24 13:21:01 -04:00
Tullio Sebastiani	97f4c1fd9c	main github action fix Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 4m55s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> main github action fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> elastic password Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> typo Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> config fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2025-10-17 17:06:35 +02:00
Tullio Sebastiani	c54390d8b1	pod network filter ingress fix (#925 ) * pod network filter ingress fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * increasing lib version Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> --------- Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2025-10-17 12:27:53 +02:00
Tullio Sebastiani	543729b18a	Add exclude_label functionality to pod disruption scenarios (#910 ) Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 9m15s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped * kill pod exclude label Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * config alignment Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> --------- Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2025-10-08 22:10:27 +02:00
Paige Patton	a0ea4dc749	adding virt checks to metric info (#918 ) Signed-off-by: Paige Patton <prubenda@redhat.com> Co-authored-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2025-10-08 15:43:48 -04:00
Paige Patton	a5459792ef	adding critical alerts to post to elastic search Signed-off-by: Paige Patton <prubenda@redhat.com>	2025-10-08 15:38:20 -04:00
Tullio Sebastiani	d434bb26fa	Feature/add exclude label pod network chaos (#921 ) * feat: Add exclude_label feature to pod network outage scenarios This feature enables filtering out specific pods from network outage chaos testing based on label selectors. Users can now target all pods in a namespace except critical ones by specifying exclude_label. - Added exclude_label parameter to list_pods() function - Updated get_test_pods() to pass the exclude parameter - Added exclude_label field to all relevant plugin classes - Updated schema.json with the new parameter - Added documentation and examples - Created comprehensive unit tests Signed-off-by: Priyansh Saxena <130545865+Transcendental-Programmer@users.noreply.github.com> * krkn-lib update Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * removed plugin schema Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> --------- Signed-off-by: Priyansh Saxena <130545865+Transcendental-Programmer@users.noreply.github.com> Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> Co-authored-by: Priyansh Saxena <130545865+Transcendental-Programmer@users.noreply.github.com>	2025-10-08 16:01:41 +02:00
Paige Patton	fee41d404e	adding code owners (#920 ) Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 11m6s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped Signed-off-by: Paige Patton <prubenda@redhat.com>	2025-10-06 16:03:13 -04:00
Tullio Sebastiani	8663ee8893	new elasticsearch action (#919 ) fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2025-10-06 12:58:26 -04:00
Paige Patton	a072f0306a	adding failure if unrecoverd pod (#908 ) Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 10m48s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped Signed-off-by: Paige Patton <prubenda@redhat.com>	2025-09-17 11:59:45 -04:00
Paige Patton	8221392356	adding kill count Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 9m29s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped Signed-off-by: Paige Patton <prubenda@redhat.com>	2025-09-17 09:46:32 -04:00
Sahil Shah	671fc581dd	Adding node_label_selector for pod scenarios (#888 ) Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 10m38s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped * Adding node_label_selector for pod scenarios Signed-off-by: Sahil Shah <sahshah@redhat.com> * using kubernetes function, adding node_name and removing extra config Signed-off-by: Sahil Shah <sahshah@redhat.com> * adding CI test for custom pod scenario Signed-off-by: Sahil Shah <sahshah@redhat.com> * fixing comment * adding test to workflow * adding list parsing logic for krkn hub * parsing not needed, as input is always [] --------- Signed-off-by: Sahil Shah <sahshah@redhat.com>	2025-09-15 16:52:08 -04:00
Naga Ravi Chaitanya Elluri	11508ce017	Deprecate blog post links in favor of the website Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 9m40s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2025-09-08 15:04:53 -04:00
Paige Patton	0d78139fb6	increasing krkn lib version (#906 ) Signed-off-by: Paige Patton <prubenda@redhat.com>	2025-09-08 09:05:53 -04:00