moving ibm node to non native

Signed-off-by: Paige Patton <prubenda@redhat.com>
2026-02-14 18:10:00 +00:00 · 2025-03-04 17:16:54 -05:00
parent ce8593f2f0
commit 0eba329305
9 changed files with 392 additions and 790 deletions
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -24,12 +24,10 @@ kraken:
            - scenarios/openshift/prom_kill.yml
            - scenarios/openshift/openshift-apiserver.yml
            - scenarios/openshift/openshift-kube-apiserver.yml
-        - vmware_node_scenarios:
-            - scenarios/openshift/vmware_node_scenarios.yml
-        - ibmcloud_node_scenarios:
-            - scenarios/openshift/ibmcloud_node_scenarios.yml
        - node_scenarios:                                  # List of chaos node scenarios to load
            - scenarios/openshift/aws_node_scenarios.yml
+            - scenarios/openshift/vmware_node_scenarios.yml
+            - scenarios/openshift/ibmcloud_node_scenarios.yml
        - time_scenarios:                                  # List of chaos time scenarios to load
            - scenarios/openshift/time_scenarios_example.yml
        - cluster_shut_down_scenarios:
--- a/krkn/scenario_plugins/native/native_scenario_plugin.py
+++ b/krkn/scenario_plugins/native/native_scenario_plugin.py
@@ -49,7 +49,6 @@ class NativeScenarioPlugin(AbstractScenarioPlugin):
        return [
            "pod_disruption_scenarios",
            "pod_network_scenarios",
-            "ibmcloud_node_scenarios",
        ]

    def start_monitoring(self, pool: PodsMonitorPool, scenarios: list[Any]):
--- a/krkn/scenario_plugins/native/node_scenarios/ibmcloud_plugin.py
+++ b/krkn/scenario_plugins/native/node_scenarios/ibmcloud_plugin.py
@@ -1,589 +0,0 @@
-#!/usr/bin/env python
-import time
-import typing
-from os import environ
-from dataclasses import dataclass, field
-from traceback import format_exc
-import logging
-from krkn.scenario_plugins.native.node_scenarios import (
-    kubernetes_functions as kube_helper,
-)
-from arcaflow_plugin_sdk import validation, plugin
-from kubernetes import client, watch
-from ibm_vpc import VpcV1
-from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
-import sys
-
-
-class IbmCloud:
-    def __init__(self):
-        """
-        Initialize the ibm cloud client by using the the env variables:
-            'IBMC_APIKEY' 'IBMC_URL'
-        """
-        apiKey = environ.get("IBMC_APIKEY")
-        service_url = environ.get("IBMC_URL")
-        if not apiKey:
-            raise Exception("Environmental variable 'IBMC_APIKEY' is not set")
-        if not service_url:
-            raise Exception("Environmental variable 'IBMC_URL' is not set")
-        try:
-            authenticator = IAMAuthenticator(apiKey)
-            self.service = VpcV1(authenticator=authenticator)
-
-            self.service.set_service_url(service_url)
-        except Exception as e:
-            logging.error("error authenticating" + str(e))
-
-
-    # Get the instance ID of the node
-    def get_instance_id(self, node_name):
-        node_list = self.list_instances()
-        for node in node_list:
-            if node_name == node["vpc_name"]:
-                return node["vpc_id"]
-        logging.error("Couldn't find node with name " + str(node_name) + ", you could try another region")
-        sys.exit(1)
-
-    def delete_instance(self, instance_id):
-        """
-        Deletes the Instance whose name is given by 'instance_id'
-        """
-        try:
-            self.service.delete_instance(instance_id)
-            logging.info("Deleted Instance -- '{}'".format(instance_id))
-        except Exception as e:
-            logging.info("Instance '{}' could not be deleted. ".format(instance_id))
-            return False
-
-    def reboot_instances(self, instance_id):
-        """
-        Reboots the Instance whose name is given by 'instance_id'. Returns True if successful, or
-        returns False if the Instance is not powered on
-        """
-
-        try:
-            self.service.create_instance_action(
-                instance_id,
-                type="reboot",
-            )
-            logging.info("Reset Instance -- '{}'".format(instance_id))
-            return True
-        except Exception as e:
-            logging.info("Instance '{}' could not be rebooted".format(instance_id))
-            return False
-
-    def stop_instances(self, instance_id):
-        """
-        Stops the Instance whose name is given by 'instance_id'. Returns True if successful, or
-        returns False if the Instance is already stopped
-        """
-
-        try:
-            self.service.create_instance_action(
-                instance_id,
-                type="stop",
-            )
-            logging.info("Stopped Instance -- '{}'".format(instance_id))
-            return True
-        except Exception as e:
-            logging.info("Instance '{}' could not be stopped".format(instance_id))
-            logging.info("error" + str(e))
-            return False
-
-    def start_instances(self, instance_id):
-        """
-        Stops the Instance whose name is given by 'instance_id'. Returns True if successful, or
-        returns False if the Instance is already running
-        """
-
-        try:
-            self.service.create_instance_action(
-                instance_id,
-                type="start",
-            )
-            logging.info("Started Instance -- '{}'".format(instance_id))
-            return True
-        except Exception as e:
-            logging.info("Instance '{}' could not start running".format(instance_id))
-            return False
-
-    def list_instances(self):
-        """
-        Returns a list of Instances present in the datacenter
-        """
-        instance_names = []
-        try:
-            instances_result = self.service.list_instances().get_result()
-            instances_list = instances_result["instances"]
-            for vpc in instances_list:
-                instance_names.append({"vpc_name": vpc["name"], "vpc_id": vpc["id"]})
-            starting_count = instances_result["total_count"]
-            while instances_result["total_count"] == instances_result["limit"]:
-                instances_result = self.service.list_instances(
-                    start=starting_count
-                ).get_result()
-                instances_list = instances_result["instances"]
-                starting_count += instances_result["total_count"]
-                for vpc in instances_list:
-                    instance_names.append({"vpc_name": vpc.name, "vpc_id": vpc.id})
-        except Exception as e:
-            logging.error("Error listing out instances: " + str(e))
-            sys.exit(1)
-        return instance_names
-
-    def find_id_in_list(self, name, vpc_list):
-        for vpc in vpc_list:
-            if vpc["vpc_name"] == name:
-                return vpc["vpc_id"]
-
-    def get_instance_status(self, instance_id):
-        """
-        Returns the status of the Instance whose name is given by 'instance_id'
-        """
-
-        try:
-            instance = self.service.get_instance(instance_id).get_result()
-            state = instance["status"]
-            return state
-        except Exception as e:
-            logging.error(
-                "Failed to get node instance status %s. Encountered following "
-                "exception: %s." % (instance_id, e)
-            )
-            return None
-
-    def wait_until_deleted(self, instance_id, timeout):
-        """
-        Waits until the instance is deleted or until the timeout. Returns True if
-        the instance is successfully deleted, else returns False
-        """
-
-        time_counter = 0
-        vpc = self.get_instance_status(instance_id)
-        while vpc is not None:
-            vpc = self.get_instance_status(instance_id)
-            logging.info(
-                "Instance %s is still being deleted, sleeping for 5 seconds"
-                % instance_id
-            )
-            time.sleep(5)
-            time_counter += 5
-            if time_counter >= timeout:
-                logging.info(
-                    "Instance %s is still not deleted in allotted time" % instance_id
-                )
-                return False
-        return True
-
-    def wait_until_running(self, instance_id, timeout):
-        """
-        Waits until the Instance switches to running state or until the timeout.
-        Returns True if the Instance switches to running, else returns False
-        """
-
-        time_counter = 0
-        status = self.get_instance_status(instance_id)
-        while status != "running":
-            status = self.get_instance_status(instance_id)
-            logging.info(
-                "Instance %s is still not running, sleeping for 5 seconds" % instance_id
-            )
-            time.sleep(5)
-            time_counter += 5
-            if time_counter >= timeout:
-                logging.info(
-                    "Instance %s is still not ready in allotted time" % instance_id
-                )
-                return False
-        return True
-
-    def wait_until_stopped(self, instance_id, timeout):
-        """
-        Waits until the Instance switches to stopped state or until the timeout.
-        Returns True if the Instance switches to stopped, else returns False
-        """
-
-        time_counter = 0
-        status = self.get_instance_status(instance_id)
-        while status != "stopped":
-            status = self.get_instance_status(instance_id)
-            logging.info(
-                "Instance %s is still not stopped, sleeping for 5 seconds" % instance_id
-            )
-            time.sleep(5)
-            time_counter += 5
-            if time_counter >= timeout:
-                logging.info(
-                    "Instance %s is still not stopped in allotted time" % instance_id
-                )
-                return False
-        return True
-
-    def wait_until_rebooted(self, instance_id, timeout):
-        """
-        Waits until the Instance switches to restarting state and then running state or until the timeout.
-        Returns True if the Instance switches back to running, else returns False
-        """
-
-        time_counter = 0
-        status = self.get_instance_status(instance_id)
-        while status == "starting":
-            status = self.get_instance_status(instance_id)
-            logging.info(
-                "Instance %s is still restarting, sleeping for 5 seconds" % instance_id
-            )
-            time.sleep(5)
-            time_counter += 5
-            if time_counter >= timeout:
-                logging.info(
-                    "Instance %s is still restarting after allotted time" % instance_id
-                )
-                return False
-        self.wait_until_running(instance_id, timeout)
-        return True
-
-
-@dataclass
-class Node:
-    name: str
-
-
-@dataclass
-class NodeScenarioSuccessOutput:
-
-    nodes: typing.Dict[int, Node] = field(
-        metadata={
-            "name": "Nodes started/stopped/terminated/rebooted",
-            "description": """Map between timestamps and the pods started/stopped/terminated/rebooted.
-                        The timestamp is provided in nanoseconds""",
-        }
-    )
-    action: kube_helper.Actions = field(
-        metadata={
-            "name": "The action performed on the node",
-            "description": """The action performed or attempted to be performed on the node. Possible values
-                        are : Start, Stop, Terminate, Reboot""",
-        }
-    )
-
-
-@dataclass
-class NodeScenarioErrorOutput:
-
-    error: str
-    action: kube_helper.Actions = field(
-        metadata={
-            "name": "The action performed on the node",
-            "description": """The action attempted to be performed on the node. Possible values are : Start
-                        Stop, Terminate, Reboot""",
-        }
-    )
-
-
-@dataclass
-class NodeScenarioConfig:
-
-    name: typing.Annotated[
-        typing.Optional[str],
-        validation.required_if_not("label_selector"),
-        validation.required_if("skip_openshift_checks"),
-    ] = field(
-        default=None,
-        metadata={
-            "name": "Name",
-            "description": "Name(s) for target nodes. Required if label_selector is not set.",
-        },
-    )
-
-    runs: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
-        default=1,
-        metadata={
-            "name": "Number of runs per node",
-            "description": "Number of times to inject each scenario under actions (will perform on same node each time)",
-        },
-    )
-
-    label_selector: typing.Annotated[
-        typing.Optional[str], validation.min(1), validation.required_if_not("name")
-    ] = field(
-        default=None,
-        metadata={
-            "name": "Label selector",
-            "description": "Kubernetes label selector for the target nodes. Required if name is not set.\n"
-            "See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ for details.",
-        },
-    )
-
-    timeout: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
-        default=180,
-        metadata={
-            "name": "Timeout",
-            "description": "Timeout to wait for the target pod(s) to be removed in seconds.",
-        },
-    )
-
-    instance_count: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
-        default=1,
-        metadata={
-            "name": "Instance Count",
-            "description": "Number of nodes to perform action/select that match the label selector.",
-        },
-    )
-
-    skip_openshift_checks: typing.Optional[bool] = field(
-        default=False,
-        metadata={
-            "name": "Skip Openshift Checks",
-            "description": "Skip checking the status of the openshift nodes.",
-        },
-    )
-
-    kubeconfig_path: typing.Optional[str] = field(
-        default=None,
-        metadata={
-            "name": "Kubeconfig path",
-            "description": "Path to your Kubeconfig file. Defaults to ~/.kube/config.\n"
-            "See https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for "
-            "details.",
-        },
-    )
-
-
-@plugin.step(
-    id="ibmcloud-node-start",
-    name="Start the node",
-    description="Start the node(s) by starting the Ibmcloud Instance on which the node is configured",
-    outputs={"success": NodeScenarioSuccessOutput, "error": NodeScenarioErrorOutput},
-)
-def node_start(
-    cfg: NodeScenarioConfig,
-) -> typing.Tuple[
-    str, typing.Union[NodeScenarioSuccessOutput, NodeScenarioErrorOutput]
-]:
-    with kube_helper.setup_kubernetes(None) as cli:
-        ibmcloud = IbmCloud()
-        core_v1 = client.CoreV1Api(cli)
-        watch_resource = watch.Watch()
-        node_list = kube_helper.get_node_list(cfg, kube_helper.Actions.START, core_v1)
-        node_name_id_list = ibmcloud.list_instances()
-        nodes_started = {}
-        for name in node_list:
-            try:
-                for _ in range(cfg.runs):
-                    logging.info("Starting node_start_scenario injection")
-                    logging.info("Starting the node %s " % (name))
-                    instance_id = ibmcloud.find_id_in_list(name, node_name_id_list)
-                    if instance_id:
-                        vm_started = ibmcloud.start_instances(instance_id)
-                        if vm_started:
-                            ibmcloud.wait_until_running(instance_id, cfg.timeout)
-                            if not cfg.skip_openshift_checks:
-                                kube_helper.wait_for_ready_status(
-                                    name, cfg.timeout, watch_resource, core_v1
-                                )
-                            nodes_started[int(time.time_ns())] = Node(name=name)
-                        logging.info(
-                            "Node with instance ID: %s is in running state" % name
-                        )
-                        logging.info(
-                            "node_start_scenario has been successfully injected!"
-                        )
-                    else:
-                        logging.error(
-                            "Failed to find node that matched instances on ibm cloud in region"
-                        )
-                        return "error", NodeScenarioErrorOutput(
-                            "No matching vpc with node name " + name,
-                            kube_helper.Actions.START,
-                        )
-            except Exception as e:
-                logging.error("Failed to start node instance. Test Failed")
-                logging.error("node_start_scenario injection failed!")
-                return "error", NodeScenarioErrorOutput(
-                    format_exc(), kube_helper.Actions.START
-                )
-
-    return "success", NodeScenarioSuccessOutput(
-        nodes_started, kube_helper.Actions.START
-    )
-
-
-@plugin.step(
-    id="ibmcloud-node-stop",
-    name="Stop the node",
-    description="Stop the node(s) by starting the Ibmcloud Instance on which the node is configured",
-    outputs={"success": NodeScenarioSuccessOutput, "error": NodeScenarioErrorOutput},
-)
-def node_stop(
-    cfg: NodeScenarioConfig,
-) -> typing.Tuple[
-    str, typing.Union[NodeScenarioSuccessOutput, NodeScenarioErrorOutput]
-]:
-    with kube_helper.setup_kubernetes(None) as cli:
-        ibmcloud = IbmCloud()
-        core_v1 = client.CoreV1Api(cli)
-        watch_resource = watch.Watch()
-        logging.info("set up done")
-        node_list = kube_helper.get_node_list(cfg, kube_helper.Actions.STOP, core_v1)
-        logging.info("set node list" + str(node_list))
-        node_name_id_list = ibmcloud.list_instances()
-        logging.info("node names" + str(node_name_id_list))
-        nodes_stopped = {}
-        for name in node_list:
-            try:
-                for _ in range(cfg.runs):
-                    logging.info("Starting node_stop_scenario injection")
-                    logging.info("Stopping the node %s " % (name))
-                    instance_id = ibmcloud.find_id_in_list(name, node_name_id_list)
-                    if instance_id:
-                        vm_stopped = ibmcloud.stop_instances(instance_id)
-                        if vm_stopped:
-                            ibmcloud.wait_until_stopped(instance_id, cfg.timeout)
-                            if not cfg.skip_openshift_checks:
-                                kube_helper.wait_for_ready_status(
-                                    name, cfg.timeout, watch_resource, core_v1
-                                )
-                            nodes_stopped[int(time.time_ns())] = Node(name=name)
-                        logging.info(
-                            "Node with instance ID: %s is in stopped state" % name
-                        )
-                        logging.info(
-                            "node_stop_scenario has been successfully injected!"
-                        )
-                    else:
-                        logging.error(
-                            "Failed to find node that matched instances on ibm cloud in region"
-                        )
-                        return "error", NodeScenarioErrorOutput(
-                            "No matching vpc with node name " + name,
-                            kube_helper.Actions.STOP,
-                        )
-            except Exception as e:
-                logging.error("Failed to stop node instance. Test Failed")
-                logging.error("node_stop_scenario injection failed!")
-                return "error", NodeScenarioErrorOutput(
-                    format_exc(), kube_helper.Actions.STOP
-                )
-
-        return "success", NodeScenarioSuccessOutput(
-            nodes_stopped, kube_helper.Actions.STOP
-        )
-
-
-@plugin.step(
-    id="ibmcloud-node-reboot",
-    name="Reboot Ibmcloud Instance",
-    description="Reboot the node(s) by starting the Ibmcloud Instance on which the node is configured",
-    outputs={"success": NodeScenarioSuccessOutput, "error": NodeScenarioErrorOutput},
-)
-def node_reboot(
-    cfg: NodeScenarioConfig,
-) -> typing.Tuple[
-    str, typing.Union[NodeScenarioSuccessOutput, NodeScenarioErrorOutput]
-]:
-    with kube_helper.setup_kubernetes(None) as cli:
-        ibmcloud = IbmCloud()
-        core_v1 = client.CoreV1Api(cli)
-        watch_resource = watch.Watch()
-        node_list = kube_helper.get_node_list(cfg, kube_helper.Actions.REBOOT, core_v1)
-        node_name_id_list = ibmcloud.list_instances()
-        nodes_rebooted = {}
-        for name in node_list:
-            try:
-                for _ in range(cfg.runs):
-                    logging.info("Starting node_reboot_scenario injection")
-                    logging.info("Rebooting the node %s " % (name))
-                    instance_id = ibmcloud.find_id_in_list(name, node_name_id_list)
-                    if instance_id:
-                        ibmcloud.reboot_instances(instance_id)
-                        ibmcloud.wait_until_rebooted(instance_id, cfg.timeout)
-                        if not cfg.skip_openshift_checks:
-                            kube_helper.wait_for_unknown_status(
-                                name, cfg.timeout, watch_resource, core_v1
-                            )
-                            kube_helper.wait_for_ready_status(
-                                name, cfg.timeout, watch_resource, core_v1
-                            )
-                        nodes_rebooted[int(time.time_ns())] = Node(name=name)
-                        logging.info(
-                            "Node with instance ID: %s has rebooted successfully" % name
-                        )
-                        logging.info(
-                            "node_reboot_scenario has been successfully injected!"
-                        )
-                    else:
-                        logging.error(
-                            "Failed to find node that matched instances on ibm cloud in region"
-                        )
-                        return "error", NodeScenarioErrorOutput(
-                            "No matching vpc with node name " + name,
-                            kube_helper.Actions.REBOOT,
-                        )
-            except Exception as e:
-                logging.error("Failed to reboot node instance. Test Failed")
-                logging.error("node_reboot_scenario injection failed!")
-                return "error", NodeScenarioErrorOutput(
-                    format_exc(), kube_helper.Actions.REBOOT
-                )
-
-    return "success", NodeScenarioSuccessOutput(
-        nodes_rebooted, kube_helper.Actions.REBOOT
-    )
-
-
-@plugin.step(
-    id="ibmcloud-node-terminate",
-    name="Reboot Ibmcloud Instance",
-    description="Wait for node to be deleted",
-    outputs={"success": NodeScenarioSuccessOutput, "error": NodeScenarioErrorOutput},
-)
-def node_terminate(
-    cfg: NodeScenarioConfig,
-) -> typing.Tuple[
-    str, typing.Union[NodeScenarioSuccessOutput, NodeScenarioErrorOutput]
-]:
-    with kube_helper.setup_kubernetes(None) as cli:
-        ibmcloud = IbmCloud()
-        core_v1 = client.CoreV1Api(cli)
-        node_list = kube_helper.get_node_list(
-            cfg, kube_helper.Actions.TERMINATE, core_v1
-        )
-        node_name_id_list = ibmcloud.list_instances()
-        nodes_terminated = {}
-        for name in node_list:
-            try:
-                for _ in range(cfg.runs):
-                    logging.info(
-                        "Starting node_termination_scenario injection by first stopping the node"
-                    )
-                    instance_id = ibmcloud.find_id_in_list(name, node_name_id_list)
-                    logging.info("Deleting the node with instance ID: %s " % (name))
-                    if instance_id:
-                        ibmcloud.delete_instance(instance_id)
-                        ibmcloud.wait_until_released(name, cfg.timeout)
-                        nodes_terminated[int(time.time_ns())] = Node(name=name)
-                        logging.info(
-                            "Node with instance ID: %s has been released" % name
-                        )
-                        logging.info(
-                            "node_terminate_scenario has been successfully injected!"
-                        )
-                    else:
-                        logging.error(
-                            "Failed to find instances that matched the node specifications on ibm cloud in the set region"
-                        )
-                        return "error", NodeScenarioErrorOutput(
-                            "No matching vpc with node name " + name,
-                            kube_helper.Actions.TERMINATE,
-                        )
-            except Exception as e:
-                logging.error("Failed to terminate node instance. Test Failed")
-                logging.error("node_terminate_scenario injection failed!")
-                return "error", NodeScenarioErrorOutput(
-                    format_exc(), kube_helper.Actions.TERMINATE
-                )
-
-    return "success", NodeScenarioSuccessOutput(
-        nodes_terminated, kube_helper.Actions.TERMINATE
-    )
--- a/krkn/scenario_plugins/native/node_scenarios/kubernetes_functions.py
+++ b/krkn/scenario_plugins/native/node_scenarios/kubernetes_functions.py
@@ -1,179 +0,0 @@
-from kubernetes import config, client
-from kubernetes.client.rest import ApiException
-import logging
-import random
-from enum import Enum
-
-
-class Actions(Enum):
-    """
-    This enumeration indicates different kinds of node operations
-    """
-
-    START = "Start"
-    STOP = "Stop"
-    TERMINATE = "Terminate"
-    REBOOT = "Reboot"
-
-
-def setup_kubernetes(kubeconfig_path):
-    """
-    Sets up the Kubernetes client
-    """
-
-    if kubeconfig_path is None:
-        kubeconfig_path = config.KUBE_CONFIG_DEFAULT_LOCATION
-    kubeconfig = config.kube_config.KubeConfigMerger(kubeconfig_path)
-
-    if kubeconfig.config is None:
-        raise Exception(
-            "Invalid kube-config file: %s. " "No configuration found." % kubeconfig_path
-        )
-    loader = config.kube_config.KubeConfigLoader(
-        config_dict=kubeconfig.config,
-    )
-    client_config = client.Configuration()
-    loader.load_and_set(client_config)
-    return client.ApiClient(configuration=client_config)
-
-
-def list_killable_nodes(core_v1, label_selector=None):
-    """
-    Returns a list of nodes that can be stopped/reset/released
-    """
-
-    nodes = []
-    try:
-        if label_selector:
-            ret = core_v1.list_node(pretty=True, label_selector=label_selector)
-        else:
-            ret = core_v1.list_node(pretty=True)
-    except ApiException as e:
-        logging.error("Exception when calling CoreV1Api->list_node: %s\n" % e)
-        raise e
-    for node in ret.items:
-        for cond in node.status.conditions:
-            if str(cond.type) == "Ready" and str(cond.status) == "True":
-                nodes.append(node.metadata.name)
-    return nodes
-
-
-def list_startable_nodes(core_v1, label_selector=None):
-    """
-    Returns a list of nodes that can be started
-    """
-
-    nodes = []
-    try:
-        if label_selector:
-            ret = core_v1.list_node(pretty=True, label_selector=label_selector)
-        else:
-            ret = core_v1.list_node(pretty=True)
-    except ApiException as e:
-        logging.error("Exception when calling CoreV1Api->list_node: %s\n" % e)
-        raise e
-    for node in ret.items:
-        for cond in node.status.conditions:
-            if str(cond.type) == "Ready" and str(cond.status) != "True":
-                nodes.append(node.metadata.name)
-    return nodes
-
-
-def get_node_list(cfg, action, core_v1):
-    """
-    Returns a list of nodes to be used in the node scenarios. The list returned is constructed as follows:
-        - If the key 'name' is present in the node scenario config, the value is extracted and split into
-          a list
-        - Each node in the list is fed to the get_node function which checks if the node is killable or
-          fetches the node using the label selector
-    """
-
-    def get_node(node_name, label_selector, instance_kill_count, action, core_v1):
-        list_nodes_func = (
-            list_startable_nodes if action == Actions.START else list_killable_nodes
-        )
-        if node_name in list_nodes_func(core_v1):
-            return [node_name]
-        elif node_name:
-            logging.info(
-                "Node with provided node_name does not exist or the node might "
-                "be in NotReady state."
-            )
-        nodes = list_nodes_func(core_v1, label_selector)
-        if not nodes:
-            raise Exception("Ready nodes with the provided label selector do not exist")
-        logging.info(
-            "Ready nodes with the label selector %s: %s" % (label_selector, nodes)
-        )
-        number_of_nodes = len(nodes)
-        if instance_kill_count == number_of_nodes:
-            return nodes
-        nodes_to_return = []
-        for i in range(instance_kill_count):
-            node_to_add = nodes[random.randint(0, len(nodes) - 1)]
-            nodes_to_return.append(node_to_add)
-            nodes.remove(node_to_add)
-        return nodes_to_return
-
-    if cfg.name:
-        input_nodes = cfg.name.split(",")
-    else:
-        input_nodes = [""]
-    scenario_nodes = set()
-
-    if cfg.skip_openshift_checks:
-        scenario_nodes = input_nodes
-    else:
-        for node in input_nodes:
-            nodes = get_node(
-                node, cfg.label_selector, cfg.instance_count, action, core_v1
-            )
-            scenario_nodes.update(nodes)
-
-    return list(scenario_nodes)
-
-
-def watch_node_status(node, status, timeout, watch_resource, core_v1):
-    """
-    Monitor the status of a node for change
-    """
-    count = timeout
-    for event in watch_resource.stream(
-        core_v1.list_node,
-        field_selector=f"metadata.name={node}",
-        timeout_seconds=timeout,
-    ):
-        conditions = [
-            status
-            for status in event["object"].status.conditions
-            if status.type == "Ready"
-        ]
-        if conditions[0].status == status:
-            watch_resource.stop()
-            break
-        else:
-            count -= 1
-            logging.info("Status of node " + node + ": " + str(conditions[0].status))
-        if not count:
-            watch_resource.stop()
-
-
-def wait_for_ready_status(node, timeout, watch_resource, core_v1):
-    """
-    Wait until the node status becomes Ready
-    """
-    watch_node_status(node, "True", timeout, watch_resource, core_v1)
-
-
-def wait_for_not_ready_status(node, timeout, watch_resource, core_v1):
-    """
-    Wait until the node status becomes Not Ready
-    """
-    watch_node_status(node, "False", timeout, watch_resource, core_v1)
-
-
-def wait_for_unknown_status(node, timeout, watch_resource, core_v1):
-    """
-    Wait until the node status becomes Unknown
-    """
-    watch_node_status(node, "Unknown", timeout, watch_resource, core_v1)
--- a/krkn/scenario_plugins/native/plugins.py
+++ b/krkn/scenario_plugins/native/plugins.py
@@ -12,7 +12,6 @@ from krkn.scenario_plugins.native.pod_network_outage.pod_network_outage_plugin i
 from krkn.scenario_plugins.native.pod_network_outage.pod_network_outage_plugin import (
    pod_egress_shaping,
 )
-import krkn.scenario_plugins.native.node_scenarios.ibmcloud_plugin as ibmcloud_plugin
 from krkn.scenario_plugins.native.pod_network_outage.pod_network_outage_plugin import (
    pod_ingress_shaping,
 )
@@ -157,10 +156,6 @@ PLUGINS = Plugins(
        ),
        PluginStep(wait_for_pods, ["error"]),
        PluginStep(run_python_file, ["error"]),
-        PluginStep(ibmcloud_plugin.node_start, ["error"]),
-        PluginStep(ibmcloud_plugin.node_stop, ["error"]),
-        PluginStep(ibmcloud_plugin.node_reboot, ["error"]),
-        PluginStep(ibmcloud_plugin.node_terminate, ["error"]),
        PluginStep(network_chaos, ["error"]),
        PluginStep(pod_outage, ["error"]),
        PluginStep(pod_egress_shaping, ["error"]),
--- a/krkn/scenario_plugins/node_actions/ibmcloud_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/ibmcloud_node_scenarios.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python
+import time
+import typing
+from os import environ
+from dataclasses import dataclass, field
+from traceback import format_exc
+import logging
+
+from krkn_lib.k8s import KrknKubernetes
+import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction
+from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
+    abstract_node_scenarios,
+)
+from kubernetes import client, watch
+from ibm_vpc import VpcV1
+from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
+import sys
+
+from krkn_lib.models.k8s import AffectedNodeStatus, AffectedNode
+
+
+class IbmCloud:
+    def __init__(self):
+        """
+        Initialize the ibm cloud client by using the the env variables:
+            'IBMC_APIKEY' 'IBMC_URL'
+        """
+        apiKey = environ.get("IBMC_APIKEY")
+        service_url = environ.get("IBMC_URL")
+        if not apiKey:
+            raise Exception("Environmental variable 'IBMC_APIKEY' is not set")
+        if not service_url:
+            raise Exception("Environmental variable 'IBMC_URL' is not set")
+        try:
+            authenticator = IAMAuthenticator(apiKey)
+            self.service = VpcV1(authenticator=authenticator)
+
+            self.service.set_service_url(service_url)
+        except Exception as e:
+            logging.error("error authenticating" + str(e))
+
+
+    # Get the instance ID of the node
+    def get_instance_id(self, node_name):
+        node_list = self.list_instances()
+        for node in node_list:
+            if node_name == node["vpc_name"]:
+                return node["vpc_id"]
+        logging.error("Couldn't find node with name " + str(node_name) + ", you could try another region")
+        sys.exit(1)
+
+    def delete_instance(self, instance_id):
+        """
+        Deletes the Instance whose name is given by 'instance_id'
+        """
+        try:
+            self.service.delete_instance(instance_id)
+            logging.info("Deleted Instance -- '{}'".format(instance_id))
+        except Exception as e:
+            logging.info("Instance '{}' could not be deleted. ".format(instance_id))
+            return False
+
+    def reboot_instances(self, instance_id):
+        """
+        Reboots the Instance whose name is given by 'instance_id'. Returns True if successful, or
+        returns False if the Instance is not powered on
+        """
+
+        try:
+            self.service.create_instance_action(
+                instance_id,
+                type="reboot",
+            )
+            logging.info("Reset Instance -- '{}'".format(instance_id))
+            return True
+        except Exception as e:
+            logging.info("Instance '{}' could not be rebooted".format(instance_id))
+            return False
+
+    def stop_instances(self, instance_id):
+        """
+        Stops the Instance whose name is given by 'instance_id'. Returns True if successful, or
+        returns False if the Instance is already stopped
+        """
+
+        try:
+            self.service.create_instance_action(
+                instance_id,
+                type="stop",
+            )
+            logging.info("Stopped Instance -- '{}'".format(instance_id))
+            return True
+        except Exception as e:
+            logging.info("Instance '{}' could not be stopped".format(instance_id))
+            logging.info("error" + str(e))
+            return False
+
+    def start_instances(self, instance_id):
+        """
+        Stops the Instance whose name is given by 'instance_id'. Returns True if successful, or
+        returns False if the Instance is already running
+        """
+
+        try:
+            self.service.create_instance_action(
+                instance_id,
+                type="start",
+            )
+            logging.info("Started Instance -- '{}'".format(instance_id))
+            return True
+        except Exception as e:
+            logging.info("Instance '{}' could not start running".format(instance_id))
+            return False
+
+    def list_instances(self):
+        """
+        Returns a list of Instances present in the datacenter
+        """
+        instance_names = []
+        try:
+            instances_result = self.service.list_instances().get_result()
+            instances_list = instances_result["instances"]
+            for vpc in instances_list:
+                instance_names.append({"vpc_name": vpc["name"], "vpc_id": vpc["id"]})
+            starting_count = instances_result["total_count"]
+            while instances_result["total_count"] == instances_result["limit"]:
+                instances_result = self.service.list_instances(
+                    start=starting_count
+                ).get_result()
+                instances_list = instances_result["instances"]
+                starting_count += instances_result["total_count"]
+                for vpc in instances_list:
+                    instance_names.append({"vpc_name": vpc.name, "vpc_id": vpc.id})
+        except Exception as e:
+            logging.error("Error listing out instances: " + str(e))
+            sys.exit(1)
+        return instance_names
+
+    def find_id_in_list(self, name, vpc_list):
+        for vpc in vpc_list:
+            if vpc["vpc_name"] == name:
+                return vpc["vpc_id"]
+
+    def get_instance_status(self, instance_id):
+        """
+        Returns the status of the Instance whose name is given by 'instance_id'
+        """
+
+        try:
+            instance = self.service.get_instance(instance_id).get_result()
+            state = instance["status"]
+            return state
+        except Exception as e:
+            logging.error(
+                "Failed to get node instance status %s. Encountered following "
+                "exception: %s." % (instance_id, e)
+            )
+            return None
+
+    def wait_until_deleted(self, instance_id, timeout, affected_node=None):
+        """
+        Waits until the instance is deleted or until the timeout. Returns True if
+        the instance is successfully deleted, else returns False
+        """
+        start_time = time.time()
+        time_counter = 0
+        vpc = self.get_instance_status(instance_id)
+        while vpc is not None:
+            vpc = self.get_instance_status(instance_id)
+            logging.info(
+                "Instance %s is still being deleted, sleeping for 5 seconds"
+                % instance_id
+            )
+            time.sleep(5)
+            time_counter += 5
+            if time_counter >= timeout:
+                logging.info(
+                    "Instance %s is still not deleted in allotted time" % instance_id
+                )
+                return False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("terminated", end_time - start_time)
+        return True
+
+    def wait_until_running(self, instance_id, timeout, affected_node=None):
+        """
+        Waits until the Instance switches to running state or until the timeout.
+        Returns True if the Instance switches to running, else returns False
+        """
+        start_time = time.time()
+        time_counter = 0
+        status = self.get_instance_status(instance_id)
+        while status != "running":
+            status = self.get_instance_status(instance_id)
+            logging.info(
+                "Instance %s is still not running, sleeping for 5 seconds" % instance_id
+            )
+            time.sleep(5)
+            time_counter += 5
+            if time_counter >= timeout:
+                logging.info(
+                    "Instance %s is still not ready in allotted time" % instance_id
+                )
+                return False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("running", end_time - start_time)
+        return True
+
+    def wait_until_stopped(self, instance_id, timeout, affected_node):
+        """
+        Waits until the Instance switches to stopped state or until the timeout.
+        Returns True if the Instance switches to stopped, else returns False
+        """
+        start_time = time.time()
+        time_counter = 0
+        status = self.get_instance_status(instance_id)
+        while status != "stopped":
+            status = self.get_instance_status(instance_id)
+            logging.info(
+                "Instance %s is still not stopped, sleeping for 5 seconds" % instance_id
+            )
+            time.sleep(5)
+            time_counter += 5
+            if time_counter >= timeout:
+                logging.info(
+                    "Instance %s is still not stopped in allotted time" % instance_id
+                )
+                return False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("stopped", end_time - start_time)
+        return True
+
+
+    def wait_until_rebooted(self, instance_id, timeout, affected_node):
+        """
+        Waits until the Instance switches to restarting state and then running state or until the timeout.
+        Returns True if the Instance switches back to running, else returns False
+        """
+
+        time_counter = 0
+        status = self.get_instance_status(instance_id)
+        while status == "starting":
+            status = self.get_instance_status(instance_id)
+            logging.info(
+                "Instance %s is still restarting, sleeping for 5 seconds" % instance_id
+            )
+            time.sleep(5)
+            time_counter += 5
+            if time_counter >= timeout:
+                logging.info(
+                    "Instance %s is still restarting after allotted time" % instance_id
+                )
+                return False
+        self.wait_until_running(instance_id, timeout, affected_node)
+        return True
+
+
+@dataclass
+class ibm_node_scenarios(abstract_node_scenarios):
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
+        super().__init__(kubecli, affected_nodes_status)
+        self.ibmcloud = IbmCloud()
+
+    def node_start_scenario(self, instance_kill_count, node, timeout):
+        try:
+            instance_id = self.ibmcloud.get_instance_id( node)
+            affected_node = AffectedNode(node, node_id=instance_id)
+            for _ in range(instance_kill_count):
+                logging.info("Starting node_start_scenario injection")
+                logging.info("Starting the node %s " % (node))
+                
+                if instance_id:
+                    vm_started = self.ibmcloud.start_instances(instance_id)
+                    if vm_started:
+                        self.ibmcloud.wait_until_running(instance_id, timeout, affected_node)
+                        nodeaction.wait_for_ready_status(
+                            node, timeout, self.kubecli, affected_node
+                        )
+                    logging.info(
+                        "Node with instance ID: %s is in running state" % node
+                    )
+                    logging.info(
+                        "node_start_scenario has been successfully injected!"
+                    )
+                else:
+                    logging.error(
+                        "Failed to find node that matched instances on ibm cloud in region"
+                    )
+
+        except Exception as e:
+            logging.error("Failed to start node instance. Test Failed")
+            logging.error("node_start_scenario injection failed!")
+        self.affected_nodes_status.affected_nodes.append(affected_node)
+
+
+    def node_stop_scenario(self, instance_kill_count, node, timeout):
+        try:
+            instance_id = self.ibmcloud.get_instance_id(node)
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node, instance_id)
+                logging.info("Starting node_stop_scenario injection")
+                logging.info("Stopping the node %s " % (node))
+                vm_stopped = self.ibmcloud.stop_instances(instance_id)
+                if vm_stopped:
+                    self.ibmcloud.wait_until_stopped(instance_id, timeout, affected_node)
+                logging.info(
+                    "Node with instance ID: %s is in stopped state" % node
+                )
+                logging.info(
+                    "node_stop_scenario has been successfully injected!"
+                )
+        except Exception as e:
+            logging.error("Failed to stop node instance. Test Failed")
+            logging.error("node_stop_scenario injection failed!")
+
+
+    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+        try:
+            instance_id = self.ibmcloud.get_instance_id(node)
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node, node_id=instance_id)
+                logging.info("Starting node_reboot_scenario injection")
+                logging.info("Rebooting the node %s " % (node))
+                self.ibmcloud.reboot_instances(instance_id)
+                self.ibmcloud.wait_until_rebooted(instance_id, timeout)
+                nodeaction.wait_for_unknown_status(
+                    node, timeout, affected_node
+                )
+                nodeaction.wait_for_ready_status(
+                    node, timeout, affected_node
+                )
+                logging.info(
+                    "Node with instance ID: %s has rebooted successfully" % node
+                )
+                logging.info(
+                    "node_reboot_scenario has been successfully injected!"
+                )
+
+        except Exception as e:
+            logging.error("Failed to reboot node instance. Test Failed")
+            logging.error("node_reboot_scenario injection failed!")
+
+
+    def node_terminate_scenario(self, instance_kill_count, node, timeout):
+        try:
+            instance_id = self.ibmcloud.get_instance_id(node)
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node, node_id=instance_id)
+                logging.info(
+                    "Starting node_termination_scenario injection by first stopping the node"
+                )
+                logging.info("Deleting the node with instance ID: %s " % (node))
+                self.ibmcloud.delete_instance(instance_id)
+                self.ibmcloud.wait_until_deleted(node, timeout, affected_node)
+                logging.info(
+                    "Node with instance ID: %s has been released" % node
+                )
+                logging.info(
+                    "node_terminate_scenario has been successfully injected!"
+                )
+        except Exception as e:
+            logging.error("Failed to terminate node instance. Test Failed")
+            logging.error("node_terminate_scenario injection failed!")
+
--- a/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py
+++ b/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py
@@ -23,7 +23,7 @@ from krkn.scenario_plugins.node_actions.general_cloud_node_scenarios import (
    general_node_scenarios,
 )
 from krkn.scenario_plugins.node_actions.vmware_node_scenarios import vmware_node_scenarios
-
+from krkn.scenario_plugins.node_actions.ibmcloud_node_scenarios import ibm_node_scenarios
 node_general = False


@@ -113,6 +113,11 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
            or node_scenario["cloud_type"].lower() == "vmware"
        ):
            return vmware_node_scenarios(kubecli, affected_nodes_status)
+        elif (
+            node_scenario["cloud_type"].lower() == "ibm"
+            or node_scenario["cloud_type"].lower() == "ibmcloud"
+        ):
+            return ibm_node_scenarios(kubecli, affected_nodes_status)
        else:
            logging.error(
                "Cloud type "
--- a/krkn/scenario_plugins/shut_down/shut_down_scenario_plugin.py
+++ b/krkn/scenario_plugins/shut_down/shut_down_scenario_plugin.py
@@ -13,7 +13,7 @@ from krkn.scenario_plugins.node_actions.aws_node_scenarios import AWS
 from krkn.scenario_plugins.node_actions.az_node_scenarios import Azure
 from krkn.scenario_plugins.node_actions.gcp_node_scenarios import GCP
 from krkn.scenario_plugins.node_actions.openstack_node_scenarios import OPENSTACKCLOUD
-from krkn.scenario_plugins.native.node_scenarios.ibmcloud_plugin import IbmCloud
+from krkn.scenario_plugins.node_actions.ibmcloud_node_scenarios import IbmCloud

 import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction

--- a/scenarios/openshift/ibmcloud_node_scenarios.yml
+++ b/scenarios/openshift/ibmcloud_node_scenarios.yml
@@ -1,10 +1,16 @@
-# yaml-language-server: $schema=../plugin.schema.json
- id: <ibmcloud-node-terminate/ibmcloud-node-reboot/ibmcloud-node-stop/ibmcloud-node-start>
-  config:
-    name: ""        
-    label_selector: "node-role.kubernetes.io/worker"    # When node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection 
-    runs: 1                             # Number of times to inject each scenario under actions (will perform on same node each time)                                                           
-    instance_count: 1                   # Number of nodes to perform action/select that match the label selector                                             
-    timeout: 360                         # Duration to wait for completion of node scenario injection
-    duration: 120                       # Duration to stop the node before running the start action 
-    skip_openshift_checks: False        # Set to True if you don't want to wait for the status of the nodes to change on OpenShift before passing the scenario 
+node_scenarios:
+  - actions:
+    - node_stop_start_scenario
+    node_name:
+    label_selector: node-role.kubernetes.io/worker
+    instance_count: 1
+    timeout: 360
+    duration: 120
+    cloud_type: ibm
+  - actions:
+    - node_reboot_scenario
+    node_name:
+    label_selector: node-role.kubernetes.io/worker
+    instance_count: 1
+    timeout: 120
+    cloud_type: ibm