adding vsphere updates to non native

Signed-off-by: Paige Patton <prubenda@redhat.com>
Hog scenario porting from arcaflow to native (#748 )
2026-02-17 19:40:01 +00:00 · 2025-01-31 15:21:48 -05:00 · 2025-01-31 13:45:59 -05:00 · 2025-01-31 17:01:26 +01:00 · 2025-01-20 09:43:59 -05:00 · 2025-01-13 15:47:33 -05:00
78 changed files with 1657 additions and 2078 deletions
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -13,6 +13,7 @@ jobs:
    - name: Build the Docker images
      if: startsWith(github.ref, 'refs/tags')
      run:  |
+        ./containers/compile_dockerfile.sh
        docker build --no-cache -t quay.io/krkn-chaos/krkn containers/ --build-arg TAG=${GITHUB_REF#refs/tags/}
        docker tag quay.io/krkn-chaos/krkn quay.io/redhat-chaos/krkn
        docker tag quay.io/krkn-chaos/krkn quay.io/krkn-chaos/krkn:${GITHUB_REF#refs/tags/}
@@ -21,6 +22,7 @@ jobs:
    - name: Test Build the Docker images
      if: ${{ github.event_name == 'pull_request' }}
      run: |
+        ./containers/compile_dockerfile.sh
        docker build --no-cache -t quay.io/krkn-chaos/krkn containers/ --build-arg PR_NUMBER=${{ github.event.pull_request.number }}
    - name: Login in quay
      if: startsWith(github.ref, 'refs/tags')
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -84,9 +84,9 @@ jobs:
            echo "test_namespace"      >> ./CI/tests/functional_tests
            echo "test_net_chaos"      >> ./CI/tests/functional_tests
            echo "test_time"           >> ./CI/tests/functional_tests
-            echo "test_arca_cpu_hog" >> ./CI/tests/functional_tests
-            echo "test_arca_memory_hog" >> ./CI/tests/functional_tests
-            echo "test_arca_io_hog" >> ./CI/tests/functional_tests
+            echo "test_cpu_hog" >> ./CI/tests/functional_tests
+            echo "test_memory_hog" >> ./CI/tests/functional_tests
+            echo "test_io_hog" >> ./CI/tests/functional_tests


      # Push on main only steps + all other functional to collect coverage
@@ -113,9 +113,9 @@ jobs:
          echo "test_namespace"      >> ./CI/tests/functional_tests
          echo "test_net_chaos"      >> ./CI/tests/functional_tests
          echo "test_time"           >> ./CI/tests/functional_tests
-          echo "test_arca_cpu_hog" >> ./CI/tests/functional_tests
-          echo "test_arca_memory_hog" >> ./CI/tests/functional_tests
-          echo "test_arca_io_hog" >> ./CI/tests/functional_tests
+          echo "test_cpu_hog" >> ./CI/tests/functional_tests
+          echo "test_memory_hog" >> ./CI/tests/functional_tests
+          echo "test_io_hog" >> ./CI/tests/functional_tests

      # Final common steps
      - name: Run Functional tests
--- a/CI/tests/test_arca_cpu_hog.sh
+++ b/CI/tests/test_arca_cpu_hog.sh
@@ -1,19 +0,0 @@
-set -xeEo pipefail
-
-source CI/tests/common.sh
-
-trap error ERR
-trap finish EXIT
-
-
-function functional_test_arca_cpu_hog {
-  yq -i '.input_list[0].node_selector={"kubernetes.io/hostname":"kind-worker2"}' scenarios/kube/cpu-hog/input.yaml
-  export scenario_type="hog_scenarios"
-  export scenario_file="scenarios/kube/cpu-hog/input.yaml"
-  export post_config=""
-  envsubst < CI/config/common_test_config.yaml > CI/config/arca_cpu_hog.yaml
-  python3 -m coverage run -a run_kraken.py -c CI/config/arca_cpu_hog.yaml
-  echo "Arcaflow CPU Hog: Success"
-}
-
-functional_test_arca_cpu_hog
--- a/CI/tests/test_arca_io_hog.sh
+++ b/CI/tests/test_arca_io_hog.sh
@@ -1,19 +0,0 @@
-set -xeEo pipefail
-
-source CI/tests/common.sh
-
-trap error ERR
-trap finish EXIT
-
-
-function functional_test_arca_io_hog {
-  yq -i '.input_list[0].node_selector={"kubernetes.io/hostname":"kind-worker2"}' scenarios/kube/io-hog/input.yaml
-  export scenario_type="hog_scenarios"
-  export scenario_file="scenarios/kube/io-hog/input.yaml"
-  export post_config=""
-  envsubst < CI/config/common_test_config.yaml > CI/config/arca_io_hog.yaml
-  python3 -m coverage run -a run_kraken.py -c CI/config/arca_io_hog.yaml
-  echo "Arcaflow IO Hog: Success"
-}
-
-functional_test_arca_io_hog
--- a/CI/tests/test_arca_memory_hog.sh
+++ b/CI/tests/test_arca_memory_hog.sh
@@ -1,19 +0,0 @@
-set -xeEo pipefail
-
-source CI/tests/common.sh
-
-trap error ERR
-trap finish EXIT
-
-
-function functional_test_arca_memory_hog {
-  yq -i '.input_list[0].node_selector={"kubernetes.io/hostname":"kind-worker2"}' scenarios/kube/memory-hog/input.yaml
-  export scenario_type="hog_scenarios"
-  export scenario_file="scenarios/kube/memory-hog/input.yaml"
-  export post_config=""
-  envsubst < CI/config/common_test_config.yaml > CI/config/arca_memory_hog.yaml
-  python3 -m coverage run -a run_kraken.py -c CI/config/arca_memory_hog.yaml
-  echo "Arcaflow Memory Hog: Success"
-}
-
-functional_test_arca_memory_hog
--- a/CI/tests/test_cpu_hog.sh
+++ b/CI/tests/test_cpu_hog.sh
@@ -0,0 +1,20 @@
+set -xeEo pipefail
+
+source CI/tests/common.sh
+
+trap error ERR
+trap finish EXIT
+
+
+function functional_test_cpu_hog {
+  yq -i '.node_selector="kubernetes.io/hostname=kind-worker2"' scenarios/kube/cpu-hog.yml
+
+  export scenario_type="hog_scenarios"
+  export scenario_file="scenarios/kube/cpu-hog.yml"
+  export post_config=""
+  envsubst < CI/config/common_test_config.yaml > CI/config/cpu_hog.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/cpu_hog.yaml
+  echo "CPU Hog: Success"
+}
+
+functional_test_cpu_hog
--- a/CI/tests/test_io_hog.sh
+++ b/CI/tests/test_io_hog.sh
@@ -0,0 +1,19 @@
+set -xeEo pipefail
+
+source CI/tests/common.sh
+
+trap error ERR
+trap finish EXIT
+
+
+function functional_test_io_hog {
+  yq -i '.node_selector="kubernetes.io/hostname=kind-worker2"' scenarios/kube/io-hog.yml
+  export scenario_type="hog_scenarios"
+  export scenario_file="scenarios/kube/io-hog.yml"
+  export post_config=""
+  envsubst < CI/config/common_test_config.yaml > CI/config/io_hog.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/io_hog.yaml
+  echo "IO Hog: Success"
+}
+
+functional_test_io_hog
--- a/CI/tests/test_memory_hog.sh
+++ b/CI/tests/test_memory_hog.sh
@@ -0,0 +1,19 @@
+set -xeEo pipefail
+
+source CI/tests/common.sh
+
+trap error ERR
+trap finish EXIT
+
+
+function functional_test_memory_hog {
+  yq -i '.node_selector="kubernetes.io/hostname=kind-worker2"' scenarios/kube/memory-hog.yml
+  export scenario_type="hog_scenarios"
+  export scenario_file="scenarios/kube/memory-hog.yml"
+  export post_config=""
+  envsubst < CI/config/common_test_config.yaml > CI/config/memory_hog.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/memory_hog.yaml
+  echo "Memory Hog: Success"
+}
+
+functional_test_memory_hog
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ Scenario type               | Kubernetes
 [Container Scenarios](docs/container_scenarios.md) | :heavy_check_mark: |
 [Node Scenarios](docs/node_scenarios.md) | :heavy_check_mark: |
 [Time Scenarios](docs/time_scenarios.md) | :heavy_check_mark: |
-[Hog Scenarios: CPU, Memory](docs/arcaflow_scenarios.md) | :heavy_check_mark: |
+[Hog Scenarios: CPU, Memory](docs/hog_scenarios.md) | :heavy_check_mark: |
 [Cluster Shut Down Scenarios](docs/cluster_shut_down_scenarios.md) | :heavy_check_mark: |
 [Service Disruption Scenarios](docs/service_disruption_scenarios.md.md) | :heavy_check_mark: |
 [Zone Outage Scenarios](docs/zone_outage.md) | :heavy_check_mark: |
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -6,10 +6,11 @@ Following are a list of enhancements that we are planning to work on adding supp
 - [x] [Centralized storage for chaos experiments artifacts](https://github.com/krkn-chaos/krkn/issues/423)
 - [ ] [Support for causing DNS outages](https://github.com/krkn-chaos/krkn/issues/394)
 - [x] [Chaos recommender](https://github.com/krkn-chaos/krkn/tree/main/utils/chaos-recommender) to suggest scenarios having probability of impacting the service under test using profiling results 
- [ ] Chaos AI integration to improve and automate test coverage
+- [] Chaos AI integration to improve test coverage while reducing fault space to save costs and execution time
 - [x] [Support for pod level network traffic shaping](https://github.com/krkn-chaos/krkn/issues/393)
 - [ ] [Ability to visualize the metrics that are being captured by Kraken and stored in Elasticsearch](https://github.com/krkn-chaos/krkn/issues/124)
- [ ] Support for running all the scenarios of Kraken on Kubernetes distribution - see https://github.com/krkn-chaos/krkn/issues/185, https://github.com/redhat-chaos/krkn/issues/186
- [ ] Continue to improve [Chaos Testing Guide](https://krkn-chaos.github.io/krkn) in terms of adding best practices, test environment recommendations and scenarios to make sure the OpenShift platform, as well the applications running on top it, are resilient and performant under chaotic conditions.
- [ ] [Switch documentation references to Kubernetes](https://github.com/krkn-chaos/krkn/issues/495)
- [ ] [OCP and Kubernetes functionalities segregation](https://github.com/krkn-chaos/krkn/issues/497)
+- [x] Support for running all the scenarios of Kraken on Kubernetes distribution - see https://github.com/krkn-chaos/krkn/issues/185, https://github.com/redhat-chaos/krkn/issues/186
+- [x] Continue to improve [Chaos Testing Guide](https://krkn-chaos.github.io/krkn) in terms of adding best practices, test environment recommendations and scenarios to make sure the OpenShift platform, as well the applications running on top it, are resilient and performant under chaotic conditions.
+- [x] [Switch documentation references to Kubernetes](https://github.com/krkn-chaos/krkn/issues/495)
+- [x] [OCP and Kubernetes functionalities segregation](https://github.com/krkn-chaos/krkn/issues/497)
+- [x] [Krknctl - client for running Krkn scenarios with ease](https://github.com/krkn-chaos/krknctl)
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -9,10 +9,9 @@ kraken:
    chaos_scenarios:
        # List of policies/chaos scenarios to load
        - hog_scenarios:
-            - scenarios/kube/cpu-hog/input.yaml
-            - scenarios/kube/memory-hog/input.yaml
-            - scenarios/kube/io-hog/input.yaml
-            - scenarios/kube/io-hog/input.yaml
+            - scenarios/kube/cpu-hog.yml
+            - scenarios/kube/memory-hog.yml
+            - scenarios/kube/io-hog.yml
        - application_outages_scenarios:
            - scenarios/openshift/app_outage.yaml
        - container_scenarios:                             # List of chaos pod scenarios to load
--- a/containers/Dockerfile.template
+++ b/containers/Dockerfile.template
@@ -49,6 +49,11 @@ RUN python3.9 -m ensurepip
 RUN pip3.9 install -r requirements.txt
 RUN pip3.9 install jsonschema

+LABEL krknctl.title.global="Krkn Base Image"
+LABEL krknctl.description.global="This is the krkn base image."
+LABEL krknctl.input_fields.global='$KRKNCTL_INPUT'
+
+
 RUN chown -R krkn:krkn /home/krkn && chmod 755 /home/krkn
 USER krkn
 ENTRYPOINT ["python3.9", "run_kraken.py"]
--- a/containers/compile_dockerfile.sh
+++ b/containers/compile_dockerfile.sh
@@ -0,0 +1,5 @@
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+export KRKNCTL_INPUT=$(cat krknctl-input.json|tr -d "\n")
+
+envsubst '${KRKNCTL_INPUT}' < Dockerfile.template > Dockerfile
--- a/containers/krknctl-input.json
+++ b/containers/krknctl-input.json
@@ -0,0 +1,396 @@
+[
+  {
+    "name": "cerberus-enabled",
+    "short_description": "Enable Cerberus",
+    "description": "Enables Cerberus Support",
+    "variable": "CERBERUS_ENABLED",
+    "type": "enum",
+    "default": "False",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "required": "false"
+  },
+  {
+    "name": "cerberus-url",
+    "short_description": "Cerberus URL",
+    "description": "Cerberus http url",
+    "variable": "CERBERUS_URL",
+    "type": "string",
+    "default": "http://0.0.0.0:8080",
+    "validator": "^(http|https):\/\/.*",
+    "required": "false"
+  },
+  {
+    "name": "distribution",
+    "short_description": "Orchestrator distribution",
+    "description": "Selects the orchestrator distribution",
+    "variable": "DISTRIBUTION",
+    "type": "enum",
+    "default": "openshift",
+    "allowed_values": "openshift,kubernetes",
+    "separator": ",",
+    "required": "false"
+  },
+  {
+    "name": "krkn-kubeconfig",
+    "short_description": "Krkn kubeconfig path",
+    "description": "Sets the path where krkn will search for kubeconfig (in container)",
+    "variable": "KRKN_KUBE_CONFIG",
+    "type": "string",
+    "default": "/home/krkn/.kube/config",
+    "required": "false"
+  },
+  {
+    "name": "wait-duration",
+    "short_description": "Post chaos wait duration",
+    "description": "waits for a certain amount of time after the scenario",
+    "variable": "WAIT_DURATION",
+    "type": "number",
+    "default": "1"
+  },
+  {
+    "name": "iterations",
+    "short_description": "Chaos scenario iterations",
+    "description": "number of times the same chaos scenario will be executed",
+    "variable": "ITERATIONS",
+    "type": "number",
+    "default": "1"
+  },
+  {
+    "name": "daemon-mode",
+    "short_description": "Sets krkn daemon mode",
+    "description": "if set the scenario will execute forever",
+    "variable": "DAEMON_MODE",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "uuid",
+    "short_description": "Sets krkn run uuid",
+    "description": "sets krkn run uuid instead of generating it",
+    "variable": "UUID",
+    "type": "string",
+    "default": "",
+    "required": "false"
+  },
+  {
+    "name": "capture-metrics",
+    "short_description": "Enables metrics capture",
+    "description": "Enables metrics capture",
+    "variable": "CAPTURE_METRICS",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "enable-alerts",
+    "short_description": "Enables cluster alerts check",
+    "description": "Enables cluster alerts check",
+    "variable": "ENABLE_ALERTS",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "alerts-path",
+    "short_description": "Cluster alerts path file (in container)",
+    "description": "Enables cluster alerts check",
+    "variable": "ALERTS_PATH",
+    "type": "string",
+    "default": "config/alerts.yaml",
+    "required": "false"
+  },
+  {
+    "name": "enable-es",
+    "short_description": "Enables elastic search data collection",
+    "description": "Enables elastic search data collection",
+    "variable": "ENABLE_ES",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "es-server",
+    "short_description": "Elasticsearch instance URL",
+    "description": "Elasticsearch instance URL",
+    "variable": "ES_SERVER",
+    "type": "string",
+    "default": "http://0.0.0.0",
+    "validator": "^(http|https):\/\/.*",
+    "required": "false"
+  },
+  {
+    "name": "es-port",
+    "short_description": "Elasticsearch instance port",
+    "description": "Elasticsearch instance port",
+    "variable": "ES_PORT",
+    "type": "number",
+    "default": "443",
+    "required": "false"
+  },
+  {
+    "name": "es-username",
+    "short_description": "Elasticsearch instance username",
+    "description": "Elasticsearch instance username",
+    "variable": "ES_USERNAME",
+    "type": "string",
+    "default": "elastic",
+    "required": "false"
+  },
+  {
+    "name": "es-password",
+    "short_description": "Elasticsearch instance password",
+    "description": "Elasticsearch instance password",
+    "variable": "ES_PASSWORD",
+    "type": "string",
+    "default": "",
+    "required": "false"
+  },
+  {
+    "name": "es-verify-certs",
+    "short_description": "Enables elasticsearch TLS certificate verification",
+    "description": "Enables elasticsearch TLS certificate verification",
+    "variable": "ES_VERIFY_CERTS",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "es-collect-metrics",
+    "short_description": "Enables metrics collection on elastic search",
+    "description": "Enables metrics collection on elastic search",
+    "variable": "ES_COLLECT_METRICS",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "es-collect-alerts",
+    "short_description": "Enables alerts collection on elastic search",
+    "description": "Enables alerts collection on elastic search",
+    "variable": "ES_COLLECT_ALERTS",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "es-metrics-index",
+    "short_description": "Elasticsearch metrics index",
+    "description": "Index name for metrics in Elasticsearch",
+    "variable": "ES_METRICS_INDEX",
+    "type": "string",
+    "default": "krkn-metrics",
+    "required": "false"
+  },
+  {
+    "name": "es-alerts-index",
+    "short_description": "Elasticsearch alerts index",
+    "description": "Index name for alerts in Elasticsearch",
+    "variable": "ES_ALERTS_INDEX",
+    "type": "string",
+    "default": "krkn-alerts",
+    "required": "false"
+  },
+  {
+    "name": "es-telemetry-index",
+    "short_description": "Elasticsearch telemetry index",
+    "description": "Index name for telemetry in Elasticsearch",
+    "variable": "ES_TELEMETRY_INDEX",
+    "type": "string",
+    "default": "krkn-telemetry",
+    "required": "false"
+  },
+  {
+    "name": "check-critical-alerts",
+    "short_description": "Check critical alerts",
+    "description": "Enables checking for critical alerts",
+    "variable": "CHECK_CRITICAL_ALERTS",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-enabled",
+    "short_description": "Enable telemetry",
+    "description": "Enables telemetry support",
+    "variable": "TELEMETRY_ENABLED",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-api-url",
+    "short_description": "Telemetry API URL",
+    "description": "API endpoint for telemetry data",
+    "variable": "TELEMETRY_API_URL",
+    "type": "string",
+    "default": "https://ulnmf9xv7j.execute-api.us-west-2.amazonaws.com/production",
+    "validator": "^(http|https):\/\/.*",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-username",
+    "short_description": "Telemetry username",
+    "description": "Username for telemetry authentication",
+    "variable": "TELEMETRY_USERNAME",
+    "type": "string",
+    "default": "redhat-chaos",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-password",
+    "short_description": "Telemetry password",
+    "description": "Password for telemetry authentication",
+    "variable": "TELEMETRY_PASSWORD",
+    "type": "string",
+    "default": "",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-prometheus-backup",
+    "short_description": "Prometheus backup for telemetry",
+    "description": "Enables Prometheus backup for telemetry",
+    "variable": "TELEMETRY_PROMETHEUS_BACKUP",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "True",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-full-prometheus-backup",
+    "short_description": "Full Prometheus backup",
+    "description": "Enables full Prometheus backup for telemetry",
+    "variable": "TELEMETRY_FULL_PROMETHEUS_BACKUP",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-backup-threads",
+    "short_description": "Telemetry backup threads",
+    "description": "Number of threads for telemetry backup",
+    "variable": "TELEMETRY_BACKUP_THREADS",
+    "type": "number",
+    "default": "5",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-archive-path",
+    "short_description": "Telemetry archive path",
+    "description": "Path to save telemetry archive",
+    "variable": "TELEMETRY_ARCHIVE_PATH",
+    "type": "string",
+    "default": "/tmp",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-max-retries",
+    "short_description": "Telemetry max retries",
+    "description": "Maximum retries for telemetry operations",
+    "variable": "TELEMETRY_MAX_RETRIES",
+    "type": "number",
+    "default": "0",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-run-tag",
+    "short_description": "Telemetry run tag",
+    "description": "Tag for telemetry run",
+    "variable": "TELEMETRY_RUN_TAG",
+    "type": "string",
+    "default": "chaos",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-group",
+    "short_description": "Telemetry group",
+    "description": "Group name for telemetry data",
+    "variable": "TELEMETRY_GROUP",
+    "type": "string",
+    "default": "default",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-archive-size",
+    "short_description": "Telemetry archive size",
+    "description": "Maximum size for telemetry archives",
+    "variable": "TELEMETRY_ARCHIVE_SIZE",
+    "type": "number",
+    "default": "1000",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-logs-backup",
+    "short_description": "Telemetry logs backup",
+    "description": "Enables logs backup for telemetry",
+    "variable": "TELEMETRY_LOGS_BACKUP",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-filter-pattern",
+    "short_description": "Telemetry filter pattern",
+    "description": "Filter pattern for telemetry logs",
+    "variable": "TELEMETRY_FILTER_PATTERN",
+    "type": "string",
+    "default": "[\"(\\\\w{3}\\\\s\\\\d{1,2}\\\\s\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d+).+\",\"kinit (\\\\d+/\\\\d+/\\\\d+\\\\s\\\\d{2}:\\\\d{2}:\\\\d{2})\\\\s+\",\"(\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d+Z).+\"]",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-cli-path",
+    "short_description": "Telemetry CLI path (oc)",
+    "description": "Path to telemetry CLI tool (oc)",
+    "variable": "TELEMETRY_CLI_PATH",
+    "type": "string",
+    "default": "",
+    "required": "false"
+  },
+  {
+    "name": "telemetry-events-backup",
+    "short_description": "Telemetry events backup",
+    "description": "Enables events backup for telemetry",
+    "variable": "TELEMETRY_EVENTS_BACKUP",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "True",
+    "required": "false"
+  },
+  {
+    "name": "krkn-debug",
+    "short_description": "Krkn debug mode",
+    "description": "Enables debug mode for Krkn",
+    "variable": "KRKN_DEBUG",
+    "type": "enum",
+    "allowed_values": "True,False",
+    "separator": ",",
+    "default": "False",
+    "required": "false"
+  }
+]
+
--- a/docs/SLOs_validation.md
+++ b/docs/SLOs_validation.md
@@ -38,11 +38,11 @@ A couple of [alert profiles](https://github.com/redhat-chaos/krkn/tree/main/conf
  severity: critical
 ```

-Kube-burner supports setting the severity for the alerts with each one having different effects:
+Krkn supports setting the severity for the alerts with each one having different effects:

 ```
 info: Prints an info message with the alarm description to stdout. By default all expressions have this severity.
 warning: Prints a warning message with the alarm description to stdout.
-error: Prints a error message with the alarm description to stdout and makes kube-burner rc = 1
+error: Prints a error message with the alarm description to stdout and sets Krkn rc = 1
 critical: Prints a fatal message with the alarm description to stdout and exits execution inmediatly with rc != 0
 ```
--- a/docs/arcaflow_scenarios.md
+++ b/docs/arcaflow_scenarios.md
@@ -1,70 +0,0 @@
-## Arcaflow Scenarios
-Arcaflow is a workflow engine in development which provides the ability to execute workflow steps in sequence, in parallel, repeatedly, etc. The main difference to competitors such as Netflix Conductor is the ability to run ad-hoc workflows without an infrastructure setup required.
-
-The engine uses containers to execute plugins and runs them either locally in Docker/Podman or remotely on a Kubernetes cluster. The workflow system is strongly typed and allows for generating JSON schema and OpenAPI documents for all data formats involved.
-
-### Available Scenarios
-#### Hog scenarios:
- [CPU Hog](arcaflow_scenarios/cpu_hog.md)
- [Memory Hog](arcaflow_scenarios/memory_hog.md)
- [I/O Hog](arcaflow_scenarios/io_hog.md)
-
-
-### Prequisites
-Arcaflow supports three deployment technologies:
- Docker
- Podman
- Kubernetes
-
-#### Docker
-In order to run Arcaflow Scenarios with the Docker deployer, be sure that:
- Docker is correctly installed in your Operating System (to find instructions on how to install docker please refer to [Docker Documentation](https://www.docker.com/))
- The Docker daemon is running
-
-#### Podman
-The podman deployer is built around the podman CLI and doesn't need necessarily to be run along with the podman daemon.
-To run Arcaflow Scenarios in your Operating system be sure that:
- podman is correctly installed in your Operating System (to find instructions on how to install podman refer to [Podman Documentation](https://podman.io/))
- the podman CLI is in your shell PATH
-
-#### Kubernetes
-The kubernetes deployer integrates directly the Kubernetes API Client and needs only a valid kubeconfig file and a reachable Kubernetes/OpenShift Cluster.
-
-### Usage
-
-To enable arcaflow scenarios edit the kraken config file, go to the section `kraken -> chaos_scenarios` of the yaml structure
-and add a new element to the list named `arcaflow_scenarios` then add the desired scenario
-pointing to the `input.yaml` file.
-```
-kraken:
-    ...
-    chaos_scenarios:
-        - arcaflow_scenarios:
-            - scenarios/arcaflow/cpu-hog/input.yaml
-```
-
-#### input.yaml
-The implemented scenarios can be found in *scenarios/arcaflow/<scenario_name>* folder.
-The entrypoint of each scenario is the *input.yaml* file. 
-In this file there are all the options to set up the scenario accordingly to the desired target 
-### config.yaml
-The arcaflow config file. Here you can set the arcaflow deployer and the arcaflow log level.
-The supported deployers are:
- Docker
- Podman (podman daemon not needed, suggested option)
- Kubernetes
-
-The supported log levels are:
- debug
- info
- warning
- error
-### workflow.yaml
-This file contains the steps that will be executed to perform the scenario against the target.
-Each step is represented by a container that will be executed from the deployer and its options.
-Note that we provide the scenarios as a template, but they can be manipulated to define more complex workflows.
-To have more details regarding the arcaflow workflows architecture and syntax it is suggested to refer to the [Arcaflow Documentation](https://arcalot.io/arcaflow/).
-
-This edit is no longer in quay image
-Working on fix in ticket: https://issues.redhat.com/browse/CHAOS-494
-This will effect all versions 4.12 and higher of OpenShift
--- a/docs/arcaflow_scenarios/cpu_hog.md
+++ b/docs/arcaflow_scenarios/cpu_hog.md
@@ -1,19 +0,0 @@
-# CPU Hog
-This scenario is based on the arcaflow [arcaflow-plugin-stressng](https://github.com/arcalot/arcaflow-plugin-stressng) plugin. 
-The purpose of this scenario is to create cpu pressure on a particular node of the Kubernetes/OpenShift cluster for a time span.
-To enable this plugin add the pointer to the scenario input file `scenarios/arcaflow/cpu-hog/input.yaml` as described in the 
-Usage section.
-This scenario takes a list of objects named `input_list` with the following properties:
-
- **kubeconfig :** *string* the kubeconfig needed by the deployer to deploy the sysbench plugin in the target cluster
- **namespace :** *string* the namespace where the scenario container will be deployed
-**Note:** this parameter will be automatically filled by kraken if the `kubeconfig_path` property is correctly set
- **node_selector :** *key-value map* the node label that will be used as `nodeSelector` by the pod to target a specific cluster node
- **duration :** *string* stop  stress  test  after  N  seconds.  One  can  also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or y.
- **cpu_count :** *int* the number of CPU cores to be used (0 means all)
- **cpu_method :** *string* a fine-grained control of which cpu stressors to use (ackermann, cfloat etc. see [manpage](https://manpages.org/sysbench) for all the cpu_method options)
- **cpu_load_percentage :** *int* the CPU load by percentage
-
-To perform several load tests in the same run simultaneously (eg. stress two or more nodes in the same run) add another item
-to the `input_list` with the same properties (and eventually different values eg. different node_selectors 
-to schedule the pod on different nodes). To reduce (or increase) the parallelism change the value `parallelism` in `workload.yaml` file 
--- a/docs/arcaflow_scenarios/io_hog.md
+++ b/docs/arcaflow_scenarios/io_hog.md
@@ -1,21 +0,0 @@
-# I/O Hog
-This scenario is based on the arcaflow [arcaflow-plugin-stressng](https://github.com/arcalot/arcaflow-plugin-stressng) plugin. 
-The purpose of this scenario is to create disk pressure on a particular node of the Kubernetes/OpenShift cluster for a time span.
-The scenario allows to attach a node path to the pod as a `hostPath` volume.
-To enable this plugin add the pointer to the scenario input file `scenarios/arcaflow/io-hog/input.yaml` as described in the 
-Usage section.
-This scenario takes a list of objects named `input_list` with the following properties:
-
- **kubeconfig :** *string* the kubeconfig needed by the deployer to deploy the sysbench plugin in the target cluster
- **namespace :** *string* the namespace where the scenario container will be deployed
-**Note:** this parameter will be automatically filled by kraken if the `kubeconfig_path` property is correctly set
- **node_selector :** *key-value map* the node label that will be used as `nodeSelector` by the pod to target a specific cluster node
- **duration :** *string* stop  stress  test  after  N  seconds.  One  can  also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or y.
- **target_pod_folder :** *string* the path in the pod where the volume is mounted
- **target_pod_volume :** *object* the `hostPath` volume definition in the [Kubernetes/OpenShift](https://docs.openshift.com/container-platform/3.11/install_config/persistent_storage/using_hostpath.html) format, that will be attached to the pod as a volume
- **io_write_bytes :** *string* writes N bytes for each hdd process. The size can be expressed as % of free space on the file system or in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g
- **io_block_size :** *string* size of each write in bytes. Size can be from 1 byte to 4m.
-
-To perform several load tests in the same run simultaneously (eg. stress two or more nodes in the same run) add another item
-to the `input_list` with the same properties (and eventually different values eg. different node_selectors 
-to schedule the pod on different nodes). To reduce (or increase) the parallelism change the value `parallelism` in `workload.yaml` file 
--- a/docs/arcaflow_scenarios/memory_hog.md
+++ b/docs/arcaflow_scenarios/memory_hog.md
@@ -1,18 +0,0 @@
-# Memory Hog
-This scenario is based on the arcaflow [arcaflow-plugin-stressng](https://github.com/arcalot/arcaflow-plugin-stressng) plugin. 
-The purpose of this scenario is to create Virtual Memory pressure on a particular node of the Kubernetes/OpenShift cluster for a time span.
-To enable this plugin add the pointer to the scenario input file `scenarios/arcaflow/memory-hog/input.yaml` as described in the 
-Usage section.
-This scenario takes a list of objects named `input_list` with the following properties:
-
- **kubeconfig :** *string* the kubeconfig needed by the deployer to deploy the sysbench plugin in the target cluster
- **namespace :** *string* the namespace where the scenario container will be deployed
-**Note:** this parameter will be automatically filled by kraken if the `kubeconfig_path` property is correctly set
- **node_selector :** *key-value map* the node label that will be used as `nodeSelector` by the pod to target a specific cluster node
- **duration :** *string* stop  stress  test  after  N  seconds.  One  can  also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or y.
- **vm_bytes :** *string* N bytes per vm process or percentage of memory used (using the % symbol). The size can be expressed in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g.
- **vm_workers :** *int* Number of VM stressors to be run (0 means 1 stressor per CPU)
-
-To perform several load tests in the same run simultaneously (eg. stress two or more nodes in the same run) add another item
-to the `input_list` with the same properties (and eventually different values eg. different node_selectors 
-to schedule the pod on different nodes). To reduce (or increase) the parallelism change the value `parallelism` in `workload.yaml` file 
--- a/docs/cloud_setup.md
+++ b/docs/cloud_setup.md
@@ -13,13 +13,26 @@ Supported Cloud Providers:
 **NOTE**: For clusters with AWS make sure [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) is installed and properly [configured](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html) using an AWS account

 ## GCP
-**NOTE**: For clusters with GCP make sure [GCP CLI](https://cloud.google.com/sdk/docs/install#linux) is installed.

-A google service account is required to give proper authentication to GCP for node actions. See [here](https://cloud.google.com/docs/authentication/getting-started) for how to create a service account.
+In order to set up Application Default Credentials (ADC) for use by Cloud Client Libraries, you can provide either service account credentials or the credentials associated with your user acccount:

-**NOTE**: A user with 'resourcemanager.projects.setIamPolicy' permission is required to grant project-level permissions to the service account.
+- Using service account credentials:

-After creating the service account you will need to enable the account using the following: ```export GOOGLE_APPLICATION_CREDENTIALS="<serviceaccount.json>"```
+  A google service account is required to give proper authentication to GCP for node actions. See [here](https://cloud.google.com/docs/authentication/getting-started) for how to create a service account.
+
+  **NOTE**: A user with 'resourcemanager.projects.setIamPolicy' permission is required to grant project-level permissions to the service account.
+
+  After creating the service account you will need to enable the account using the following: ```export GOOGLE_APPLICATION_CREDENTIALS="<serviceaccount.json>"```
+
+- Using the credentials associated with your user acccount:
+
+  1. Make sure that the [GCP CLI](https://cloud.google.com/sdk/docs/install#linux) is installed and [initialized](https://cloud.google.com/sdk/docs/initializing) by running:
+
+     ```gcloud init```
+
+  2. Create local authentication credentials for your user account:
+
+     ```gcloud auth application-default login```

 ## Openstack

@@ -32,6 +45,7 @@ After creating the service account you will need to enable the account using the
 To properly run the service principal requires “Azure Active Directory Graph/Application.ReadWrite.OwnedBy” api permission granted and “User Access Administrator”.

 Before running you will need to set the following:
+
 1. ```export AZURE_SUBSCRIPTION_ID=<subscription_id>```

 2. ```export AZURE_TENANT_ID=<tenant_id>```
@@ -66,9 +80,10 @@ Set the following environment variables

 These are the credentials that you would normally use to access the vSphere client.

-
 ## IBMCloud
-If no api key is set up with proper VPC resource permissions, use the following to create: 
+
+If no API key is set up with proper VPC resource permissions, use the following to create it:
+
 * Access group
 * Service id with the following access
  * With policy **VPC Infrastructure Services**
--- a/docs/cluster_shut_down_scenarios.md
+++ b/docs/cluster_shut_down_scenarios.md
@@ -8,6 +8,7 @@ Current accepted cloud types:
 * [GCP](cloud_setup.md#gcp)
 * [AWS](cloud_setup.md#aws)
 * [Openstack](cloud_setup.md#openstack)
+* [IBMCloud](cloud_setup.md#ibmcloud)


 ```
--- a/docs/hog_scenarios.md
+++ b/docs/hog_scenarios.md
@@ -0,0 +1,49 @@
+### Hog Scenarios
+
+Hog Scenarios are designed to push the limits of memory, CPU, or I/O on one or more nodes in your cluster. 
+They also serve to evaluate whether your cluster can withstand rogue pods that excessively consume resources 
+without any limits.
+
+These scenarios involve deploying one or more workloads in the cluster. Based on the specific configuration, 
+these workloads will use a predetermined amount of resources for a specified duration.
+
+#### Common options
+
+| Option  | Type                                                                                                                                                                                                                                                                                                                                                    | Description                                                                                                                                                                                                                                                                                                                           |
+|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|`duration`| number                                                                                                                                                                                                                                                                                                                                                  | the duration of the stress test in seconds                                                                                                                                                                                                                                                                                            |
+|`workers`| number (Optional)                                                                                                                                                                                                                                                                                                                                       | the number of threads instantiated by stress-ng, if left empty the number of workers will match the number of available cores in the node.                                                                                                                                                                                            |
+|`hog-type`| string (Enum)                                                                                                                                                                                                                                                                                                                                           | can be cpu, memory or io.                                                                                                                                                                                                                                                                                                             |
+|`image`| string                                                                                                                                                                                                                                                                                                                                                  | the container image of the stress workload                                                                                                                                                                                                                                                                                            |
+|`namespace`| string                                                                                                                                                                                                                                                                                                                                                  | the namespace where the stress workload will be deployed                                                                                                                                                                                                                                                                              |
+|`node-selector`| string (Optional) | defines the node selector for choosing target nodes. If not specified, one schedulable node in the cluster will be chosen at random. If multiple nodes match the selector, all of them will be subjected to stress. If number-of-nodes is specified, that many nodes will be randomly selected from those identified by the selector. |
+|`number-of-nodes`| number (Optional) | restricts the number of selected nodes by the selector|
+
+
+#### `cpu-hog` options
+
+| Option  | Type   |Description|
+|---|--------|---|
+|`cpu-load-percentage`| number | the amount of cpu that will be consumed by the hog|
+|`cpu-method`| string | reflects the cpu load strategy adopted by stress-ng, please refer to the stress-ng documentation for all the available options|
+
+
+
+
+#### `io-hog` options
+
+| Option                | Type   | Description                                                                                                                                                                                                  |
+|-----------------------|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `io-block-size`       |string| the block size written by the stressor                                                                                                                                                                       |
+| `io-write-bytes`      |string| the total amount of data that will be written by the stressor. The size can be specified as % of free space on the file system or in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g |
+| `io-target-pod-folder` |string| the folder where the volume will be mounted in the pod                                                                                                                                                       |
+| `io-target-pod-volume`| dictionary | the pod volume definition that will be stressed by the scenario.                                                                                                                                             |
+
+> [!CAUTION]
+> Modifying the structure of `io-target-pod-volume` might alter how the hog operates, potentially rendering it ineffective.
+
+#### `memory-hog` options
+
+| Option                | Type   |Description|
+|-----------------------|--------|---|
+|`memory-vm-bytes`| string | the amount of memory that the scenario will try to hog.The size can be specified as % of free space on the file system or in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g | 
--- a/docs/index.md
+++ b/docs/index.md
@@ -11,7 +11,7 @@
 * [Scenarios](#scenarios)
 * [Test Environment Recommendations - how and where to run chaos tests](#test-environment-recommendations---how-and-where-to-run-chaos-tests)
 * [Chaos testing in Practice](#chaos-testing-in-practice)
-  * [OpenShift oraganization](#openshift-organization)
+  * [OpenShift organization](#openshift-organization)
  * [startx-lab](#startx-lab)


--- a/docs/network_chaos.md
+++ b/docs/network_chaos.md
@@ -18,7 +18,7 @@ network_chaos:                                    # Scenario to create an outage
 ```

 ##### Sample scenario config for ingress traffic shaping (using a plugin)
-'''
+```
 - id: network_chaos
  config:
    node_interface_name:                            # Dictionary with key as node name(s) and value as a list of its interfaces to test
@@ -35,7 +35,7 @@ network_chaos:                                    # Scenario to create an outage
        bandwidth: 10mbit
    wait_duration: 120
    test_duration: 60
-  '''
+```

  Note: For ingress traffic shaping, ensure that your node doesn't have any [IFB](https://wiki.linuxfoundation.org/networking/ifb) interfaces already present. The scenario relies on creating IFBs to do the shaping, and they are deleted at the end of the scenario.

--- a/docs/node_scenarios.md
+++ b/docs/node_scenarios.md
@@ -4,7 +4,7 @@ The following node chaos scenarios are supported:

 1. **node_start_scenario**: Scenario to stop the node instance.
 2. **node_stop_scenario**: Scenario to stop the node instance.
-3. **node_stop_start_scenario**: Scenario to stop and then start the node instance. Not supported on VMware.
+3. **node_stop_start_scenario**: Scenario to stop the node instance for specified duration and then start the node instance. Not supported on VMware.
 4. **node_termination_scenario**: Scenario to terminate the node instance.
 5. **node_reboot_scenario**: Scenario to reboot the node instance.
 6. **stop_kubelet_scenario**: Scenario to stop the kubelet of the node instance.
@@ -12,6 +12,7 @@ The following node chaos scenarios are supported:
 8. **restart_kubelet_scenario**: Scenario to restart the kubelet of the node instance.
 9. **node_crash_scenario**: Scenario to crash the node instance.
 10. **stop_start_helper_node_scenario**: Scenario to stop and start the helper node and check service status.
+11. **node_disk_detach_attach_scenario**: Scenario to detach node disk for specified duration.


 **NOTE**: If the node does not recover from the node_crash_scenario injection, reboot the node to get it back to Ready state.
@@ -20,6 +21,8 @@ The following node chaos scenarios are supported:
 , node_reboot_scenario and stop_start_kubelet_scenario are supported on AWS, Azure, OpenStack, BareMetal, GCP
 , VMware and Alibaba.

+**NOTE**: node_disk_detach_attach_scenario is supported only on AWS and cannot detach root disk.
+

 #### AWS

@@ -57,6 +60,8 @@ kind was primarily designed for testing Kubernetes itself, but may be used for l
 #### GCP
 Cloud setup instructions can be found [here](cloud_setup.md#gcp). Sample scenario config can be found [here](https://github.com/krkn-chaos/krkn/blob/main/scenarios/openshift/gcp_node_scenarios.yml).

+NOTE: The parallel option is not available for GCP, the api doesn't perform processes at the same time
+

 #### Openstack

@@ -88,12 +93,7 @@ How to set up Alibaba cli to run node scenarios is defined [here](cloud_setup.md
 #### VMware
 How to set up VMware vSphere to run node scenarios is defined [here](cloud_setup.md#vmware)

-This cloud type uses a different configuration style, see actions below and [example config file](../scenarios/openshift/vmware_node_scenarios.yml)
-
- vmware-node-terminate
- vmware-node-reboot
- vmware-node-stop
- vmware-node-start
+See [example config file](../scenarios/openshift/vmware_node_scenarios.yml)



--- a/docs/zone_outage.md
+++ b/docs/zone_outage.md
@@ -13,10 +13,12 @@ zone_outage:                                         # Scenario to create an out
  duration: 600                                      # Duration in seconds after which the zone will be back online.
  vpc_id:                                            # Cluster virtual private network to target.
  subnet_id: [subnet1, subnet2]                      # List of subnet-id's to deny both ingress and egress traffic.
+  default_acl_id: acl-xxxxxxxx                       # (Optional) ID of an existing network ACL to use instead of creating a new one. If provided, this ACL will not be deleted after the scenario.
 ```

 **NOTE**: vpc_id and subnet_id can be obtained from the cloud web console by selecting one of the instances in the targeted zone ( us-west-2a for example ).
 **NOTE**: Multiple zones will experience downtime in case of targeting multiple subnets which might have an impact on the cluster health especially if the zones have control plane components deployed.
+**NOTE**: default_acl_id can be obtained from the AWS VPC Console by selecting "Network ACLs" from the left sidebar ( the ID will be in the format 'acl-xxxxxxxx' ). Make sure the selected ACL has the desired ingress/egress rules for your outage scenario ( i.e., deny all ).

 ##### Debugging steps in case of failures
 In case of failures during the steps which revert back the network acl to allow traffic and bring back the cluster nodes in the zone, the nodes in the particular zone will be in `NotReady` condition. Here is how to fix it:
--- a/krkn/chaos_recommender/analysis.py
+++ b/krkn/chaos_recommender/analysis.py
@@ -29,9 +29,9 @@ def calculate_zscores(data):


 def identify_outliers(data, threshold):
-    outliers_cpu = data[data["CPU"] > threshold]["Service"].tolist()
-    outliers_memory = data[data["Memory"] > threshold]["Service"].tolist()
-    outliers_network = data[data["Network"] > threshold]["Service"].tolist()
+    outliers_cpu = data[data["CPU"] > float(threshold)]["Service"].tolist()
+    outliers_memory = data[data["Memory"] > float(threshold)]["Service"].tolist()
+    outliers_network = data[data["Network"] > float(threshold)]["Service"].tolist()

    return outliers_cpu, outliers_memory, outliers_network

@@ -39,13 +39,13 @@ def identify_outliers(data, threshold):
 def get_services_above_heatmap_threshold(dataframe, cpu_threshold, mem_threshold):
    # Filter the DataFrame based on CPU_HEATMAP and MEM_HEATMAP thresholds
    filtered_df = dataframe[
-        ((dataframe["CPU"] / dataframe["CPU_LIMITS"]) > cpu_threshold)
+        ((dataframe["CPU"] / dataframe["CPU_LIMITS"]) > float(cpu_threshold))
    ]
    # Get the lists of services
    cpu_services = filtered_df["service"].tolist()

    filtered_df = dataframe[
-        ((dataframe["MEM"] / dataframe["MEM_LIMITS"]) > mem_threshold)
+        ((dataframe["MEM"] / dataframe["MEM_LIMITS"]) > float(mem_threshold))
    ]
    mem_services = filtered_df["service"].tolist()

--- a/krkn/scenario_plugins/abstract_scenario_plugin.py
+++ b/krkn/scenario_plugins/abstract_scenario_plugin.py
@@ -56,6 +56,7 @@ class AbstractScenarioPlugin(ABC):
        scenario_telemetries: list[ScenarioTelemetry] = []
        failed_scenarios = []
        wait_duration = krkn_config["tunings"]["wait_duration"]
+        events_backup = krkn_config["telemetry"]["events_backup"]
        for scenario_config in scenarios_list:
            if isinstance(scenario_config, list):
                logging.error(
@@ -99,13 +100,15 @@ class AbstractScenarioPlugin(ABC):
                int(scenario_telemetry.start_timestamp),
                int(scenario_telemetry.end_timestamp),
            )
-            utils.populate_cluster_events(
-                scenario_telemetry,
-                parsed_scenario_config,
-                telemetry.get_lib_kubernetes(),
-                int(scenario_telemetry.start_timestamp),
-                int(scenario_telemetry.end_timestamp),
-            )
+
+            if events_backup: 
+                utils.populate_cluster_events(
+                    scenario_telemetry,
+                    parsed_scenario_config,
+                    telemetry.get_lib_kubernetes(),
+                    int(scenario_telemetry.start_timestamp),
+                    int(scenario_telemetry.end_timestamp),
+                )

            if scenario_telemetry.exit_status != 0:
                failed_scenarios.append(scenario_config)
--- a/krkn/scenario_plugins/arcaflow/arcaflow_scenario_plugin.py
+++ b/krkn/scenario_plugins/arcaflow/arcaflow_scenario_plugin.py
@@ -1,197 +0,0 @@
-import logging
-import os
-from pathlib import Path
-import arcaflow
-import yaml
-from krkn_lib.models.telemetry import ScenarioTelemetry
-from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
-from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin
-from krkn.scenario_plugins.arcaflow.context_auth import ContextAuth
-
-
-class ArcaflowScenarioPlugin(AbstractScenarioPlugin):
-
-    def run(
-        self,
-        run_uuid: str,
-        scenario: str,
-        krkn_config: dict[str, any],
-        lib_telemetry: KrknTelemetryOpenshift,
-        scenario_telemetry: ScenarioTelemetry,
-    ) -> int:
-        try:
-            engine_args = self.build_args(scenario)
-            status_code = self.run_workflow(
-                engine_args, lib_telemetry.get_lib_kubernetes().get_kubeconfig_path()
-            )
-            return status_code
-        except Exception as e:
-            logging.error("ArcaflowScenarioPlugin exiting due to Exception %s" % e)
-            return 1
-
-    def get_scenario_types(self) -> [str]:
-        return ["hog_scenarios", "arcaflow_scenario"]
-
-    def run_workflow(
-        self, engine_args: arcaflow.EngineArgs, kubeconfig_path: str
-    ) -> int:
-        self.set_arca_kubeconfig(engine_args, kubeconfig_path)
-        exit_status = arcaflow.run(engine_args)
-        return exit_status
-
-    def build_args(self, input_file: str) -> arcaflow.EngineArgs:
-        """sets the kubeconfig parsed by setArcaKubeConfig as an input to the arcaflow workflow"""
-        current_path = Path().resolve()
-        context = f"{current_path}/{Path(input_file).parent}"
-        workflow = f"{context}/workflow.yaml"
-        config = f"{context}/config.yaml"
-        if not os.path.exists(context):
-            raise Exception(
-                "context folder for arcaflow workflow not found: {}".format(context)
-            )
-        if not os.path.exists(input_file):
-            raise Exception(
-                "input file for arcaflow workflow not found: {}".format(input_file)
-            )
-        if not os.path.exists(workflow):
-            raise Exception(
-                "workflow file for arcaflow workflow not found: {}".format(workflow)
-            )
-        if not os.path.exists(config):
-            raise Exception(
-                "configuration file for arcaflow workflow not found: {}".format(config)
-            )
-
-        engine_args = arcaflow.EngineArgs()
-        engine_args.context = context
-        engine_args.config = config
-        engine_args.workflow = workflow
-        engine_args.input = f"{current_path}/{input_file}"
-        return engine_args
-
-    def set_arca_kubeconfig(
-        self, engine_args: arcaflow.EngineArgs, kubeconfig_path: str
-    ):
-
-        context_auth = ContextAuth()
-        if not os.path.exists(kubeconfig_path):
-            raise Exception("kubeconfig not found in {}".format(kubeconfig_path))
-
-        with open(kubeconfig_path, "r") as stream:
-            try:
-                kubeconfig = yaml.safe_load(stream)
-                context_auth.fetch_auth_data(kubeconfig)
-            except Exception as e:
-                logging.error(
-                    "impossible to read kubeconfig file in: {}".format(kubeconfig_path)
-                )
-                raise e
-
-        kubeconfig_str = self.set_kubeconfig_auth(kubeconfig, context_auth)
-
-        with open(engine_args.input, "r") as stream:
-            input_file = yaml.safe_load(stream)
-            if "input_list" in input_file and isinstance(
-                input_file["input_list"], list
-            ):
-                for index, _ in enumerate(input_file["input_list"]):
-                    if isinstance(input_file["input_list"][index], dict):
-                        input_file["input_list"][index]["kubeconfig"] = kubeconfig_str
-            else:
-                input_file["kubeconfig"] = kubeconfig_str
-            stream.close()
-        with open(engine_args.input, "w") as stream:
-            yaml.safe_dump(input_file, stream)
-
-        with open(engine_args.config, "r") as stream:
-            config_file = yaml.safe_load(stream)
-        if config_file["deployers"]["image"]["deployer_name"] == "kubernetes":
-            kube_connection = self.set_kubernetes_deployer_auth(
-                config_file["deployers"]["image"]["connection"], context_auth
-            )
-            config_file["deployers"]["image"]["connection"] = kube_connection
-            with open(engine_args.config, "w") as stream:
-                yaml.safe_dump(config_file, stream, explicit_start=True, width=4096)
-
-    def set_kubernetes_deployer_auth(
-        self, deployer: any, context_auth: ContextAuth
-    ) -> any:
-        if context_auth.clusterHost is not None:
-            deployer["host"] = context_auth.clusterHost
-        if context_auth.clientCertificateData is not None:
-            deployer["cert"] = context_auth.clientCertificateData
-        if context_auth.clientKeyData is not None:
-            deployer["key"] = context_auth.clientKeyData
-        if context_auth.clusterCertificateData is not None:
-            deployer["cacert"] = context_auth.clusterCertificateData
-        if context_auth.username is not None:
-            deployer["username"] = context_auth.username
-        if context_auth.password is not None:
-            deployer["password"] = context_auth.password
-        if context_auth.bearerToken is not None:
-            deployer["bearerToken"] = context_auth.bearerToken
-        return deployer
-
-    def set_kubeconfig_auth(self, kubeconfig: any, context_auth: ContextAuth) -> str:
-        """
-        Builds an arcaflow-compatible kubeconfig representation and returns it as a string.
-        In order to run arcaflow plugins in kubernetes/openshift the kubeconfig must contain client certificate/key
-        and server certificate base64 encoded within the kubeconfig file itself in *-data fields. That is not always the
-        case, infact kubeconfig may contain filesystem paths to those files, this function builds an arcaflow-compatible
-        kubeconfig file and returns it as a string that can be safely included in input.yaml
-        """
-
-        if "current-context" not in kubeconfig.keys():
-            raise Exception(
-                "invalid kubeconfig file, impossible to determine current-context"
-            )
-        user_id = None
-        cluster_id = None
-        user_name = None
-        cluster_name = None
-        current_context = kubeconfig["current-context"]
-        for context in kubeconfig["contexts"]:
-            if context["name"] == current_context:
-                user_name = context["context"]["user"]
-                cluster_name = context["context"]["cluster"]
-        if user_name is None:
-            raise Exception(
-                "user not set for context {} in kubeconfig file".format(current_context)
-            )
-        if cluster_name is None:
-            raise Exception(
-                "cluster not set for context {} in kubeconfig file".format(
-                    current_context
-                )
-            )
-
-        for index, user in enumerate(kubeconfig["users"]):
-            if user["name"] == user_name:
-                user_id = index
-        for index, cluster in enumerate(kubeconfig["clusters"]):
-            if cluster["name"] == cluster_name:
-                cluster_id = index
-
-        if cluster_id is None:
-            raise Exception(
-                "no cluster {} found in kubeconfig users".format(cluster_name)
-            )
-        if "client-certificate" in kubeconfig["users"][user_id]["user"]:
-            kubeconfig["users"][user_id]["user"][
-                "client-certificate-data"
-            ] = context_auth.clientCertificateDataBase64
-            del kubeconfig["users"][user_id]["user"]["client-certificate"]
-
-        if "client-key" in kubeconfig["users"][user_id]["user"]:
-            kubeconfig["users"][user_id]["user"][
-                "client-key-data"
-            ] = context_auth.clientKeyDataBase64
-            del kubeconfig["users"][user_id]["user"]["client-key"]
-
-        if "certificate-authority" in kubeconfig["clusters"][cluster_id]["cluster"]:
-            kubeconfig["clusters"][cluster_id]["cluster"][
-                "certificate-authority-data"
-            ] = context_auth.clusterCertificateDataBase64
-            del kubeconfig["clusters"][cluster_id]["cluster"]["certificate-authority"]
-        kubeconfig_str = yaml.dump(kubeconfig)
-        return kubeconfig_str
--- a/krkn/scenario_plugins/arcaflow/context_auth.py
+++ b/krkn/scenario_plugins/arcaflow/context_auth.py
@@ -1,142 +0,0 @@
-import os
-import base64
-
-
-class ContextAuth:
-    clusterCertificate: str = None
-    clusterCertificateData: str = None
-    clusterHost: str = None
-    clientCertificate: str = None
-    clientCertificateData: str = None
-    clientKey: str = None
-    clientKeyData: str = None
-    clusterName: str = None
-    username: str = None
-    password: str = None
-    bearerToken: str = None
-    # TODO: integrate in krkn-lib-kubernetes in the next iteration
-
-    @property
-    def clusterCertificateDataBase64(self):
-        if self.clusterCertificateData is not None:
-            return base64.b64encode(bytes(self.clusterCertificateData, "utf8")).decode(
-                "ascii"
-            )
-        return
-
-    @property
-    def clientCertificateDataBase64(self):
-        if self.clientCertificateData is not None:
-            return base64.b64encode(bytes(self.clientCertificateData, "utf8")).decode(
-                "ascii"
-            )
-        return
-
-    @property
-    def clientKeyDataBase64(self):
-        if self.clientKeyData is not None:
-            return base64.b64encode(bytes(self.clientKeyData, "utf-8")).decode("ascii")
-        return
-
-    def fetch_auth_data(self, kubeconfig: any):
-        context_username = None
-        current_context = kubeconfig["current-context"]
-        if current_context is None:
-            raise Exception("no current-context found in kubeconfig")
-
-        for context in kubeconfig["contexts"]:
-            if context["name"] == current_context:
-                context_username = context["context"]["user"]
-                self.clusterName = context["context"]["cluster"]
-        if context_username is None:
-            raise Exception("user not found for context {0}".format(current_context))
-        if self.clusterName is None:
-            raise Exception("cluster not found for context {0}".format(current_context))
-        cluster_id = None
-        user_id = None
-        for index, user in enumerate(kubeconfig["users"]):
-            if user["name"] == context_username:
-                user_id = index
-        if user_id is None:
-            raise Exception(
-                "user {0} not found in kubeconfig users".format(context_username)
-            )
-
-        for index, cluster in enumerate(kubeconfig["clusters"]):
-            if cluster["name"] == self.clusterName:
-                cluster_id = index
-
-        if cluster_id is None:
-            raise Exception(
-                "no cluster {} found in kubeconfig users".format(self.clusterName)
-            )
-
-        user = kubeconfig["users"][user_id]["user"]
-        cluster = kubeconfig["clusters"][cluster_id]["cluster"]
-        # sets cluster api URL
-        self.clusterHost = cluster["server"]
-        # client certificates
-
-        if "client-key" in user:
-            try:
-                self.clientKey = user["client-key"]
-                self.clientKeyData = self.read_file(user["client-key"])
-            except Exception as e:
-                raise e
-
-        if "client-key-data" in user:
-            try:
-                self.clientKeyData = base64.b64decode(user["client-key-data"]).decode(
-                    "utf-8"
-                )
-            except Exception as e:
-                raise Exception("impossible to decode client-key-data")
-
-        if "client-certificate" in user:
-            try:
-                self.clientCertificate = user["client-certificate"]
-                self.clientCertificateData = self.read_file(user["client-certificate"])
-            except Exception as e:
-                raise e
-
-        if "client-certificate-data" in user:
-            try:
-                self.clientCertificateData = base64.b64decode(
-                    user["client-certificate-data"]
-                ).decode("utf-8")
-            except Exception as e:
-                raise Exception("impossible to decode client-certificate-data")
-
-        # cluster certificate authority
-
-        if "certificate-authority" in cluster:
-            try:
-                self.clusterCertificate = cluster["certificate-authority"]
-                self.clusterCertificateData = self.read_file(
-                    cluster["certificate-authority"]
-                )
-            except Exception as e:
-                raise e
-
-        if "certificate-authority-data" in cluster:
-            try:
-                self.clusterCertificateData = base64.b64decode(
-                    cluster["certificate-authority-data"]
-                ).decode("utf-8")
-            except Exception as e:
-                raise Exception("impossible to decode certificate-authority-data")
-
-        if "username" in user:
-            self.username = user["username"]
-
-        if "password" in user:
-            self.password = user["password"]
-
-        if "token" in user:
-            self.bearerToken = user["token"]
-
-    def read_file(self, filename: str) -> str:
-        if not os.path.exists(filename):
-            raise Exception("file not found {0} ".format(filename))
-        with open(filename, "rb") as file_stream:
-            return file_stream.read().decode("utf-8")
--- a/krkn/scenario_plugins/arcaflow/fixtures/ca.crt
+++ b/krkn/scenario_plugins/arcaflow/fixtures/ca.crt
@@ -1,19 +0,0 @@
-----BEGIN CERTIFICATE-----
-MIIDBjCCAe6gAwIBAgIBATANBgkqhkiG9w0BAQsFADAVMRMwEQYDVQQDEwptaW5p
-a3ViZUNBMB4XDTIzMDMxMzE1NDAxM1oXDTMzMDMxMTE1NDAxM1owFTETMBEGA1UE
-AxMKbWluaWt1YmVDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMnz
-U/gIbJBRGOgNYVKX2fV03ANOwnM4VjquR28QMAdxURqgOFZ6IxYNysHEyxxE9I+I
-DAm9hi4vQPbOX7FlxUezuzw+ExEfa6RRJ+n+AGJOV1lezCVph6OaJxB1+L1UqaDZ
-eM3B4cUf/iCc5Y4bs927+CBG3MJL/jmCVPCO+MiSn/l73PXSFNJAYMvRj42zkXqD
-CVG9CwY2vWgZnnzl01l7jNGtie871AmV2uqKakJrQ2ILhD+8fZk4jE5JBDTCZnqQ
-pXIc+vERNKLUS8cvjO6Ux8dMv/Z7+xonpXOU59LlpUdHWP9jgCvMTwiOriwqGjJ+
-pQJWpX9Dm+oxJiVOJzsCAwEAAaNhMF8wDgYDVR0PAQH/BAQDAgKkMB0GA1UdJQQW
-MBQGCCsGAQUFBwMCBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQW
-BBQU9pDMtbayJdNM6bp0IG8dcs15qTANBgkqhkiG9w0BAQsFAAOCAQEAtl9TVKPA
-hTnPODqv0AGTqreS9kLg4WUUjZRaPUkPWmtCoTh2Yf55nRWdHOHeZnCWDSg24x42
-lpt+13IdqKew1RKTpKCTkicMFi090A01bYu/w39Cm6nOAA5h8zkgSkV5czvQotuV
-SoN2vB+nbuY28ah5PkdqjMHEZbNwa59cgEke8wB1R1DWFQ/pqflrH2v9ACAuY+5Q
-i673tA6CXrb1YfaCQnVBzcfvjGS1MqShPKpOLMF+/GccPczNimaBxMnKvYLvf3pN
-qEUrJC00mAcein8HmxR2Xz8wredbMUUyrQxW29pZJwfGE5GU0olnlsA0lZLbTwio
-xoolo5y+fsK/dA==
-----END CERTIFICATE-----
--- a/krkn/scenario_plugins/arcaflow/fixtures/client.crt
+++ b/krkn/scenario_plugins/arcaflow/fixtures/client.crt
@@ -1,19 +0,0 @@
-----BEGIN CERTIFICATE-----
-MIIDITCCAgmgAwIBAgIBAjANBgkqhkiG9w0BAQsFADAVMRMwEQYDVQQDEwptaW5p
-a3ViZUNBMB4XDTIzMDUwMTA4NTc0N1oXDTI2MDUwMTA4NTc0N1owMTEXMBUGA1UE
-ChMOc3lzdGVtOm1hc3RlcnMxFjAUBgNVBAMTDW1pbmlrdWJlLXVzZXIwggEiMA0G
-CSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC0b7uy9nQYrh7uC5NODve7dFNLAgo5
-pWRS6Kx13ULA55gOpieZiI5/1jwUBjOz0Hhl5QAdHC1HDNu5wf4MmwIEheuq3kMA
-mfuvNxW2BnWSDuXyUMlBfqlwg5o6W8ndEWaK33D7wd2WQsSsAnhQPJSjnzWKvWKq
-+Kbcygc4hdss/ZWN+SXLTahNpHBw0sw8AcJqddNeXs2WI5GdZmbXL4QZI36EaNUm
-m4xKmKRKYIP9wYkmXOV/D2h1meM44y4lul5v2qvo6I+umJ84q4W1/W1vVmAzyVfL
-v1TQCUx8cpKMHzw3ma6CTBCtU3Oq9HKHBnf8GyHZicmV7ESzf/phJu4ZAgMBAAGj
-YDBeMA4GA1UdDwEB/wQEAwIFoDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUH
-AwIwDAYDVR0TAQH/BAIwADAfBgNVHSMEGDAWgBQU9pDMtbayJdNM6bp0IG8dcs15
-qTANBgkqhkiG9w0BAQsFAAOCAQEABNzEQQMYUcLsBASHladEjr46avKn7gREfaDl
-Y5PBvgCPP42q/sW/9iCNY3UpT9TJZWM6s01+0p6I96jYbRQER1NX7O4OgQYHmFw2
-PF6UOG2vMo54w11OvL7sbr4d+nkE6ItdM9fLDIJ3fEOYJZkSoxhOL/U3jSjIl7Wu
-KCIlpM/M/gcZ4w2IvcLrWtvswbFNUd+dwQfBGcQTmSQDOLE7MqSvzYAkeNv73GLB
-ieba7gs/PmoTFsf9nW60iXymDDF4MtODn15kqT/y1uD6coujmiEiIomBfxqAkUCU
-0ciP/KF5oOEMmMedm7/peQxaRTMdRSk4yu7vbj/BxnTcj039Qg==
-----END CERTIFICATE-----
--- a/krkn/scenario_plugins/arcaflow/fixtures/client.key
+++ b/krkn/scenario_plugins/arcaflow/fixtures/client.key
@@ -1,27 +0,0 @@
-----BEGIN RSA PRIVATE KEY-----
-MIIEowIBAAKCAQEAtG+7svZ0GK4e7guTTg73u3RTSwIKOaVkUuisdd1CwOeYDqYn
-mYiOf9Y8FAYzs9B4ZeUAHRwtRwzbucH+DJsCBIXrqt5DAJn7rzcVtgZ1kg7l8lDJ
-QX6pcIOaOlvJ3RFmit9w+8HdlkLErAJ4UDyUo581ir1iqvim3MoHOIXbLP2Vjfkl
-y02oTaRwcNLMPAHCanXTXl7NliORnWZm1y+EGSN+hGjVJpuMSpikSmCD/cGJJlzl
-fw9odZnjOOMuJbpeb9qr6OiPrpifOKuFtf1tb1ZgM8lXy79U0AlMfHKSjB88N5mu
-gkwQrVNzqvRyhwZ3/Bsh2YnJlexEs3/6YSbuGQIDAQABAoIBAQCdJxPb8zt6o2zc
-98f8nJy378D7+3LccmjGrVBH98ZELXIKkDy9RGqYfQcmiaBOZKv4U1OeBwSIdXKK
-f6O9ZuSC/AEeeSbyRysmmFuYhlewNrmgKyyelqsNDBIv8fIHUTh2i9Xj8B4G2XBi
-QGR5vcnYGLqRdBGTx63Nb0iKuksDCwPAuPA/e0ySz9HdWL1j4bqpVSYsOIXsqTDr
-CVnxUeSIL0fFQnRm3IASXQD7zdq9eEFX7vESeleZoz8qNcKb4Na/C3N6crScjgH7
-qyNZ2zNLfy1LT84k8uc1TMX2KcEVEmfdDv5cCnUH2ic12CwXMZ0vgId5LJTaHx4x
-ytIQIe5hAoGBANB+TsRXP4KzcjZlUUfiAp/pWUM4kVktbsfZa1R2NEuIGJUxPk3P
-7WS0WX5W75QKRg+UWTubg5kfd0f9fklLgofmliBnY/HrpgdyugJmUZBgzIxmy0k+
-aCe0biD1gULfyyrKtfe8k5wRFstzhfGszlOf2ebR87sSVNBuF2lEwPTvAoGBAN2M
-0/XrsodGU4B9Mj86Go2gb2k2WU2izI0cO+tm2S5U5DvKmVEnmjXfPRaOFj2UUQjo
-cljnDAinbN+O0+Inc35qsEeYdAIepNAPglzcpfTHagja9mhx2idLYTXGhbZLL+Ei
-TRzMyP27NF+GVVfYU/cA86ns6NboG6spohmnqh13AoGAKPc4aNGv0/GIVnHP56zb
-0SnbdR7PSFNp+fCZay4Slmi2U9IqKMXbIjdhgjZ4uoDORU9jvReQYuzQ1h9TyfkB
-O8yt4M4P0D/6DmqXa9NI4XJznn6wIMMXWf3UybsTW913IQBVgsjVxAuDjBQ11Eec
-/sdg3D6SgkZWzeFjzjZJJ5cCgYBSYVg7fE3hERxhjawOaJuRCBQFSklAngVzfwkk
-yhR9ruFC/l2uGIy19XFwnprUgP700gIa3qbR3PeV1TUiRcsjOaacqKqSUzSzjODL
-iNxIvZHHAyxWv+b/b38REOWNWD3QeAG2cMtX1bFux7OaO31VPkxcZhRaPOp05cE5
-yudtlwKBgDBbR7RLYn03OPm3NDBLLjTybhD8Iu8Oj7UeNCiEWAdZpqIKYnwSxMzQ
-kdo4aTENA/seEwq+XDV7TwbUIFFJg5gDXIhkcK2c9kiO2bObCAmKpBlQCcrp0a5X
-NSBk1N/ZG/Qhqns7z8k01KN4LNcdpRoNiYYPgY+p3xbY8+nWhv+q
-----END RSA PRIVATE KEY-----
--- a/krkn/scenario_plugins/arcaflow/test_context_auth.py
+++ b/krkn/scenario_plugins/arcaflow/test_context_auth.py
@@ -1,98 +0,0 @@
-import os
-import unittest
-
-import yaml
-
-from .context_auth import ContextAuth
-
-
-class TestCurrentContext(unittest.TestCase):
-
-    def get_kubeconfig_with_data(self) -> str:
-        """
-        This function returns a test kubeconfig file as a string.
-
-        :return: a test kubeconfig file in string format (for unit testing purposes)
-        """  # NOQA
-        return """apiVersion: v1
-clusters:
- cluster:
-    certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM5ekNDQWQrZ0F3SUJBZ0lVV01PTVBNMVUrRi9uNXN6TSthYzlMcGZISHB3d0RRWUpLb1pJaHZjTkFRRUwKQlFBd0hqRWNNQm9HQTFVRUF3d1RhM1ZpZFc1MGRTNXNiMk5oYkdSdmJXRnBiakFlRncweU1URXlNRFl4T0RBdwpNRFJhRncwek1URXlNRFF4T0RBd01EUmFNQjR4SERBYUJnTlZCQU1NRTJ0MVluVnVkSFV1Ykc5allXeGtiMjFoCmFXNHdnZ0VpTUEwR0NTcUdTSWIzRFFFQkFRVUFBNElCRHdBd2dnRUtBb0lCQVFDNExhcG00SDB0T1NuYTNXVisKdzI4a0tOWWRwaHhYOUtvNjUwVGlOK2c5ZFNQU3VZK0V6T1JVOWVONlgyWUZkMEJmVFNodno4Y25rclAvNysxegpETEoxQ3MwRi9haEV3ZDQxQXN5UGFjbnRiVE80dGRLWm9POUdyODR3YVdBN1hSZmtEc2ZxRGN1YW5UTmVmT1hpCkdGbmdDVzU5Q285M056alB1eEFrakJxdVF6eE5GQkgwRlJPbXJtVFJ4cnVLZXo0aFFuUW1OWEFUNnp0M21udzMKWUtWTzU4b2xlcUxUcjVHNlRtVFQyYTZpVGdtdWY2N0cvaVZlalJGbkw3YkNHWmgzSjlCSTNMcVpqRzE4dWxvbgpaVDdQcGQrQTlnaTJOTm9UZlI2TVB5SndxU1BCL0xZQU5ZNGRoZDVJYlVydDZzbmViTlRZSHV2T0tZTDdNTWRMCmVMSzFBZ01CQUFHakxUQXJNQWtHQTFVZEV3UUNNQUF3SGdZRFZSMFJCQmN3RllJVGEzVmlkVzUwZFM1c2IyTmgKYkdSdmJXRnBiakFOQmdrcWhraUc5dzBCQVFzRkFBT0NBUUVBQTVqUHVpZVlnMExySE1PSkxYY0N4d3EvVzBDNApZeFpncVd3VHF5VHNCZjVKdDlhYTk0SkZTc2dHQWdzUTN3NnA2SlBtL0MyR05MY3U4ZWxjV0E4UXViQWxueXRRCnF1cEh5WnYrZ08wMG83TXdrejZrTUxqQVZ0QllkRzJnZ21FRjViTEk5czBKSEhjUGpHUkl1VHV0Z0tHV1dPWHgKSEg4T0RzaG9wZHRXMktrR2c2aThKaEpYaWVIbzkzTHptM00xRUNGcXAvMEdtNkN1RFphVVA2SGpJMWRrYllLdgpsSHNVZ1U1SmZjSWhNYmJLdUllTzRkc1YvT3FHcm9iNW5vcmRjaExBQmRDTnc1cmU5T1NXZGZ1VVhSK0ViZVhrCjVFM0tFYzA1RGNjcGV2a1NTdlJ4SVQrQzNMOTltWGcxL3B5NEw3VUhvNFFLTXlqWXJXTWlLRlVKV1E9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==
-    server: https://127.0.0.1:6443
-  name: default
-contexts:
- context:
-    cluster: default
-    namespace: default
-    user: testuser
-  name: default
-current-context: default
-kind: Config
-preferences: {}
-users:
- name: testuser
-  user:
-    client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM5ekNDQWQrZ0F3SUJBZ0lVV01PTVBNMVUrRi9uNXN6TSthYzlMcGZISHB3d0RRWUpLb1pJaHZjTkFRRUwKQlFBd0hqRWNNQm9HQTFVRUF3d1RhM1ZpZFc1MGRTNXNiMk5oYkdSdmJXRnBiakFlRncweU1URXlNRFl4T0RBdwpNRFJhRncwek1URXlNRFF4T0RBd01EUmFNQjR4SERBYUJnTlZCQU1NRTJ0MVluVnVkSFV1Ykc5allXeGtiMjFoCmFXNHdnZ0VpTUEwR0NTcUdTSWIzRFFFQkFRVUFBNElCRHdBd2dnRUtBb0lCQVFDNExhcG00SDB0T1NuYTNXVisKdzI4a0tOWWRwaHhYOUtvNjUwVGlOK2c5ZFNQU3VZK0V6T1JVOWVONlgyWUZkMEJmVFNodno4Y25rclAvNysxegpETEoxQ3MwRi9haEV3ZDQxQXN5UGFjbnRiVE80dGRLWm9POUdyODR3YVdBN1hSZmtEc2ZxRGN1YW5UTmVmT1hpCkdGbmdDVzU5Q285M056alB1eEFrakJxdVF6eE5GQkgwRlJPbXJtVFJ4cnVLZXo0aFFuUW1OWEFUNnp0M21udzMKWUtWTzU4b2xlcUxUcjVHNlRtVFQyYTZpVGdtdWY2N0cvaVZlalJGbkw3YkNHWmgzSjlCSTNMcVpqRzE4dWxvbgpaVDdQcGQrQTlnaTJOTm9UZlI2TVB5SndxU1BCL0xZQU5ZNGRoZDVJYlVydDZzbmViTlRZSHV2T0tZTDdNTWRMCmVMSzFBZ01CQUFHakxUQXJNQWtHQTFVZEV3UUNNQUF3SGdZRFZSMFJCQmN3RllJVGEzVmlkVzUwZFM1c2IyTmgKYkdSdmJXRnBiakFOQmdrcWhraUc5dzBCQVFzRkFBT0NBUUVBQTVqUHVpZVlnMExySE1PSkxYY0N4d3EvVzBDNApZeFpncVd3VHF5VHNCZjVKdDlhYTk0SkZTc2dHQWdzUTN3NnA2SlBtL0MyR05MY3U4ZWxjV0E4UXViQWxueXRRCnF1cEh5WnYrZ08wMG83TXdrejZrTUxqQVZ0QllkRzJnZ21FRjViTEk5czBKSEhjUGpHUkl1VHV0Z0tHV1dPWHgKSEg4T0RzaG9wZHRXMktrR2c2aThKaEpYaWVIbzkzTHptM00xRUNGcXAvMEdtNkN1RFphVVA2SGpJMWRrYllLdgpsSHNVZ1U1SmZjSWhNYmJLdUllTzRkc1YvT3FHcm9iNW5vcmRjaExBQmRDTnc1cmU5T1NXZGZ1VVhSK0ViZVhrCjVFM0tFYzA1RGNjcGV2a1NTdlJ4SVQrQzNMOTltWGcxL3B5NEw3VUhvNFFLTXlqWXJXTWlLRlVKV1E9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==
-    client-key-data: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2QUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktZd2dnU2lBZ0VBQW9JQkFRQzRMYXBtNEgwdE9TbmEKM1dWK3cyOGtLTllkcGh4WDlLbzY1MFRpTitnOWRTUFN1WStFek9SVTllTjZYMllGZDBCZlRTaHZ6OGNua3JQLwo3KzF6RExKMUNzMEYvYWhFd2Q0MUFzeVBhY250YlRPNHRkS1pvTzlHcjg0d2FXQTdYUmZrRHNmcURjdWFuVE5lCmZPWGlHRm5nQ1c1OUNvOTNOempQdXhBa2pCcXVRenhORkJIMEZST21ybVRSeHJ1S2V6NGhRblFtTlhBVDZ6dDMKbW53M1lLVk81OG9sZXFMVHI1RzZUbVRUMmE2aVRnbXVmNjdHL2lWZWpSRm5MN2JDR1poM0o5QkkzTHFaakcxOAp1bG9uWlQ3UHBkK0E5Z2kyTk5vVGZSNk1QeUp3cVNQQi9MWUFOWTRkaGQ1SWJVcnQ2c25lYk5UWUh1dk9LWUw3Ck1NZExlTEsxQWdNQkFBRUNnZ0VBQ28rank4NW5ueVk5L2l6ZjJ3cjkzb2J3OERaTVBjYnIxQURhOUZYY1hWblEKT2c4bDZhbU9Ga2tiU0RNY09JZ0VDdkx6dEtXbmQ5OXpydU5sTEVtNEdmb0trNk5kK01OZEtKRUdoZHE5RjM1Qgpqdi91R1owZTIyRE5ZLzFHNVdDTE5DcWMwQkVHY2RFOTF0YzJuMlppRVBTNWZ6WVJ6L1k4cmJ5K1NqbzJkWE9RCmRHYWRlUFplbi9UbmlHTFlqZWhrbXZNQjJvU0FDbVMycTd2OUNrcmdmR1RZbWJzeGVjSU1QK0JONG9KS3BOZ28KOUpnRWJ5SUxkR1pZS2pQb2lLaHNjMVhmSy8zZStXSmxuYjJBaEE5Y1JMUzhMcDdtcEYySWp4SjNSNE93QTg3WQpNeGZvZWFGdnNuVUFHWUdFWFo4Z3BkWmhQMEoxNWRGdERjajIrcngrQVFLQmdRRDFoSE9nVGdFbERrVEc5bm5TCjE1eXYxRzUxYnJMQU1UaWpzNklEMU1qelhzck0xY2ZvazVaaUlxNVJsQ3dReTlYNDdtV1RhY0lZRGR4TGJEcXEKY0IydjR5Wm1YK1VleGJ3cDU1OWY0V05HdzF5YzQrQjdaNFF5aTRFelN4WmFjbldjMnBzcHJMUFVoOUFXRXVNcApOaW1vcXNiVGNnNGs5QWRxeUIrbWhIWmJRUUtCZ1FEQUNzU09qNXZMU1VtaVpxYWcrOVMySUxZOVNOdDZzS1VyCkprcjdCZEVpN3N2YmU5cldRR2RBb0xkQXNzcU94aENydmtPNkpSSHB1YjlRRjlYdlF4Riszc2ZpZm4yYkQ0ZloKMlVsclA1emF3RlNrNDNLbjdMZzRscURpaVUxVGlqTkJBL3dUcFlmbTB4dW5WeFRWNDZpNVViQW1XRk12TWV0bQozWUZYQmJkK2RRS0JnRGl6Q1B6cFpzeEcrazAwbUxlL2dYajl4ekNwaXZCbHJaM29teTdsVWk4YUloMmg5VlBaCjJhMzZNbVcyb1dLVG9HdW5xcCtibWU1eUxRRGlFcjVQdkJ0bGl2V3ppYmRNbFFMY2Nlcnpveml4WDA4QU5WUnEKZUpZdnIzdklDSGFFM25LRjdiVjNJK1NlSk1ra1BYL0QrV1R4WTQ5clZLYm1FRnh4c1JXRW04ekJBb0dBWEZ3UgpZanJoQTZqUW1DRmtYQ0loa0NJMVkwNEorSHpDUXZsY3NGT0EzSnNhUWduVUdwekl5OFUvdlFiLzhpQ0IzZ2RZCmpVck16YXErdnVkbnhYVnRFYVpWWGJIVitPQkVSdHFBdStyUkprZS9yYm1SNS84cUxsVUxOVWd4ZjA4RkRXeTgKTERxOUhKOUZPbnJnRTJvMU9FTjRRMGpSWU81U041dXFXODd0REEwQ2dZQXpXbk1KSFgrbmlyMjhRRXFyVnJKRAo4ZUEwOHIwWTJRMDhMRlcvMjNIVWQ4WU12VnhTUTdwcUwzaE41RXVJQ2dCbEpGVFI3TndBREo3eDY2M002akFMCm1DNlI4dWxSZStwa08xN2Y0UUs3MnVRanJGZEhESnlXQmdDL0RKSkV6d1dwY0Q4VVNPK3A5bVVIbllLTUJTOEsKTVB1ejYrZ3h0VEtsRU5pZUVacXhxZz09Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K
-    username: testuser
-    password: testpassword
-    token: sha256~fFyEqjf1xxFMO0tbEyGRvWeNOd7QByuEgS4hyEq_A9o
-    """  # NOQA
-
-    def get_kubeconfig_with_paths(self) -> str:
-        """
-        This function returns a test kubeconfig file as a string.
-
-        :return: a test kubeconfig file in string format (for unit testing purposes)
-        """  # NOQA
-        return """apiVersion: v1
-clusters:
- cluster:
-    certificate-authority: fixtures/ca.crt
-    server: https://127.0.0.1:6443
-  name: default
-contexts:
- context:
-    cluster: default
-    namespace: default
-    user: testuser
-  name: default
-current-context: default
-kind: Config
-preferences: {}
-users:
- name: testuser
-  user:
-    client-certificate: fixtures/client.crt
-    client-key: fixtures/client.key
-    username: testuser
-    password: testpassword
-    token: sha256~fFyEqjf1xxFMO0tbEyGRvWeNOd7QByuEgS4hyEq_A9o
-    """  # NOQA
-
-    def test_current_context(self):
-        cwd = os.getcwd()
-        current_context_data = ContextAuth()
-        data = yaml.safe_load(self.get_kubeconfig_with_data())
-        current_context_data.fetch_auth_data(data)
-        self.assertIsNotNone(current_context_data.clusterCertificateData)
-        self.assertIsNotNone(current_context_data.clientCertificateData)
-        self.assertIsNotNone(current_context_data.clientKeyData)
-        self.assertIsNotNone(current_context_data.username)
-        self.assertIsNotNone(current_context_data.password)
-        self.assertIsNotNone(current_context_data.bearerToken)
-        self.assertIsNotNone(current_context_data.clusterHost)
-
-        current_context_no_data = ContextAuth()
-        data = yaml.safe_load(self.get_kubeconfig_with_paths())
-        current_context_no_data.fetch_auth_data(data)
-        self.assertIsNotNone(current_context_no_data.clusterCertificate)
-        self.assertIsNotNone(current_context_no_data.clusterCertificateData)
-        self.assertIsNotNone(current_context_no_data.clientCertificate)
-        self.assertIsNotNone(current_context_no_data.clientCertificateData)
-        self.assertIsNotNone(current_context_no_data.clientKey)
-        self.assertIsNotNone(current_context_no_data.clientKeyData)
-        self.assertIsNotNone(current_context_no_data.username)
-        self.assertIsNotNone(current_context_no_data.password)
-        self.assertIsNotNone(current_context_no_data.bearerToken)
-        self.assertIsNotNone(current_context_data.clusterHost)
--- a/krkn/scenario_plugins/arcaflow/init.py
+++ b/krkn/scenario_plugins/arcaflow/init.py
--- a/krkn/scenario_plugins/hogs/hogs_scenario_plugin.py
+++ b/krkn/scenario_plugins/hogs/hogs_scenario_plugin.py
@@ -0,0 +1,142 @@
+import copy
+import logging
+import queue
+import random
+import re
+import threading
+import time
+
+
+import yaml
+from krkn_lib.models.telemetry import ScenarioTelemetry
+from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
+from krkn_lib.models.krkn import  HogConfig, HogType
+from krkn_lib.models.k8s import NodeResources
+from krkn_lib.k8s import KrknKubernetes
+from krkn_lib.utils import get_random_string
+
+from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin
+
+
+class HogsScenarioPlugin(AbstractScenarioPlugin):
+    def run(self, run_uuid: str, scenario: str, krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift,
+            scenario_telemetry: ScenarioTelemetry) -> int:
+        try:
+            with open(scenario, "r") as f:
+                scenario = yaml.full_load(f)
+            scenario_config = HogConfig.from_yaml_dict(scenario)
+            has_selector = True
+            if not scenario_config.node_selector or not re.match("^.+=.*$", scenario_config.node_selector):
+                if scenario_config.node_selector:
+                    logging.warning(f"node selector {scenario_config.node_selector} not in right format (key=value)")
+                node_selector = ""
+            else:
+                node_selector = scenario_config.node_selector
+
+            available_nodes = lib_telemetry.get_lib_kubernetes().list_schedulable_nodes(node_selector)
+            if len(available_nodes) == 0:
+                raise Exception("no available nodes to schedule workload")
+
+            if not has_selector:
+                # if selector not specified picks a random node between the available
+                available_nodes = [available_nodes[random.randint(0, len(available_nodes))]]
+
+            if scenario_config.number_of_nodes and len(available_nodes) > scenario_config.number_of_nodes:
+                available_nodes = random.sample(available_nodes, scenario_config.number_of_nodes)
+
+            exception_queue = queue.Queue()
+            self.run_scenario(scenario_config, lib_telemetry.get_lib_kubernetes(), available_nodes, exception_queue)
+            return 0
+        except Exception as e:
+            logging.error(f"scenario exception: {e}")
+            return 1
+
+    def get_scenario_types(self) -> list[str]:
+        return ["hog_scenarios"]
+
+    def run_scenario_worker(self, config: HogConfig,
+                            lib_k8s: KrknKubernetes, node: str,
+                            exception_queue: queue.Queue):
+        try:
+            if not config.workers:
+                config.workers = lib_k8s.get_node_cpu_count(node)
+                logging.info(f"[{node}] detected {config.workers} cpus for node {node}")
+
+            logging.info(f"[{node}] workers number: {config.workers}")
+
+            # using kubernetes.io/hostname = <node_name> selector to
+            # precisely deploy each workload on each selected node
+            config.node_selector = f"kubernetes.io/hostname={node}"
+            pod_name = f"{config.type.value}-hog-{get_random_string(5)}"
+            node_resources_start = lib_k8s.get_node_resources_info(node)
+            lib_k8s.deploy_hog(pod_name, config)
+            start = time.time()
+            # waiting 3 seconds before starting sample collection
+            time.sleep(3)
+            node_resources_end = lib_k8s.get_node_resources_info(node)
+
+            samples: list[NodeResources] = []
+            avg_node_resources = NodeResources()
+
+            while time.time() - start < config.duration-1:
+                samples.append(lib_k8s.get_node_resources_info(node))
+
+            max_wait = 30
+            wait = 0
+            logging.info(f"[{node}] waiting {max_wait} up to seconds pod: {pod_name} namespace: {config.namespace} to finish")
+            while lib_k8s.is_pod_running(pod_name, config.namespace):
+                if wait >= max_wait:
+                    raise Exception(f"[{node}] hog workload pod: {pod_name} namespace: {config.namespace} "
+                                    f"didn't finish after {max_wait}")
+                time.sleep(1)
+                wait += 1
+                continue
+
+            logging.info(f"[{node}] deleting pod: {pod_name} namespace: {config.namespace}")
+            lib_k8s.delete_pod(pod_name, config.namespace)
+
+            for resource in samples:
+                avg_node_resources.cpu += resource.cpu
+                avg_node_resources.memory += resource.memory
+                avg_node_resources.disk_space += resource.disk_space
+
+            avg_node_resources.cpu = avg_node_resources.cpu/len(samples)
+            avg_node_resources.memory = avg_node_resources.memory / len(samples)
+            avg_node_resources.disk_space = avg_node_resources.disk_space / len(samples)
+
+            if config.type == HogType.cpu:
+                logging.info(f"[{node}] detected cpu consumption: "
+                             f"{(avg_node_resources.cpu / (config.workers * 1000000000)) * 100} %")
+            if config.type == HogType.memory:
+                logging.info(f"[{node}] detected memory increase: "
+                             f"{avg_node_resources.memory / node_resources_start.memory * 100} %")
+            if config.type == HogType.io:
+                logging.info(f"[{node}] detected disk space allocated: "
+                             f"{(avg_node_resources.disk_space - node_resources_end.disk_space) / 1024 / 1024} MB")
+        except Exception as e:
+            exception_queue.put(e)
+
+    def run_scenario(self, config: HogConfig,
+                     lib_k8s: KrknKubernetes,
+                     available_nodes: list[str],
+                     exception_queue: queue.Queue):
+        workers = []
+        logging.info(f"running {config.type.value} hog scenario")
+        logging.info(f"targeting nodes: [{','.join(available_nodes)}]")
+        for node in available_nodes:
+            config_copy = copy.deepcopy(config)
+            worker = threading.Thread(target=self.run_scenario_worker,
+                                      args=(config_copy, lib_k8s, node, exception_queue))
+            worker.daemon = True
+            worker.start()
+            workers.append(worker)
+
+        for worker in workers:
+            worker.join()
+
+        try:
+            while True:
+                exception = exception_queue.get_nowait()
+                raise exception
+        except queue.Empty:
+            pass
--- a/krkn/scenario_plugins/native/native_scenario_plugin.py
+++ b/krkn/scenario_plugins/native/native_scenario_plugin.py
@@ -49,7 +49,6 @@ class NativeScenarioPlugin(AbstractScenarioPlugin):
        return [
            "pod_disruption_scenarios",
            "pod_network_scenarios",
-            "vmware_node_scenarios",
            "ibmcloud_node_scenarios",
        ]

--- a/krkn/scenario_plugins/native/node_scenarios/ibmcloud_plugin.py
+++ b/krkn/scenario_plugins/native/node_scenarios/ibmcloud_plugin.py
@@ -34,7 +34,16 @@ class IbmCloud:
            self.service.set_service_url(service_url)
        except Exception as e:
            logging.error("error authenticating" + str(e))
-            sys.exit(1)
+
+
+    # Get the instance ID of the node
+    def get_instance_id(self, node_name):
+        node_list = self.list_instances()
+        for node in node_list:
+            if node_name == node["vpc_name"]:
+                return node["vpc_id"]
+        logging.error("Couldn't find node with name " + str(node_name) + ", you could try another region")
+        sys.exit(1)

    def delete_instance(self, instance_id):
        """
--- a/krkn/scenario_plugins/native/plugins.py
+++ b/krkn/scenario_plugins/native/plugins.py
@@ -18,9 +18,6 @@ from krkn.scenario_plugins.native.pod_network_outage.pod_network_outage_plugin i
 )
 from arcaflow_plugin_sdk import schema, serialization, jsonschema

-from krkn.scenario_plugins.native.node_scenarios import vmware_plugin
-
-
@dataclasses.dataclass
 class PluginStep:
    schema: schema.StepSchema
@@ -160,10 +157,6 @@ PLUGINS = Plugins(
        ),
        PluginStep(wait_for_pods, ["error"]),
        PluginStep(run_python_file, ["error"]),
-        PluginStep(vmware_plugin.node_start, ["error"]),
-        PluginStep(vmware_plugin.node_stop, ["error"]),
-        PluginStep(vmware_plugin.node_reboot, ["error"]),
-        PluginStep(vmware_plugin.node_terminate, ["error"]),
        PluginStep(ibmcloud_plugin.node_start, ["error"]),
        PluginStep(ibmcloud_plugin.node_stop, ["error"]),
        PluginStep(ibmcloud_plugin.node_reboot, ["error"]),
--- a/krkn/scenario_plugins/network_chaos/network_chaos_scenario_plugin.py
+++ b/krkn/scenario_plugins/network_chaos/network_chaos_scenario_plugin.py
@@ -42,19 +42,13 @@ class NetworkChaosScenarioPlugin(AbstractScenarioPlugin):
                test_egress = get_yaml_item_value(
                    test_dict, "egress", {"bandwidth": "100mbit"}
                )
+
                if test_node:
                    node_name_list = test_node.split(",")
+                    nodelst = common_node_functions.get_node_by_name(node_name_list, lib_telemetry.get_lib_kubernetes())
                else:
-                    node_name_list = [test_node]
-                nodelst = []
-                for single_node_name in node_name_list:
-                    nodelst.extend(
-                        common_node_functions.get_node(
-                            single_node_name,
-                            test_node_label,
-                            test_instance_count,
-                            lib_telemetry.get_lib_kubernetes(),
-                        )
+                    nodelst = common_node_functions.get_node(
+                        test_node_label, test_instance_count, lib_telemetry.get_lib_kubernetes()
                    )
                file_loader = FileSystemLoader(
                    os.path.abspath(os.path.dirname(__file__))
@@ -149,7 +143,10 @@ class NetworkChaosScenarioPlugin(AbstractScenarioPlugin):
                finally:
                    logging.info("Deleting jobs")
                    self.delete_job(joblst[:], lib_telemetry.get_lib_kubernetes())
-        except (RuntimeError, Exception):
+        except (RuntimeError, Exception) as e:
+            logging.error(
+                "NetworkChaosScenarioPlugin exiting due to Exception %s" % e
+            )
            scenario_telemetry.exit_status = 1
            return 1
        else:
--- a/krkn/scenario_plugins/node_actions/abstract_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/abstract_node_scenarios.py
@@ -4,14 +4,16 @@ import time
 import krkn.invoke.command as runcommand
 import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction
 from krkn_lib.k8s import KrknKubernetes
-
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus

 # krkn_lib
 class abstract_node_scenarios:
    kubecli: KrknKubernetes
+    affected_nodes_status: AffectedNodeStatus

-    def __init__(self, kubecli: KrknKubernetes):
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
        self.kubecli = kubecli
+        self.affected_nodes_status = affected_nodes_status

    # Node scenario to start the node
    def node_start_scenario(self, instance_kill_count, node, timeout):
@@ -28,6 +30,7 @@ class abstract_node_scenarios:
        logging.info("Waiting for %s seconds before starting the node" % (duration))
        time.sleep(duration)
        self.node_start_scenario(instance_kill_count, node, timeout)
+        self.affected_nodes_status.merge_affected_nodes()
        logging.info("node_stop_start_scenario has been successfully injected!")

    def helper_node_stop_start_scenario(self, instance_kill_count, node, timeout):
@@ -36,6 +39,20 @@ class abstract_node_scenarios:
        self.helper_node_start_scenario(instance_kill_count, node, timeout)
        logging.info("helper_node_stop_start_scenario has been successfully injected!")

+    # Node scenario to detach and attach the disk
+    def node_disk_detach_attach_scenario(self, instance_kill_count, node, timeout, duration):
+        logging.info("Starting disk_detach_attach_scenario injection")
+        disk_attachment_details = self.get_disk_attachment_info(instance_kill_count, node)
+        if disk_attachment_details:
+            self.disk_detach_scenario(instance_kill_count, node, timeout)
+            logging.info("Waiting for %s seconds before attaching the disk" % (duration))
+            time.sleep(duration)
+            self.disk_attach_scenario(instance_kill_count, disk_attachment_details, timeout)
+            logging.info("node_disk_detach_attach_scenario has been successfully injected!")
+        else:
+            logging.error("Node %s has only root disk attached" % (node))
+            logging.error("node_disk_detach_attach_scenario failed!")
+
    # Node scenario to terminate the node
    def node_termination_scenario(self, instance_kill_count, node, timeout):
        pass
@@ -47,13 +64,15 @@ class abstract_node_scenarios:
    # Node scenario to stop the kubelet
    def stop_kubelet_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting stop_kubelet_scenario injection")
                logging.info("Stopping the kubelet of the node %s" % (node))
                runcommand.run(
                    "oc debug node/" + node + " -- chroot /host systemctl stop kubelet"
                )
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
+                
                logging.info("The kubelet of the node %s has been stopped" % (node))
                logging.info("stop_kubelet_scenario has been successfuly injected!")
            except Exception as e:
@@ -63,17 +82,20 @@ class abstract_node_scenarios:
                )
                logging.error("stop_kubelet_scenario injection failed!")
                raise e
+            self.add_affected_node(affected_node)

    # Node scenario to stop and start the kubelet
    def stop_start_kubelet_scenario(self, instance_kill_count, node, timeout):
        logging.info("Starting stop_start_kubelet_scenario injection")
        self.stop_kubelet_scenario(instance_kill_count, node, timeout)
        self.node_reboot_scenario(instance_kill_count, node, timeout)
+        self.affected_nodes_status.merge_affected_nodes()
        logging.info("stop_start_kubelet_scenario has been successfully injected!")

    # Node scenario to restart the kubelet
    def restart_kubelet_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting restart_kubelet_scenario injection")
                logging.info("Restarting the kubelet of the node %s" % (node))
@@ -82,8 +104,8 @@ class abstract_node_scenarios:
                    + node
                    + " -- chroot /host systemctl restart kubelet &"
                )
-                nodeaction.wait_for_not_ready_status(node, timeout, self.kubecli)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_not_ready_status(node, timeout, self.kubecli, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli,affected_node)
                logging.info("The kubelet of the node %s has been restarted" % (node))
                logging.info("restart_kubelet_scenario has been successfuly injected!")
            except Exception as e:
@@ -93,6 +115,7 @@ class abstract_node_scenarios:
                )
                logging.error("restart_kubelet_scenario injection failed!")
                raise e
+            self.add_affected_node(affected_node)

    # Node scenario to crash the node
    def node_crash_scenario(self, instance_kill_count, node, timeout):
--- a/krkn/scenario_plugins/node_actions/alibaba_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/alibaba_node_scenarios.py
@@ -18,7 +18,7 @@ from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
    abstract_node_scenarios,
 )
 from krkn_lib.k8s import KrknKubernetes
-
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus

 class Alibaba:
    def __init__(self):
@@ -161,8 +161,9 @@ class Alibaba:
            return None

    # Wait until the node instance is running
-    def wait_until_running(self, instance_id, timeout):
+    def wait_until_running(self, instance_id, timeout, affected_node):
        time_counter = 0
+        start_time = time.time()
        status = self.get_vm_status(instance_id)
        while status != "Running":
            status = self.get_vm_status(instance_id)
@@ -174,11 +175,15 @@ class Alibaba:
            if time_counter >= timeout:
                logging.info("ECS %s is still not ready in allotted time" % instance_id)
                return False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("running", end_time - start_time)
        return True

    # Wait until the node instance is stopped
-    def wait_until_stopped(self, instance_id, timeout):
+    def wait_until_stopped(self, instance_id, timeout, affected_node):
        time_counter = 0
+        start_time = time.time()
        status = self.get_vm_status(instance_id)
        while status != "Stopped":
            status = self.get_vm_status(instance_id)
@@ -192,10 +197,14 @@ class Alibaba:
                    "Vm %s is still not stopped in allotted time" % instance_id
                )
                return False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("stopped", end_time - start_time)
        return True

    # Wait until the node instance is terminated
-    def wait_until_released(self, instance_id, timeout):
+    def wait_until_released(self, instance_id, timeout, affected_node):
+        start_time = time.time()
        statuses = self.get_vm_status(instance_id)
        time_counter = 0
        while statuses and statuses != "Released":
@@ -210,17 +219,23 @@ class Alibaba:
                return False

        logging.info("ECS %s is released" % instance_id)
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("terminated", end_time - start_time)
        return True


 # krkn_lib
 class alibaba_node_scenarios(abstract_node_scenarios):
-    def __init__(self, kubecli: KrknKubernetes):
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
+        super().__init__(kubecli, affected_nodes_status)
        self.alibaba = Alibaba()
+        

    # Node scenario to start the node
    def node_start_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_start_scenario injection")
                vm_id = self.alibaba.get_instance_id(node)
@@ -228,8 +243,8 @@ class alibaba_node_scenarios(abstract_node_scenarios):
                    "Starting the node %s with instance ID: %s " % (node, vm_id)
                )
                self.alibaba.start_instances(vm_id)
-                self.alibaba.wait_until_running(vm_id, timeout)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                self.alibaba.wait_until_running(vm_id, timeout, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info("Node with instance ID: %s is in running state" % node)
                logging.info("node_start_scenario has been successfully injected!")
            except Exception as e:
@@ -239,10 +254,12 @@ class alibaba_node_scenarios(abstract_node_scenarios):
                )
                logging.error("node_start_scenario injection failed!")
                raise e
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to stop the node
    def node_stop_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_stop_scenario injection")
                vm_id = self.alibaba.get_instance_id(node)
@@ -250,9 +267,9 @@ class alibaba_node_scenarios(abstract_node_scenarios):
                    "Stopping the node %s with instance ID: %s " % (node, vm_id)
                )
                self.alibaba.stop_instances(vm_id)
-                self.alibaba.wait_until_stopped(vm_id, timeout)
+                self.alibaba.wait_until_stopped(vm_id, timeout, affected_node)
                logging.info("Node with instance ID: %s is in stopped state" % vm_id)
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
            except Exception as e:
                logging.error(
                    "Failed to stop node instance. Encountered following exception: %s. "
@@ -260,23 +277,25 @@ class alibaba_node_scenarios(abstract_node_scenarios):
                )
                logging.error("node_stop_scenario injection failed!")
                raise e
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Might need to stop and then release the instance
    # Node scenario to terminate the node
    def node_termination_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info(
                    "Starting node_termination_scenario injection by first stopping instance"
                )
                vm_id = self.alibaba.get_instance_id(node)
                self.alibaba.stop_instances(vm_id)
-                self.alibaba.wait_until_stopped(vm_id, timeout)
+                self.alibaba.wait_until_stopped(vm_id, timeout, affected_node)
                logging.info(
                    "Releasing the node %s with instance ID: %s " % (node, vm_id)
                )
                self.alibaba.release_instance(vm_id)
-                self.alibaba.wait_until_released(vm_id, timeout)
+                self.alibaba.wait_until_released(vm_id, timeout, affected_node)
                logging.info("Node with instance ID: %s has been released" % node)
                logging.info(
                    "node_termination_scenario has been successfully injected!"
@@ -288,17 +307,19 @@ class alibaba_node_scenarios(abstract_node_scenarios):
                )
                logging.error("node_termination_scenario injection failed!")
                raise e
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to reboot the node
    def node_reboot_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_reboot_scenario injection")
                instance_id = self.alibaba.get_instance_id(node)
                logging.info("Rebooting the node with instance ID: %s " % (instance_id))
                self.alibaba.reboot_instances(instance_id)
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info(
                    "Node with instance ID: %s has been rebooted" % (instance_id)
                )
@@ -310,3 +331,4 @@ class alibaba_node_scenarios(abstract_node_scenarios):
                )
                logging.error("node_reboot_scenario injection failed!")
                raise e
+            self.affected_nodes_status.affected_nodes.append(affected_node)
--- a/krkn/scenario_plugins/node_actions/aws_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/aws_node_scenarios.py
@@ -7,12 +7,13 @@ from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
    abstract_node_scenarios,
 )
 from krkn_lib.k8s import KrknKubernetes
-
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus

 class AWS:
    def __init__(self):
        self.boto_client = boto3.client("ec2")
-        self.boto_instance = boto3.resource("ec2").Instance("id")
+        self.boto_resource = boto3.resource("ec2")
+        self.boto_instance = self.boto_resource.Instance("id")

    # Get the instance ID of the node
    def get_instance_id(self, node):
@@ -76,9 +77,13 @@ class AWS:
    # until a successful state is reached. An error is returned after 40 failed checks
    # Setting timeout for consistency with other cloud functions
    # Wait until the node instance is running
-    def wait_until_running(self, instance_id, timeout=600):
+    def wait_until_running(self, instance_id, timeout=600, affected_node=None):
        try:
+            start_time = time.time()
            self.boto_instance.wait_until_running(InstanceIds=[instance_id])
+            end_time = time.time()
+            if affected_node:
+                affected_node.set_affected_node_status("running", end_time - start_time)
            return True
        except Exception as e:
            logging.error(
@@ -88,9 +93,13 @@ class AWS:
            return False

    # Wait until the node instance is stopped
-    def wait_until_stopped(self, instance_id, timeout=600):
+    def wait_until_stopped(self, instance_id, timeout=600, affected_node= None):
        try:
+            start_time = time.time()
            self.boto_instance.wait_until_stopped(InstanceIds=[instance_id])
+            end_time = time.time()
+            if affected_node:
+                affected_node.set_affected_node_status("stopped", end_time - start_time)
            return True
        except Exception as e:
            logging.error(
@@ -100,9 +109,13 @@ class AWS:
            return False

    # Wait until the node instance is terminated
-    def wait_until_terminated(self, instance_id, timeout=600):
+    def wait_until_terminated(self, instance_id, timeout=600, affected_node= None):
        try:
+            start_time = time.time()
            self.boto_instance.wait_until_terminated(InstanceIds=[instance_id])
+            end_time = time.time()
+            if affected_node:
+                affected_node.set_affected_node_status("terminated", end_time - start_time)
            return True
        except Exception as e:
            logging.error(
@@ -179,16 +192,83 @@ class AWS:

            raise RuntimeError()

+    # Detach volume
+    def detach_volumes(self, volumes_ids: list):
+        for volume in volumes_ids:
+            try:
+                self.boto_client.detach_volume(VolumeId=volume, Force=True)
+            except Exception as e:
+                logging.error(
+                    "Detaching volume %s failed with exception: %s"
+                    % (volume, e)
+                )
+
+    # Attach volume
+    def attach_volume(self, attachment: dict):
+        try:
+            if self.get_volume_state(attachment["VolumeId"]) == "in-use":
+                logging.info(
+                    "Volume %s is already in use." % attachment["VolumeId"]
+                )
+                return
+            logging.info(
+                "Attaching the %s volumes to instance %s."
+                % (attachment["VolumeId"], attachment["InstanceId"])
+            )
+            self.boto_client.attach_volume(
+                InstanceId=attachment["InstanceId"],
+                Device=attachment["Device"],
+                VolumeId=attachment["VolumeId"]
+            )
+        except Exception as e:
+            logging.error(
+                "Failed attaching disk %s to the %s instance. "
+                "Encountered following exception: %s"
+                % (attachment['VolumeId'], attachment['InstanceId'], e)
+            )
+            raise RuntimeError()
+
+    # Get IDs of node volumes
+    def get_volumes_ids(self, instance_id: list):
+        response = self.boto_client.describe_instances(InstanceIds=instance_id)
+        instance_attachment_details = response["Reservations"][0]["Instances"][0]["BlockDeviceMappings"]
+        root_volume_device_name = self.get_root_volume_id(instance_id)
+        volume_ids = []
+        for device in instance_attachment_details:
+            if device["DeviceName"] != root_volume_device_name:
+                volume_id = device["Ebs"]["VolumeId"]
+                volume_ids.append(volume_id)
+        return volume_ids
+
+    # Get volumes attachment details
+    def get_volume_attachment_details(self, volume_ids: list):
+        response = self.boto_client.describe_volumes(VolumeIds=volume_ids)
+        volumes_details = response["Volumes"]
+        return volumes_details
+
+    # Get root volume
+    def get_root_volume_id(self, instance_id):
+        instance_id = instance_id[0]
+        instance = self.boto_resource.Instance(instance_id)
+        root_volume_id = instance.root_device_name
+        return root_volume_id
+
+    # Get volume state
+    def get_volume_state(self, volume_id: str):
+        volume = self.boto_resource.Volume(volume_id)
+        state = volume.state
+        return state

 # krkn_lib
 class aws_node_scenarios(abstract_node_scenarios):
-    def __init__(self, kubecli: KrknKubernetes):
-        super().__init__(kubecli)
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
+        super().__init__(kubecli, affected_nodes_status)
        self.aws = AWS()

    # Node scenario to start the node
    def node_start_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_start_scenario injection")
                instance_id = self.aws.get_instance_id(node)
@@ -196,8 +276,8 @@ class aws_node_scenarios(abstract_node_scenarios):
                    "Starting the node %s with instance ID: %s " % (node, instance_id)
                )
                self.aws.start_instances(instance_id)
-                self.aws.wait_until_running(instance_id)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                self.aws.wait_until_running(instance_id, affected_node=affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info(
                    "Node with instance ID: %s is in running state" % (instance_id)
                )
@@ -210,10 +290,12 @@ class aws_node_scenarios(abstract_node_scenarios):
                logging.error("node_start_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to stop the node
    def node_stop_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_stop_scenario injection")
                instance_id = self.aws.get_instance_id(node)
@@ -221,11 +303,11 @@ class aws_node_scenarios(abstract_node_scenarios):
                    "Stopping the node %s with instance ID: %s " % (node, instance_id)
                )
                self.aws.stop_instances(instance_id)
-                self.aws.wait_until_stopped(instance_id)
+                self.aws.wait_until_stopped(instance_id, affected_node=affected_node)
                logging.info(
                    "Node with instance ID: %s is in stopped state" % (instance_id)
                )
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node=affected_node)
            except Exception as e:
                logging.error(
                    "Failed to stop node instance. Encountered following exception: %s. "
@@ -234,10 +316,12 @@ class aws_node_scenarios(abstract_node_scenarios):
                logging.error("node_stop_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to terminate the node
    def node_termination_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_termination_scenario injection")
                instance_id = self.aws.get_instance_id(node)
@@ -246,7 +330,7 @@ class aws_node_scenarios(abstract_node_scenarios):
                    % (node, instance_id)
                )
                self.aws.terminate_instances(instance_id)
-                self.aws.wait_until_terminated(instance_id)
+                self.aws.wait_until_terminated(instance_id, affected_node=affected_node)
                for _ in range(timeout):
                    if node not in self.kubecli.list_nodes():
                        break
@@ -265,10 +349,12 @@ class aws_node_scenarios(abstract_node_scenarios):
                logging.error("node_termination_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to reboot the node
    def node_reboot_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_reboot_scenario injection" + str(node))
                instance_id = self.aws.get_instance_id(node)
@@ -276,8 +362,8 @@ class aws_node_scenarios(abstract_node_scenarios):
                    "Rebooting the node %s with instance ID: %s " % (node, instance_id)
                )
                self.aws.reboot_instances(instance_id)
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info(
                    "Node with instance ID: %s has been rebooted" % (instance_id)
                )
@@ -290,3 +376,50 @@ class aws_node_scenarios(abstract_node_scenarios):
                logging.error("node_reboot_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)
+
+    # Get volume attachment info
+    def get_disk_attachment_info(self, instance_kill_count, node):
+        for _ in range(instance_kill_count):
+            try:
+                logging.info("Obtaining disk attachment information")
+                instance_id = (self.aws.get_instance_id(node)).split()
+                volumes_ids = self.aws.get_volumes_ids(instance_id)
+                if volumes_ids:
+                    vol_attachment_details = self.aws.get_volume_attachment_details(
+                        volumes_ids
+                    )
+                    return vol_attachment_details
+                return
+            except Exception as e:
+                logging.error(
+                    "Failed to obtain disk attachment information of %s node. "
+                    "Encounteres following exception: %s." % (node, e)
+                )
+                raise RuntimeError()
+
+    # Node scenario to detach the volume
+    def disk_detach_scenario(self, instance_kill_count, node, timeout):
+        for _ in range(instance_kill_count):
+            try:
+                logging.info("Starting disk_detach_scenario injection")
+                instance_id = (self.aws.get_instance_id(node)).split()
+                volumes_ids = self.aws.get_volumes_ids(instance_id)
+                logging.info(
+                    "Detaching the %s volumes from instance %s "
+                    % (volumes_ids, node)
+                )
+                self.aws.detach_volumes(volumes_ids)
+            except Exception as e:
+                logging.error(
+                    "Failed to detach disk from %s node. Encountered following"
+                    "exception: %s." % (node, e)
+                )
+                logging.debug("")
+                raise RuntimeError()
+
+    # Node scenario to attach the volume
+    def disk_attach_scenario(self, instance_kill_count, attachment_details, timeout):
+        for _ in range(instance_kill_count):
+            for attachment in attachment_details:
+                self.aws.attach_volume(attachment["Attachments"][0])
--- a/krkn/scenario_plugins/node_actions/az_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/az_node_scenarios.py
@@ -8,7 +8,7 @@ from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
 from azure.mgmt.compute import ComputeManagementClient
 from azure.identity import DefaultAzureCredential
 from krkn_lib.k8s import KrknKubernetes
-
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus

 class Azure:
    def __init__(self):
@@ -18,8 +18,11 @@ class Azure:
        logging.info("credential " + str(credentials))
        # az_account = runcommand.invoke("az account list -o yaml")
        # az_account_yaml = yaml.safe_load(az_account, Loader=yaml.FullLoader)
+        logger = logging.getLogger("azure")
+        logger.setLevel(logging.WARNING)
        subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
-        self.compute_client = ComputeManagementClient(credentials, subscription_id)
+        self.compute_client = ComputeManagementClient(credentials, subscription_id,logging=logger)
+        

    # Get the instance ID of the node
    def get_instance_id(self, node_name):
@@ -90,8 +93,9 @@ class Azure:
        return status

    # Wait until the node instance is running
-    def wait_until_running(self, resource_group, vm_name, timeout):
+    def wait_until_running(self, resource_group, vm_name, timeout, affected_node):
        time_counter = 0
+        start_time = time.time()
        status = self.get_vm_status(resource_group, vm_name)
        while status and status.code != "PowerState/running":
            status = self.get_vm_status(resource_group, vm_name)
@@ -101,11 +105,15 @@ class Azure:
            if time_counter >= timeout:
                logging.info("Vm %s is still not ready in allotted time" % vm_name)
                return False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("running", end_time - start_time)
        return True

    # Wait until the node instance is stopped
-    def wait_until_stopped(self, resource_group, vm_name, timeout):
+    def wait_until_stopped(self, resource_group, vm_name, timeout, affected_node):
        time_counter = 0
+        start_time = time.time()
        status = self.get_vm_status(resource_group, vm_name)
        while status and status.code != "PowerState/stopped":
            status = self.get_vm_status(resource_group, vm_name)
@@ -115,10 +123,14 @@ class Azure:
            if time_counter >= timeout:
                logging.info("Vm %s is still not stopped in allotted time" % vm_name)
                return False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("stopped", end_time - start_time)
        return True

    # Wait until the node instance is terminated
-    def wait_until_terminated(self, resource_group, vm_name, timeout):
+    def wait_until_terminated(self, resource_group, vm_name, timeout, affected_node):
+        start_time = time.time()
        statuses = self.compute_client.virtual_machines.instance_view(
            resource_group, vm_name
        ).statuses[0]
@@ -137,29 +149,35 @@ class Azure:
                    return False
            except Exception:
                logging.info("Vm %s is terminated" % vm_name)
+                end_time = time.time()
+                if affected_node:
+                    affected_node.set_affected_node_status("terminated", end_time - start_time)
                return True


 # krkn_lib
 class azure_node_scenarios(abstract_node_scenarios):
-    def __init__(self, kubecli: KrknKubernetes):
-        super().__init__(kubecli)
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
+        super().__init__(kubecli, affected_nodes_status)
        logging.info("init in azure")
        self.azure = Azure()
+        

    # Node scenario to start the node
    def node_start_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_start_scenario injection")
                vm_name, resource_group = self.azure.get_instance_id(node)
+                
                logging.info(
                    "Starting the node %s with instance ID: %s "
                    % (vm_name, resource_group)
                )
                self.azure.start_instances(resource_group, vm_name)
-                self.azure.wait_until_running(resource_group, vm_name, timeout)
-                nodeaction.wait_for_ready_status(vm_name, timeout, self.kubecli)
+                self.azure.wait_until_running(resource_group, vm_name, timeout, affected_node=affected_node)
+                nodeaction.wait_for_ready_status(vm_name, timeout, self.kubecli, affected_node)
                logging.info("Node with instance ID: %s is in running state" % node)
                logging.info("node_start_scenario has been successfully injected!")
            except Exception as e:
@@ -170,10 +188,12 @@ class azure_node_scenarios(abstract_node_scenarios):
                logging.error("node_start_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to stop the node
    def node_stop_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_stop_scenario injection")
                vm_name, resource_group = self.azure.get_instance_id(node)
@@ -182,9 +202,9 @@ class azure_node_scenarios(abstract_node_scenarios):
                    % (vm_name, resource_group)
                )
                self.azure.stop_instances(resource_group, vm_name)
-                self.azure.wait_until_stopped(resource_group, vm_name, timeout)
+                self.azure.wait_until_stopped(resource_group, vm_name, timeout, affected_node=affected_node)
                logging.info("Node with instance ID: %s is in stopped state" % vm_name)
-                nodeaction.wait_for_unknown_status(vm_name, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(vm_name, timeout, self.kubecli, affected_node)
            except Exception as e:
                logging.error(
                    "Failed to stop node instance. Encountered following exception: %s. "
@@ -193,19 +213,22 @@ class azure_node_scenarios(abstract_node_scenarios):
                logging.error("node_stop_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to terminate the node
    def node_termination_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_termination_scenario injection")
+                affected_node = AffectedNode(node)
                vm_name, resource_group = self.azure.get_instance_id(node)
                logging.info(
                    "Terminating the node %s with instance ID: %s "
                    % (vm_name, resource_group)
                )
                self.azure.terminate_instances(resource_group, vm_name)
-                self.azure.wait_until_terminated(resource_group, vm_name, timeout)
+                self.azure.wait_until_terminated(resource_group, vm_name, timeout, affected_node)
                for _ in range(timeout):
                    if vm_name not in self.kubecli.list_nodes():
                        break
@@ -224,10 +247,13 @@ class azure_node_scenarios(abstract_node_scenarios):
                logging.error("node_termination_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)
+

    # Node scenario to reboot the node
    def node_reboot_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_reboot_scenario injection")
                vm_name, resource_group = self.azure.get_instance_id(node)
@@ -235,9 +261,11 @@ class azure_node_scenarios(abstract_node_scenarios):
                    "Rebooting the node %s with instance ID: %s "
                    % (vm_name, resource_group)
                )
+                
                self.azure.reboot_instances(resource_group, vm_name)
-                nodeaction.wait_for_unknown_status(vm_name, timeout, self.kubecli)
-                nodeaction.wait_for_ready_status(vm_name, timeout, self.kubecli)
+
+                nodeaction.wait_for_ready_status(vm_name, timeout, self.kubecli, affected_node)
+
                logging.info("Node with instance ID: %s has been rebooted" % (vm_name))
                logging.info("node_reboot_scenario has been successfully injected!")
            except Exception as e:
@@ -248,3 +276,4 @@ class azure_node_scenarios(abstract_node_scenarios):
                logging.error("node_reboot_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)
--- a/krkn/scenario_plugins/node_actions/bm_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/bm_node_scenarios.py
@@ -9,7 +9,7 @@ import pyipmi.interfaces
 import time
 import traceback
 from krkn_lib.k8s import KrknKubernetes
-
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus

 class BM:
    def __init__(self, bm_info, user, passwd):
@@ -127,8 +127,8 @@ class BM:

 # krkn_lib
 class bm_node_scenarios(abstract_node_scenarios):
-    def __init__(self, bm_info, user, passwd, kubecli: KrknKubernetes):
-        super().__init__(kubecli)
+    def __init__(self, bm_info, user, passwd, kubecli: KrknKubernetes,affected_nodes_status: AffectedNodeStatus):
+        super().__init__(kubecli, affected_nodes_status)
        self.bm = BM(bm_info, user, passwd)

    # Node scenario to start the node
@@ -159,6 +159,7 @@ class bm_node_scenarios(abstract_node_scenarios):
    # Node scenario to stop the node
    def node_stop_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_stop_scenario injection")
                bmc_addr = self.bm.get_bmc_addr(node)
@@ -166,11 +167,11 @@ class bm_node_scenarios(abstract_node_scenarios):
                    "Stopping the node %s with bmc address: %s " % (node, bmc_addr)
                )
                self.bm.stop_instances(bmc_addr, node)
-                self.bm.wait_until_stopped(bmc_addr, node)
+                self.bm.wait_until_stopped(bmc_addr, node, affected_node)
                logging.info(
                    "Node with bmc address: %s is in stopped state" % (bmc_addr)
                )
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
            except Exception as e:
                logging.error(
                    "Failed to stop node instance. Encountered following exception: %s. "
@@ -179,6 +180,7 @@ class bm_node_scenarios(abstract_node_scenarios):
                )
                logging.error("node_stop_scenario injection failed!")
                raise e
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to terminate the node
    def node_termination_scenario(self, instance_kill_count, node, timeout):
@@ -187,6 +189,7 @@ class bm_node_scenarios(abstract_node_scenarios):
    # Node scenario to reboot the node
    def node_reboot_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_reboot_scenario injection")
                bmc_addr = self.bm.get_bmc_addr(node)
@@ -195,8 +198,8 @@ class bm_node_scenarios(abstract_node_scenarios):
                    "Rebooting the node %s with bmc address: %s " % (node, bmc_addr)
                )
                self.bm.reboot_instances(bmc_addr, node)
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info("Node with bmc address: %s has been rebooted" % (bmc_addr))
                logging.info("node_reboot_scenario has been successfuly injected!")
            except Exception as e:
@@ -208,3 +211,4 @@ class bm_node_scenarios(abstract_node_scenarios):
                traceback.print_exc()
                logging.error("node_reboot_scenario injection failed!")
                raise e
+            self.affected_nodes_status.affected_nodes.append(affected_node)
--- a/krkn/scenario_plugins/node_actions/common_node_functions.py
+++ b/krkn/scenario_plugins/node_actions/common_node_functions.py
@@ -1,26 +1,39 @@
+import datetime
 import time
 import random
 import logging
 import paramiko
+from krkn_lib.models.k8s import AffectedNode
 import krkn.invoke.command as runcommand
 from krkn_lib.k8s import KrknKubernetes
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus
+from krkn_lib.models.k8s import AffectedNode

 node_general = False


+def get_node_by_name(node_name_list, kubecli: KrknKubernetes):
+    killable_nodes = kubecli.list_killable_nodes()
+    for node_name in node_name_list:
+        if node_name not in killable_nodes:
+            logging.info(
+                f"Node with provided ${node_name} does not exist or the node might "
+                "be in NotReady state."
+            )
+            return
+    return node_name_list
+        
+
 # Pick a random node with specified label selector
-def get_node(node_name, label_selector, instance_kill_count, kubecli: KrknKubernetes):
-    if node_name in kubecli.list_killable_nodes():
-        return [node_name]
-    elif node_name:
-        logging.info(
-            "Node with provided node_name does not exist or the node might "
-            "be in NotReady state."
-        )
-    nodes = kubecli.list_killable_nodes(label_selector)
+def get_node(label_selector, instance_kill_count, kubecli: KrknKubernetes):
+
+    label_selector_list  = label_selector.split(",")
+    nodes = []
+    for label_selector in label_selector_list: 
+        nodes.extend(kubecli.list_killable_nodes(label_selector))
    if not nodes:
        raise Exception("Ready nodes with the provided label selector do not exist")
-    logging.info("Ready nodes with the label selector %s: %s" % (label_selector, nodes))
+    logging.info("Ready nodes with the label selector %s: %s" % (label_selector_list, nodes))
    number_of_nodes = len(nodes)
    if instance_kill_count == number_of_nodes:
        return nodes
@@ -31,26 +44,25 @@ def get_node(node_name, label_selector, instance_kill_count, kubecli: KrknKubern
        nodes.remove(node_to_add)
    return nodes_to_return

-
 # krkn_lib
 # Wait until the node status becomes Ready
-def wait_for_ready_status(node, timeout, kubecli: KrknKubernetes):
-    resource_version = kubecli.get_node_resource_version(node)
-    kubecli.watch_node_status(node, "True", timeout, resource_version)
-
+def wait_for_ready_status(node, timeout, kubecli: KrknKubernetes, affected_node: AffectedNode = None):
+    affected_node =  kubecli.watch_node_status(node, "True", timeout, affected_node)
+    return affected_node
+   

 # krkn_lib
 # Wait until the node status becomes Not Ready
-def wait_for_not_ready_status(node, timeout, kubecli: KrknKubernetes):
-    resource_version = kubecli.get_node_resource_version(node)
-    kubecli.watch_node_status(node, "False", timeout, resource_version)
-
+def wait_for_not_ready_status(node, timeout, kubecli: KrknKubernetes, affected_node: AffectedNode = None):
+    affected_node = kubecli.watch_node_status(node, "False", timeout, affected_node)
+    return affected_node
+    

 # krkn_lib
 # Wait until the node status becomes Unknown
-def wait_for_unknown_status(node, timeout, kubecli: KrknKubernetes):
-    resource_version = kubecli.get_node_resource_version(node)
-    kubecli.watch_node_status(node, "Unknown", timeout, resource_version)
+def wait_for_unknown_status(node, timeout, kubecli: KrknKubernetes, affected_node: AffectedNode = None):
+    affected_node = kubecli.watch_node_status(node, "Unknown", timeout, affected_node)
+    return affected_node


 # Get the ip of the cluster node
--- a/krkn/scenario_plugins/node_actions/docker_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/docker_node_scenarios.py
@@ -5,7 +5,7 @@ from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
 import logging
 import docker
 from krkn_lib.k8s import KrknKubernetes
-
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus

 class Docker:
    def __init__(self):
@@ -38,13 +38,14 @@ class Docker:


 class docker_node_scenarios(abstract_node_scenarios):
-    def __init__(self, kubecli: KrknKubernetes):
-        super().__init__(kubecli)
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
+        super().__init__(kubecli, affected_nodes_status)
        self.docker = Docker()

    # Node scenario to start the node
    def node_start_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_start_scenario injection")
                container_id = self.docker.get_container_id(node)
@@ -52,7 +53,7 @@ class docker_node_scenarios(abstract_node_scenarios):
                    "Starting the node %s with container ID: %s " % (node, container_id)
                )
                self.docker.start_instances(node)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info(
                    "Node with container ID: %s is in running state" % (container_id)
                )
@@ -64,10 +65,12 @@ class docker_node_scenarios(abstract_node_scenarios):
                )
                logging.error("node_start_scenario injection failed!")
                raise e
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to stop the node
    def node_stop_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_stop_scenario injection")
                container_id = self.docker.get_container_id(node)
@@ -78,7 +81,7 @@ class docker_node_scenarios(abstract_node_scenarios):
                logging.info(
                    "Node with container ID: %s is in stopped state" % (container_id)
                )
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
            except Exception as e:
                logging.error(
                    "Failed to stop node instance. Encountered following exception: %s. "
@@ -86,6 +89,7 @@ class docker_node_scenarios(abstract_node_scenarios):
                )
                logging.error("node_stop_scenario injection failed!")
                raise e
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to terminate the node
    def node_termination_scenario(self, instance_kill_count, node, timeout):
@@ -113,6 +117,7 @@ class docker_node_scenarios(abstract_node_scenarios):
    # Node scenario to reboot the node
    def node_reboot_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_reboot_scenario injection")
                container_id = self.docker.get_container_id(node)
@@ -121,8 +126,8 @@ class docker_node_scenarios(abstract_node_scenarios):
                    % (node, container_id)
                )
                self.docker.reboot_instances(node)
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info(
                    "Node with container ID: %s has been rebooted" % (container_id)
                )
@@ -134,3 +139,4 @@ class docker_node_scenarios(abstract_node_scenarios):
                )
                logging.error("node_reboot_scenario injection failed!")
                raise e
+            self.affected_nodes_status.affected_nodes.append(affected_node)
--- a/krkn/scenario_plugins/node_actions/gcp_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/gcp_node_scenarios.py
@@ -1,66 +1,78 @@
-import os
-import sys
 import time
 import logging
-import json
+import google.auth
 import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction
 from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
    abstract_node_scenarios,
 )
-from googleapiclient import discovery
-from oauth2client.client import GoogleCredentials
+from google.cloud import compute_v1
 from krkn_lib.k8s import KrknKubernetes
-
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus

 class GCP:
    def __init__(self):
        try:
-            gapp_creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
-            with open(gapp_creds, "r") as f:
-                f_str = f.read()
-                self.project = json.loads(f_str)["project_id"]
-            # self.project = runcommand.invoke("gcloud config get-value project").split("/n")[0].strip()
-            logging.info("project " + str(self.project) + "!")
-            credentials = GoogleCredentials.get_application_default()
-            self.client = discovery.build(
-                "compute", "v1", credentials=credentials, cache_discovery=False
-            )
-
+            _, self.project_id = google.auth.default()
+            self.instance_client = compute_v1.InstancesClient()
        except Exception as e:
            logging.error("Error on setting up GCP connection: " + str(e))
+
            raise e

-    # Get the instance ID of the node
-    def get_instance_id(self, node):
-        zone_request = self.client.zones().list(project=self.project)
-        while zone_request is not None:
-            zone_response = zone_request.execute()
-            for zone in zone_response["items"]:
-                instances_request = self.client.instances().list(
-                    project=self.project, zone=zone["name"]
-                )
-                while instances_request is not None:
-                    instance_response = instances_request.execute()
-                    if "items" in instance_response.keys():
-                        for instance in instance_response["items"]:
-                            if instance["name"] in node:
-                                return instance["name"], zone["name"]
-                    instances_request = self.client.zones().list_next(
-                        previous_request=instances_request,
-                        previous_response=instance_response,
-                    )
-            zone_request = self.client.zones().list_next(
-                previous_request=zone_request, previous_response=zone_response
+    # Get the instance of the node
+    def get_node_instance(self, node):
+        try:
+            request = compute_v1.AggregatedListInstancesRequest(
+                project = self.project_id
            )
-        logging.info("no instances ")
+            agg_list = self.instance_client.aggregated_list(request=request)
+            for _, response in agg_list:
+                if response.instances:
+                    for instance in response.instances:
+                        if instance.name in node:
+                            return instance
+            logging.info("no instances ")
+        except Exception as e:
+            logging.error("Error getting the instance of the node: " + str(e))
+
+            raise e
+
+    # Get the instance name
+    def get_instance_name(self, instance):
+        if instance.name:
+            return instance.name
+
+    # Get the instance zone
+    def get_instance_zone(self, instance):
+        if instance.zone:
+            return instance.zone.split("/")[-1]
+
+    # Get the instance zone of the node
+    def get_node_instance_zone(self, node):
+        instance = self.get_node_instance(node)
+        if instance:
+            return self.get_instance_zone(instance)
+
+    # Get the instance name of the node
+    def get_node_instance_name(self, node):
+        instance = self.get_node_instance(node)
+        if instance:
+            return self.get_instance_name(instance)
+
+    # Get the instance name of the node
+    def get_instance_id(self, node):
+        return self.get_node_instance_name(node)

    # Start the node instance
-    def start_instances(self, zone, instance_id):
+    def start_instances(self, instance_id):
        try:
-            self.client.instances().start(
-                project=self.project, zone=zone, instance=instance_id
-            ).execute()
-            logging.info("vm name " + str(instance_id) + " started")
+            request = compute_v1.StartInstanceRequest(
+                instance=instance_id,
+                project=self.project_id,
+                zone=self.get_node_instance_zone(instance_id),
+            )
+            self.instance_client.start(request=request)
+            logging.info("Instance: " + str(instance_id) + " started")
        except Exception as e:
            logging.error(
                "Failed to start node instance %s. Encountered following "
@@ -70,12 +82,15 @@ class GCP:
            raise RuntimeError()

    # Stop the node instance
-    def stop_instances(self, zone, instance_id):
+    def stop_instances(self, instance_id):
        try:
-            self.client.instances().stop(
-                project=self.project, zone=zone, instance=instance_id
-            ).execute()
-            logging.info("vm name " + str(instance_id) + " stopped")
+            request = compute_v1.StopInstanceRequest(
+                instance=instance_id,
+                project=self.project_id,
+                zone=self.get_node_instance_zone(instance_id),
+            )
+            self.instance_client.stop(request=request)
+            logging.info("Instance: " + str(instance_id) + " stopped")
        except Exception as e:
            logging.error(
                "Failed to stop node instance %s. Encountered following "
@@ -84,13 +99,16 @@ class GCP:

            raise RuntimeError()

-    # Start the node instance
-    def suspend_instances(self, zone, instance_id):
+    # Suspend the node instance
+    def suspend_instances(self, instance_id):
        try:
-            self.client.instances().suspend(
-                project=self.project, zone=zone, instance=instance_id
-            ).execute()
-            logging.info("vm name " + str(instance_id) + " suspended")
+            request = compute_v1.SuspendInstanceRequest(
+                instance=instance_id,
+                project=self.project_id,
+                zone=self.get_node_instance_zone(instance_id),
+            )
+            self.instance_client.suspend(request=request)
+            logging.info("Instance: " + str(instance_id) + " suspended")
        except Exception as e:
            logging.error(
                "Failed to suspend node instance %s. Encountered following "
@@ -100,49 +118,65 @@ class GCP:
            raise RuntimeError()

    # Terminate the node instance
-    def terminate_instances(self, zone, instance_id):
+    def terminate_instances(self, instance_id):
        try:
-            self.client.instances().delete(
-                project=self.project, zone=zone, instance=instance_id
-            ).execute()
-            logging.info("vm name " + str(instance_id) + " terminated")
+            request = compute_v1.DeleteInstanceRequest(
+                instance=instance_id,
+                project=self.project_id,
+                zone=self.get_node_instance_zone(instance_id),
+            )
+            self.instance_client.delete(request=request)
+            logging.info("Instance: " + str(instance_id) + " terminated")
        except Exception as e:
            logging.error(
-                "Failed to start node instance %s. Encountered following "
+                "Failed to terminate node instance %s. Encountered following "
                "exception: %s." % (instance_id, e)
            )

            raise RuntimeError()

    # Reboot the node instance
-    def reboot_instances(self, zone, instance_id):
+    def reboot_instances(self, instance_id):
        try:
-            self.client.instances().reset(
-                project=self.project, zone=zone, instance=instance_id
-            ).execute()
-            logging.info("vm name " + str(instance_id) + " rebooted")
+            request = compute_v1.ResetInstanceRequest(
+                instance=instance_id,
+                project=self.project_id,
+                zone=self.get_node_instance_zone(instance_id),
+            )
+            self.instance_client.reset(request=request)
+            logging.info("Instance: " + str(instance_id) + " rebooted")
        except Exception as e:
            logging.error(
-                "Failed to start node instance %s. Encountered following "
+                "Failed to reboot node instance %s. Encountered following "
                "exception: %s." % (instance_id, e)
            )

            raise RuntimeError()

    # Get instance status
-    def get_instance_status(self, zone, instance_id, expected_status, timeout):
-        # statuses: PROVISIONING, STAGING, RUNNING, STOPPING, SUSPENDING, SUSPENDED, REPAIRING,
+    def get_instance_status(self, instance_id, expected_status, timeout):
+        # states: PROVISIONING, STAGING, RUNNING, STOPPING, SUSPENDING, SUSPENDED, REPAIRING,
        # and TERMINATED.
        i = 0
        sleeper = 5
        while i <= timeout:
-            instStatus = (
-                self.client.instances()
-                .get(project=self.project, zone=zone, instance=instance_id)
-                .execute()
-            )
-            logging.info("Status of vm " + str(instStatus["status"]))
-            if instStatus["status"] == expected_status:
+            try:
+                request = compute_v1.GetInstanceRequest(
+                    instance=instance_id,
+                    project=self.project_id,
+                    zone=self.get_node_instance_zone(instance_id),
+                )
+                instance_status = self.instance_client.get(request=request).status
+                logging.info("Status of instance " + str(instance_id) + ": " + instance_status)
+            except Exception as e:
+                logging.error(
+                    "Failed to get status of instance %s. Encountered following "
+                    "exception: %s." % (instance_id, e)
+                )
+                raise RuntimeError()
+
+            if instance_status == expected_status:
+                logging.info('status matches, end' + str(expected_status) + str(instance_status))                
                return True
            time.sleep(sleeper)
            i += sleeper
@@ -153,53 +187,59 @@ class GCP:
        return False

    # Wait until the node instance is suspended
-    def wait_until_suspended(self, zone, instance_id, timeout):
-        return self.get_instance_status(zone, instance_id, "SUSPENDED", timeout)
+    def wait_until_suspended(self, instance_id, timeout):
+        return self.get_instance_status(instance_id, "SUSPENDED", timeout)

    # Wait until the node instance is running
-    def wait_until_running(self, zone, instance_id, timeout):
-        return self.get_instance_status(zone, instance_id, "RUNNING", timeout)
+    def wait_until_running(self, instance_id, timeout, affected_node):
+        start_time = time.time()
+        instance_status = self.get_instance_status(instance_id, "RUNNING", timeout)
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("running", end_time - start_time)
+        return instance_status

    # Wait until the node instance is stopped
-    def wait_until_stopped(self, zone, instance_id, timeout):
-        return self.get_instance_status(zone, instance_id, "TERMINATED", timeout)
+    def wait_until_stopped(self, instance_id, timeout, affected_node):
+        # In GCP, the next state after STOPPING is TERMINATED
+        start_time = time.time()
+        instance_status = self.get_instance_status(instance_id, "TERMINATED", timeout)
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("stopped", end_time - start_time)
+        return instance_status

    # Wait until the node instance is terminated
-    def wait_until_terminated(self, zone, instance_id, timeout):
-        try:
-            i = 0
-            sleeper = 5
-            while i <= timeout:
-                instStatus = (
-                    self.client.instances()
-                    .get(project=self.project, zone=zone, instance=instance_id)
-                    .execute()
-                )
-                logging.info("Status of vm " + str(instStatus["status"]))
-                time.sleep(sleeper)
-        except Exception as e:
-            logging.info("here " + str(e))
-            return True
+    def wait_until_terminated(self, instance_id, timeout, affected_node):
+        start_time = time.time()
+        instance_status =  self.get_instance_status(instance_id, "TERMINATED", timeout)
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("terminated", end_time - start_time)
+        return instance_status


 # krkn_lib
 class gcp_node_scenarios(abstract_node_scenarios):
-    def __init__(self, kubecli: KrknKubernetes):
-        super().__init__(kubecli)
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
+        super().__init__(kubecli, affected_nodes_status)
        self.gcp = GCP()
+        print("selfkeys" + str(vars(self)))

    # Node scenario to start the node
    def node_start_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_start_scenario injection")
-                instance_id, zone = self.gcp.get_instance_id(node)
+                instance = self.gcp.get_node_instance(node)
+                instance_id = self.gcp.get_instance_name(instance)
                logging.info(
                    "Starting the node %s with instance ID: %s " % (node, instance_id)
                )
-                self.gcp.start_instances(zone, instance_id)
-                self.gcp.wait_until_running(zone, instance_id, timeout)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                self.gcp.start_instances(instance_id)
+                self.gcp.wait_until_running(instance_id, timeout, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info(
                    "Node with instance ID: %s is in running state" % instance_id
                )
@@ -212,23 +252,26 @@ class gcp_node_scenarios(abstract_node_scenarios):
                logging.error("node_start_scenario injection failed!")

                raise RuntimeError()
+            logging.info("started affected node" + str(affected_node.to_json()))
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to stop the node
    def node_stop_scenario(self, instance_kill_count, node, timeout):
-        logging.info("stop scenario")
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_stop_scenario injection")
-                instance_id, zone = self.gcp.get_instance_id(node)
+                instance = self.gcp.get_node_instance(node)
+                instance_id = self.gcp.get_instance_name(instance)
                logging.info(
                    "Stopping the node %s with instance ID: %s " % (node, instance_id)
                )
-                self.gcp.stop_instances(zone, instance_id)
-                self.gcp.wait_until_stopped(zone, instance_id, timeout)
+                self.gcp.stop_instances(instance_id)
+                self.gcp.wait_until_stopped(instance_id, timeout, affected_node=affected_node)
                logging.info(
                    "Node with instance ID: %s is in stopped state" % instance_id
                )
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
            except Exception as e:
                logging.error(
                    "Failed to stop node instance. Encountered following exception: %s. "
@@ -237,19 +280,23 @@ class gcp_node_scenarios(abstract_node_scenarios):
                logging.error("node_stop_scenario injection failed!")

                raise RuntimeError()
+            logging.info("stopedd affected node" + str(affected_node.to_json()))
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to terminate the node
    def node_termination_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_termination_scenario injection")
-                instance_id, zone = self.gcp.get_instance_id(node)
+                instance = self.gcp.get_node_instance(node)
+                instance_id = self.gcp.get_instance_name(instance)
                logging.info(
                    "Terminating the node %s with instance ID: %s "
                    % (node, instance_id)
                )
-                self.gcp.terminate_instances(zone, instance_id)
-                self.gcp.wait_until_terminated(zone, instance_id, timeout)
+                self.gcp.terminate_instances(instance_id)
+                self.gcp.wait_until_terminated(instance_id, timeout, affected_node=affected_node)
                for _ in range(timeout):
                    if node not in self.kubecli.list_nodes():
                        break
@@ -267,20 +314,24 @@ class gcp_node_scenarios(abstract_node_scenarios):
                )
                logging.error("node_termination_scenario injection failed!")

-
-                raise e
+                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to reboot the node
    def node_reboot_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_reboot_scenario injection")
-                instance_id, zone = self.gcp.get_instance_id(node)
+                instance = self.gcp.get_node_instance(node)
+                instance_id = self.gcp.get_instance_name(instance)
                logging.info(
                    "Rebooting the node %s with instance ID: %s " % (node, instance_id)
                )
-                self.gcp.reboot_instances(zone, instance_id)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                self.gcp.reboot_instances(instance_id)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
+                self.gcp.wait_until_running(instance_id, timeout, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info(
                    "Node with instance ID: %s has been rebooted" % instance_id
                )
@@ -293,3 +344,4 @@ class gcp_node_scenarios(abstract_node_scenarios):
                logging.error("node_reboot_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)
--- a/krkn/scenario_plugins/node_actions/general_cloud_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/general_cloud_node_scenarios.py
@@ -3,7 +3,7 @@ from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
    abstract_node_scenarios,
 )
 from krkn_lib.k8s import KrknKubernetes
-
+from krkn_lib.models.k8s import AffectedNodeStatus

 class GENERAL:
    def __init__(self):
@@ -12,8 +12,8 @@ class GENERAL:

 # krkn_lib
 class general_node_scenarios(abstract_node_scenarios):
-    def __init__(self, kubecli: KrknKubernetes):
-        super().__init__(kubecli)
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
+        super().__init__(kubecli, affected_nodes_status)
        self.general = GENERAL()

    # Node scenario to start the node
--- a/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py
+++ b/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py
@@ -1,9 +1,12 @@
 import logging
 import time
+from multiprocessing.pool import ThreadPool
+from itertools import repeat

 import yaml
 from krkn_lib.k8s import KrknKubernetes
 from krkn_lib.models.telemetry import ScenarioTelemetry
+from krkn_lib.models.k8s import AffectedNodeStatus
 from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
 from krkn_lib.utils import get_yaml_item_value, log_exception

@@ -19,6 +22,7 @@ from krkn.scenario_plugins.node_actions.gcp_node_scenarios import gcp_node_scena
 from krkn.scenario_plugins.node_actions.general_cloud_node_scenarios import (
    general_node_scenarios,
 )
+from krkn.scenario_plugins.node_actions.vmware_node_scenarios import vmware_node_scenarios

 node_general = False

@@ -47,6 +51,7 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
                                node_scenario,
                                node_scenario_object,
                                lib_telemetry.get_lib_kubernetes(),
+                                scenario_telemetry,
                            )
                            end_time = int(time.time())
                            cerberus.get_status(krkn_config, start_time, end_time)
@@ -57,38 +62,39 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
                    return 0

    def get_node_scenario_object(self, node_scenario, kubecli: KrknKubernetes):
+        affected_nodes_status = AffectedNodeStatus()
        if (
            "cloud_type" not in node_scenario.keys()
            or node_scenario["cloud_type"] == "generic"
        ):
            global node_general
            node_general = True
-            return general_node_scenarios(kubecli)
-        if node_scenario["cloud_type"] == "aws":
-            return aws_node_scenarios(kubecli)
-        elif node_scenario["cloud_type"] == "gcp":
-            return gcp_node_scenarios(kubecli)
-        elif node_scenario["cloud_type"] == "openstack":
+            return general_node_scenarios(kubecli, affected_nodes_status)
+        if node_scenario["cloud_type"].lower() == "aws":
+            return aws_node_scenarios(kubecli, affected_nodes_status)
+        elif node_scenario["cloud_type"].lower() == "gcp":
+            return gcp_node_scenarios(kubecli, affected_nodes_status)
+        elif node_scenario["cloud_type"].lower() == "openstack":
            from krkn.scenario_plugins.node_actions.openstack_node_scenarios import (
                openstack_node_scenarios,
            )

-            return openstack_node_scenarios(kubecli)
+            return openstack_node_scenarios(kubecli, affected_nodes_status)
        elif (
-            node_scenario["cloud_type"] == "azure"
-            or node_scenario["cloud_type"] == "az"
+            node_scenario["cloud_type"].lower() == "azure"
+            or node_scenario["cloud_type"].lower() == "az"
        ):
-            return azure_node_scenarios(kubecli)
+            return azure_node_scenarios(kubecli, affected_nodes_status)
        elif (
-            node_scenario["cloud_type"] == "alibaba"
-            or node_scenario["cloud_type"] == "alicloud"
+            node_scenario["cloud_type"].lower() == "alibaba"
+            or node_scenario["cloud_type"].lower() == "alicloud"
        ):
            from krkn.scenario_plugins.node_actions.alibaba_node_scenarios import (
                alibaba_node_scenarios,
            )

-            return alibaba_node_scenarios(kubecli)
-        elif node_scenario["cloud_type"] == "bm":
+            return alibaba_node_scenarios(kubecli, affected_nodes_status)
+        elif node_scenario["cloud_type"].lower() == "bm":
            from krkn.scenario_plugins.node_actions.bm_node_scenarios import (
                bm_node_scenarios,
            )
@@ -98,9 +104,15 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
                node_scenario.get("bmc_user", None),
                node_scenario.get("bmc_password", None),
                kubecli,
+                affected_nodes_status
            )
-        elif node_scenario["cloud_type"] == "docker":
+        elif node_scenario["cloud_type"].lower() == "docker":
            return docker_node_scenarios(kubecli)
+        elif (
+            node_scenario["cloud_type"].lower() == "vsphere"
+            or node_scenario["cloud_type"].lower() == "vmware"
+        ):
+            return vmware_node_scenarios(kubecli, affected_nodes_status)
        else:
            logging.error(
                "Cloud type "
@@ -118,102 +130,134 @@ class NodeActionsScenarioPlugin(AbstractScenarioPlugin):
            )

    def inject_node_scenario(
-        self, action, node_scenario, node_scenario_object, kubecli: KrknKubernetes
+        self, action, node_scenario, node_scenario_object, kubecli: KrknKubernetes, scenario_telemetry: ScenarioTelemetry
    ):
-        generic_cloud_scenarios = ("stop_kubelet_scenario", "node_crash_scenario")
-        # Get the node scenario configurations
-        run_kill_count = get_yaml_item_value(node_scenario, "runs", 1)
+        
+        # Get the node scenario configurations for setting nodes
+       
        instance_kill_count = get_yaml_item_value(node_scenario, "instance_count", 1)
        node_name = get_yaml_item_value(node_scenario, "node_name", "")
        label_selector = get_yaml_item_value(node_scenario, "label_selector", "")
-        if action == "node_stop_start_scenario":
+        parallel_nodes = get_yaml_item_value(node_scenario, "parallel", False)
+        
+        # Get the node to apply the scenario
+        if node_name:
+            node_name_list = node_name.split(",")
+            nodes = common_node_functions.get_node_by_name(node_name_list, kubecli)
+        else:
+            nodes = common_node_functions.get_node(
+                label_selector, instance_kill_count, kubecli
+            )
+        
+        # GCP api doesn't support multiprocessing calls, will only actually run 1 
+        if parallel_nodes:
+            self.multiprocess_nodes(nodes, node_scenario_object, action, node_scenario)
+        else: 
+            for single_node in nodes:
+                self.run_node(single_node, node_scenario_object, action, node_scenario)
+        affected_nodes_status = node_scenario_object.affected_nodes_status
+        scenario_telemetry.affected_nodes.extend(affected_nodes_status.affected_nodes)
+
+    def multiprocess_nodes(self, nodes, node_scenario_object, action, node_scenario):
+        try:
+            # pool object with number of element
+            pool = ThreadPool(processes=len(nodes))
+    
+            pool.starmap(self.run_node,zip(nodes, repeat(node_scenario_object), repeat(action), repeat(node_scenario)))
+
+            pool.close()
+        except Exception as e:
+            logging.info("Error on pool multiprocessing: " + str(e))
+
+
+    def run_node(self, single_node, node_scenario_object, action, node_scenario):
+        # Get the scenario specifics for running action nodes
+        run_kill_count = get_yaml_item_value(node_scenario, "runs", 1)
+        if action in ("node_stop_start_scenario", "node_disk_detach_attach_scenario"):
            duration = get_yaml_item_value(node_scenario, "duration", 120)
+
        timeout = get_yaml_item_value(node_scenario, "timeout", 120)
        service = get_yaml_item_value(node_scenario, "service", "")
        ssh_private_key = get_yaml_item_value(
            node_scenario, "ssh_private_key", "~/.ssh/id_rsa"
        )
-        # Get the node to apply the scenario
-        if node_name:
-            node_name_list = node_name.split(",")
-        else:
-            node_name_list = [node_name]
-        for single_node_name in node_name_list:
-            nodes = common_node_functions.get_node(
-                single_node_name, label_selector, instance_kill_count, kubecli
+        generic_cloud_scenarios = ("stop_kubelet_scenario", "node_crash_scenario")
+
+        if node_general and action not in generic_cloud_scenarios:
+            logging.info(
+                "Scenario: "
+                + action
+                + " is not set up for generic cloud type, skipping action"
            )
-            for single_node in nodes:
-                if node_general and action not in generic_cloud_scenarios:
-                    logging.info(
-                        "Scenario: "
-                        + action
-                        + " is not set up for generic cloud type, skipping action"
+        else:
+            if action == "node_start_scenario":
+                node_scenario_object.node_start_scenario(
+                    run_kill_count, single_node, timeout
+                )
+            elif action == "node_stop_scenario":
+                node_scenario_object.node_stop_scenario(
+                    run_kill_count, single_node, timeout
+                )
+            elif action == "node_stop_start_scenario":
+                node_scenario_object.node_stop_start_scenario(
+                    run_kill_count, single_node, timeout, duration
+                )
+            elif action == "node_termination_scenario":
+                node_scenario_object.node_termination_scenario(
+                    run_kill_count, single_node, timeout
+                )
+            elif action == "node_reboot_scenario":
+                node_scenario_object.node_reboot_scenario(
+                    run_kill_count, single_node, timeout
+                )
+            elif action == "node_disk_detach_attach_scenario":
+                node_scenario_object.node_disk_detach_attach_scenario(
+                    run_kill_count, single_node, timeout, duration)
+            elif action == "stop_start_kubelet_scenario":
+                node_scenario_object.stop_start_kubelet_scenario(
+                    run_kill_count, single_node, timeout
+                )
+            elif action == "restart_kubelet_scenario":
+                node_scenario_object.restart_kubelet_scenario(
+                    run_kill_count, single_node, timeout
+                )
+            elif action == "stop_kubelet_scenario":
+                node_scenario_object.stop_kubelet_scenario(
+                    run_kill_count, single_node, timeout
+                )
+            elif action == "node_crash_scenario":
+                node_scenario_object.node_crash_scenario(
+                    run_kill_count, single_node, timeout
+                )
+            elif action == "stop_start_helper_node_scenario":
+                if node_scenario["cloud_type"] != "openstack":
+                    logging.error(
+                        "Scenario: " + action + " is not supported for "
+                        "cloud type "
+                        + node_scenario["cloud_type"]
+                        + ", skipping action"
                    )
                else:
-                    if action == "node_start_scenario":
-                        node_scenario_object.node_start_scenario(
-                            run_kill_count, single_node, timeout
-                        )
-                    elif action == "node_stop_scenario":
-                        node_scenario_object.node_stop_scenario(
-                            run_kill_count, single_node, timeout
-                        )
-                    elif action == "node_stop_start_scenario":
-                        node_scenario_object.node_stop_start_scenario(
-                            run_kill_count, single_node, timeout, duration
-                        )
-                    elif action == "node_termination_scenario":
-                        node_scenario_object.node_termination_scenario(
-                            run_kill_count, single_node, timeout
-                        )
-                    elif action == "node_reboot_scenario":
-                        node_scenario_object.node_reboot_scenario(
-                            run_kill_count, single_node, timeout
-                        )
-                    elif action == "stop_start_kubelet_scenario":
-                        node_scenario_object.stop_start_kubelet_scenario(
-                            run_kill_count, single_node, timeout
-                        )
-                    elif action == "restart_kubelet_scenario":
-                        node_scenario_object.restart_kubelet_scenario(
-                            run_kill_count, single_node, timeout
-                        )
-                    elif action == "stop_kubelet_scenario":
-                        node_scenario_object.stop_kubelet_scenario(
-                            run_kill_count, single_node, timeout
-                        )
-                    elif action == "node_crash_scenario":
-                        node_scenario_object.node_crash_scenario(
-                            run_kill_count, single_node, timeout
-                        )
-                    elif action == "stop_start_helper_node_scenario":
-                        if node_scenario["cloud_type"] != "openstack":
-                            logging.error(
-                                "Scenario: " + action + " is not supported for "
-                                "cloud type "
-                                + node_scenario["cloud_type"]
-                                + ", skipping action"
-                            )
-                        else:
-                            if not node_scenario["helper_node_ip"]:
-                                logging.error("Helper node IP address is not provided")
-                                raise Exception(
-                                    "Helper node IP address is not provided"
-                                )
-                            node_scenario_object.helper_node_stop_start_scenario(
-                                run_kill_count, node_scenario["helper_node_ip"], timeout
-                            )
-                            node_scenario_object.helper_node_service_status(
-                                node_scenario["helper_node_ip"],
-                                service,
-                                ssh_private_key,
-                                timeout,
-                            )
-                    else:
-                        logging.info(
-                            "There is no node action that matches %s, skipping scenario"
-                            % action
+                    if not node_scenario["helper_node_ip"]:
+                        logging.error("Helper node IP address is not provided")
+                        raise Exception(
+                            "Helper node IP address is not provided"
                        )
+                    node_scenario_object.helper_node_stop_start_scenario(
+                        run_kill_count, node_scenario["helper_node_ip"], timeout
+                    )
+                    node_scenario_object.helper_node_service_status(
+                        node_scenario["helper_node_ip"],
+                        service,
+                        ssh_private_key,
+                        timeout,
+                    )
+            else:
+                logging.info(
+                    "There is no node action that matches %s, skipping scenario"
+                    % action
+                )
+

    def get_scenario_types(self) -> list[str]:
        return ["node_scenarios"]
--- a/krkn/scenario_plugins/node_actions/openstack_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/openstack_node_scenarios.py
@@ -7,7 +7,7 @@ from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
    abstract_node_scenarios,
 )
 from krkn_lib.k8s import KrknKubernetes
-
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus

 class OPENSTACKCLOUD:
    def __init__(self):
@@ -56,12 +56,22 @@ class OPENSTACKCLOUD:
            raise RuntimeError()

    # Wait until the node instance is running
-    def wait_until_running(self, node, timeout):
-        return self.get_instance_status(node, "ACTIVE", timeout)
+    def wait_until_running(self, node, timeout, affected_node):
+        start_time = time.time()
+        instance_status= self.get_instance_status(node, "ACTIVE", timeout)
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("running", end_time - start_time)
+        return instance_status

    # Wait until the node instance is stopped
-    def wait_until_stopped(self, node, timeout):
-        return self.get_instance_status(node, "SHUTOFF", timeout)
+    def wait_until_stopped(self, node, timeout, affected_node):
+        start_time = time.time()
+        instance_status = self.get_instance_status(node, "SHUTOFF", timeout)
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("stopped", end_time - start_time)
+        return instance_status

    # Get instance status
    def get_instance_status(self, node, expected_status, timeout):
@@ -107,19 +117,21 @@ class OPENSTACKCLOUD:

 # krkn_lib
 class openstack_node_scenarios(abstract_node_scenarios):
-    def __init__(self, kubecli: KrknKubernetes):
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus ):
+        super().__init__(kubecli, affected_nodes_status)
        self.openstackcloud = OPENSTACKCLOUD()
-
+    
    # Node scenario to start the node
    def node_start_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_start_scenario injection")
                logging.info("Starting the node %s" % (node))
                openstack_node_name = self.openstackcloud.get_instance_id(node)
                self.openstackcloud.start_instances(openstack_node_name)
-                self.openstackcloud.wait_until_running(openstack_node_name, timeout)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                self.openstackcloud.wait_until_running(openstack_node_name, timeout, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info("Node with instance ID: %s is in running state" % (node))
                logging.info("node_start_scenario has been successfully injected!")
            except Exception as e:
@@ -130,18 +142,20 @@ class openstack_node_scenarios(abstract_node_scenarios):
                logging.error("node_start_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to stop the node
    def node_stop_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_stop_scenario injection")
                logging.info("Stopping the node %s " % (node))
                openstack_node_name = self.openstackcloud.get_instance_id(node)
                self.openstackcloud.stop_instances(openstack_node_name)
-                self.openstackcloud.wait_until_stopped(openstack_node_name, timeout)
+                self.openstackcloud.wait_until_stopped(openstack_node_name, timeout, affected_node)
                logging.info("Node with instance name: %s is in stopped state" % (node))
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_not_ready_status(node, timeout, self.kubecli, affected_node)
            except Exception as e:
                logging.error(
                    "Failed to stop node instance. Encountered following exception: %s. "
@@ -150,17 +164,19 @@ class openstack_node_scenarios(abstract_node_scenarios):
                logging.error("node_stop_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to reboot the node
    def node_reboot_scenario(self, instance_kill_count, node, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node)
            try:
                logging.info("Starting node_reboot_scenario injection")
                logging.info("Rebooting the node %s" % (node))
                openstack_node_name = self.openstackcloud.get_instance_id(node)
                self.openstackcloud.reboot_instances(openstack_node_name)
-                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
-                nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
+                nodeaction.wait_for_unknown_status(node, timeout, self.kubecli, affected_node)
+                nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
                logging.info("Node with instance name: %s has been rebooted" % (node))
                logging.info("node_reboot_scenario has been successfuly injected!")
            except Exception as e:
@@ -171,10 +187,12 @@ class openstack_node_scenarios(abstract_node_scenarios):
                logging.error("node_reboot_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to start the node
    def helper_node_start_scenario(self, instance_kill_count, node_ip, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node_ip)
            try:
                logging.info("Starting helper_node_start_scenario injection")
                openstack_node_name = self.openstackcloud.get_openstack_nodename(
@@ -182,7 +200,7 @@ class openstack_node_scenarios(abstract_node_scenarios):
                )
                logging.info("Starting the helper node %s" % (openstack_node_name))
                self.openstackcloud.start_instances(openstack_node_name)
-                self.openstackcloud.wait_until_running(openstack_node_name, timeout)
+                self.openstackcloud.wait_until_running(openstack_node_name, timeout, affected_node)
                logging.info("Helper node with IP: %s is in running state" % (node_ip))
                logging.info("node_start_scenario has been successfully injected!")
            except Exception as e:
@@ -193,10 +211,12 @@ class openstack_node_scenarios(abstract_node_scenarios):
                logging.error("helper_node_start_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    # Node scenario to stop the node
    def helper_node_stop_scenario(self, instance_kill_count, node_ip, timeout):
        for _ in range(instance_kill_count):
+            affected_node = AffectedNode(node_ip)
            try:
                logging.info("Starting helper_node_stop_scenario injection")
                openstack_node_name = self.openstackcloud.get_openstack_nodename(
@@ -204,7 +224,7 @@ class openstack_node_scenarios(abstract_node_scenarios):
                )
                logging.info("Stopping the helper node %s " % (openstack_node_name))
                self.openstackcloud.stop_instances(openstack_node_name)
-                self.openstackcloud.wait_until_stopped(openstack_node_name, timeout)
+                self.openstackcloud.wait_until_stopped(openstack_node_name, timeout, affected_node)
                logging.info("Helper node with IP: %s is in stopped state" % (node_ip))
            except Exception as e:
                logging.error(
@@ -214,6 +234,7 @@ class openstack_node_scenarios(abstract_node_scenarios):
                logging.error("helper_node_stop_scenario injection failed!")

                raise RuntimeError()
+            self.affected_nodes_status.affected_nodes.append(affected_node)

    def helper_node_service_status(self, node_ip, service, ssh_private_key, timeout):
        try:
--- a/krkn/scenario_plugins/native/node_scenarios/vmware_plugin.py
+++ b/krkn/scenario_plugins/native/node_scenarios/vmware_plugin.py
@@ -3,25 +3,25 @@ import logging
 import random
 import sys
 import time
-import typing
-from dataclasses import dataclass, field
+import urllib3
+
+from krkn_lib.k8s import KrknKubernetes
+import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction
+from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
+    abstract_node_scenarios,
+)
+from dataclasses import dataclass
 from os import environ
-from traceback import format_exc
 import requests
-from arcaflow_plugin_sdk import plugin, validation
 from com.vmware.vapi.std.errors_client import (
    AlreadyInDesiredState,
    NotAllowedInCurrentState,
 )
 from com.vmware.vcenter.vm_client import Power
 from com.vmware.vcenter_client import VM, ResourcePool
-from kubernetes import client, watch
 from vmware.vapi.vsphere.client import create_vsphere_client

-from krkn.scenario_plugins.native.node_scenarios import (
-    kubernetes_functions as kube_helper,
-)
-
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus

 class vSphere:
    def __init__(self, verify=True):
@@ -32,7 +32,7 @@ class vSphere:
        self.server = environ.get("VSPHERE_IP")
        self.username = environ.get("VSPHERE_USERNAME")
        self.password = environ.get("VSPHERE_PASSWORD")
-        session = self.get_unverified_session() if not verify else None
+        session = self.get_unverified_session()
        self.credentials_present = (
            True if self.server and self.username and self.password else False
        )
@@ -42,6 +42,7 @@ class vSphere:
                "'VSPHERE_IP', 'VSPHERE_USERNAME', "
                "'VSPHERE_PASSWORD' are not set"
            )
+
        self.client = create_vsphere_client(
            server=self.server,
            username=self.username,
@@ -53,10 +54,13 @@ class vSphere:
        """
        Returns an unverified session object
        """
-
+        
        session = requests.session()
+        # Set the proxy settings for the session
        session.verify = False
-        requests.packages.urllib3.disable_warnings()
+            
+        urllib3.disable_warnings()
+
        return session

    def get_vm(self, instance_id):
@@ -297,14 +301,16 @@ class vSphere:
            )
            return None

-    def wait_until_released(self, instance_id, timeout):
+    def wait_until_released(self, instance_id, timeout, affected_node):
        """
        Waits until the VM is deleted or until the timeout. Returns True if
        the VM is successfully deleted, else returns False
        """

        time_counter = 0
+        start_time = time.time()
        vm = self.get_vm(instance_id)
+        exit_status = True
        while vm is not None:
            vm = self.get_vm(instance_id)
            logging.info(
@@ -314,16 +320,22 @@ class vSphere:
            time_counter += 5
            if time_counter >= timeout:
                logging.info(f"VM {instance_id} is still not deleted in allotted time")
-                return False
-        return True
+                exit_status = False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("terminated", end_time - start_time)
+                
+        return exit_status

-    def wait_until_running(self, instance_id, timeout):
+    def wait_until_running(self, instance_id, timeout, affected_node):
        """
        Waits until the VM switches to POWERED_ON state or until the timeout.
        Returns True if the VM switches to POWERED_ON, else returns False
        """

        time_counter = 0
+        start_time = time.time()
+        exit_status = True
        status = self.get_vm_status(instance_id)
        while status != Power.State.POWERED_ON:
            status = self.get_vm_status(instance_id)
@@ -334,16 +346,23 @@ class vSphere:
            time_counter += 5
            if time_counter >= timeout:
                logging.info(f"VM {instance_id} is still not ready in allotted time")
-                return False
-        return True
+                exit_status = False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("running", end_time - start_time)
+                

-    def wait_until_stopped(self, instance_id, timeout):
+        return exit_status
+
+    def wait_until_stopped(self, instance_id, timeout, affected_node):
        """
        Waits until the VM switches to POWERED_OFF state or until the timeout.
        Returns True if the VM switches to POWERED_OFF, else returns False
        """

        time_counter = 0
+        start_time = time.time()
+        exit_status = True
        status = self.get_vm_status(instance_id)
        while status != Power.State.POWERED_OFF:
            status = self.get_vm_status(instance_id)
@@ -354,322 +373,106 @@ class vSphere:
            time_counter += 5
            if time_counter >= timeout:
                logging.info(f"VM {instance_id} is still not ready in allotted time")
-                return False
-        return True
+                exit_status = False
+        end_time = time.time()
+        if affected_node:
+            affected_node.set_affected_node_status("stopped", end_time - start_time)
+                
+
+        return exit_status


@dataclass
-class Node:
-    name: str
+class vmware_node_scenarios(abstract_node_scenarios):
+    def __init__(self, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
+        super().__init__(kubecli, affected_nodes_status)
+        self.vsphere = vSphere()

+    def node_start_scenario(self, instance_kill_count, node, timeout):
+        try:
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node)
+                logging.info("Starting node_start_scenario injection")
+                logging.info(f"Starting the node {node} ")
+                vm_started = self.vsphere.start_instances(node)
+                if vm_started:
+                    self.vsphere.wait_until_running(node, timeout, affected_node)
+                    nodeaction.wait_for_ready_status(node, timeout, self.kubecli, affected_node)
+                logging.info(f"Node with instance ID: {node} is in running state")
+                logging.info("node_start_scenario has been successfully injected!")
+                self.affected_nodes_status.affected_nodes.append(affected_node)
+        except Exception as e:
+            logging.error("Failed to start node instance. Test Failed")
+            logging.error(
+                f"node_start_scenario injection failed! " f"Error was: {str(e)}"
+            )

-@dataclass
-class NodeScenarioSuccessOutput:
-
-    nodes: typing.Dict[int, Node] = field(
-        metadata={
-            "name": "Nodes started/stopped/terminated/rebooted",
-            "description": "Map between timestamps and the pods "
-            "started/stopped/terminated/rebooted. "
-            "The timestamp is provided in nanoseconds",
-        }
-    )
-    action: kube_helper.Actions = field(
-        metadata={
-            "name": "The action performed on the node",
-            "description": "The action performed or attempted to be "
-            "performed on the node. Possible values"
-            "are : Start, Stop, Terminate, Reboot",
-        }
-    )
-
-
-@dataclass
-class NodeScenarioErrorOutput:
-
-    error: str
-    action: kube_helper.Actions = field(
-        metadata={
-            "name": "The action performed on the node",
-            "description": "The action attempted to be performed on the node. "
-            "Possible values are : Start Stop, Terminate, Reboot",
-        }
-    )
-
-
-@dataclass
-class NodeScenarioConfig:
-
-    name: typing.Annotated[
-        typing.Optional[str],
-        validation.required_if_not("label_selector"),
-        validation.required_if("skip_openshift_checks"),
-    ] = field(
-        default=None,
-        metadata={
-            "name": "Name",
-            "description": "Name(s) for target nodes. "
-            "Required if label_selector is not set.",
-        },
-    )
-
-    runs: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
-        default=1,
-        metadata={
-            "name": "Number of runs per node",
-            "description": "Number of times to inject each scenario under "
-            "actions (will perform on same node each time)",
-        },
-    )
-
-    label_selector: typing.Annotated[
-        typing.Optional[str], validation.min(1), validation.required_if_not("name")
-    ] = field(
-        default=None,
-        metadata={
-            "name": "Label selector",
-            "description": "Kubernetes label selector for the target nodes. "
-            "Required if name is not set.\n"
-            "See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ "  # noqa
-            "for details.",
-        },
-    )
-
-    timeout: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
-        default=180,
-        metadata={
-            "name": "Timeout",
-            "description": "Timeout to wait for the target pod(s) "
-            "to be removed in seconds.",
-        },
-    )
-
-    instance_count: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
-        default=1,
-        metadata={
-            "name": "Instance Count",
-            "description": "Number of nodes to perform action/select "
-            "that match the label selector.",
-        },
-    )
-
-    skip_openshift_checks: typing.Optional[bool] = field(
-        default=False,
-        metadata={
-            "name": "Skip Openshift Checks",
-            "description": "Skip checking the status of the openshift nodes.",
-        },
-    )
-
-    verify_session: bool = field(
-        default=True,
-        metadata={
-            "name": "Verify API Session",
-            "description": "Verifies the vSphere client session. "
-            "It is enabled by default",
-        },
-    )
-
-    kubeconfig_path: typing.Optional[str] = field(
-        default=None,
-        metadata={
-            "name": "Kubeconfig path",
-            "description": "Path to your Kubeconfig file. "
-            "Defaults to ~/.kube/config.\n"
-            "See https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ "  # noqa
-            "for details.",
-        },
-    )
-
-
-@plugin.step(
-    id="vmware-node-start",
-    name="Start the node",
-    description="Start the node(s) by starting the VMware VM "
-    "on which the node is configured",
-    outputs={"success": NodeScenarioSuccessOutput, "error": NodeScenarioErrorOutput},
-)
-def node_start(
-    cfg: NodeScenarioConfig,
-) -> typing.Tuple[
-    str, typing.Union[NodeScenarioSuccessOutput, NodeScenarioErrorOutput]
-]:
-    with kube_helper.setup_kubernetes(None) as cli:
-        vsphere = vSphere(verify=cfg.verify_session)
-        core_v1 = client.CoreV1Api(cli)
-        watch_resource = watch.Watch()
-        node_list = kube_helper.get_node_list(cfg, kube_helper.Actions.START, core_v1)
-        nodes_started = {}
-        for name in node_list:
-            try:
-                for _ in range(cfg.runs):
-                    logging.info("Starting node_start_scenario injection")
-                    logging.info(f"Starting the node {name} ")
-                    vm_started = vsphere.start_instances(name)
-                    if vm_started:
-                        vsphere.wait_until_running(name, cfg.timeout)
-                        if not cfg.skip_openshift_checks:
-                            kube_helper.wait_for_ready_status(
-                                name, cfg.timeout, watch_resource, core_v1
-                            )
-                        nodes_started[int(time.time_ns())] = Node(name=name)
-                    logging.info(f"Node with instance ID: {name} is in running state")
-                    logging.info("node_start_scenario has been successfully injected!")
-            except Exception as e:
-                logging.error("Failed to start node instance. Test Failed")
-                logging.error(
-                    f"node_start_scenario injection failed! " f"Error was: {str(e)}"
-                )
-                return "error", NodeScenarioErrorOutput(
-                    format_exc(), kube_helper.Actions.START
-                )
-
-    return "success", NodeScenarioSuccessOutput(
-        nodes_started, kube_helper.Actions.START
-    )
-
-
-@plugin.step(
-    id="vmware-node-stop",
-    name="Stop the node",
-    description="Stop the node(s) by starting the VMware VM "
-    "on which the node is configured",
-    outputs={"success": NodeScenarioSuccessOutput, "error": NodeScenarioErrorOutput},
-)
-def node_stop(
-    cfg: NodeScenarioConfig,
-) -> typing.Tuple[
-    str, typing.Union[NodeScenarioSuccessOutput, NodeScenarioErrorOutput]
-]:
-    with kube_helper.setup_kubernetes(None) as cli:
-        vsphere = vSphere(verify=cfg.verify_session)
-        core_v1 = client.CoreV1Api(cli)
-        watch_resource = watch.Watch()
-        node_list = kube_helper.get_node_list(cfg, kube_helper.Actions.STOP, core_v1)
-        nodes_stopped = {}
-        for name in node_list:
-            try:
-                for _ in range(cfg.runs):
-                    logging.info("Starting node_stop_scenario injection")
-                    logging.info(f"Stopping the node {name} ")
-                    vm_stopped = vsphere.stop_instances(name)
-                    if vm_stopped:
-                        vsphere.wait_until_stopped(name, cfg.timeout)
-                        if not cfg.skip_openshift_checks:
-                            kube_helper.wait_for_ready_status(
-                                name, cfg.timeout, watch_resource, core_v1
-                            )
-                        nodes_stopped[int(time.time_ns())] = Node(name=name)
-                    logging.info(f"Node with instance ID: {name} is in stopped state")
-                    logging.info("node_stop_scenario has been successfully injected!")
-            except Exception as e:
-                logging.error("Failed to stop node instance. Test Failed")
-                logging.error(
-                    f"node_stop_scenario injection failed! " f"Error was: {str(e)}"
-                )
-                return "error", NodeScenarioErrorOutput(
-                    format_exc(), kube_helper.Actions.STOP
-                )
-
-        return "success", NodeScenarioSuccessOutput(
-            nodes_stopped, kube_helper.Actions.STOP
-        )
-
-
-@plugin.step(
-    id="vmware-node-reboot",
-    name="Reboot VMware VM",
-    description="Reboot the node(s) by starting the VMware VM "
-    "on which the node is configured",
-    outputs={"success": NodeScenarioSuccessOutput, "error": NodeScenarioErrorOutput},
-)
-def node_reboot(
-    cfg: NodeScenarioConfig,
-) -> typing.Tuple[
-    str, typing.Union[NodeScenarioSuccessOutput, NodeScenarioErrorOutput]
-]:
-    with kube_helper.setup_kubernetes(None) as cli:
-        vsphere = vSphere(verify=cfg.verify_session)
-        core_v1 = client.CoreV1Api(cli)
-        watch_resource = watch.Watch()
-        node_list = kube_helper.get_node_list(cfg, kube_helper.Actions.REBOOT, core_v1)
-        nodes_rebooted = {}
-        for name in node_list:
-            try:
-                for _ in range(cfg.runs):
-                    logging.info("Starting node_reboot_scenario injection")
-                    logging.info(f"Rebooting the node {name} ")
-                    vsphere.reboot_instances(name)
-                    if not cfg.skip_openshift_checks:
-                        kube_helper.wait_for_unknown_status(
-                            name, cfg.timeout, watch_resource, core_v1
-                        )
-                        kube_helper.wait_for_ready_status(
-                            name, cfg.timeout, watch_resource, core_v1
-                        )
-                    nodes_rebooted[int(time.time_ns())] = Node(name=name)
-                    logging.info(
-                        f"Node with instance ID: {name} has rebooted " "successfully"
+    def node_stop_scenario(self, instance_kill_count, node, timeout):
+        try:
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node)
+                logging.info("Starting node_stop_scenario injection")
+                logging.info(f"Stopping the node {node} ")
+                vm_stopped = self.vsphere.stop_instances(node)
+                if vm_stopped:
+                    self.vsphere.wait_until_stopped(node, timeout, affected_node)
+                    nodeaction.wait_for_ready_status(
+                        node, timeout, self.kubecli, affected_node
                    )
-                    logging.info("node_reboot_scenario has been successfully injected!")
-            except Exception as e:
-                logging.error("Failed to reboot node instance. Test Failed")
-                logging.error(
-                    f"node_reboot_scenario injection failed! " f"Error was: {str(e)}"
-                )
-                return "error", NodeScenarioErrorOutput(
-                    format_exc(), kube_helper.Actions.REBOOT
-                )
+                logging.info(f"Node with instance ID: {node} is in stopped state")
+                logging.info("node_stop_scenario has been successfully injected!")
+                self.affected_nodes_status.affected_nodes.append(affected_node)
+        except Exception as e:
+            logging.error("Failed to stop node instance. Test Failed")
+            logging.error(
+                f"node_stop_scenario injection failed! " f"Error was: {str(e)}"
+            )
+                

-    return "success", NodeScenarioSuccessOutput(
-        nodes_rebooted, kube_helper.Actions.REBOOT
-    )
+    def node_reboot_scenario(self, instance_kill_count, node, timeout):
+        try:
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node)
+                logging.info("Starting node_reboot_scenario injection")
+                logging.info(f"Rebooting the node {node} ")
+                self.vsphere.reboot_instances(node)
+
+                nodeaction.wait_for_unknown_status(
+                    node, timeout, self.kubecli, affected_node
+                )
+                   
+                logging.info(
+                    f"Node with instance ID: {node} has rebooted " "successfully"
+                )
+                logging.info("node_reboot_scenario has been successfully injected!")
+                self.affected_nodes_status.affected_nodes.append(affected_node)
+        except Exception as e:
+            logging.error("Failed to reboot node instance. Test Failed")
+            logging.error(
+                f"node_reboot_scenario injection failed! " f"Error was: {str(e)}"
+            )


-@plugin.step(
-    id="vmware-node-terminate",
-    name="Reboot VMware VM",
-    description="Wait for the node to be terminated",
-    outputs={"success": NodeScenarioSuccessOutput, "error": NodeScenarioErrorOutput},
-)
-def node_terminate(
-    cfg: NodeScenarioConfig,
-) -> typing.Tuple[
-    str, typing.Union[NodeScenarioSuccessOutput, NodeScenarioErrorOutput]
-]:
-    with kube_helper.setup_kubernetes(None) as cli:
-        vsphere = vSphere(verify=cfg.verify_session)
-        core_v1 = client.CoreV1Api(cli)
-        node_list = kube_helper.get_node_list(
-            cfg, kube_helper.Actions.TERMINATE, core_v1
-        )
-        nodes_terminated = {}
-        for name in node_list:
-            try:
-                for _ in range(cfg.runs):
-                    logging.info(
-                        "Starting node_termination_scenario injection "
-                        "by first stopping the node"
-                    )
-                    vsphere.stop_instances(name)
-                    vsphere.wait_until_stopped(name, cfg.timeout)
-                    logging.info(f"Releasing the node with instance ID: {name} ")
-                    vsphere.release_instances(name)
-                    vsphere.wait_until_released(name, cfg.timeout)
-                    nodes_terminated[int(time.time_ns())] = Node(name=name)
-                    logging.info(f"Node with instance ID: {name} has been released")
-                    logging.info(
-                        "node_terminate_scenario has been " "successfully injected!"
-                    )
-            except Exception as e:
-                logging.error("Failed to terminate node instance. Test Failed")
-                logging.error(
-                    f"node_terminate_scenario injection failed! " f"Error was: {str(e)}"
+    def node_terminate_scenario(self, instance_kill_count, node, timeout):
+        try:
+            for _ in range(instance_kill_count):
+                affected_node = AffectedNode(node)
+                logging.info(
+                    "Starting node_termination_scenario injection "
+                    "by first stopping the node"
                )
-                return "error", NodeScenarioErrorOutput(
-                    format_exc(), kube_helper.Actions.TERMINATE
+                self.vsphere.stop_instances(node)
+                self.vsphere.wait_until_stopped(node, timeout, affected_node)
+                logging.info(f"Releasing the node with instance ID: {node} ")
+                self.vsphere.release_instances(node)
+                self.vsphere.wait_until_released(node, timeout, affected_node)
+                logging.info(f"Node with instance ID: {node} has been released")
+                logging.info(
+                    "node_terminate_scenario has been " "successfully injected!"
                )
-
-    return "success", NodeScenarioSuccessOutput(
-        nodes_terminated, kube_helper.Actions.TERMINATE
-    )
+                self.affected_nodes_status.affected_nodes.append(affected_node)
+        except Exception as e:
+            logging.error("Failed to terminate node instance. Test Failed")
+            logging.error(
+                f"node_terminate_scenario injection failed! " f"Error was: {str(e)}"
+            )
--- a/krkn/scenario_plugins/pvc/pvc_scenario_plugin.py
+++ b/krkn/scenario_plugins/pvc/pvc_scenario_plugin.py
@@ -29,6 +29,9 @@ class PvcScenarioPlugin(AbstractScenarioPlugin):
                pvc_name = get_yaml_item_value(scenario_config, "pvc_name", "")
                pod_name = get_yaml_item_value(scenario_config, "pod_name", "")
                namespace = get_yaml_item_value(scenario_config, "namespace", "")
+                block_size = get_yaml_item_value(
+                    scenario_config, "block_size", "102400"
+                )
                target_fill_percentage = get_yaml_item_value(
                    scenario_config, "fill_percentage", "50"
                )
@@ -197,10 +200,12 @@ class PvcScenarioPlugin(AbstractScenarioPlugin):
                        str(full_path),
                    )
                elif dd is not None:
+                    block_size = int(block_size)
+                    blocks = int(file_size_kb / int(block_size / 1024))
                    logging.warning(
                        "fallocate not found, using dd, it may take longer based on the amount of data, please wait..."
                    )
-                    command = f"dd if=/dev/urandom of={str(full_path)} bs=1024 count={str(file_size_kb)} oflag=direct"
+                    command = f"dd if=/dev/urandom of={str(full_path)} bs={str(block_size)} count={str(blocks)} oflag=direct"
                else:
                    logging.error(
                        "failed to locate required binaries fallocate or dd to execute the scenario"
@@ -241,45 +246,6 @@ class PvcScenarioPlugin(AbstractScenarioPlugin):
                    )
                    return 1

-            # Calculate file size
-            file_size_kb = int(
-                (float(target_fill_percentage / 100) * float(pvc_capacity_kb))
-                - float(pvc_used_kb)
-            )
-            logging.debug("File size: %s KB" % file_size_kb)
-
-            file_name = "kraken.tmp"
-            logging.info(
-                "Creating %s file, %s KB size, in pod %s at %s (ns %s)"
-                % (
-                    str(file_name),
-                    str(file_size_kb),
-                    str(pod_name),
-                    str(mount_path),
-                    str(namespace),
-                )
-            )
-
-            start_time = int(time.time())
-            # Create temp file in the PVC
-            full_path = "%s/%s" % (str(mount_path), str(file_name))
-            command = "fallocate -l $((%s*1024)) %s" % (
-                str(file_size_kb),
-                str(full_path),
-            )
-            logging.debug("Create temp file in the PVC command:\n %s" % command)
-            lib_telemetry.get_lib_kubernetes().exec_cmd_in_pod(
-                [command], pod_name, namespace, container_name
-            )
-
-            # Check if file is created
-            command = "ls -lh %s" % (str(mount_path))
-            logging.debug("Check file is created command:\n %s" % command)
-            response = lib_telemetry.get_lib_kubernetes().exec_cmd_in_pod(
-                [command], pod_name, namespace, container_name
-            )
-            logging.info("\n" + str(response))
-            if str(file_name).lower() in str(response).lower():
                logging.info(
                    "Waiting for the specified duration in the config: %ss" % duration
                )
--- a/krkn/scenario_plugins/shut_down/shut_down_scenario_plugin.py
+++ b/krkn/scenario_plugins/shut_down/shut_down_scenario_plugin.py
@@ -13,7 +13,9 @@ from krkn.scenario_plugins.node_actions.aws_node_scenarios import AWS
 from krkn.scenario_plugins.node_actions.az_node_scenarios import Azure
 from krkn.scenario_plugins.node_actions.gcp_node_scenarios import GCP
 from krkn.scenario_plugins.node_actions.openstack_node_scenarios import OPENSTACKCLOUD
+from krkn.scenario_plugins.native.node_scenarios.ibmcloud_plugin import IbmCloud

+from krkn_lib.models.k8s import AffectedNodeStatus, AffectedNode

 class ShutDownScenarioPlugin(AbstractScenarioPlugin):
    def run(
@@ -31,9 +33,12 @@ class ShutDownScenarioPlugin(AbstractScenarioPlugin):
                    "cluster_shut_down_scenario"
                ]
                start_time = int(time.time())
+                affected_nodes_status = AffectedNodeStatus()
                self.cluster_shut_down(
-                    shut_down_config_scenario, lib_telemetry.get_lib_kubernetes()
+                    shut_down_config_scenario, lib_telemetry.get_lib_kubernetes(), affected_nodes_status
                )
+
+                scenario_telemetry.affected_nodes = affected_nodes_status
                end_time = int(time.time())
                cerberus.publish_kraken_status(krkn_config, [], start_time, end_time)
                return 0
@@ -71,7 +76,7 @@ class ShutDownScenarioPlugin(AbstractScenarioPlugin):

    # Inject the cluster shut down scenario
    # krkn_lib
-    def cluster_shut_down(self, shut_down_config, kubecli: KrknKubernetes):
+    def cluster_shut_down(self, shut_down_config, kubecli: KrknKubernetes, affected_nodes_status: AffectedNodeStatus):
        runs = shut_down_config["runs"]
        shut_down_duration = shut_down_config["shut_down_duration"]
        cloud_type = shut_down_config["cloud_type"]
@@ -86,6 +91,8 @@ class ShutDownScenarioPlugin(AbstractScenarioPlugin):
            cloud_object = OPENSTACKCLOUD()
        elif cloud_type.lower() in ["azure", "az"]:
            cloud_object = Azure()
+        elif cloud_type.lower() in ["ibm", "ibmcloud"]:
+            cloud_object = IbmCloud()
        else:
            logging.error(
                "Cloud type %s is not currently supported for cluster shut down"
@@ -98,6 +105,7 @@ class ShutDownScenarioPlugin(AbstractScenarioPlugin):
        node_id = []
        for node in nodes:
            instance_id = cloud_object.get_instance_id(node)
+            affected_nodes_status.affected_nodes.append(AffectedNode(node))
            node_id.append(instance_id)
        logging.info("node id list " + str(node_id))
        for _ in range(runs):
@@ -105,14 +113,18 @@ class ShutDownScenarioPlugin(AbstractScenarioPlugin):
            stopping_nodes = set(node_id)
            self.multiprocess_nodes(cloud_object.stop_instances, node_id, processes)
            stopped_nodes = stopping_nodes.copy()
+            start_time = time.time()
            while len(stopping_nodes) > 0:
                for node in stopping_nodes:
+                    affected_node = affected_nodes_status.get_affected_node_index(node)
+                    # need to add in time that is passing while waiting for other nodes to be stopped
+                    affected_node.set_cloud_stopping_time(time.time() - start_time)
                    if type(node) is tuple:
                        node_status = cloud_object.wait_until_stopped(
-                            node[1], node[0], timeout
+                            node[1], node[0], timeout, affected_node
                        )
                    else:
-                        node_status = cloud_object.wait_until_stopped(node, timeout)
+                        node_status = cloud_object.wait_until_stopped(node, timeout, affected_node)

                    # Only want to remove node from stopping list
                    # when fully stopped/no error
@@ -129,16 +141,20 @@ class ShutDownScenarioPlugin(AbstractScenarioPlugin):
            logging.info("Restarting the nodes")
            restarted_nodes = set(node_id)
            self.multiprocess_nodes(cloud_object.start_instances, node_id, processes)
+            start_time = time.time()
            logging.info("Wait for each node to be running again")
            not_running_nodes = restarted_nodes.copy()
            while len(not_running_nodes) > 0:
                for node in not_running_nodes:
+                    affected_node = affected_nodes_status.get_affected_node_index(node)
+                    # need to add in time that is passing while waiting for other nodes to be running
+                    affected_node.set_cloud_running_time(time.time() - start_time)
                    if type(node) is tuple:
                        node_status = cloud_object.wait_until_running(
-                            node[1], node[0], timeout
+                            node[1], node[0], timeout, affected_node
                        )
                    else:
-                        node_status = cloud_object.wait_until_running(node, timeout)
+                        node_status = cloud_object.wait_until_running(node, timeout, affected_node)
                    if node_status:
                        restarted_nodes.remove(node)
                not_running_nodes = restarted_nodes.copy()
--- a/krkn/scenario_plugins/zone_outage/zone_outage_scenario_plugin.py
+++ b/krkn/scenario_plugins/zone_outage/zone_outage_scenario_plugin.py
@@ -29,6 +29,8 @@ class ZoneOutageScenarioPlugin(AbstractScenarioPlugin):
                subnet_ids = scenario_config["subnet_id"]
                duration = scenario_config["duration"]
                cloud_type = scenario_config["cloud_type"]
+                # Add support for user-provided default network ACL
+                default_acl_id = scenario_config.get("default_acl_id")
                ids = {}
                acl_ids_created = []

@@ -58,7 +60,20 @@ class ZoneOutageScenarioPlugin(AbstractScenarioPlugin):
                        "Network association ids associated with "
                        "the subnet %s: %s" % (subnet_id, network_association_ids)
                    )
-                    acl_id = cloud_object.create_default_network_acl(vpc_id)
+                    
+                    # Use provided default ACL if available, otherwise create a new one
+                    if default_acl_id:
+                        acl_id = default_acl_id
+                        logging.info(
+                            "Using provided default ACL ID %s - this ACL will not be deleted after the scenario", 
+                            default_acl_id
+                        )
+                        # Don't add to acl_ids_created since we don't want to delete user-provided ACLs at cleanup
+                    else:
+                        acl_id = cloud_object.create_default_network_acl(vpc_id)
+                        logging.info("Created new default ACL %s", acl_id)
+                        acl_ids_created.append(acl_id)
+
                    new_association_id = cloud_object.replace_network_acl_association(
                        network_association_ids[0], acl_id
                    )
@@ -66,7 +81,6 @@ class ZoneOutageScenarioPlugin(AbstractScenarioPlugin):
                    # capture the orginal_acl_id, created_acl_id and
                    # new association_id to use during the recovery
                    ids[new_association_id] = original_acl_id
-                    acl_ids_created.append(acl_id)

                # wait for the specified duration
                logging.info(
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,6 @@
 aliyun-python-sdk-core==2.13.36
 aliyun-python-sdk-ecs==4.24.25
 arcaflow-plugin-sdk==0.14.0
-arcaflow==0.19.1
 boto3==1.28.61
 azure-identity==1.16.1
 azure-keyvault==4.2.0
@@ -11,15 +10,15 @@ coverage==7.4.1
 datetime==5.4
 docker==7.0.0
 gitpython==3.1.41
-google-api-python-client==2.116.0
+google-auth==2.37.0
+google-cloud-compute==1.22.0
 ibm_cloud_sdk_core==3.18.0
 ibm_vpc==0.20.0
-jinja2==3.1.4
-krkn-lib==4.0.3
+jinja2==3.1.5
+krkn-lib==4.0.7
 lxml==5.1.0
 kubernetes==28.1.0
 numpy==1.26.4
-oauth2client==4.1.3
 pandas==2.2.0
 openshift-client==1.0.21
 paramiko==3.4.0
@@ -32,7 +31,7 @@ requests==2.32.2
 service_identity==24.1.0
 PyYAML==6.0.1
 setuptools==70.0.0
-werkzeug==3.0.3
+werkzeug==3.0.6
 wheel==0.42.0
 zope.interface==5.4.0

--- a/run_kraken.py
+++ b/run_kraken.py
@@ -31,6 +31,10 @@ from krkn.scenario_plugins.scenario_plugin_factory import (
    ScenarioPluginNotFound,
 )

+# removes TripleDES warning
+import warnings
+warnings.filterwarnings(action='ignore', module='.*paramiko.*')
+
 report_file = ""


@@ -627,7 +631,7 @@ if __name__ == "__main__":
        junit_testcase_xml = get_junit_test_case(
            success=True if retval == 0 else False,
            time=int(junit_endtime - junit_start_time),
-            test_suite_name="krkn-test-suite",
+            test_suite_name="chaos-krkn",
            test_case_description=options.junit_testcase,
            test_stdout=tee_handler.get_output(),
            test_version=options.junit_testcase_version,
--- a/scenarios/kube/cpu-hog.yml
+++ b/scenarios/kube/cpu-hog.yml
@@ -0,0 +1,9 @@
+duration: 60
+workers: '' # leave it empty '' node cpu auto-detection
+hog-type: cpu
+image: quay.io/krkn-chaos/krkn-hog
+namespace: default
+cpu-load-percentage: 90
+cpu-method: all
+node-selector: "node-role.kubernetes.io/worker="
+number-of-nodes: 2
--- a/scenarios/kube/cpu-hog/config.yaml
+++ b/scenarios/kube/cpu-hog/config.yaml
@@ -1,12 +0,0 @@
---
-deployers:
-  image:
-    connection: {}
-    deployer_name: kubernetes
-log:
-  level: error
-logged_outputs:
-  error:
-    level: error
-  success:
-    level: debug
--- a/scenarios/kube/cpu-hog/input.yaml
+++ b/scenarios/kube/cpu-hog/input.yaml
@@ -1,13 +0,0 @@
-input_list:
-  - cpu_count: 1
-    cpu_load_percentage: 80
-    cpu_method: all
-    duration: 30
-    kubeconfig: ''
-    namespace: default
-    # set the node selector as a key-value pair eg.
-    # node_selector:
-    #  kubernetes.io/hostname: kind-worker2
-    node_selector: {}
-
-
--- a/scenarios/kube/cpu-hog/sub-workflow.yaml
+++ b/scenarios/kube/cpu-hog/sub-workflow.yaml
@@ -1,98 +0,0 @@
-version: v0.2.0
-input:
-  root: SubRootObject
-  objects:
-    SubRootObject:
-      id: SubRootObject
-      properties:
-        kubeconfig:
-          display:
-            description: The complete kubeconfig file as a string
-            name: Kubeconfig file contents
-          type:
-            type_id: string
-          required: true
-        namespace:
-          display:
-            description: The namespace where the container will be deployed
-            name: Namespace
-          type:
-            type_id: string
-          required: true
-        node_selector:
-            display:
-              description: kubernetes node name where the plugin must be deployed
-            type:
-              type_id: map
-              values:
-                type_id: string
-              keys:
-                type_id: string
-            required: true
-        duration:
-          display:
-            name: duration the scenario expressed in seconds
-            description: stop stress test after T seconds. One can also specify the units of time in
-              seconds, minutes, hours, days or years with the suffix s, m, h, d or y
-          type:
-            type_id: integer
-          required: true
-        cpu_count:
-          display:
-            description: Number of CPU cores to be used (0 means all)
-            name: number of CPUs
-          type:
-            type_id: integer
-          required: true
-        cpu_method:
-          display:
-            description: CPU stress method
-            name: fine grained control of which cpu stressors to use (ackermann, cfloat etc.)
-          type:
-            type_id: string
-          required: true
-        cpu_load_percentage:
-          display:
-            description: load CPU by percentage
-            name: CPU load
-          type:
-            type_id: integer
-          required: true
-
-steps:
-  kubeconfig:
-    plugin: 
-      src: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.2.0
-      deployment_type: image
-    input:
-      kubeconfig: !expr $.input.kubeconfig
-  stressng:
-    plugin: 
-      src: quay.io/arcalot/arcaflow-plugin-stressng:0.6.0
-      deployment_type: image
-    step: workload
-    input:
-      cleanup: "true"
-
-      timeout: !expr $.input.duration
-      stressors:
-        - stressor: cpu
-          workers: !expr $.input.cpu_count
-          cpu-method: "all"
-          cpu-load: !expr $.input.cpu_load_percentage
-    deploy:
-      deployer_name: kubernetes
-      connection: !expr $.steps.kubeconfig.outputs.success.connection
-      pod:
-        metadata:
-          namespace: !expr $.input.namespace
-          labels:
-            arcaflow: stressng
-        spec:
-          nodeSelector: !expr $.input.node_selector
-          pluginContainer:
-            imagePullPolicy: Always
-outputs:
-  success:
-    stressng: !expr $.steps.stressng.outputs.success
-
--- a/scenarios/kube/cpu-hog/workflow.yaml
+++ b/scenarios/kube/cpu-hog/workflow.yaml
@@ -1,25 +0,0 @@
-version: v0.2.0
-input:
-  root: RootObject
-  objects:
-    RootObject:
-      id: RootObject
-      properties:
-        input_list:
-          type:
-            type_id: list
-            items:
-              id: SubRootObject
-              type_id: ref
-              namespace: $.steps.workload_loop.execute.inputs.items
-
-steps:
-  workload_loop:
-    kind: foreach
-    items: !expr $.input.input_list
-    workflow: sub-workflow.yaml
-    parallelism: 1000
-outputs:
-  success:
-    workloads: !expr $.steps.workload_loop.outputs.success.data
-
--- a/scenarios/kube/io-hog.yml
+++ b/scenarios/kube/io-hog.yml
@@ -0,0 +1,14 @@
+duration: 30
+workers: '' # leave it empty '' node cpu auto-detection
+hog-type: io
+image: quay.io/krkn-chaos/krkn-hog
+namespace: default
+io-block-size: 1m
+io-write-bytes: 1g
+io-target-pod-folder: /hog-data
+io-target-pod-volume:
+  name: node-volume
+  hostPath:
+    path: /root # a path writable by kubelet in the root filesystem of the node
+node-selector: "node-role.kubernetes.io/worker="
+number-of-nodes: ''
--- a/scenarios/kube/io-hog/config.yaml
+++ b/scenarios/kube/io-hog/config.yaml
@@ -1,11 +0,0 @@
-deployers:
-  image:
-    connection: {}
-    deployer_name: kubernetes
-log:
-  level: error
-logged_outputs:
-  error:
-    level: error
-  success:
-    level: debug
--- a/scenarios/kube/io-hog/input.yaml
+++ b/scenarios/kube/io-hog/input.yaml
@@ -1,16 +0,0 @@
-input_list:
- duration: 30
-  io_block_size: 1m
-  io_workers: 1
-  io_write_bytes: 10m
-  kubeconfig: ''
-  namespace: default
-  # set the node selector as a key-value pair eg.
-  # node_selector:
-  #  kubernetes.io/hostname: kind-worker2
-  node_selector: {}
-  target_pod_folder: /hog-data
-  target_pod_volume:
-    hostPath:
-      path: /tmp
-    name: node-volume
--- a/scenarios/kube/io-hog/sub-workflow.yaml
+++ b/scenarios/kube/io-hog/sub-workflow.yaml
@@ -1,141 +0,0 @@
-version: v0.2.0
-input:
-  root: SubRootObject
-  objects:
-    hostPath:
-      id: HostPathVolumeSource
-      properties:
-        path:
-          type:
-            type_id: string
-    Volume:
-      id: Volume
-      properties:
-        name:
-          type:
-            type_id: string
-        hostPath:
-          type:
-            id: hostPath
-            type_id: ref
-    SubRootObject:
-      id: SubRootObject
-      properties:
-        kubeconfig:
-          display:
-            description: The complete kubeconfig file as a string
-            name: Kubeconfig file contents
-          type:
-            type_id: string
-          required: true
-        namespace:
-          display:
-            description: The namespace where the container will be deployed
-            name: Namespace
-          type:
-            type_id: string
-          required: true
-        node_selector:
-            display:
-              description: kubernetes node name where the plugin must be deployed
-            type:
-              type_id: map
-              values:
-                type_id: string
-              keys:
-                type_id: string
-            required: true
-        duration:
-          display:
-            name: duration the scenario expressed in seconds
-            description: stop  stress  test  after  T  seconds.  One  can  also specify the units of time in
-              seconds, minutes, hours, days or years with the suffix s, m, h, d or  y
-          type:
-            type_id: integer
-          required: true
-        io_workers:
-          display:
-            description: number of workers
-            name: start N workers continually writing, reading  and  removing  temporary  files
-          type:
-            type_id: integer
-          required: true
-        io_block_size:
-            display:
-              description: single write size
-              name: specify size of each write in bytes. Size can be from 1 byte to 4MB.
-            type:
-              type_id: string
-            required: true
-        io_write_bytes:
-          display:
-            description: Total number of bytes written
-            name: write  N  bytes for each hdd process, the default is 1 GB. One can specify the size
-              as % of free space on the file system or in units  of  Bytes,  KBytes,  MBytes  and
-              GBytes using the suffix b, k, m or g
-          type:
-            type_id: string
-          required: true
-        target_pod_folder:
-          display:
-            description: Target Folder
-            name: Folder in the pod where the test will be executed and the test files will be written
-          type:
-            type_id: string
-          required: true
-        target_pod_volume:
-          display:
-            name: kubernetes volume definition
-            description: the volume that will be attached to the pod. In order to stress
-                         the node storage only hosPath mode is currently supported
-          type:
-            type_id: ref
-            id: Volume
-          required: true
-
-steps:
-  kubeconfig:
-    plugin: 
-      src: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.2.0
-      deployment_type: image
-    input:
-      kubeconfig: !expr $.input.kubeconfig
-  stressng:
-    plugin: 
-      src: quay.io/arcalot/arcaflow-plugin-stressng:0.6.0
-      deployment_type: image
-    step: workload
-    input:
-      cleanup: "true"
-      timeout: !expr $.input.duration
-      workdir: !expr $.input.target_pod_folder
-      stressors:
-        - stressor: hdd
-          workers: !expr $.input.io_workers
-          hdd-bytes: !expr $.input.io_write_bytes
-          hdd-write-size: !expr $.input.io_block_size
-
-    deploy:
-      deployer_name: kubernetes
-      connection: !expr $.steps.kubeconfig.outputs.success.connection
-      pod:
-        metadata:
-          namespace: !expr $.input.namespace
-          labels:
-            arcaflow: stressng
-        spec:
-          nodeSelector: !expr $.input.node_selector
-          pluginContainer:
-            imagePullPolicy: Always
-            securityContext:
-              privileged: true
-            volumeMounts:
-              - mountPath: /hog-data
-                name: node-volume
-          volumes:
-            - !expr $.input.target_pod_volume
-
-outputs:
-  success:
-    stressng: !expr $.steps.stressng.outputs.success
-
--- a/scenarios/kube/io-hog/workflow.yaml
+++ b/scenarios/kube/io-hog/workflow.yaml
@@ -1,26 +0,0 @@
-version: v0.2.0
-input:
-  root: RootObject
-  objects:
-    RootObject:
-      id: RootObject
-      properties:
-        input_list:
-          type:
-            type_id: list
-            items:
-              id: SubRootObject
-              type_id: ref
-              namespace: $.steps.workload_loop.execute.inputs.items
-steps:
-  workload_loop:
-    kind: foreach
-    items: !expr $.input.input_list
-    workflow: sub-workflow.yaml
-    parallelism: 1000
-outputs:
-  success:
-    workloads: !expr $.steps.workload_loop.outputs.success.data
-
-
-
--- a/scenarios/kube/memory-hog.yml
+++ b/scenarios/kube/memory-hog.yml
@@ -0,0 +1,8 @@
+duration: 60
+workers: '' # leave it empty '' node cpu auto-detection
+hog-type: memory
+image: quay.io/krkn-chaos/krkn-hog
+namespace: default
+memory-vm-bytes: 90%
+node-selector: "node-role.kubernetes.io/worker="
+number-of-nodes: ''
--- a/scenarios/kube/memory-hog/config.yaml
+++ b/scenarios/kube/memory-hog/config.yaml
@@ -1,12 +0,0 @@
---
-deployers:
-  image:
-    connection: {}
-    deployer_name: kubernetes
-log:
-  level: error
-logged_outputs:
-  error:
-    level: error
-  success:
-    level: debug
--- a/scenarios/kube/memory-hog/input.yaml
+++ b/scenarios/kube/memory-hog/input.yaml
@@ -1,13 +0,0 @@
-input_list:
- duration: 30
-  vm_bytes: 10%
-  vm_workers: 2
-  # set the node selector as a key-value pair eg.
-  # node_selector:
-  #  kubernetes.io/hostname: kind-worker2
-  node_selector: { }
-  kubeconfig: ""
-  namespace: default
-
-# duplicate this section to run simultaneous stressors in the same run
-
--- a/scenarios/kube/memory-hog/sub-workflow.yaml
+++ b/scenarios/kube/memory-hog/sub-workflow.yaml
@@ -1,89 +0,0 @@
-version: v0.2.0
-input:
-  root: SubRootObject
-  objects:
-    SubRootObject:
-      id: SubRootObject
-      properties:
-        kubeconfig:
-          display:
-            description: The complete kubeconfig file as a string
-            name: Kubeconfig file contents
-          type:
-            type_id: string
-          required: true
-        namespace:
-          display:
-            description: The namespace where the container will be deployed
-            name: Namespace
-          type:
-            type_id: string
-          required: true
-        node_selector:
-            display:
-              description: kubernetes node name where the plugin must be deployed
-            type:
-              type_id: map
-              values:
-                type_id: string
-              keys:
-                type_id: string
-            required: true
-        duration:
-          display:
-            name: duration the scenario expressed in seconds
-            description: stop stress test after T seconds. One can also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or  y
-          type:
-            type_id: integer
-          required: true
-        vm_workers:
-          display:
-            description: Number of VM stressors to be run (0 means 1 stressor per CPU)
-            name: Number of VM stressors
-          type:
-            type_id: integer
-          required: true
-        vm_bytes:
-          display:
-            description: N bytes per vm process, the default is 256MB. The size can be expressed in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g.
-            name: Kubeconfig file contents
-          type:
-            type_id: string
-          required: true
-
-steps:
-  kubeconfig:
-    plugin: 
-      src: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.2.0
-      deployment_type: image
-    input:
-      kubeconfig: !expr $.input.kubeconfig
-  stressng:
-    plugin: 
-      src: quay.io/arcalot/arcaflow-plugin-stressng:0.6.0
-      deployment_type: image
-    step: workload
-    input:
-      cleanup: "true"
-      timeout: !expr $.input.duration
-      stressors:
-        - stressor: vm
-          workers: !expr $.input.vm_workers
-          vm-bytes: !expr $.input.vm_bytes
-    deploy:
-      deployer_name: kubernetes
-      connection: !expr $.steps.kubeconfig.outputs.success.connection
-      pod:
-        metadata:
-          namespace: !expr $.input.namespace
-          labels:
-            arcaflow: stressng
-        spec:
-          nodeSelector: !expr $.input.node_selector
-          pluginContainer:
-            imagePullPolicy: Always
-
-outputs:
-  success:
-    stressng: !expr $.steps.stressng.outputs.success
-
--- a/scenarios/kube/memory-hog/workflow.yaml
+++ b/scenarios/kube/memory-hog/workflow.yaml
@@ -1,29 +0,0 @@
-version: v0.2.0
-input:
-  root: RootObject
-  objects:
-    RootObject:
-      id: RootObject
-      properties:
-        input_list:
-          type:
-            type_id: list
-            items:
-              id: SubRootObject
-              type_id: ref
-              namespace: $.steps.workload_loop.execute.inputs.items
-
-steps:
-  workload_loop:
-    kind: foreach
-    items: !expr $.input.input_list
-    workflow: sub-workflow.yaml
-    parallelism: 1000
-outputs:
-  success:
-    workloads: !expr $.steps.workload_loop.outputs.success.data
-
-
-
-
-
--- a/scenarios/openshift/aws_node_scenarios.yml
+++ b/scenarios/openshift/aws_node_scenarios.yml
@@ -1,13 +1,14 @@
 node_scenarios:
-  - actions:                                                        # node chaos scenarios to be injected
+  - actions:                                                      # node chaos scenarios to be injected
    - node_stop_start_scenario
-    node_name:                                                      # node on which scenario has to be injected; can set multiple names separated by comma
-    label_selector: node-role.kubernetes.io/worker                  # when node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection
-    instance_count: 1                                               # Number of nodes to perform action/select that match the label selector
-    runs: 1                                                         # number of times to inject each scenario under actions (will perform on same node each time)
-    timeout: 360                                                    # duration to wait for completion of node scenario injection
-    duration: 120                                                   # duration to stop the node before running the start action
-    cloud_type: aws                                                 # cloud type on which Kubernetes/OpenShift runs  
+    node_name:                                                    # node on which scenario has to be injected; can set multiple names separated by comma
+    label_selector: node-role.kubernetes.io/worker                # when node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection; can specify multiple by a comma separated list
+    instance_count: 2                                             # Number of nodes to perform action/select that match the label selector
+    runs: 1                                                       # number of times to inject each scenario under actions (will perform on same node each time)
+    timeout: 360                                                  # duration to wait for completion of node scenario injection
+    duration: 20                                                  # duration to stop the node before running the start action
+    cloud_type: aws                                               # cloud type on which Kubernetes/OpenShift runs  
+    parallel: true                                                # Run action on label or node name in parallel or sequential, defaults to sequential
  - actions:
    - node_reboot_scenario
    node_name:
@@ -15,3 +16,10 @@ node_scenarios:
    instance_count: 1
    timeout: 120
    cloud_type: aws
+  - actions:
+      - node_disk_detach_attach_scenario
+    node_name:
+    label_selector:
+    instance_count: 1
+    timeout: 120
+    cloud_type: aws
--- a/scenarios/openshift/pvc_scenario.yaml
+++ b/scenarios/openshift/pvc_scenario.yaml
@@ -4,3 +4,4 @@ pvc_scenario:
  namespace: <namespace_name>   # Namespace where the PVC is
  fill_percentage: 50           # Target percentage to fill up the cluster, value must be higher than current percentage, valid values are between 0 and 99
  duration: 60                  # Duration in seconds for the fault
+  block_size: 102400            # used only by dd if fallocate not present in the container
--- a/scenarios/openshift/vmware_node_scenarios.yml
+++ b/scenarios/openshift/vmware_node_scenarios.yml
@@ -1,10 +1,17 @@
-# yaml-language-server: $schema=../plugin.schema.json
- id: <vmware-node-stop/vmware-node-start/vmware-node-reboot/vmware-node-terminate>
-  config:
-    name: <node_name>                   # Node on which scenario has to be injected; can set multiple names separated by comma                           
-    label_selector: <label_selector>    # When node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection 
-    runs: 1                             # Number of times to inject each scenario under actions (will perform on same node each time)                                                           
-    instance_count: 1                   # Number of nodes to perform action/select that match the label selector                                             
-    timeout: 300                        # Duration to wait for completion of node scenario injection
-    verify_session: True                # Set to True if you want to verify the vSphere client session using certificates; else False
-    skip_openshift_checks: False        # Set to True if you don't want to wait for the status of the nodes to change on OpenShift before passing the scenario 
+node_scenarios:
+  - actions:
+    - node_reboot_scenario
+    node_name:
+    label_selector: node-role.kubernetes.io/worker
+    instance_count: 1
+    timeout: 120
+    cloud_type: vmware
+  - actions:
+    - node_stop_start_scenario
+    node_name:
+    label_selector: node-role.kubernetes.io/worker
+    instance_count: 1
+    timeout: 360
+    duration: 10
+    cloud_type: vmware
+    parallel: false
--- a/scenarios/openshift/zone_outage.yaml
+++ b/scenarios/openshift/zone_outage.yaml
@@ -3,3 +3,4 @@ zone_outage:                                         # Scenario to create an out
  duration: 600                                      # duration in seconds after which the zone will be back online
  vpc_id:                                            # cluster virtual private network to target
  subnet_id: [subnet1, subnet2]                      # List of subnet-id's to deny both ingress and egress traffic
+  default_acl_id: acl-xxxxxxxx                       # (Optional) ID of an existing network ACL to use instead of creating a new one. If provided, this ACL will not be deleted after the scenario.
--- a/tests/test_vmware_plugin.py
+++ b/tests/test_vmware_plugin.py
@@ -1,121 +0,0 @@
-import unittest
-import os
-import logging
-from arcaflow_plugin_sdk import plugin
-
-from krkn.scenario_plugins.native.node_scenarios import vmware_plugin
-from krkn.scenario_plugins.native.node_scenarios.kubernetes_functions import Actions
-
-
-class NodeScenariosTest(unittest.TestCase):
-    def setUp(self):
-        vsphere_env_vars = ["VSPHERE_IP", "VSPHERE_USERNAME", "VSPHERE_PASSWORD"]
-        self.credentials_present = all(
-            env_var in os.environ for env_var in vsphere_env_vars
-        )
-
-    def test_serialization(self):
-        plugin.test_object_serialization(
-            vmware_plugin.NodeScenarioConfig(name="test", skip_openshift_checks=True),
-            self.fail,
-        )
-        plugin.test_object_serialization(
-            vmware_plugin.NodeScenarioSuccessOutput(nodes={}, action=Actions.START),
-            self.fail,
-        )
-        plugin.test_object_serialization(
-            vmware_plugin.NodeScenarioErrorOutput(
-                error="Hello World", action=Actions.START
-            ),
-            self.fail,
-        )
-
-    def test_node_start(self):
-        if not self.credentials_present:
-            self.skipTest(
-                "Check if the environmental variables 'VSPHERE_IP', "
-                "'VSPHERE_USERNAME', 'VSPHERE_PASSWORD' are set"
-            )
-        vsphere = vmware_plugin.vSphere(verify=False)
-        vm_id, vm_name = vsphere.create_default_vm()
-        if vm_id is None:
-            self.fail("Could not create test VM")
-
-        output_id, output_data = vmware_plugin.node_start(
-            vmware_plugin.NodeScenarioConfig(
-                name=vm_name, skip_openshift_checks=True, verify_session=False
-            )
-        )
-        if output_id == "error":
-            logging.error(output_data.error)
-            self.fail("The VMware VM did not start because an error occurred")
-        vsphere.release_instances(vm_name)
-
-    def test_node_stop(self):
-        if not self.credentials_present:
-            self.skipTest(
-                "Check if the environmental variables 'VSPHERE_IP', "
-                "'VSPHERE_USERNAME', 'VSPHERE_PASSWORD' are set"
-            )
-        vsphere = vmware_plugin.vSphere(verify=False)
-        vm_id, vm_name = vsphere.create_default_vm()
-        if vm_id is None:
-            self.fail("Could not create test VM")
-        vsphere.start_instances(vm_name)
-
-        output_id, output_data = vmware_plugin.node_stop(
-            vmware_plugin.NodeScenarioConfig(
-                name=vm_name, skip_openshift_checks=True, verify_session=False
-            )
-        )
-        if output_id == "error":
-            logging.error(output_data.error)
-            self.fail("The VMware VM did not stop because an error occurred")
-        vsphere.release_instances(vm_name)
-
-    def test_node_reboot(self):
-        if not self.credentials_present:
-            self.skipTest(
-                "Check if the environmental variables 'VSPHERE_IP', "
-                "'VSPHERE_USERNAME', 'VSPHERE_PASSWORD' are set"
-            )
-        vsphere = vmware_plugin.vSphere(verify=False)
-        vm_id, vm_name = vsphere.create_default_vm()
-        if vm_id is None:
-            self.fail("Could not create test VM")
-        vsphere.start_instances(vm_name)
-
-        output_id, output_data = vmware_plugin.node_reboot(
-            vmware_plugin.NodeScenarioConfig(
-                name=vm_name, skip_openshift_checks=True, verify_session=False
-            )
-        )
-        if output_id == "error":
-            logging.error(output_data.error)
-            self.fail("The VMware VM did not reboot because an error occurred")
-        vsphere.release_instances(vm_name)
-
-    def test_node_terminate(self):
-        if not self.credentials_present:
-            self.skipTest(
-                "Check if the environmental variables 'VSPHERE_IP', "
-                "'VSPHERE_USERNAME', 'VSPHERE_PASSWORD' are set"
-            )
-        vsphere = vmware_plugin.vSphere(verify=False)
-        vm_id, vm_name = vsphere.create_default_vm()
-        if vm_id is None:
-            self.fail("Could not create test VM")
-        vsphere.start_instances(vm_name)
-
-        output_id, output_data = vmware_plugin.node_terminate(
-            vmware_plugin.NodeScenarioConfig(
-                name=vm_name, skip_openshift_checks=True, verify_session=False
-            )
-        )
-        if output_id == "error":
-            logging.error(output_data.error)
-            self.fail("The VMware VM did not reboot because an error occurred")
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/utils/chaos_recommender/chaos_recommender.py
+++ b/utils/chaos_recommender/chaos_recommender.py
@@ -112,12 +112,12 @@ def parse_arguments(parser):
        default=[],
        help="Memory related chaos tests (space separated list)",
    )
-    parser.add_argument("--threshold", action="store", default="", help="Threshold")
+    parser.add_argument("--threshold", action="store", help="Threshold")
    parser.add_argument(
-        "--cpu-threshold", action="store", default="", help="CPU threshold"
+        "--cpu-threshold", action="store", help="CPU threshold"
    )
    parser.add_argument(
-        "--mem-threshold", action="store", default="", help="Memory threshold"
+        "--mem-threshold", action="store", help="Memory threshold"
    )

    return parser.parse_args()
@@ -141,9 +141,9 @@ def read_configuration(config_file_path):
    prometheus_endpoint = config.get("prometheus_endpoint")
    auth_token = config.get("auth_token")
    scrape_duration = get_yaml_item_value(config, "scrape_duration", "10m")
-    threshold = get_yaml_item_value(config, "threshold", ".7")
-    heatmap_cpu_threshold = get_yaml_item_value(config, "cpu_threshold", ".5")
-    heatmap_mem_threshold = get_yaml_item_value(config, "mem_threshold", ".3")
+    threshold = get_yaml_item_value(config, "threshold")
+    heatmap_cpu_threshold = get_yaml_item_value(config, "cpu_threshold")
+    heatmap_mem_threshold = get_yaml_item_value(config, "mem_threshold")
    output_file = config.get("json_output_file", False)
    if output_file is True:
        output_path = config.get("json_output_folder_path")
Author	SHA1	Message	Date
Paige Patton	21ab8d475d	adding vsphere updates to non native Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 10m19s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped Signed-off-by: Paige Patton <prubenda@redhat.com>	2025-01-31 15:21:48 -05:00
Tullio Sebastiani	b024cfde19	Hog scenario porting from arcaflow to native (#748 ) * added new native hog scenario * removed arcaflow dependency + legacy hog scenarios * config update * changed hog configuration structure + added average samples * fix on cpu count * removes tripledes warning * changed selector format * changed selector syntax * number of nodes option * documentation * functional tests * exception handling on hog deployment thread Signed-off-by: Paige Patton <prubenda@redhat.com>	2025-01-31 13:45:59 -05:00
Tullio Sebastiani	c7e068a562	Hog scenario porting from arcaflow to native (#748 ) * added new native hog scenario * removed arcaflow dependency + legacy hog scenarios * config update * changed hog configuration structure + added average samples * fix on cpu count * removes tripledes warning * changed selector format * changed selector syntax * number of nodes option * documentation * functional tests * exception handling on hog deployment thread	2025-01-31 17:01:26 +01:00
Tullio Sebastiani	64cfd2ca4d	fixes krknctl describe bug Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 4m36s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped	2025-01-20 09:43:59 -05:00
Naga Ravi Chaitanya Elluri	9cb701a616	Convert thresholds to float Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 9m22s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped This is needed to avoid issues due to comparing two different data types: TypeError: Invalid comparison between dtype=float64 and str. This commit also avoids setting defaults for the thresholds to make it mandatory for the users to define them as it plays a key role in determining the outliers. Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2025-01-13 15:47:33 -05:00
dependabot[bot]	0372013b67	Bump jinja2 from 3.1.4 to 3.1.5 (#745 ) Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 3m57s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5) --- updated-dependencies: - dependency-name: jinja2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-01-08 09:54:26 +01:00
Tullio Sebastiani	4fea1a354d	added krknctl types to krkn baseimage for global variables (#741 ) Some checks failed Functional & Unit Tests / Functional & Unit Tests (push) Failing after 7m55s Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped * added krknctl types to krkn baseimage for global variables fixed * fixed dockerfile * dockerfile compile script fix	2025-01-07 10:12:37 -05:00
Pablo Méndez Hernández	667798d588	Change API from 'Google API Client' to 'Google Cloud Python Client' (#723 ) * Document how to use Google's credentials associated with a user acccount Signed-off-by: Pablo Méndez Hernández <pablomh@redhat.com> * Change API from 'Google API Client' to 'Google Cloud Python Client' According to the 'Google API Client' GH page: ``` This library is considered complete and is in maintenance mode. This means that we will address critical bugs and security issues but will not add any new features. This library is officially supported by Google. However, the maintainers of this repository recommend using Cloud Client Libraries for Python, where possible, for new code development. ``` So change the code accordingly to adapt it to 'Google Cloud Python Client'. Signed-off-by: Pablo Méndez Hernández <pablomh@redhat.com> --------- Signed-off-by: Pablo Méndez Hernández <pablomh@redhat.com>	2024-12-12 22:34:45 -05:00
jtydlack	0c30d89a1b	Add node_disk_detach_attach_scenario for aws under node scenarios Resolves #678 Signed-off-by: jtydlack <139967002+jtydlack@users.noreply.github.com> Add functions for aws detach disk scenario Signed-off-by: jtydlack <139967002+jtydlack@users.noreply.github.com> Add detach disk scenario in node scenario Signed-off-by: jtydlack <139967002+jtydlack@users.noreply.github.com> Add disk_deatch_attach_scenario in docs Signed-off-by: jtydlack <139967002+jtydlack@users.noreply.github.com>	2024-12-10 09:21:05 -05:00
Paige Patton	2ba20fa483	adding code bock	2024-12-05 12:37:43 -05:00
Paige Patton	97035a765c	adding get node name list changes Signed-off-by: Paige Patton <prubenda@redhat.com>	2024-11-26 10:34:25 -05:00
Paige Patton	10ba53574e	not equal to gcp Signed-off-by: Paige Patton <prubenda@redhat.com>	2024-11-15 09:31:09 -07:00
Paige Patton	0ecba41082	adding multi label comment	2024-11-12 10:34:09 -07:00
Paige Patton	491f59d152	few small changes Signed-off-by: Paige Patton <prubenda@redhat.com>	2024-11-12 10:34:09 -07:00
Tullio Sebastiani	2549c9a146	bump werkzeug to 3.0.6 to fix cve on krkn-hub baseimage	2024-11-12 09:42:50 -07:00
Henrick Goldwurm	949f1f09e0	Add support for user-provided default network ACL (#731 ) * Add support for user-provided default network ACL Signed-off-by: henrick <self@thehenrick.com> * Add logs to notify user when their provided acl is used Signed-off-by: henrick <self@thehenrick.com> * Update docs to include optional default_acl_id parameter in zone_outage Signed-off-by: henrick <self@thehenrick.com> --------- Signed-off-by: henrick <self@thehenrick.com> Co-authored-by: henrick <self@thehenrick.com>	2024-11-06 12:58:25 -05:00
Naga Ravi Chaitanya Elluri	959766254d	Update status of the relevant work items under roadmap Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2024-11-04 08:36:11 -05:00
Paige Patton	0e68dedb12	adding ibm shut down scenario (#697 ) rh-pre-commit.version: 2.2.0 rh-pre-commit.check-secrets: ENABLED Signed-off-by: Auto User <auto@users.noreply.github.com> Signed-off-by: Paige Patton <prubenda@redhat.com>	2024-11-01 15:16:07 -04:00
Tullio Sebastiani	34a676a795	block_size parameter for dd (#719 ) removed log Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-10-28 11:45:33 -04:00
Naga Ravi Chaitanya Elluri	e5c5b35db3	Update kube-burner references to krkn Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2024-10-28 11:03:52 -04:00
Pablo Méndez Hernández	93d2e60386	Fix typo in docs index Replace "oraganization" with "organization" in table of contents. Signed-off-by: Pablo Méndez Hernández <pablomh@redhat.com>	2024-10-24 15:10:55 -04:00
Naga Ravi Chaitanya Elluri	462c9ac67e	Rename test suite name to chaos-krkn This is needed for the TRT/component readiness integration to improve dashboard readability and tie results back to chaos. Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2024-10-21 14:38:37 -04:00