Dockerfile update

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
Bump arcaflow version to 0.17.2 (#648 )
2026-02-19 20:40:33 +00:00 · 2024-06-12 14:36:38 -04:00 · 2024-06-12 20:29:32 +02:00 · 2024-06-12 09:17:14 -04:00 · 2024-06-11 12:07:28 -04:00 · 2024-06-10 14:26:03 -04:00
40 changed files with 715 additions and 372 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -61,6 +61,8 @@ jobs:
          kubectl create namespace namespace-scenario
          kubectl apply -f CI/templates/time_pod.yaml
          kubectl wait --for=condition=ready pod -l scenario=time-skew --timeout=300s
+          kubectl apply -f CI/templates/service_hijacking.yaml
+          kubectl wait --for=condition=ready pod -l "app.kubernetes.io/name=proxy" --timeout=300s
      - name: Get Kind nodes
        run: |
          kubectl get nodes --show-labels=true
@@ -70,12 +72,14 @@ jobs:
        run: python -m coverage run -a -m unittest discover -s tests -v

      - name: Setup Pull Request Functional Tests
-        if: github.event_name == 'pull_request'
+        if: |
+          github.event_name == 'pull_request'
        run: |
            yq -i '.kraken.port="8081"' CI/config/common_test_config.yaml
            yq -i '.kraken.signal_address="0.0.0.0"' CI/config/common_test_config.yaml
            yq -i '.kraken.performance_monitoring="localhost:9090"' CI/config/common_test_config.yaml
-            echo "test_app_outages" > ./CI/tests/functional_tests
+            echo "test_service_hijacking" > ./CI/tests/functional_tests
+            echo "test_app_outages" >> ./CI/tests/functional_tests
            echo "test_container"      >> ./CI/tests/functional_tests
            echo "test_namespace"      >> ./CI/tests/functional_tests
            echo "test_net_chaos"      >> ./CI/tests/functional_tests
@@ -84,7 +88,9 @@ jobs:
            echo "test_arca_memory_hog" >> ./CI/tests/functional_tests
            echo "test_arca_io_hog" >> ./CI/tests/functional_tests

-      # Push on main only steps
+
+      # Push on main only steps + all other functional to collect coverage
+      # for the badge
      - name: Configure AWS Credentials
        if: github.ref == 'refs/heads/main' && github.event_name == 'push'
        uses: aws-actions/configure-aws-credentials@v4
@@ -101,6 +107,15 @@ jobs:
          yq -i '.telemetry.username="${{secrets.TELEMETRY_USERNAME}}"' CI/config/common_test_config.yaml
          yq -i '.telemetry.password="${{secrets.TELEMETRY_PASSWORD}}"' CI/config/common_test_config.yaml
          echo "test_telemetry" > ./CI/tests/functional_tests
+          echo "test_service_hijacking" >> ./CI/tests/functional_tests
+          echo "test_app_outages" >> ./CI/tests/functional_tests
+          echo "test_container"      >> ./CI/tests/functional_tests
+          echo "test_namespace"      >> ./CI/tests/functional_tests
+          echo "test_net_chaos"      >> ./CI/tests/functional_tests
+          echo "test_time"           >> ./CI/tests/functional_tests
+          echo "test_arca_cpu_hog" >> ./CI/tests/functional_tests
+          echo "test_arca_memory_hog" >> ./CI/tests/functional_tests
+          echo "test_arca_io_hog" >> ./CI/tests/functional_tests

      # Final common steps
      - name: Run Functional tests
@@ -119,6 +134,7 @@ jobs:
      - name: Collect coverage report
        run: |
          python -m coverage html
+          python -m coverage json
      - name: Publish coverage report to job summary
        run: |
          pip install html2text
@@ -129,6 +145,54 @@ jobs:
          name: coverage
          path: htmlcov
          if-no-files-found: error
+      - name: Upload json coverage
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage.json
+          path: coverage.json
+          if-no-files-found: error
      - name: Check CI results
        run: grep Fail CI/results.markdown && false || true
+  badge:
+    permissions:
+      contents: write
+    name: Generate Coverage Badge
+    runs-on: ubuntu-latest
+    needs:
+      - tests
+    if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+    steps:
+        - name: Check out doc repo
+          uses: actions/checkout@master
+          with:
+            repository: krkn-chaos/krkn-lib-docs
+            path: krkn-lib-docs
+            ssh-key: ${{ secrets.KRKN_LIB_DOCS_PRIV_KEY }}
+        - name: Download json coverage
+          uses: actions/download-artifact@v3
+          with:
+            name: coverage.json
+        - name: Set up Python
+          uses: actions/setup-python@v4
+          with:
+            python-version: 3.9
+        - name: Copy badge on GitHub Page Repo
+          env:
+            COLOR: yellow
+          run: |
+            # generate coverage badge on previously calculated total coverage
+            # and copy in the docs page
+            export TOTAL=$(python -c "import json;print(json.load(open('coverage.json'))['totals']['percent_covered_display'])")
+            [[ $TOTAL > 40 ]] && COLOR=green
+            echo "TOTAL: $TOTAL"
+            echo "COLOR: $COLOR"
+            curl "https://img.shields.io/badge/coverage-$TOTAL%25-$COLOR" > ./krkn-lib-docs/coverage_badge_krkn.svg
+        - name: Push updated Coverage Badge
+          run: |
+            cd krkn-lib-docs
+            git add .
+            git config user.name "krkn-chaos"
+            git config user.email "<>"
+            git commit -m "[KRKN] Coverage Badge ${GITHUB_REF##*/}" || echo "no changes to commit"
+            git push
      
--- a/CI/templates/service_hijacking.yaml
+++ b/CI/templates/service_hijacking.yaml
@@ -0,0 +1,29 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: nginx
+  labels:
+    app.kubernetes.io/name: proxy
+spec:
+  containers:
+  - name: nginx
+    image: nginx:stable
+    ports:
+      - containerPort: 80
+        name: http-web-svc
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: nginx-service
+spec:
+  selector:
+    app.kubernetes.io/name: proxy
+  type: NodePort
+  ports:
+  - name: name-of-service-port
+    protocol: TCP
+    port: 80
+    targetPort: http-web-svc
+    nodePort: 30036
--- a/CI/tests/test_service_hijacking.sh
+++ b/CI/tests/test_service_hijacking.sh
@@ -0,0 +1,107 @@
+set -xeEo pipefail
+
+source CI/tests/common.sh
+
+trap error ERR
+trap finish EXIT
+# port mapping has been configured in kind-config.yml
+SERVICE_URL=http://localhost:8888
+PAYLOAD_GET_1="{ \
+  \"status\":\"internal server error\" \
+}"
+STATUS_CODE_GET_1=500
+
+PAYLOAD_PATCH_1="resource patched"
+STATUS_CODE_PATCH_1=201
+
+PAYLOAD_POST_1="{ \
+  \"status\": \"unauthorized\" \
+}"
+STATUS_CODE_POST_1=401
+
+PAYLOAD_GET_2="{ \
+  \"status\":\"resource created\" \
+}"
+STATUS_CODE_GET_2=201
+
+PAYLOAD_PATCH_2="bad request"
+STATUS_CODE_PATCH_2=400
+
+PAYLOAD_POST_2="not found"
+STATUS_CODE_POST_2=404
+
+JSON_MIME="application/json"
+TEXT_MIME="text/plain; charset=utf-8"
+
+function functional_test_service_hijacking {
+
+  export scenario_type="service_hijacking"
+  export scenario_file="scenarios/kube/service_hijacking.yaml"
+  export post_config=""
+  envsubst < CI/config/common_test_config.yaml > CI/config/service_hijacking.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/service_hijacking.yaml  > /dev/null 2>&1 &
+  PID=$!
+  #Waiting the hijacking to have effect
+  while [ `curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` == 404 ]; do echo "waiting scenario to kick in."; sleep 1; done;
+
+  #Checking Step 1 GET on /list/index.php
+  OUT_GET="`curl -X GET -s $SERVICE_URL/list/index.php`"
+  OUT_CONTENT=`curl -X GET -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php`
+  OUT_STATUS_CODE=`curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php`
+  [ "${PAYLOAD_GET_1//[$'\t\r\n ']}" == "${OUT_GET//[$'\t\r\n ']}" ] && echo "Step 1 GET Payload OK" || (echo "Payload did not match. Test failed." && exit 1)
+  [ "$OUT_STATUS_CODE" == "$STATUS_CODE_GET_1" ] && echo "Step 1 GET Status Code OK" || (echo " Step 1 GET status code did not match. Test failed." && exit 1)
+  [ "$OUT_CONTENT" == "$JSON_MIME" ] && echo "Step 1 GET MIME OK" || (echo " Step 1 GET MIME did not match. Test failed." && exit 1)
+
+  #Checking Step 1 POST on /list/index.php
+  OUT_POST="`curl -s -X POST $SERVICE_URL/list/index.php`"
+  OUT_STATUS_CODE=`curl -X POST -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php`
+  OUT_CONTENT=`curl -X POST -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php`
+  [ "${PAYLOAD_POST_1//[$'\t\r\n ']}" == "${OUT_POST//[$'\t\r\n ']}" ] && echo "Step 1 POST Payload OK" || (echo "Payload did not match. Test failed." && exit 1)
+  [ "$OUT_STATUS_CODE" == "$STATUS_CODE_POST_1" ] && echo "Step 1 POST Status Code OK" || (echo "Step 1 POST status code did not match. Test failed." && exit 1)
+  [ "$OUT_CONTENT" == "$JSON_MIME" ] && echo "Step 1 POST MIME OK" || (echo " Step 1 POST MIME did not match. Test failed." && exit 1)
+
+  #Checking Step 1 PATCH on /patch
+  OUT_PATCH="`curl -s -X PATCH $SERVICE_URL/patch`"
+  OUT_STATUS_CODE=`curl -X PATCH -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/patch`
+  OUT_CONTENT=`curl -X PATCH -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/patch`
+  [ "${PAYLOAD_PATCH_1//[$'\t\r\n ']}" == "${OUT_PATCH//[$'\t\r\n ']}" ] && echo "Step 1 PATCH Payload OK" || (echo "Payload did not match. Test failed." && exit 1)
+  [ "$OUT_STATUS_CODE" == "$STATUS_CODE_PATCH_1" ] && echo "Step 1 PATCH Status Code OK" || (echo "Step 1 PATCH status code did not match. Test failed." && exit 1)
+  [ "$OUT_CONTENT" == "$TEXT_MIME" ] && echo "Step 1 PATCH MIME OK" || (echo " Step 1 PATCH MIME did not match. Test failed." && exit 1)
+  # wait for the next step
+  sleep 16
+
+  #Checking Step 2 GET on /list/index.php
+  OUT_GET="`curl -X GET -s $SERVICE_URL/list/index.php`"
+  OUT_CONTENT=`curl -X GET -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php`
+  OUT_STATUS_CODE=`curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php`
+  [ "${PAYLOAD_GET_2//[$'\t\r\n ']}" == "${OUT_GET//[$'\t\r\n ']}" ] && echo "Step 2 GET Payload OK" || (echo "Step 2 GET Payload did not match. Test failed." && exit 1)
+  [ "$OUT_STATUS_CODE" == "$STATUS_CODE_GET_2" ] && echo "Step 2 GET Status Code OK" || (echo "Step 2 GET status code did not match. Test failed." && exit 1)
+  [ "$OUT_CONTENT" == "$JSON_MIME" ] && echo "Step 2 GET MIME OK" || (echo " Step 2 GET MIME did not match. Test failed." && exit 1)
+
+  #Checking Step 2 POST on /list/index.php
+  OUT_POST="`curl -s -X POST $SERVICE_URL/list/index.php`"
+  OUT_CONTENT=`curl -X POST -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php`
+  OUT_STATUS_CODE=`curl -X POST -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php`
+  [ "${PAYLOAD_POST_2//[$'\t\r\n ']}" == "${OUT_POST//[$'\t\r\n ']}" ] && echo "Step 2 POST Payload OK" || (echo "Step 2 POST Payload did not match. Test failed." && exit 1)
+  [ "$OUT_STATUS_CODE" == "$STATUS_CODE_POST_2" ] && echo "Step 2 POST Status Code OK" || (echo "Step 2 POST status code did not match. Test failed." && exit 1)
+  [ "$OUT_CONTENT" == "$TEXT_MIME" ] && echo "Step 2 POST MIME OK" || (echo " Step 2 POST MIME did not match. Test failed." && exit 1)
+
+  #Checking Step 2 PATCH on /patch
+  OUT_PATCH="`curl -s -X PATCH $SERVICE_URL/patch`"
+  OUT_CONTENT=`curl -X PATCH -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/patch`
+  OUT_STATUS_CODE=`curl -X PATCH -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/patch`
+  [ "${PAYLOAD_PATCH_2//[$'\t\r\n ']}" == "${OUT_PATCH//[$'\t\r\n ']}" ] && echo "Step 2 PATCH Payload OK" || (echo "Step 2 PATCH Payload did not match. Test failed." && exit 1)
+  [ "$OUT_STATUS_CODE" == "$STATUS_CODE_PATCH_2" ] && echo "Step 2 PATCH Status Code OK" || (echo "Step 2 PATCH status code did not match. Test failed." && exit 1)
+  [ "$OUT_CONTENT" == "$TEXT_MIME" ] && echo "Step 2 PATCH MIME OK" || (echo " Step 2 PATCH MIME did not match. Test failed." && exit 1)
+  wait $PID
+
+  # now checking  if service has been restore correctly and nginx responds correctly
+  curl -s  $SERVICE_URL | grep nginx! && echo "BODY: Service restored!" || (echo "BODY: failed to restore service" && exit 1)
+  OUT_STATUS_CODE=`curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL`
+  [ "$OUT_STATUS_CODE" == "200" ] && echo "STATUS_CODE: Service restored!" || (echo "STATUS_CODE: failed to restore service" && exit 1)
+
+  echo "Service Hijacking Chaos test: Success"
+}
+
+
+functional_test_service_hijacking
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # Krkn aka Kraken
 ![Workflow-Status](https://github.com/krkn-chaos/krkn/actions/workflows/docker-image.yml/badge.svg)
+![coverage](https://krkn-chaos.github.io/krkn-lib-docs/coverage_badge_krkn.svg)
+![action](https://github.com/krkn-chaos/krkn/actions/workflows/tests.yml/badge.svg)

 ![Krkn logo](media/logo.png)

@@ -73,6 +75,7 @@ Scenario type               | Kubernetes
 [PVC scenario](docs/pvc_scenario.md) | :heavy_check_mark: |
 [Network_Chaos](docs/network_chaos.md) | :heavy_check_mark: |
 [ManagedCluster Scenarios](docs/managedcluster_scenarios.md) | :heavy_check_mark: |
+[Service Hijacking Scenarios](docs/service_hijacking_scenarios.md) | :heavy_check_mark: |


 ### Kraken scenario pass/fail criteria and report
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -42,6 +42,8 @@ kraken:
            - scenarios/openshift/pvc_scenario.yaml
        - network_chaos:
            - scenarios/openshift/network_chaos.yaml
+        - service_hijacking:
+              - scenarios/kube/service_hijacking.yaml

 cerberus:
    cerberus_enabled: False                                # Enable it when cerberus is previously installed
--- a/containers/Dockerfile
+++ b/containers/Dockerfile
@@ -1,28 +1,50 @@
-# Dockerfile for kraken
-
+# azure-client
 FROM mcr.microsoft.com/azure-cli:latest as azure-cli

-FROM registry.access.redhat.com/ubi8/ubi:latest
+# oc build
+FROM golang:1.22.4 AS oc-build
+RUN apt-get update && apt-get install -y libkrb5-dev
+WORKDIR /tmp
+RUN git clone --branch release-4.18 https://github.com/openshift/oc.git
+WORKDIR /tmp/oc
+RUN go mod edit -go 1.22.3 &&\
+    go get github.com/moby/buildkit@v0.12.5 &&\
+    go get github.com/containerd/containerd@v1.7.11&&\
+    go get github.com/docker/docker@v25.0.5&&\
+    go mod tidy && go mod vendor
+RUN make GO_REQUIRED_MIN_VERSION:= oc

-ENV KUBECONFIG /root/.kube/config
+FROM fedora:40
+RUN groupadd -g 1001 krkn && useradd -m -u 1001 -g krkn krkn
+RUN dnf update -y

-# Copy azure client binary from azure-cli image
+# krkn version that will be built
+ENV KRKN_VERSION v1.6.1
+
+ENV KUBECONFIG /home/krkn/.kube/config
+
+# install kubectl
+RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" &&\
+    cp kubectl /usr/local/bin/kubectl && chmod +x /usr/local/bin/kubectl &&\
+    cp kubectl /usr/bin/kubectl && chmod +x /usr/bin/kubectl
+
+# This overwrites any existing configuration in /etc/yum.repos.d/kubernetes.repo
+RUN dnf update && dnf install -y git python39 jq yq gettext wget which
+# copy azure client binary from azure-cli image
 COPY --from=azure-cli /usr/local/bin/az /usr/bin/az

-# Install dependencies
-RUN yum install -y git python39 python3-pip jq gettext wget && \
-    python3.9 -m pip install -U pip && \
-    git clone https://github.com/krkn-chaos/krkn.git --branch v1.5.13 /root/kraken && \
-    mkdir -p /root/.kube && cd /root/kraken && \
-    pip3.9 install -r requirements.txt && \
-    pip3.9 install virtualenv && \
-    wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq && chmod +x /usr/bin/yq
+# copy oc client binary from oc-build image
+COPY --from=oc-build /tmp/oc/oc /usr/bin/oc

-# Get Kubernetes and OpenShift clients from stable releases
-WORKDIR /tmp
-RUN wget https://mirror.openshift.com/pub/openshift-v4/clients/ocp/stable/openshift-client-linux.tar.gz && tar -xvf openshift-client-linux.tar.gz && cp oc /usr/local/bin/oc && cp oc /usr/bin/oc && cp kubectl /usr/local/bin/kubectl && cp kubectl /usr/bin/kubectl
-
-WORKDIR /root/kraken
+# krkn build
+RUN git clone https://github.com/krkn-chaos/krkn.git --branch $KRKN_VERSION /home/krkn/kraken && \
+    mkdir -p /home/krkn/.kube
+WORKDIR /home/krkn/kraken
+RUN python3.9 -m ensurepip
+RUN pip3.9 install -r requirements.txt
+RUN pip3.9 install jsonschema

+RUN chown -R krkn:krkn /home/krkn
+USER krkn
 ENTRYPOINT ["python3.9", "run_kraken.py"]
-CMD ["--config=config/config.yaml"]
+CMD ["--config=config/config.yaml"]
--- a/containers/Dockerfile-ppc64le
+++ b/containers/Dockerfile-ppc64le
@@ -14,7 +14,7 @@ COPY --from=azure-cli /usr/local/bin/az /usr/bin/az
 # Install dependencies
 RUN yum install -y git python39 python3-pip jq gettext wget && \
    python3.9 -m pip install -U pip && \
-    git clone https://github.com/redhat-chaos/krkn.git --branch v1.5.13 /root/kraken && \
+    git clone https://github.com/redhat-chaos/krkn.git --branch v1.5.14 /root/kraken && \
    mkdir -p /root/.kube && cd /root/kraken && \
    pip3.9 install -r requirements.txt && \
    pip3.9 install virtualenv && \
--- a/docs/service_hijacking_scenarios.md
+++ b/docs/service_hijacking_scenarios.md
@@ -0,0 +1,80 @@
+### Service Hijacking Scenarios
+
+Service Hijacking Scenarios aim to simulate fake HTTP responses from a workload targeted by a 
+`Service` already deployed in the cluster. 
+This scenario is executed by deploying a custom-made web service and modifying the target `Service`
+selector to direct traffic to this web service for a specified duration.
+
+The web service's source code is available [here](https://github.com/krkn-chaos/krkn-service-hijacking). 
+It employs a time-based test plan from the scenario configuration file, which specifies the behavior of resources during the chaos scenario as follows:
+
+```yaml
+service_target_port: http-web-svc # The port of the service to be hijacked (can be named or numeric, based on the workload and service configuration).
+service_name: nginx-service # The name of the service that will be hijacked.
+service_namespace: default # The namespace where the target service is located.
+image: quay.io/krkn-chaos/krkn-service-hijacking:v0.1.3 # Image of the krkn web service to be deployed to receive traffic.
+chaos_duration: 30 # Total duration of the chaos scenario in seconds.
+plan:
+  - resource: "/list/index.php" # Specifies the resource or path to respond to in the scenario. For paths, both the path and query parameters are captured but ignored. For resources, only query parameters are captured.
+
+    steps:                      # A time-based plan consisting of steps can be defined for each resource.
+      GET:                      # One or more HTTP methods can be specified for each step. Note: Non-standard methods are supported for fully custom web services (e.g., using NONEXISTENT instead of POST).
+
+        - duration: 15          # Duration in seconds for this step before moving to the next one, if defined. Otherwise, this step will continue until the chaos scenario ends.
+
+          status: 500           # HTTP status code to be returned in this step.
+          mime_type: "application/json" # MIME type of the response for this step.
+          payload: |            # The response payload for this step.
+            {
+              "status":"internal server error"
+            }
+        - duration: 15
+          status: 201
+          mime_type: "application/json"
+          payload: |
+            {
+              "status":"resource created"
+            }
+      POST:
+        - duration: 15
+          status: 401
+          mime_type: "application/json"
+          payload: |
+            {
+               "status": "unauthorized"
+            }
+        - duration: 15
+          status: 404
+          mime_type: "text/plain"
+          payload: "not found"
+
+
+```
+The scenario will focus on the `service_name` within the `service_namespace`, 
+substituting the selector with a randomly generated one, which is added as a label in the mock service manifest.
+This allows multiple scenarios to be executed in the same namespace, each targeting different services without 
+causing conflicts.
+
+The newly deployed mock web service will expose a `service_target_port`, 
+which can be either a named or numeric port based on the service configuration. 
+This ensures that the Service correctly routes HTTP traffic to the mock web service during the chaos run.
+
+Each step will last for `duration` seconds from the deployment of the mock web service in the cluster. 
+For each HTTP resource, defined as a top-level YAML property of the plan 
+(it could be a specific resource, e.g., /list/index.php, or a path-based resource typical in MVC frameworks), 
+one or more HTTP request methods can be specified. Both standard and custom request methods are supported.
+
+During this time frame, the web service will respond with:
+
+- `status`: The [HTTP status code](https://datatracker.ietf.org/doc/html/rfc7231#section-6) (can be standard or custom).
+- `mime_type`: The [MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types) (can be standard or custom).
+- `payload`: The response body to be returned to the client.
+
+At the end of the step `duration`, the web service will proceed to the next step (if available) until 
+the global `chaos_duration` concludes. At this point, the original service will be restored, 
+and the custom web service and its resources will be undeployed.
+
+__NOTE__: Some clients (e.g., cURL, jQuery) may optimize queries using lightweight methods (like HEAD or OPTIONS) 
+to probe API behavior. If these methods are not defined in the test plan, the web service may respond with 
+a `405` or `404` status code. If you encounter unexpected behavior, consider this use case.
+
--- a/kind-config.yml
+++ b/kind-config.yml
@@ -2,6 +2,9 @@ kind: Cluster
 apiVersion: kind.x-k8s.io/v1alpha4
 nodes:
  - role: control-plane
+    extraPortMappings:
+      - containerPort: 30036
+        hostPort: 8888
  - role: control-plane
  - role: control-plane
  - role: worker
--- a/kraken/application_outage/actions.py
+++ b/kraken/application_outage/actions.py
@@ -19,7 +19,7 @@ def run(scenarios_list, config, wait_duration,kubecli: KrknKubernetes, telemetry
    for app_outage_config in scenarios_list:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = app_outage_config
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, app_outage_config)
        if len(app_outage_config) > 1:
            try:
@@ -73,12 +73,12 @@ spec:
                    end_time = int(time.time())
                    cerberus.publish_kraken_status(config, failed_post_scenarios, start_time, end_time)
            except Exception as e :
-                scenario_telemetry.exitStatus = 1
+                scenario_telemetry.exit_status = 1
                failed_scenarios.append(app_outage_config)
                log_exception(app_outage_config)
            else:
-                scenario_telemetry.exitStatus = 0
-            scenario_telemetry.endTimeStamp = time.time()
+                scenario_telemetry.exit_status = 0
+            scenario_telemetry.end_timestamp = time.time()
            scenario_telemetries.append(scenario_telemetry)
    return failed_scenarios, scenario_telemetries

--- a/kraken/arcaflow_plugin/arcaflow_plugin.py
+++ b/kraken/arcaflow_plugin/arcaflow_plugin.py
@@ -16,12 +16,12 @@ def run(scenarios_list: List[str], kubeconfig_path: str, telemetry: KrknTelemetr
    for scenario in scenarios_list:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = scenario
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry,scenario)
        engine_args = build_args(scenario)
        status_code = run_workflow(engine_args, kubeconfig_path)
-        scenario_telemetry.endTimeStamp = time.time()
-        scenario_telemetry.exitStatus = status_code
+        scenario_telemetry.end_timestamp = time.time()
+        scenario_telemetry.exit_status = status_code
        scenario_telemetries.append(scenario_telemetry)
        if status_code != 0:
            failed_post_scenarios.append(scenario)
@@ -36,9 +36,10 @@ def run_workflow(engine_args: arcaflow.EngineArgs, kubeconfig_path: str) -> int:

 def build_args(input_file: str) -> arcaflow.EngineArgs:
    """sets the kubeconfig parsed by setArcaKubeConfig as an input to the arcaflow workflow"""
-    context = Path(input_file).parent
-    workflow = "{}/workflow.yaml".format(context)
-    config = "{}/config.yaml".format(context)
+    current_path = Path().resolve()
+    context = f"{current_path}/{Path(input_file).parent}"
+    workflow = f"{context}/workflow.yaml"
+    config = f"{context}/config.yaml"
    if not os.path.exists(context):
        raise Exception(
            "context folder for arcaflow workflow not found: {}".format(
@@ -61,7 +62,8 @@ def build_args(input_file: str) -> arcaflow.EngineArgs:
    engine_args = arcaflow.EngineArgs()
    engine_args.context = context
    engine_args.config = config
-    engine_args.input = input_file
+    engine_args.workflow = workflow
+    engine_args.input = f"{current_path}/{input_file}"
    return engine_args


--- a/kraken/chaos_recommender/prometheus.py
+++ b/kraken/chaos_recommender/prometheus.py
@@ -17,16 +17,41 @@ def convert_data_to_dataframe(data, label):


 def convert_data(data, service):
-
    result = {}
    for entry in data:
        pod_name = entry['metric']['pod']
        value = entry['value'][1]
        result[pod_name] = value
-    return result.get(service, '100000000000') # for those pods whose limits are not defined they can take as much resources, there assigning a very high value
+    return result.get(service) # for those pods whose limits are not defined they can take as much resources, there assigning a very high value


-def save_utilization_to_file(utilization, filename):
+def convert_data_limits(data, node_data, service, prometheus):
+    result = {}
+    for entry in data:
+        pod_name = entry['metric']['pod']
+        value = entry['value'][1]
+        result[pod_name] = value
+    return result.get(service, get_node_capacity(node_data, service, prometheus)) # for those pods whose limits are not defined they can take as much resources, there assigning a very high value
+
+def get_node_capacity(node_data, pod_name, prometheus ):
+
+    # Get the node name on which the pod is running
+    query = f'kube_pod_info{{pod="{pod_name}"}}'
+    result = prometheus.custom_query(query)
+    if not result:
+        return None
+
+    node_name = result[0]['metric']['node']
+
+    for item in node_data:
+        if item['metric']['node'] == node_name:
+            return item['value'][1]
+
+    return '1000000000'
+
+
+def save_utilization_to_file(utilization, filename, prometheus):
+
    merged_df = pd.DataFrame(columns=['namespace', 'service', 'CPU', 'CPU_LIMITS', 'MEM', 'MEM_LIMITS', 'NETWORK'])
    for namespace in utilization:
        # Loading utilization_data[] for namespace
@@ -41,9 +66,9 @@ def save_utilization_to_file(utilization, filename):
            new_row_df = pd.DataFrame({
                "namespace": namespace, "service": s,
                "CPU": convert_data(utilization_data[0], s),
-                "CPU_LIMITS": convert_data(utilization_data[1], s),
+                "CPU_LIMITS": convert_data_limits(utilization_data[1],utilization_data[5], s, prometheus),
                "MEM": convert_data(utilization_data[2], s),
-                "MEM_LIMITS": convert_data(utilization_data[3], s),
+                "MEM_LIMITS": convert_data_limits(utilization_data[3], utilization_data[6], s, prometheus),
                "NETWORK": convert_data(utilization_data[4], s)}, index=[0])
            merged_df = pd.concat([merged_df, new_row_df], ignore_index=True)

@@ -55,11 +80,11 @@ def save_utilization_to_file(utilization, filename):
    merged_df['NETWORK'] = merged_df['NETWORK'].astype(str)

    # Extract integer part before the decimal point
-    merged_df['CPU'] = merged_df['CPU'].str.split('.').str[0]
-    merged_df['MEM'] = merged_df['MEM'].str.split('.').str[0]
-    merged_df['CPU_LIMITS'] = merged_df['CPU_LIMITS'].str.split('.').str[0]
-    merged_df['MEM_LIMITS'] = merged_df['MEM_LIMITS'].str.split('.').str[0]
-    merged_df['NETWORK'] = merged_df['NETWORK'].str.split('.').str[0]
+    #merged_df['CPU'] = merged_df['CPU'].str.split('.').str[0]
+    #merged_df['MEM'] = merged_df['MEM'].str.split('.').str[0]
+    #merged_df['CPU_LIMITS'] = merged_df['CPU_LIMITS'].str.split('.').str[0]
+    #merged_df['MEM_LIMITS'] = merged_df['MEM_LIMITS'].str.split('.').str[0]
+    #merged_df['NETWORK'] = merged_df['NETWORK'].str.split('.').str[0]

    merged_df.to_csv(filename, sep='\t', index=False)

@@ -84,20 +109,27 @@ def fetch_utilization_from_prometheus(prometheus_endpoint, auth_token,
        cpu_limits_query = '(sum by (pod) (kube_pod_container_resource_limits{resource="cpu", namespace="%s"}))*1000' %(namespace)
        cpu_limits_result = prometheus.custom_query(cpu_limits_query)

+        node_cpu_limits_query = 'kube_node_status_capacity{resource="cpu", unit="core"}*1000'
+        node_cpu_limits_result = prometheus.custom_query(node_cpu_limits_query)
+
        mem_query = 'sum by (pod) (avg_over_time(container_memory_usage_bytes{image!="", namespace="%s"}[%s]))' % (namespace, scrape_duration)
        mem_result = prometheus.custom_query(mem_query)

        mem_limits_query = 'sum by (pod) (kube_pod_container_resource_limits{resource="memory", namespace="%s"})  ' %(namespace)
        mem_limits_result = prometheus.custom_query(mem_limits_query)

+        node_mem_limits_query = 'kube_node_status_capacity{resource="memory", unit="byte"}'
+        node_mem_limits_result = prometheus.custom_query(node_mem_limits_query)
+
        network_query = 'sum by (pod) ((avg_over_time(container_network_transmit_bytes_total{namespace="%s"}[%s])) + \
        (avg_over_time(container_network_receive_bytes_total{namespace="%s"}[%s])))' % (namespace, scrape_duration, namespace, scrape_duration)
        network_result = prometheus.custom_query(network_query)

-        utilization[namespace] = [cpu_result, cpu_limits_result, mem_result, mem_limits_result, network_result]
+        utilization[namespace] = [cpu_result, cpu_limits_result, mem_result, mem_limits_result, network_result, node_cpu_limits_result, node_mem_limits_result ]
        queries[namespace] = json_queries(cpu_query, cpu_limits_query, mem_query, mem_limits_query, network_query)

-    save_utilization_to_file(utilization, saved_metrics_path)
+    save_utilization_to_file(utilization, saved_metrics_path, prometheus)
+
    return saved_metrics_path, queries


--- a/kraken/network_chaos/actions.py
+++ b/kraken/network_chaos/actions.py
@@ -23,7 +23,7 @@ def run(scenarios_list, config, wait_duration, kubecli: KrknKubernetes, telemetr
    for net_config in scenarios_list:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = net_config
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, net_config)
        try:
            with open(net_config, "r") as file:
@@ -114,11 +114,11 @@ def run(scenarios_list, config, wait_duration, kubecli: KrknKubernetes, telemetr
                    logging.info("Deleting jobs")
                    delete_job(joblst[:], kubecli)
        except (RuntimeError, Exception):
-            scenario_telemetry.exitStatus = 1
+            scenario_telemetry.exit_status = 1
            failed_scenarios.append(net_config)
            log_exception(net_config)
        else:
-            scenario_telemetry.exitStatus = 0
+            scenario_telemetry.exit_status = 0
        scenario_telemetries.append(scenario_telemetry)
    return failed_scenarios, scenario_telemetries

--- a/kraken/node_actions/run.py
+++ b/kraken/node_actions/run.py
@@ -15,7 +15,7 @@ import kraken.cerberus.setup as cerberus
 from krkn_lib.k8s import KrknKubernetes
 from krkn_lib.telemetry.k8s import KrknTelemetryKubernetes
 from krkn_lib.models.telemetry import ScenarioTelemetry
-from krkn_lib.utils.functions import get_yaml_item_value
+from krkn_lib.utils.functions import get_yaml_item_value, log_exception

 node_general = False

@@ -61,7 +61,7 @@ def run(scenarios_list, config, wait_duration, kubecli: KrknKubernetes, telemetr
    for node_scenario_config in scenarios_list:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = node_scenario_config
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, node_scenario_config)
        with open(node_scenario_config, "r") as f:
            node_scenario_config = yaml.full_load(f)
@@ -78,13 +78,13 @@ def run(scenarios_list, config, wait_duration, kubecli: KrknKubernetes, telemetr
                            cerberus.get_status(config, start_time, end_time)
                            logging.info("")
                        except (RuntimeError, Exception) as e:
-                            scenario_telemetry.exitStatus = 1
+                            scenario_telemetry.exit_status = 1
                            failed_scenarios.append(node_scenario_config)
                            log_exception(node_scenario_config)
                        else:
-                            scenario_telemetry.exitStatus = 0
+                            scenario_telemetry.exit_status = 0

-                        scenario_telemetry.endTimeStamp = time.time()
+                        scenario_telemetry.end_timestamp = time.time()
                        scenario_telemetries.append(scenario_telemetry)

    return failed_scenarios, scenario_telemetries
--- a/kraken/plugins/init.py
+++ b/kraken/plugins/init.py
@@ -260,7 +260,7 @@ def run(scenarios: List[str],
    for scenario in scenarios:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = scenario
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, scenario)
        logging.info('scenario ' + str(scenario))
        pool = PodsMonitorPool(kubecli)
@@ -276,16 +276,16 @@ def run(scenarios: List[str],

        except Exception as e:
            logging.error(f"scenario exception: {str(e)}")
-            scenario_telemetry.exitStatus = 1
+            scenario_telemetry.exit_status = 1
            pool.cancel()
            failed_post_scenarios.append(scenario)
            log_exception(scenario)
        else:
-            scenario_telemetry.exitStatus = 0
+            scenario_telemetry.exit_status = 0
            logging.info("Waiting for the specified duration: %s" % (wait_duration))
            time.sleep(wait_duration)
        scenario_telemetries.append(scenario_telemetry)
-        scenario_telemetry.endTimeStamp = time.time()
+        scenario_telemetry.end_timestamp = time.time()

    return failed_post_scenarios, scenario_telemetries

--- a/kraken/plugins/node_scenarios/vmware_plugin.py
+++ b/kraken/plugins/node_scenarios/vmware_plugin.py
@@ -119,11 +119,11 @@ class vSphere:
        vm = self.get_vm(instance_id)
        try:
            self.client.vcenter.vm.Power.stop(vm)
-            logging.info("Stopped VM -- '{}-({})'", instance_id, vm)
+            logging.info(f"Stopped VM -- '{instance_id}-({vm})'")
            return True
        except AlreadyInDesiredState:
            logging.info(
-                "VM '{}'-'({})' is already Powered Off", instance_id, vm
+                f"VM '{instance_id}'-'({vm})' is already Powered Off"
            )
            return False

@@ -136,11 +136,11 @@ class vSphere:
        vm = self.get_vm(instance_id)
        try:
            self.client.vcenter.vm.Power.start(vm)
-            logging.info("Started VM -- '{}-({})'", instance_id, vm)
+            logging.info(f"Started VM -- '{instance_id}-({vm})'")
            return True
        except AlreadyInDesiredState:
            logging.info(
-                "VM '{}'-'({})' is already Powered On", instance_id, vm
+                f"VM '{instance_id}'-'({vm})' is already Powered On"
            )
            return False

@@ -318,12 +318,12 @@ class vSphere:
        try:
            vm = self.get_vm(instance_id)
            state = self.client.vcenter.vm.Power.get(vm).state
-            logging.info("Check instance %s status", instance_id)
+            logging.info(f"Check instance {instance_id} status")
            return state
        except Exception as e:
            logging.error(
-                "Failed to get node instance status %s. Encountered following "
-                "exception: %s.", instance_id, e
+                f"Failed to get node instance status {instance_id}. Encountered following "
+                f"exception: {str(e)}. "
            )
            return None

@@ -338,16 +338,14 @@ class vSphere:
        while vm is not None:
            vm = self.get_vm(instance_id)
            logging.info(
-                "VM %s is still being deleted, "
-                "sleeping for 5 seconds",
-                instance_id
+                f"VM {instance_id} is still being deleted, "
+                f"sleeping for 5 seconds"
            )
            time.sleep(5)
            time_counter += 5
            if time_counter >= timeout:
                logging.info(
-                    "VM %s is still not deleted in allotted time",
-                    instance_id
+                    f"VM {instance_id} is still not deleted in allotted time"
                )
                return False
        return True
@@ -371,8 +369,7 @@ class vSphere:
            time_counter += 5
            if time_counter >= timeout:
                logging.info(
-                    "VM %s is still not ready in allotted time",
-                    instance_id
+                    f"VM {instance_id} is still not ready in allotted time"
                )
                return False
        return True
@@ -388,16 +385,14 @@ class vSphere:
        while status != Power.State.POWERED_OFF:
            status = self.get_vm_status(instance_id)
            logging.info(
-                "VM %s is still not running, "
-                "sleeping for 5 seconds",
-                instance_id
+                f"VM {instance_id} is still not running, "
+                f"sleeping for 5 seconds"
            )
            time.sleep(5)
            time_counter += 5
            if time_counter >= timeout:
                logging.info(
-                    "VM %s is still not ready in allotted time",
-                    instance_id
+                    f"VM {instance_id} is still not ready in allotted time"
                )
                return False
        return True
@@ -561,7 +556,7 @@ def node_start(
            try:
                for _ in range(cfg.runs):
                    logging.info("Starting node_start_scenario injection")
-                    logging.info("Starting the node %s ", name)
+                    logging.info(f"Starting the node {name} ")
                    vm_started = vsphere.start_instances(name)
                    if vm_started:
                        vsphere.wait_until_running(name, cfg.timeout)
@@ -571,7 +566,7 @@ def node_start(
                            )
                        nodes_started[int(time.time_ns())] = Node(name=name)
                    logging.info(
-                        "Node with instance ID: %s is in running state", name
+                        f"Node with instance ID: {name} is in running state"
                    )
                    logging.info(
                        "node_start_scenario has been successfully injected!"
@@ -579,8 +574,8 @@ def node_start(
            except Exception as e:
                logging.error("Failed to start node instance. Test Failed")
                logging.error(
-                    "node_start_scenario injection failed! "
-                    "Error was: %s", str(e)
+                    f"node_start_scenario injection failed! "
+                    f"Error was: {str(e)}"
                )
                return "error", NodeScenarioErrorOutput(
                    format_exc(), kube_helper.Actions.START
@@ -620,7 +615,7 @@ def node_stop(
            try:
                for _ in range(cfg.runs):
                    logging.info("Starting node_stop_scenario injection")
-                    logging.info("Stopping the node %s ", name)
+                    logging.info(f"Stopping the node {name} ")
                    vm_stopped = vsphere.stop_instances(name)
                    if vm_stopped:
                        vsphere.wait_until_stopped(name, cfg.timeout)
@@ -630,7 +625,7 @@ def node_stop(
                            )
                        nodes_stopped[int(time.time_ns())] = Node(name=name)
                    logging.info(
-                        "Node with instance ID: %s is in stopped state", name
+                        f"Node with instance ID: {name} is in stopped state"
                    )
                    logging.info(
                        "node_stop_scenario has been successfully injected!"
@@ -638,8 +633,8 @@ def node_stop(
            except Exception as e:
                logging.error("Failed to stop node instance. Test Failed")
                logging.error(
-                    "node_stop_scenario injection failed! "
-                    "Error was: %s", str(e)
+                    f"node_stop_scenario injection failed! "
+                    f"Error was: {str(e)}"
                )
                return "error", NodeScenarioErrorOutput(
                    format_exc(), kube_helper.Actions.STOP
@@ -679,7 +674,7 @@ def node_reboot(
            try:
                for _ in range(cfg.runs):
                    logging.info("Starting node_reboot_scenario injection")
-                    logging.info("Rebooting the node %s ", name)
+                    logging.info(f"Rebooting the node {name} ")
                    vsphere.reboot_instances(name)
                    if not cfg.skip_openshift_checks:
                        kube_helper.wait_for_unknown_status(
@@ -690,8 +685,8 @@ def node_reboot(
                        )
                    nodes_rebooted[int(time.time_ns())] = Node(name=name)
                    logging.info(
-                        "Node with instance ID: %s has rebooted "
-                        "successfully", name
+                        f"Node with instance ID: {name} has rebooted "
+                        "successfully"
                    )
                    logging.info(
                        "node_reboot_scenario has been successfully injected!"
@@ -699,8 +694,8 @@ def node_reboot(
            except Exception as e:
                logging.error("Failed to reboot node instance. Test Failed")
                logging.error(
-                    "node_reboot_scenario injection failed! "
-                    "Error was: %s", str(e)
+                    f"node_reboot_scenario injection failed! "
+                    f"Error was: {str(e)}"
                )
                return "error", NodeScenarioErrorOutput(
                    format_exc(), kube_helper.Actions.REBOOT
@@ -739,13 +734,13 @@ def node_terminate(
                    vsphere.stop_instances(name)
                    vsphere.wait_until_stopped(name, cfg.timeout)
                    logging.info(
-                        "Releasing the node with instance ID: %s ", name
+                        f"Releasing the node with instance ID: {name} "
                    )
                    vsphere.release_instances(name)
                    vsphere.wait_until_released(name, cfg.timeout)
                    nodes_terminated[int(time.time_ns())] = Node(name=name)
                    logging.info(
-                        "Node with instance ID: %s has been released", name
+                        f"Node with instance ID: {name} has been released"
                    )
                    logging.info(
                        "node_terminate_scenario has been "
@@ -754,8 +749,8 @@ def node_terminate(
            except Exception as e:
                logging.error("Failed to terminate node instance. Test Failed")
                logging.error(
-                    "node_terminate_scenario injection failed! "
-                    "Error was: %s", str(e)
+                    f"node_terminate_scenario injection failed! "
+                    f"Error was: {str(e)}"
                )
                return "error", NodeScenarioErrorOutput(
                    format_exc(), kube_helper.Actions.TERMINATE
--- a/kraken/pod_scenarios/setup.py
+++ b/kraken/pod_scenarios/setup.py
@@ -88,7 +88,7 @@ def container_run(kubeconfig_path,
    for container_scenario_config in scenarios_list:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = container_scenario_config[0]
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, container_scenario_config[0])
        if len(container_scenario_config) > 1:
            pre_action_output = post_actions.run(kubeconfig_path, container_scenario_config[1])
@@ -119,12 +119,12 @@ def container_run(kubeconfig_path,
                    pool.cancel()
                    failed_scenarios.append(container_scenario_config[0])
                    log_exception(container_scenario_config[0])
-                    scenario_telemetry.exitStatus = 1
+                    scenario_telemetry.exit_status = 1
                    # removed_exit
                    # sys.exit(1)
                else:
-                    scenario_telemetry.exitStatus = 0
-                scenario_telemetry.endTimeStamp = time.time()
+                    scenario_telemetry.exit_status = 0
+                scenario_telemetry.end_timestamp = time.time()
                scenario_telemetries.append(scenario_telemetry)

    return failed_scenarios, scenario_telemetries
--- a/kraken/pvc/pvc_scenario.py
+++ b/kraken/pvc/pvc_scenario.py
@@ -11,7 +11,7 @@ from krkn_lib.utils.functions import get_yaml_item_value, log_exception


 # krkn_lib
-def run(scenarios_list, config, kubecli: KrknKubernetes, telemetry: KrknTelemetryKubernetes) -> (list[str], list[ScenarioTelemetry]):
+def run(scenarios_list, config, wait_duration, kubecli: KrknKubernetes, telemetry: KrknTelemetryKubernetes) -> (list[str], list[ScenarioTelemetry]):
    """
    Reads the scenario config and creates a temp file to fill up the PVC
    """
@@ -21,7 +21,7 @@ def run(scenarios_list, config, kubecli: KrknKubernetes, telemetry: KrknTelemetr
    for app_config in scenarios_list:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = app_config
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, app_config)
        try:
            if len(app_config) > 1:
@@ -305,7 +305,9 @@ def run(scenarios_list, config, kubecli: KrknKubernetes, telemetry: KrknTelemetr
                        file_size_kb,
                        kubecli
                    )
-
+                    logging.info("End of scenario. Waiting for the specified duration: %s" % (wait_duration))
+                    time.sleep(wait_duration)
+                    
                    end_time = int(time.time())
                    cerberus.publish_kraken_status(
                        config,
@@ -314,11 +316,11 @@ def run(scenarios_list, config, kubecli: KrknKubernetes, telemetry: KrknTelemetr
                        end_time
                    )
        except (RuntimeError, Exception):
-            scenario_telemetry.exitStatus = 1
+            scenario_telemetry.exit_status = 1
            failed_scenarios.append(app_config)
            log_exception(app_config)
        else:
-            scenario_telemetry.exitStatus = 0
+            scenario_telemetry.exit_status = 0
        scenario_telemetries.append(scenario_telemetry)

    return failed_scenarios, scenario_telemetries
--- a/kraken/service_disruption/common_service_disruption_functions.py
+++ b/kraken/service_disruption/common_service_disruption_functions.py
@@ -165,7 +165,7 @@ def run(
    for scenario_config in scenarios_list:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = scenario_config[0]
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, scenario_config[0])
        try:
            if len(scenario_config) > 1:
@@ -249,12 +249,12 @@ def run(
                    end_time = int(time.time())
                    cerberus.publish_kraken_status(config, failed_post_scenarios, start_time, end_time)
        except (Exception, RuntimeError):
-            scenario_telemetry.exitStatus = 1
+            scenario_telemetry.exit_status = 1
            failed_scenarios.append(scenario_config[0])
            log_exception(scenario_config[0])
        else:
-            scenario_telemetry.exitStatus = 0
-        scenario_telemetry.endTimeStamp = time.time()
+            scenario_telemetry.exit_status = 0
+        scenario_telemetry.end_timestamp = time.time()
        scenario_telemetries.append(scenario_telemetry)
    return failed_scenarios, scenario_telemetries

--- a/kraken/service_hijacking/init.py
+++ b/kraken/service_hijacking/init.py
--- a/kraken/service_hijacking/service_hijacking.py
+++ b/kraken/service_hijacking/service_hijacking.py
@@ -0,0 +1,90 @@
+import logging
+import time
+
+import yaml
+from krkn_lib.k8s import KrknKubernetes
+from krkn_lib.models.telemetry import ScenarioTelemetry
+from krkn_lib.telemetry.k8s import KrknTelemetryKubernetes
+
+
+def run(scenarios_list: list[str],wait_duration: int,  krkn_lib: KrknKubernetes, telemetry: KrknTelemetryKubernetes) -> (list[str], list[ScenarioTelemetry]):
+    scenario_telemetries= list[ScenarioTelemetry]()
+    failed_post_scenarios = []
+    for scenario in scenarios_list:
+        scenario_telemetry = ScenarioTelemetry()
+        scenario_telemetry.scenario = scenario
+        scenario_telemetry.start_timestamp = time.time()
+        telemetry.set_parameters_base64(scenario_telemetry, scenario)
+        with open(scenario) as stream:
+            scenario_config = yaml.safe_load(stream)
+
+        service_name = scenario_config['service_name']
+        service_namespace = scenario_config['service_namespace']
+        plan = scenario_config["plan"]
+        image = scenario_config["image"]
+        target_port = scenario_config["service_target_port"]
+        chaos_duration = scenario_config["chaos_duration"]
+
+        logging.info(f"checking service {service_name} in namespace: {service_namespace}")
+        if not krkn_lib.service_exists(service_name, service_namespace):
+            logging.error(f"service: {service_name} not found in namespace: {service_namespace}, failed to run scenario.")
+            fail(scenario_telemetry, scenario_telemetries)
+            failed_post_scenarios.append(scenario)
+            break
+        try:
+            logging.info(f"service: {service_name} found in namespace: {service_namespace}")
+            logging.info(f"creating webservice and initializing test plan...")
+            # both named ports and port numbers can be used
+            if isinstance(target_port, int):
+                logging.info(f"webservice will listen on port {target_port}")
+                webservice = krkn_lib.deploy_service_hijacking(service_namespace, plan, image, port_number=target_port)
+            else:
+                logging.info(f"traffic will be redirected to named port: {target_port}")
+                webservice = krkn_lib.deploy_service_hijacking(service_namespace, plan, image, port_name=target_port)
+            logging.info(f"successfully deployed pod: {webservice.pod_name} "
+                         f"in namespace:{service_namespace} with selector {webservice.selector}!"
+                         )
+            logging.info(f"patching service: {service_name} to hijack traffic towards: {webservice.pod_name}")
+            original_service = krkn_lib.replace_service_selector([webservice.selector], service_name, service_namespace)
+            if original_service is None:
+                logging.error(f"failed to patch service: {service_name}, namespace: {service_namespace} with selector {webservice.selector}")
+                fail(scenario_telemetry, scenario_telemetries)
+                failed_post_scenarios.append(scenario)
+                break
+
+            logging.info(f"service: {service_name} successfully patched!")
+            logging.info(f"original service manifest:\n\n{yaml.dump(original_service)}")
+            logging.info(f"waiting {chaos_duration} before restoring the service")
+            time.sleep(chaos_duration)
+            selectors = ["=".join([key, original_service["spec"]["selector"][key]]) for key in original_service["spec"]["selector"].keys()]
+            logging.info(f"restoring the service selectors {selectors}")
+            original_service = krkn_lib.replace_service_selector(selectors, service_name, service_namespace)
+            if original_service is None:
+                logging.error(f"failed to restore original service: {service_name}, namespace: {service_namespace} with selectors: {selectors}")
+                fail(scenario_telemetry, scenario_telemetries)
+                failed_post_scenarios.append(scenario)
+                break
+            logging.info("selectors successfully restored")
+            logging.info("undeploying service-hijacking resources...")
+            krkn_lib.undeploy_service_hijacking(webservice)
+
+            logging.info("End of scenario. Waiting for the specified duration: %s" % (wait_duration))
+            time.sleep(wait_duration)
+            
+            scenario_telemetry.exit_status = 0
+            scenario_telemetry.end_timestamp = time.time()
+            scenario_telemetries.append(scenario_telemetry)
+            logging.info("success")
+        except Exception as e:
+            logging.error(f"scenario {scenario} failed with exception: {e}")
+            fail(scenario_telemetry, scenario_telemetries)
+            failed_post_scenarios.append(scenario)
+
+    return failed_post_scenarios, scenario_telemetries
+
+
+def fail(scenario_telemetry: ScenarioTelemetry,  scenario_telemetries: list[ScenarioTelemetry]):
+    scenario_telemetry.exit_status = 1
+    scenario_telemetry.end_timestamp = time.time()
+    scenario_telemetries.append(scenario_telemetry)
+
--- a/kraken/shut_down/common_shut_down_func.py
+++ b/kraken/shut_down/common_shut_down_func.py
@@ -147,7 +147,7 @@ def run(scenarios_list, config, wait_duration, kubecli: KrknKubernetes, telemetr

        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = config_path
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, config_path)

        with open(config_path, "r") as f:
@@ -175,11 +175,11 @@ def run(scenarios_list, config, wait_duration, kubecli: KrknKubernetes, telemetr
            except (RuntimeError, Exception):
                log_exception(config_path)
                failed_scenarios.append(config_path)
-                scenario_telemetry.exitStatus = 1
+                scenario_telemetry.exit_status = 1
            else:
-                scenario_telemetry.exitStatus = 0
+                scenario_telemetry.exit_status = 0

-            scenario_telemetry.endTimeStamp = time.time()
+            scenario_telemetry.end_timestamp = time.time()
            scenario_telemetries.append(scenario_telemetry)

    return failed_scenarios, scenario_telemetries
--- a/kraken/time_actions/common_time_functions.py
+++ b/kraken/time_actions/common_time_functions.py
@@ -354,7 +354,7 @@ def run(scenarios_list, config, wait_duration, kubecli:KrknKubernetes, telemetry
    for time_scenario_config in scenarios_list:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = time_scenario_config
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, time_scenario_config)
        try:
            with open(time_scenario_config, "r") as f:
@@ -377,12 +377,12 @@ def run(scenarios_list, config, wait_duration, kubecli:KrknKubernetes, telemetry
                        end_time
                    )
        except (RuntimeError, Exception):
-            scenario_telemetry.exitStatus = 1
+            scenario_telemetry.exit_status = 1
            log_exception(time_scenario_config)
            failed_scenarios.append(time_scenario_config)
        else:
-            scenario_telemetry.exitStatus = 0
-        scenario_telemetry.endTimeStamp = time.time()
+            scenario_telemetry.exit_status = 0
+        scenario_telemetry.end_timestamp = time.time()
        scenario_telemetries.append(scenario_telemetry)

    return failed_scenarios, scenario_telemetries
--- a/kraken/zone_outage/actions.py
+++ b/kraken/zone_outage/actions.py
@@ -19,7 +19,7 @@ def run(scenarios_list, config, wait_duration, telemetry: KrknTelemetryKubernete
    for zone_outage_config in scenarios_list:
        scenario_telemetry = ScenarioTelemetry()
        scenario_telemetry.scenario = zone_outage_config
-        scenario_telemetry.startTimeStamp = time.time()
+        scenario_telemetry.start_timestamp = time.time()
        telemetry.set_parameters_base64(scenario_telemetry, zone_outage_config)
        try:
            if len(zone_outage_config) > 1:
@@ -110,12 +110,12 @@ def run(scenarios_list, config, wait_duration, telemetry: KrknTelemetryKubernete
                        end_time
                    )
        except (RuntimeError, Exception):
-            scenario_telemetry.exitStatus = 1
+            scenario_telemetry.exit_status = 1
            failed_scenarios.append(zone_outage_config)
            log_exception(zone_outage_config)
        else:
-            scenario_telemetry.exitStatus = 0
-        scenario_telemetry.endTimeStamp = time.time()
+            scenario_telemetry.exit_status = 0
+        scenario_telemetry.end_timestamp = time.time()
        scenario_telemetries.append(scenario_telemetry)
    return failed_scenarios, scenario_telemetries

--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
 aliyun-python-sdk-core==2.13.36
 aliyun-python-sdk-ecs==4.24.25
-arcaflow==0.9.0
+arcaflow==0.17.2
 arcaflow-plugin-sdk==0.10.0
 boto3==1.28.61
-azure-identity==1.15.0
+azure-identity==1.16.1
 azure-keyvault==4.2.0
 azure-mgmt-compute==30.5.0
 itsdangerous==2.0.1
@@ -14,8 +14,8 @@ gitpython==3.1.41
 google-api-python-client==2.116.0
 ibm_cloud_sdk_core==3.18.0
 ibm_vpc==0.20.0
-jinja2==3.1.3
-krkn-lib==2.1.2
+jinja2==3.1.4
+krkn-lib==2.1.3
 lxml==5.1.0
 kubernetes==26.1.0
 oauth2client==4.1.3
@@ -28,11 +28,11 @@ pyfiglet==1.0.2
 pytest==8.0.0
 python-ipmi==0.5.4
 python-openstackclient==6.5.0
-requests==2.31.0
+requests==2.32.0
 service_identity==24.1.0
 PyYAML==6.0
 setuptools==65.5.1
-werkzeug==3.0.1
+werkzeug==3.0.3
 wheel==0.42.0
 zope.interface==5.4.0

--- a/run_kraken.py
+++ b/run_kraken.py
@@ -25,6 +25,7 @@ import kraken.pvc.pvc_scenario as pvc_scenario
 import kraken.network_chaos.actions as network_chaos
 import kraken.arcaflow_plugin as arcaflow_plugin
 import kraken.prometheus as prometheus_plugin
+import kraken.service_hijacking.service_hijacking as service_hijacking_plugin
 import server as server
 from kraken import plugins
 from krkn_lib.k8s import KrknKubernetes
@@ -340,7 +341,7 @@ def main(cfg):
                        # krkn_lib
                        elif scenario_type == "pvc_scenarios":
                            logging.info("Running PVC scenario")
-                            failed_post_scenarios, scenario_telemetries = pvc_scenario.run(scenarios_list, config, kubecli, telemetry_k8s)
+                            failed_post_scenarios, scenario_telemetries = pvc_scenario.run(scenarios_list, config, wait_duration, kubecli, telemetry_k8s)
                            chaos_telemetry.scenarios.extend(scenario_telemetries)

                        # Network scenarios
@@ -348,6 +349,10 @@ def main(cfg):
                        elif scenario_type == "network_chaos":
                            logging.info("Running Network Chaos")
                            failed_post_scenarios, scenario_telemetries = network_chaos.run(scenarios_list, config, wait_duration, kubecli, telemetry_k8s)
+                        elif scenario_type == "service_hijacking":
+                            logging.info("Running Service Hijacking Chaos")
+                            failed_post_scenarios, scenario_telemetries = service_hijacking_plugin.run(scenarios_list, wait_duration, kubecli, telemetry_k8s)
+                            chaos_telemetry.scenarios.extend(scenario_telemetries)

                        # Check for critical alerts when enabled
                        post_critical_alerts = 0
--- a/scenarios/arcaflow/cpu-hog/input.yaml
+++ b/scenarios/arcaflow/cpu-hog/input.yaml
@@ -2,7 +2,7 @@ input_list:
  - cpu_count: 1
    cpu_load_percentage: 80
    cpu_method: all
-    duration: 1s
+    duration: 30
    kubeconfig: ''
    namespace: default
    # set the node selector as a key-value pair eg.
--- a/scenarios/arcaflow/cpu-hog/sub-workflow.yaml
+++ b/scenarios/arcaflow/cpu-hog/sub-workflow.yaml
@@ -1,9 +1,9 @@
 version: v0.2.0
 input:
-  root: RootObject
+  root: SubRootObject
  objects:
-    RootObject:
-      id: input_item
+    SubRootObject:
+      id: SubRootObject
      properties:
        kubeconfig:
          display:
@@ -35,7 +35,7 @@ input:
            description: stop stress test after T seconds. One can also specify the units of time in
              seconds, minutes, hours, days or years with the suffix s, m, h, d or y
          type:
-            type_id: string
+            type_id: integer
          required: true
        cpu_count:
          display:
@@ -68,18 +68,18 @@ steps:
      kubeconfig: !expr $.input.kubeconfig
  stressng:
    plugin: 
-      src: quay.io/arcalot/arcaflow-plugin-stressng:0.5.0
+      src: quay.io/arcalot/arcaflow-plugin-stressng:0.6.0
      deployment_type: image
    step: workload
    input:
      cleanup: "true"
-      StressNGParams:
-        timeout: !expr $.input.duration
-        stressors:
-          - stressor: cpu
-            cpu_count: !expr $.input.cpu_count
-            cpu_method: !expr $.input.cpu_method
-            cpu_load: !expr $.input.cpu_load_percentage
+
+      timeout: !expr $.input.duration
+      stressors:
+        - stressor: cpu
+          workers: !expr $.input.cpu_count
+          cpu-method: "all"
+          cpu-load: !expr $.input.cpu_load_percentage
    deploy:
      deployer_name: kubernetes
      connection: !expr $.steps.kubeconfig.outputs.success.connection
--- a/scenarios/arcaflow/cpu-hog/workflow.yaml
+++ b/scenarios/arcaflow/cpu-hog/workflow.yaml
@@ -9,62 +9,10 @@ input:
          type:
            type_id: list
            items:
-              id: input_item
-              type_id: object
-              properties:
-                kubeconfig:
-                  display:
-                    description: The complete kubeconfig file as a string
-                    name: Kubeconfig file contents
-                  type:
-                    type_id: string
-                  required: true
-                namespace:
-                    display:
-                      description: The namespace where the container will be deployed
-                      name: Namespace
-                    type:
-                      type_id: string
-                    required: true
-                node_selector:
-                    display:
-                      description: kubernetes node name where the plugin must be deployed
-                    type:
-                      type_id: map
-                      values:
-                        type_id: string
-                      keys:
-                        type_id: string
-                    required: true
-                duration:
-                  display:
-                    name: duration the scenario expressed in seconds
-                    description: stop stress test after T seconds. One can also specify the units of time in
-                      seconds, minutes, hours, days or years with the suffix s, m, h, d or y
-                  type:
-                    type_id: string
-                  required: true
-                cpu_count:
-                  display:
-                    description: Number of CPU cores to be used (0 means all)
-                    name: number of CPUs
-                  type:
-                    type_id: integer
-                  required: true
-                cpu_method:
-                  display:
-                    description: CPU stress method
-                    name: fine grained control of which cpu stressors to use (ackermann, cfloat etc.)
-                  type:
-                    type_id: string
-                  required: true
-                cpu_load_percentage:
-                  display:
-                    description: load CPU by percentage
-                    name: CPU load
-                  type:
-                    type_id: integer
-                  required: true
+              id: SubRootObject
+              type_id: ref
+              namespace: $.steps.workload_loop.execute.inputs.items
+
 steps:
  workload_loop:
    kind: foreach
--- a/scenarios/arcaflow/io-hog/input.yaml
+++ b/scenarios/arcaflow/io-hog/input.yaml
@@ -1,5 +1,5 @@
 input_list:
- duration: 30s
+- duration: 30
  io_block_size: 1m
  io_workers: 1
  io_write_bytes: 10m
--- a/scenarios/arcaflow/io-hog/sub-workflow.yaml
+++ b/scenarios/arcaflow/io-hog/sub-workflow.yaml
@@ -1,6 +1,6 @@
 version: v0.2.0
 input:
-  root: RootObject
+  root: SubRootObject
  objects:
    hostPath:
      id: HostPathVolumeSource
@@ -18,8 +18,8 @@ input:
          type:
            id: hostPath
            type_id: ref
-    RootObject:
-      id: input_item
+    SubRootObject:
+      id: SubRootObject
      properties:
        kubeconfig:
          display:
@@ -51,7 +51,7 @@ input:
            description: stop  stress  test  after  T  seconds.  One  can  also specify the units of time in
              seconds, minutes, hours, days or years with the suffix s, m, h, d or  y
          type:
-            type_id: string
+            type_id: integer
          required: true
        io_workers:
          display:
@@ -102,19 +102,18 @@ steps:
      kubeconfig: !expr $.input.kubeconfig
  stressng:
    plugin: 
-      src: quay.io/arcalot/arcaflow-plugin-stressng:0.5.0
+      src: quay.io/arcalot/arcaflow-plugin-stressng:0.6.0
      deployment_type: image
    step: workload
    input:
      cleanup: "true"
-      StressNGParams:
-        timeout: !expr $.input.duration
-        workdir: !expr $.input.target_pod_folder
-        stressors:
-          - stressor: hdd
-            hdd: !expr $.input.io_workers
-            hdd_bytes: !expr $.input.io_write_bytes
-            hdd_write_size: !expr $.input.io_block_size
+      timeout: !expr $.input.duration
+      workdir: !expr $.input.target_pod_folder
+      stressors:
+        - stressor: hdd
+          workers: !expr $.input.io_workers
+          hdd-bytes: !expr $.input.io_write_bytes
+          hdd-write-size: !expr $.input.io_block_size

    deploy:
      deployer_name: kubernetes
--- a/scenarios/arcaflow/io-hog/workflow.yaml
+++ b/scenarios/arcaflow/io-hog/workflow.yaml
@@ -2,22 +2,6 @@ version: v0.2.0
 input:
  root: RootObject
  objects:
-    hostPath:
-      id: HostPathVolumeSource
-      properties:
-        path:
-          type:
-            type_id: string
-    Volume:
-      id: Volume
-      properties:
-        name:
-          type:
-            type_id: string
-        hostPath:
-          type:
-            id: hostPath
-            type_id: ref
    RootObject:
      id: RootObject
      properties:
@@ -25,80 +9,9 @@ input:
          type:
            type_id: list
            items:
-              id: input_item
-              type_id: object
-              properties:
-                kubeconfig:
-                  display:
-                    description: The complete kubeconfig file as a string
-                    name: Kubeconfig file contents
-                  type:
-                    type_id: string
-                  required: true
-                namespace:
-                  display:
-                    description: The namespace where the container will be deployed
-                    name: Namespace
-                  type:
-                    type_id: string
-                  required: true
-                node_selector:
-                  display:
-                    description: kubernetes node name where the plugin must be deployed
-                  type:
-                    type_id: map
-                    values:
-                      type_id: string
-                    keys:
-                      type_id: string
-                  required: true
-                duration:
-                  display:
-                    name: duration the scenario expressed in seconds
-                    description: stop  stress  test  after  T  seconds.  One  can  also specify the units of time in
-                      seconds, minutes, hours, days or years with the suffix s, m, h, d or  y
-                  type:
-                    type_id: string
-                  required: true
-                io_workers:
-                  display:
-                    description: number of workers
-                    name: start N workers continually writing, reading  and  removing  temporary  files
-                  type:
-                    type_id: integer
-                  required: true
-                io_block_size:
-                  display:
-                    description: single write size
-                    name: specify size of each write in bytes. Size can be from 1 byte to 4MB.
-                  type:
-                    type_id: string
-                  required: true
-                io_write_bytes:
-                  display:
-                    description: Total number of bytes written
-                    name: write  N  bytes for each hdd process, the default is 1 GB. One can specify the size
-                      as % of free space on the file system or in units  of  Bytes,  KBytes,  MBytes  and
-                      GBytes using the suffix b, k, m or g
-                  type:
-                    type_id: string
-                  required: true
-                target_pod_folder:
-                  display:
-                    description: Target Folder
-                    name: Folder in the pod where the test will be executed and the test files will be written
-                  type:
-                    type_id: string
-                  required: true
-                target_pod_volume:
-                  display:
-                    name: kubernetes volume definition
-                    description: the volume that will be attached to the pod. In order to stress
-                      the node storage only hosPath mode is currently supported
-                  type:
-                    type_id: ref
-                    id: Volume
-                  required: true
+              id: SubRootObject
+              type_id: ref
+              namespace: $.steps.workload_loop.execute.inputs.items
 steps:
  workload_loop:
    kind: foreach
--- a/scenarios/arcaflow/memory-hog/input.yaml
+++ b/scenarios/arcaflow/memory-hog/input.yaml
@@ -1,5 +1,5 @@
 input_list:
- duration: 30s
+- duration: 30
  vm_bytes: 10%
  vm_workers: 2
  # set the node selector as a key-value pair eg.
--- a/scenarios/arcaflow/memory-hog/sub-workflow.yaml
+++ b/scenarios/arcaflow/memory-hog/sub-workflow.yaml
@@ -1,9 +1,9 @@
 version: v0.2.0
 input:
-  root: RootObject
+  root: SubRootObject
  objects:
-    RootObject:
-      id: input_item
+    SubRootObject:
+      id: SubRootObject
      properties:
        kubeconfig:
          display:
@@ -34,7 +34,7 @@ input:
            name: duration the scenario expressed in seconds
            description: stop stress test after T seconds. One can also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or  y
          type:
-            type_id: string
+            type_id: integer
          required: true
        vm_workers:
          display:
@@ -60,17 +60,16 @@ steps:
      kubeconfig: !expr $.input.kubeconfig
  stressng:
    plugin: 
-      src: quay.io/arcalot/arcaflow-plugin-stressng:0.5.0
+      src: quay.io/arcalot/arcaflow-plugin-stressng:0.6.0
      deployment_type: image
    step: workload
    input:
      cleanup: "true"
-      StressNGParams:
-        timeout: !expr $.input.duration
-        stressors:
-          - stressor: vm
-            vm: !expr $.input.vm_workers
-            vm_bytes: !expr $.input.vm_bytes
+      timeout: !expr $.input.duration
+      stressors:
+        - stressor: vm
+          workers: !expr $.input.vm_workers
+          vm-bytes: !expr $.input.vm_bytes
    deploy:
      deployer_name: kubernetes
      connection: !expr $.steps.kubeconfig.outputs.success.connection
--- a/scenarios/arcaflow/memory-hog/workflow.yaml
+++ b/scenarios/arcaflow/memory-hog/workflow.yaml
@@ -9,54 +9,10 @@ input:
          type:
            type_id: list
            items:
-              id: input_item
-              type_id: object
-              properties:
-                kubeconfig:
-                  display:
-                    description: The complete kubeconfig file as a string
-                    name: Kubeconfig file contents
-                  type:
-                    type_id: string
-                  required: true
-                namespace:
-                    display:
-                      description: The namespace where the container will be deployed
-                      name: Namespace
-                    type:
-                      type_id: string
-                    required: true
-                node_selector:
-                  display:
-                    description: kubernetes node name where the plugin must be deployed
-                  type:
-                    type_id: map
-                    values:
-                      type_id: string
-                    keys:
-                      type_id: string
-                  required: true
-                duration:
-                  display:
-                    name: duration the scenario expressed in seconds
-                    description: stop stress test after T seconds. One can also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or  y
-                  type:
-                    type_id: string
-                  required: true
-                vm_workers:
-                  display:
-                    description: Number of VM stressors to be run (0 means 1 stressor per CPU)
-                    name: Number of VM stressors
-                  type:
-                    type_id: integer
-                  required: true
-                vm_bytes:
-                  display:
-                    description: N bytes per vm process, the default is 256MB. The size can be expressed in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g.
-                    name: Kubeconfig file contents
-                  type:
-                    type_id: string
-                  required: true
+              id: SubRootObject
+              type_id: ref
+              namespace: $.steps.workload_loop.execute.inputs.items
+
 steps:
  workload_loop:
    kind: foreach
--- a/scenarios/kube/service_hijacking.yaml
+++ b/scenarios/kube/service_hijacking.yaml
@@ -0,0 +1,56 @@
+# refer to the documentation for further infos https://github.com/krkn-chaos/krkn/blob/main/docs/service_hijacking.md
+
+service_target_port: http-web-svc # The port of the service to be hijacked (can be named or numeric, based on the workload and service configuration).
+service_name: nginx-service # name of the service to be hijacked
+service_namespace: default # The namespace where the target service is located
+image: quay.io/krkn-chaos/krkn-service-hijacking:v0.1.3 # Image of the krkn web service to be deployed to receive traffic.
+chaos_duration: 30 # Total duration of the chaos scenario in seconds.
+plan:
+  - resource: "/list/index.php" # Specifies the resource or path to respond to in the scenario. For paths, both the path and query parameters are captured but ignored.
+                                # For resources, only query parameters are captured.
+
+    steps:                      # A time-based plan consisting of steps can be defined for each resource.
+      GET:                      # One or more HTTP methods can be specified for each step.
+                                # Note: Non-standard methods are supported
+                                # for fully custom web services (e.g., using NONEXISTENT instead of POST).
+
+        - duration: 15          # Duration in seconds for this step before moving to the next one, if defined. Otherwise,
+                                # this step will continue until the chaos scenario ends.
+
+          status: 500           # HTTP status code to be returned in this step.
+          mime_type: "application/json" # MIME type of the response for this step.
+          payload: |            # The response payload for this step.
+            {
+              "status":"internal server error"
+            }
+        - duration: 15
+          status: 201
+          mime_type: "application/json"
+          payload: |
+            {
+              "status":"resource created"
+            }
+      POST:
+        - duration: 15
+          status: 401
+          mime_type: "application/json"
+          payload: |
+            {
+               "status": "unauthorized"
+            }
+        - duration: 15
+          status: 404
+          mime_type: "text/plain"
+          payload: "not found"
+
+  - resource: "/patch"
+    steps:
+      PATCH:
+        - duration: 15
+          status: 201
+          mime_type: "text/plain"
+          payload: "resource patched"
+        - duration: 15
+          status: 400
+          mime_type: "text/plain"
+          payload: "bad request"
--- a/utils/chaos_ai/README.md
+++ b/utils/chaos_ai/README.md
@@ -3,23 +3,24 @@ Enhancing Chaos Engineering with AI-assisted fault injection for better resilien

 ## Generate python package wheel file
 ```
-python3.9 generate_wheel_package.py sdist bdist_wheel
+$ python3.9 generate_wheel_package.py sdist bdist_wheel
+$ cp dist/aichaos-0.0.1-py3-none-any.whl docker/
 ```
 This creates a python package file aichaos-0.0.1-py3-none-any.whl in the dist folder. 

 ## Build Image
 ```
-cd docker
-podman build -t aichaos:1.0 .
+$ cd docker
+$ podman build -t aichaos:1.0 .
 OR
-docker build -t aichaos:1.0 .
+$ docker build -t aichaos:1.0 .
 ```

 ## Run Chaos AI
 ```
-podman run -v aichaos-config.json:/config/aichaos-config.json --privileged=true --name aichaos -p 5001:5001 aichaos:1.0
+$ podman run -v aichaos-config.json:/config/aichaos-config.json --privileged=true --name aichaos -p 5001:5001 aichaos:1.0
 OR
-docker run -v aichaos-config.json:/config/aichaos-config.json --privileged -v /var/run/docker.sock:/var/run/docker.sock --name aichaos -p 5001:5001 aichaos:1.0
+$ docker run -v aichaos-config.json:/config/aichaos-config.json --privileged -v /var/run/docker.sock:/var/run/docker.sock --name aichaos -p 5001:5001 aichaos:1.0
 ```

 The output should look like:
--- a/utils/chaos_ai/docker/requirements.txt
+++ b/utils/chaos_ai/docker/requirements.txt
@@ -1,6 +1,6 @@
 numpy
 pandas
 requests
-Flask==2.1.0
-Werkzeug==2.3.8
+Flask==2.2.5
+Werkzeug==3.0.3
 flasgger==0.9.5
--- a/utils/chaos_recommender/README.md
+++ b/utils/chaos_recommender/README.md
@@ -7,8 +7,8 @@ This tool profiles an application and gathers telemetry data such as CPU, Memory
 ## Pre-requisites

 - Openshift Or Kubernetes Environment where the application is hosted
- Access to the telemetry data via the exposed Prometheus endpoint
- Python3
+- Access to the metrics via the exposed Prometheus endpoint
+- Python3.9

 ## Usage

@@ -22,14 +22,14 @@ This tool profiles an application and gathers telemetry data such as CPU, Memory
    $ pip3 install -r requirements.txt
    Edit configuration file:
    $ vi config/recommender_config.yaml 
-    $ python3.9 utils/chaos_recommender/chaos_recommender.py
+    $ python3.9 utils/chaos_recommender/chaos_recommender.py -c utils/chaos_recommender/recommender_config.yaml
    ```

 2. Follow the prompts to provide the required information.

 ## Configuration
 To run the recommender with a config file specify the config file path with the `-c` argument.
-You can customize the default values by editing the `krkn/config/recommender_config.yaml` file. The configuration file contains the following options:
+You can customize the default values by editing the `recommender_config.yaml` file. The configuration file contains the following options:

  - `application`: Specify the application name.
  - `namespaces`: Specify the namespaces names (separated by coma or space). If you want to profile
@@ -115,6 +115,6 @@ You can customize the thresholds and options used for data analysis and identify

 ## Additional Files

- `config/recommender_config.yaml`: The configuration file containing default values for application, namespace, labels, and kubeconfig.
+- `recommender_config.yaml`: The configuration file containing default values for application, namespace, labels, and kubeconfig.

 Happy Chaos!
--- a/utils/chaos_recommender/recommender_config.yaml
+++ b/utils/chaos_recommender/recommender_config.yaml
@@ -0,0 +1,35 @@
+application: openshift-etcd
+namespaces: openshift-etcd
+labels: app=openshift-etcd
+kubeconfig: ~/.kube/config.yaml
+prometheus_endpoint: <Prometheus_Endpoint>
+auth_token: <Auth_Token>
+scrape_duration: 10m
+chaos_library: "kraken"
+log_level: INFO
+json_output_file: False
+json_output_folder_path:
+
+# for output purpose only do not change if not needed
+chaos_tests:
+  GENERIC:
+    - pod_failure
+    - container_failure
+    - node_failure
+    - zone_outage
+    - time_skew
+    - namespace_failure
+    - power_outage
+  CPU:
+    - node_cpu_hog
+  NETWORK:
+    - application_outage
+    - node_network_chaos
+    - pod_network_chaos
+  MEM:
+    - node_memory_hog
+    - pvc_disk_fill
+
+threshold: .7
+cpu_threshold: .5
+mem_threshold: .5
Author	SHA1	Message	Date
Tullio Sebastiani	7d18487228	Dockerfile update Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-06-12 14:36:38 -04:00
Naga Ravi Chaitanya Elluri	08de42c91a	Bump arcaflow version to 0.17.2 (#648 ) Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2024-06-12 20:29:32 +02:00
dependabot[bot]	dc7d5bb01b	Bump azure-identity from 1.15.0 to 1.16.1 Bumps [azure-identity](https://github.com/Azure/azure-sdk-for-python) from 1.15.0 to 1.16.1. - [Release notes](https://github.com/Azure/azure-sdk-for-python/releases) - [Changelog](https://github.com/Azure/azure-sdk-for-python/blob/main/doc/esrp_release.md) - [Commits](https://github.com/Azure/azure-sdk-for-python/compare/azure-identity_1.15.0...azure-identity_1.16.1) --- updated-dependencies: - dependency-name: azure-identity dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2024-06-12 09:17:14 -04:00
Tullio Sebastiani	ea3444d375	added dependencies removed from the hub Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> jsonschema Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-06-11 12:07:28 -04:00
Tullio Sebastiani	7b660a0878	Fixes system and oc vulnerabilities detected by trivy (#644 ) * fixes system and oc vulnerabilities detected by trivy Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * updated base image to run as krkn user instead of root Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> --------- Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-06-10 14:26:03 -04:00
Tullio Sebastiani	5fe0655f22	libnghttp2 version update Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-06-06 08:21:08 -04:00
Tullio Sebastiani	5df343c183	dockerfile update Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-06-04 14:36:11 -04:00
Tullio Sebastiani	f364e9f283	Arcaflow upgrade to engine v0.17.1 (#639 ) * krkn plugin refactoring to match new engine context path management Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * cpu-hog new syntax Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * memory-hog new syntax Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> removed s from duration Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * io-hog new syntax Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> cpu-hog input Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * path management refactoring agreed with arca team Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> refactoring Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> --------- Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-06-04 14:13:33 -04:00
Tullio Sebastiani	86a7427606	Dockerfile refactoring to build oc together with krkn Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> added oc in /usr/local/bin as well Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fixed dumb docker build copy Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-06-04 10:41:11 -04:00
Mudit Verma	31266fbc3e	support for node limits	2024-05-31 11:22:30 -04:00
Tullio Sebastiani	57de3769e7	ubi 9 base image + quay.io vulnerability fixes Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-05-31 10:58:52 -04:00
Paige Rubendall	42fc8eea40	adding wait in pvc scenarios and serivce hijack rh-pre-commit.version: 2.2.0 rh-pre-commit.check-secrets: ENABLED Signed-off-by: Paige Rubendall <prubenda@redhat.com>	2024-05-29 16:34:33 -04:00
dependabot[bot]	22d56e2cdc	--- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2024-05-22 17:12:46 -04:00
Matt Leader	a259b68221	Updates for Arcaflow Plugin Stress-NG 0.6.0 (#625 ) * change for cpu hog Signed-off-by: Matthew F Leader <mleader@redhat.com> * change for io hog Signed-off-by: Matthew F Leader <mleader@redhat.com> * change for memory hog Signed-off-by: Matthew F Leader <mleader@redhat.com> --------- Signed-off-by: Matthew F Leader <mleader@redhat.com>	2024-05-20 12:35:51 -04:00
Tullio Sebastiani	052f83e7d9	added reference to webservice source code in the documentation (#630 )	2024-05-14 17:58:06 +02:00
Tullio Sebastiani	fb3bbe4e26	replaced log syntax to allow objects to be printed Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-05-14 11:13:44 -04:00
Naga Ravi Chaitanya Elluri	96ba9be4b8	Add instructions to copy the python package file to docker dir (#616 ) Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2024-05-13 12:36:37 -04:00
Naga Ravi Chaitanya Elluri	58d5d1d8dc	Have a config in the chaos_recommender dir (#615 ) This will make it easy for the users to find, configure and run it. Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2024-05-13 12:33:41 -04:00
Tullio Sebastiani	3fe22a0d8f	fixing badgecommit fail when coverage doesn't change Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-05-13 12:30:59 -04:00
Tullio Sebastiani	21b89a32a7	fixing missing import for log_exception Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-05-13 11:58:13 -04:00
Tullio Sebastiani	dbe3ea9718	Dockerfiles update Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-05-13 10:56:58 -04:00
Tullio Sebastiani	a142f6e7a4	Service hijacking scenario (#617 ) * WIP: service hijacking scenario Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * wip Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * error handling Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> adapted run_raken.py Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * restored config.yaml Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * added funtest Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> test fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fixed test Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fix test Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fixed funtest Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> funtest fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> minor nit Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> added explicit curl method Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> push Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> restored all funtests Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> added mime type test Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fixed pipeline Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> commented unit Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> utf-8 Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> test restored Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fix test pipeline Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * documentation Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * krkn-lib 2.1.3 Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * added other funtests to main merge to collect coverage Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> --------- Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-05-13 10:04:06 +02:00
Tullio Sebastiani	2610a7af67	added coverage badge and build badge to krkn Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> nit Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> permission Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> if main Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-05-10 09:57:10 -04:00
dependabot[bot]	f827f65132	Bump werkzeug from 2.3.8 to 3.0.3 in /utils/chaos_ai/docker (#619 ) Bumps [werkzeug](https://github.com/pallets/werkzeug) from 2.3.8 to 3.0.3. - [Release notes](https://github.com/pallets/werkzeug/releases) - [Changelog](https://github.com/pallets/werkzeug/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/werkzeug/compare/2.3.8...3.0.3) --- updated-dependencies: - dependency-name: werkzeug dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2024-05-06 16:09:10 -04:00
dependabot[bot]	aa6cbbc11a	Bump werkzeug from 3.0.1 to 3.0.3 Bumps [werkzeug](https://github.com/pallets/werkzeug) from 3.0.1 to 3.0.3. - [Release notes](https://github.com/pallets/werkzeug/releases) - [Changelog](https://github.com/pallets/werkzeug/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/werkzeug/compare/3.0.1...3.0.3) --- updated-dependencies: - dependency-name: werkzeug dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2024-05-06 16:04:27 -04:00
dependabot[bot]	e17354e54d	Bump jinja2 from 3.1.3 to 3.1.4 Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.3 to 3.1.4. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.3...3.1.4) --- updated-dependencies: - dependency-name: jinja2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2024-05-06 15:44:52 -04:00
Tullio Sebastiani	2dfa5cb0cd	fixes missing data in telemetry.json Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>	2024-05-06 14:16:09 -04:00
dependabot[bot]	0799008cd5	Bump flask from 2.1.0 to 2.2.5 in /utils/chaos_ai/docker (#611 ) Bumps [flask](https://github.com/pallets/flask) from 2.1.0 to 2.2.5. - [Release notes](https://github.com/pallets/flask/releases) - [Changelog](https://github.com/pallets/flask/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/flask/compare/2.1.0...2.2.5) --- updated-dependencies: - dependency-name: flask dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>	2024-04-25 09:11:50 -04:00