From a142f6e7a4b38cda84be2443ba244cdc8b98e5b1 Mon Sep 17 00:00:00 2001 From: Tullio Sebastiani Date: Mon, 13 May 2024 10:04:06 +0200 Subject: [PATCH] Service hijacking scenario (#617) * WIP: service hijacking scenario Signed-off-by: Tullio Sebastiani * wip Signed-off-by: Tullio Sebastiani * error handling Signed-off-by: Tullio Sebastiani adapted run_raken.py Signed-off-by: Tullio Sebastiani * restored config.yaml Signed-off-by: Tullio Sebastiani * added funtest Signed-off-by: Tullio Sebastiani test fix Signed-off-by: Tullio Sebastiani fix Signed-off-by: Tullio Sebastiani fixed test Signed-off-by: Tullio Sebastiani fix Signed-off-by: Tullio Sebastiani fix test Signed-off-by: Tullio Sebastiani fixed funtest Signed-off-by: Tullio Sebastiani funtest fix Signed-off-by: Tullio Sebastiani minor nit Signed-off-by: Tullio Sebastiani added explicit curl method Signed-off-by: Tullio Sebastiani push Signed-off-by: Tullio Sebastiani fix Signed-off-by: Tullio Sebastiani restored all funtests Signed-off-by: Tullio Sebastiani added mime type test Signed-off-by: Tullio Sebastiani fixed pipeline Signed-off-by: Tullio Sebastiani commented unit Signed-off-by: Tullio Sebastiani utf-8 Signed-off-by: Tullio Sebastiani test restored Signed-off-by: Tullio Sebastiani fix test pipeline Signed-off-by: Tullio Sebastiani * documentation Signed-off-by: Tullio Sebastiani * krkn-lib 2.1.3 Signed-off-by: Tullio Sebastiani * added other funtests to main merge to collect coverage Signed-off-by: Tullio Sebastiani --------- Signed-off-by: Tullio Sebastiani --- .github/workflows/tests.yml | 21 +++- CI/templates/service_hijacking.yaml | 29 +++++ CI/tests/test_service_hijacking.sh | 107 ++++++++++++++++++ README.md | 1 + config/config.yaml | 2 + docs/service_hijacking_scenarios.md | 80 +++++++++++++ kind-config.yml | 3 + kraken/service_hijacking/__init__.py | 0 kraken/service_hijacking/service_hijacking.py | 86 ++++++++++++++ requirements.txt | 2 +- run_kraken.py | 5 + scenarios/kube/service_hijacking.yaml | 56 +++++++++ 12 files changed, 388 insertions(+), 4 deletions(-) create mode 100644 CI/templates/service_hijacking.yaml create mode 100644 CI/tests/test_service_hijacking.sh create mode 100644 docs/service_hijacking_scenarios.md create mode 100644 kraken/service_hijacking/__init__.py create mode 100644 kraken/service_hijacking/service_hijacking.py create mode 100644 scenarios/kube/service_hijacking.yaml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8a24df2d..79bd3b2b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -61,6 +61,8 @@ jobs: kubectl create namespace namespace-scenario kubectl apply -f CI/templates/time_pod.yaml kubectl wait --for=condition=ready pod -l scenario=time-skew --timeout=300s + kubectl apply -f CI/templates/service_hijacking.yaml + kubectl wait --for=condition=ready pod -l "app.kubernetes.io/name=proxy" --timeout=300s - name: Get Kind nodes run: | kubectl get nodes --show-labels=true @@ -70,12 +72,14 @@ jobs: run: python -m coverage run -a -m unittest discover -s tests -v - name: Setup Pull Request Functional Tests - if: github.event_name == 'pull_request' + if: | + github.event_name == 'pull_request' run: | yq -i '.kraken.port="8081"' CI/config/common_test_config.yaml yq -i '.kraken.signal_address="0.0.0.0"' CI/config/common_test_config.yaml yq -i '.kraken.performance_monitoring="localhost:9090"' CI/config/common_test_config.yaml - echo "test_app_outages" > ./CI/tests/functional_tests + echo "test_service_hijacking" > ./CI/tests/functional_tests + echo "test_app_outages" >> ./CI/tests/functional_tests echo "test_container" >> ./CI/tests/functional_tests echo "test_namespace" >> ./CI/tests/functional_tests echo "test_net_chaos" >> ./CI/tests/functional_tests @@ -84,7 +88,9 @@ jobs: echo "test_arca_memory_hog" >> ./CI/tests/functional_tests echo "test_arca_io_hog" >> ./CI/tests/functional_tests - # Push on main only steps + + # Push on main only steps + all other functional to collect coverage + # for the badge - name: Configure AWS Credentials if: github.ref == 'refs/heads/main' && github.event_name == 'push' uses: aws-actions/configure-aws-credentials@v4 @@ -101,6 +107,15 @@ jobs: yq -i '.telemetry.username="${{secrets.TELEMETRY_USERNAME}}"' CI/config/common_test_config.yaml yq -i '.telemetry.password="${{secrets.TELEMETRY_PASSWORD}}"' CI/config/common_test_config.yaml echo "test_telemetry" > ./CI/tests/functional_tests + echo "test_service_hijacking" >> ./CI/tests/functional_tests + echo "test_app_outages" >> ./CI/tests/functional_tests + echo "test_container" >> ./CI/tests/functional_tests + echo "test_namespace" >> ./CI/tests/functional_tests + echo "test_net_chaos" >> ./CI/tests/functional_tests + echo "test_time" >> ./CI/tests/functional_tests + echo "test_arca_cpu_hog" >> ./CI/tests/functional_tests + echo "test_arca_memory_hog" >> ./CI/tests/functional_tests + echo "test_arca_io_hog" >> ./CI/tests/functional_tests # Final common steps - name: Run Functional tests diff --git a/CI/templates/service_hijacking.yaml b/CI/templates/service_hijacking.yaml new file mode 100644 index 00000000..439da4f1 --- /dev/null +++ b/CI/templates/service_hijacking.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: Pod +metadata: + name: nginx + labels: + app.kubernetes.io/name: proxy +spec: + containers: + - name: nginx + image: nginx:stable + ports: + - containerPort: 80 + name: http-web-svc + +--- +apiVersion: v1 +kind: Service +metadata: + name: nginx-service +spec: + selector: + app.kubernetes.io/name: proxy + type: NodePort + ports: + - name: name-of-service-port + protocol: TCP + port: 80 + targetPort: http-web-svc + nodePort: 30036 \ No newline at end of file diff --git a/CI/tests/test_service_hijacking.sh b/CI/tests/test_service_hijacking.sh new file mode 100644 index 00000000..8b779418 --- /dev/null +++ b/CI/tests/test_service_hijacking.sh @@ -0,0 +1,107 @@ +set -xeEo pipefail + +source CI/tests/common.sh + +trap error ERR +trap finish EXIT +# port mapping has been configured in kind-config.yml +SERVICE_URL=http://localhost:8888 +PAYLOAD_GET_1="{ \ + \"status\":\"internal server error\" \ +}" +STATUS_CODE_GET_1=500 + +PAYLOAD_PATCH_1="resource patched" +STATUS_CODE_PATCH_1=201 + +PAYLOAD_POST_1="{ \ + \"status\": \"unauthorized\" \ +}" +STATUS_CODE_POST_1=401 + +PAYLOAD_GET_2="{ \ + \"status\":\"resource created\" \ +}" +STATUS_CODE_GET_2=201 + +PAYLOAD_PATCH_2="bad request" +STATUS_CODE_PATCH_2=400 + +PAYLOAD_POST_2="not found" +STATUS_CODE_POST_2=404 + +JSON_MIME="application/json" +TEXT_MIME="text/plain; charset=utf-8" + +function functional_test_service_hijacking { + + export scenario_type="service_hijacking" + export scenario_file="scenarios/kube/service_hijacking.yaml" + export post_config="" + envsubst < CI/config/common_test_config.yaml > CI/config/service_hijacking.yaml + python3 -m coverage run -a run_kraken.py -c CI/config/service_hijacking.yaml > /dev/null 2>&1 & + PID=$! + #Waiting the hijacking to have effect + while [ `curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` == 404 ]; do echo "waiting scenario to kick in."; sleep 1; done; + + #Checking Step 1 GET on /list/index.php + OUT_GET="`curl -X GET -s $SERVICE_URL/list/index.php`" + OUT_CONTENT=`curl -X GET -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php` + OUT_STATUS_CODE=`curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` + [ "${PAYLOAD_GET_1//[$'\t\r\n ']}" == "${OUT_GET//[$'\t\r\n ']}" ] && echo "Step 1 GET Payload OK" || (echo "Payload did not match. Test failed." && exit 1) + [ "$OUT_STATUS_CODE" == "$STATUS_CODE_GET_1" ] && echo "Step 1 GET Status Code OK" || (echo " Step 1 GET status code did not match. Test failed." && exit 1) + [ "$OUT_CONTENT" == "$JSON_MIME" ] && echo "Step 1 GET MIME OK" || (echo " Step 1 GET MIME did not match. Test failed." && exit 1) + + #Checking Step 1 POST on /list/index.php + OUT_POST="`curl -s -X POST $SERVICE_URL/list/index.php`" + OUT_STATUS_CODE=`curl -X POST -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` + OUT_CONTENT=`curl -X POST -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php` + [ "${PAYLOAD_POST_1//[$'\t\r\n ']}" == "${OUT_POST//[$'\t\r\n ']}" ] && echo "Step 1 POST Payload OK" || (echo "Payload did not match. Test failed." && exit 1) + [ "$OUT_STATUS_CODE" == "$STATUS_CODE_POST_1" ] && echo "Step 1 POST Status Code OK" || (echo "Step 1 POST status code did not match. Test failed." && exit 1) + [ "$OUT_CONTENT" == "$JSON_MIME" ] && echo "Step 1 POST MIME OK" || (echo " Step 1 POST MIME did not match. Test failed." && exit 1) + + #Checking Step 1 PATCH on /patch + OUT_PATCH="`curl -s -X PATCH $SERVICE_URL/patch`" + OUT_STATUS_CODE=`curl -X PATCH -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/patch` + OUT_CONTENT=`curl -X PATCH -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/patch` + [ "${PAYLOAD_PATCH_1//[$'\t\r\n ']}" == "${OUT_PATCH//[$'\t\r\n ']}" ] && echo "Step 1 PATCH Payload OK" || (echo "Payload did not match. Test failed." && exit 1) + [ "$OUT_STATUS_CODE" == "$STATUS_CODE_PATCH_1" ] && echo "Step 1 PATCH Status Code OK" || (echo "Step 1 PATCH status code did not match. Test failed." && exit 1) + [ "$OUT_CONTENT" == "$TEXT_MIME" ] && echo "Step 1 PATCH MIME OK" || (echo " Step 1 PATCH MIME did not match. Test failed." && exit 1) + # wait for the next step + sleep 16 + + #Checking Step 2 GET on /list/index.php + OUT_GET="`curl -X GET -s $SERVICE_URL/list/index.php`" + OUT_CONTENT=`curl -X GET -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php` + OUT_STATUS_CODE=`curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` + [ "${PAYLOAD_GET_2//[$'\t\r\n ']}" == "${OUT_GET//[$'\t\r\n ']}" ] && echo "Step 2 GET Payload OK" || (echo "Step 2 GET Payload did not match. Test failed." && exit 1) + [ "$OUT_STATUS_CODE" == "$STATUS_CODE_GET_2" ] && echo "Step 2 GET Status Code OK" || (echo "Step 2 GET status code did not match. Test failed." && exit 1) + [ "$OUT_CONTENT" == "$JSON_MIME" ] && echo "Step 2 GET MIME OK" || (echo " Step 2 GET MIME did not match. Test failed." && exit 1) + + #Checking Step 2 POST on /list/index.php + OUT_POST="`curl -s -X POST $SERVICE_URL/list/index.php`" + OUT_CONTENT=`curl -X POST -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/list/index.php` + OUT_STATUS_CODE=`curl -X POST -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/list/index.php` + [ "${PAYLOAD_POST_2//[$'\t\r\n ']}" == "${OUT_POST//[$'\t\r\n ']}" ] && echo "Step 2 POST Payload OK" || (echo "Step 2 POST Payload did not match. Test failed." && exit 1) + [ "$OUT_STATUS_CODE" == "$STATUS_CODE_POST_2" ] && echo "Step 2 POST Status Code OK" || (echo "Step 2 POST status code did not match. Test failed." && exit 1) + [ "$OUT_CONTENT" == "$TEXT_MIME" ] && echo "Step 2 POST MIME OK" || (echo " Step 2 POST MIME did not match. Test failed." && exit 1) + + #Checking Step 2 PATCH on /patch + OUT_PATCH="`curl -s -X PATCH $SERVICE_URL/patch`" + OUT_CONTENT=`curl -X PATCH -s -o /dev/null -I -w "%{content_type}" $SERVICE_URL/patch` + OUT_STATUS_CODE=`curl -X PATCH -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL/patch` + [ "${PAYLOAD_PATCH_2//[$'\t\r\n ']}" == "${OUT_PATCH//[$'\t\r\n ']}" ] && echo "Step 2 PATCH Payload OK" || (echo "Step 2 PATCH Payload did not match. Test failed." && exit 1) + [ "$OUT_STATUS_CODE" == "$STATUS_CODE_PATCH_2" ] && echo "Step 2 PATCH Status Code OK" || (echo "Step 2 PATCH status code did not match. Test failed." && exit 1) + [ "$OUT_CONTENT" == "$TEXT_MIME" ] && echo "Step 2 PATCH MIME OK" || (echo " Step 2 PATCH MIME did not match. Test failed." && exit 1) + wait $PID + + # now checking if service has been restore correctly and nginx responds correctly + curl -s $SERVICE_URL | grep nginx! && echo "BODY: Service restored!" || (echo "BODY: failed to restore service" && exit 1) + OUT_STATUS_CODE=`curl -X GET -s -o /dev/null -I -w "%{http_code}" $SERVICE_URL` + [ "$OUT_STATUS_CODE" == "200" ] && echo "STATUS_CODE: Service restored!" || (echo "STATUS_CODE: failed to restore service" && exit 1) + + echo "Service Hijacking Chaos test: Success" +} + + +functional_test_service_hijacking diff --git a/README.md b/README.md index 19199bc6..cc9f3ced 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,7 @@ Scenario type | Kubernetes [PVC scenario](docs/pvc_scenario.md) | :heavy_check_mark: | [Network_Chaos](docs/network_chaos.md) | :heavy_check_mark: | [ManagedCluster Scenarios](docs/managedcluster_scenarios.md) | :heavy_check_mark: | +[Service Hijacking Scenarios](docs/service_hijacking_scenarios.md) | :heavy_check_mark: | ### Kraken scenario pass/fail criteria and report diff --git a/config/config.yaml b/config/config.yaml index c82cc89b..cedb1aee 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -42,6 +42,8 @@ kraken: - scenarios/openshift/pvc_scenario.yaml - network_chaos: - scenarios/openshift/network_chaos.yaml + - service_hijacking: + - scenarios/kube/service_hijacking.yaml cerberus: cerberus_enabled: False # Enable it when cerberus is previously installed diff --git a/docs/service_hijacking_scenarios.md b/docs/service_hijacking_scenarios.md new file mode 100644 index 00000000..9d9d851e --- /dev/null +++ b/docs/service_hijacking_scenarios.md @@ -0,0 +1,80 @@ +### Service Hijacking Scenarios + +Service Hijacking Scenarios aim to simulate fake HTTP responses from a workload targeted by a +`Service` already deployed in the cluster. +This scenario is executed by deploying a custom-made web service and modifying the target `Service` +selector to direct traffic to this web service for a specified duration. + +The web service will utilize a time-based test plan loaded from the scenario configuration file, +which outlines the behavior of resources during the chaos scenario, defined as follows: + +```yaml +service_target_port: http-web-svc # The port of the service to be hijacked (can be named or numeric, based on the workload and service configuration). +service_name: nginx-service # The name of the service that will be hijacked. +service_namespace: default # The namespace where the target service is located. +image: quay.io/krkn-chaos/krkn-service-hijacking:v0.1.3 # Image of the krkn web service to be deployed to receive traffic. +chaos_duration: 30 # Total duration of the chaos scenario in seconds. +plan: + - resource: "/list/index.php" # Specifies the resource or path to respond to in the scenario. For paths, both the path and query parameters are captured but ignored. For resources, only query parameters are captured. + + steps: # A time-based plan consisting of steps can be defined for each resource. + GET: # One or more HTTP methods can be specified for each step. Note: Non-standard methods are supported for fully custom web services (e.g., using NONEXISTENT instead of POST). + + - duration: 15 # Duration in seconds for this step before moving to the next one, if defined. Otherwise, this step will continue until the chaos scenario ends. + + status: 500 # HTTP status code to be returned in this step. + mime_type: "application/json" # MIME type of the response for this step. + payload: | # The response payload for this step. + { + "status":"internal server error" + } + - duration: 15 + status: 201 + mime_type: "application/json" + payload: | + { + "status":"resource created" + } + POST: + - duration: 15 + status: 401 + mime_type: "application/json" + payload: | + { + "status": "unauthorized" + } + - duration: 15 + status: 404 + mime_type: "text/plain" + payload: "not found" + + +``` +The scenario will focus on the `service_name` within the `service_namespace`, +substituting the selector with a randomly generated one, which is added as a label in the mock service manifest. +This allows multiple scenarios to be executed in the same namespace, each targeting different services without +causing conflicts. + +The newly deployed mock web service will expose a `service_target_port`, +which can be either a named or numeric port based on the service configuration. +This ensures that the Service correctly routes HTTP traffic to the mock web service during the chaos run. + +Each step will last for `duration` seconds from the deployment of the mock web service in the cluster. +For each HTTP resource, defined as a top-level YAML property of the plan +(it could be a specific resource, e.g., /list/index.php, or a path-based resource typical in MVC frameworks), +one or more HTTP request methods can be specified. Both standard and custom request methods are supported. + +During this time frame, the web service will respond with: + +- `status`: The [HTTP status code](https://datatracker.ietf.org/doc/html/rfc7231#section-6) (can be standard or custom). +- `mime_type`: The [MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types) (can be standard or custom). +- `payload`: The response body to be returned to the client. + +At the end of the step `duration`, the web service will proceed to the next step (if available) until +the global `chaos_duration` concludes. At this point, the original service will be restored, +and the custom web service and its resources will be undeployed. + +__NOTE__: Some clients (e.g., cURL, jQuery) may optimize queries using lightweight methods (like HEAD or OPTIONS) +to probe API behavior. If these methods are not defined in the test plan, the web service may respond with +a `405` or `404` status code. If you encounter unexpected behavior, consider this use case. + diff --git a/kind-config.yml b/kind-config.yml index ca84a46f..2fb7e1ac 100644 --- a/kind-config.yml +++ b/kind-config.yml @@ -2,6 +2,9 @@ kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane + extraPortMappings: + - containerPort: 30036 + hostPort: 8888 - role: control-plane - role: control-plane - role: worker diff --git a/kraken/service_hijacking/__init__.py b/kraken/service_hijacking/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/kraken/service_hijacking/service_hijacking.py b/kraken/service_hijacking/service_hijacking.py new file mode 100644 index 00000000..e885606e --- /dev/null +++ b/kraken/service_hijacking/service_hijacking.py @@ -0,0 +1,86 @@ +import logging +import time + +import yaml +from krkn_lib.k8s import KrknKubernetes +from krkn_lib.models.telemetry import ScenarioTelemetry +from krkn_lib.telemetry.k8s import KrknTelemetryKubernetes + + +def run(scenarios_list: list[str], krkn_lib: KrknKubernetes, telemetry: KrknTelemetryKubernetes) -> (list[str], list[ScenarioTelemetry]): + scenario_telemetries= list[ScenarioTelemetry]() + failed_post_scenarios = [] + for scenario in scenarios_list: + scenario_telemetry = ScenarioTelemetry() + scenario_telemetry.scenario = scenario + scenario_telemetry.start_timestamp = time.time() + telemetry.set_parameters_base64(scenario_telemetry, scenario) + with open(scenario) as stream: + scenario_config = yaml.safe_load(stream) + + service_name = scenario_config['service_name'] + service_namespace = scenario_config['service_namespace'] + plan = scenario_config["plan"] + image = scenario_config["image"] + target_port = scenario_config["service_target_port"] + chaos_duration = scenario_config["chaos_duration"] + + logging.info(f"checking service {service_name} in namespace: {service_namespace}") + if not krkn_lib.service_exists(service_name, service_namespace): + logging.error(f"service: {service_name} not found in namespace: {service_namespace}, failed to run scenario.") + fail(scenario_telemetry, scenario_telemetries) + failed_post_scenarios.append(scenario) + break + try: + logging.info(f"service: {service_name} found in namespace: {service_namespace}") + logging.info(f"creating webservice and initializing test plan...") + # both named ports and port numbers can be used + if isinstance(target_port, int): + logging.info(f"webservice will listen on port {target_port}") + webservice = krkn_lib.deploy_service_hijacking(service_namespace, plan, image, port_number=target_port) + else: + logging.info(f"traffic will be redirected to named port: {target_port}") + webservice = krkn_lib.deploy_service_hijacking(service_namespace, plan, image, port_name=target_port) + logging.info(f"successfully deployed pod: {webservice.pod_name} " + f"in namespace:{service_namespace} with selector {webservice.selector}!" + ) + logging.info(f"patching service: {service_name} to hijack traffic towards: {webservice.pod_name}") + original_service = krkn_lib.replace_service_selector([webservice.selector], service_name, service_namespace) + if original_service is None: + logging.error(f"failed to patch service: {service_name}, namespace: {service_namespace} with selector {webservice.selector}") + fail(scenario_telemetry, scenario_telemetries) + failed_post_scenarios.append(scenario) + break + + logging.info(f"service: {service_name} successfully patched!") + logging.info(f"original service manifest:\n\n{yaml.dump(original_service)}") + logging.info(f"waiting {chaos_duration} before restoring the service") + time.sleep(chaos_duration) + selectors = ["=".join([key, original_service["spec"]["selector"][key]]) for key in original_service["spec"]["selector"].keys()] + logging.info(f"restoring the service selectors {selectors}") + original_service = krkn_lib.replace_service_selector(selectors, service_name, service_namespace) + if original_service is None: + logging.error(f"failed to restore original service: {service_name}, namespace: {service_namespace} with selectors: {selectors}") + fail(scenario_telemetry, scenario_telemetries) + failed_post_scenarios.append(scenario) + break + logging.info("selectors successfully restored") + logging.info("undeploying service-hijacking resources...") + krkn_lib.undeploy_service_hijacking(webservice) + scenario_telemetry.exit_status = 0 + scenario_telemetry.end_timestamp = time.time() + scenario_telemetries.append(scenario_telemetry) + logging.info("success") + except Exception as e: + logging.error(f"scenario {scenario} failed with exception: {e}") + fail(scenario_telemetry, scenario_telemetries) + failed_post_scenarios.append(scenario) + + return failed_post_scenarios, scenario_telemetries + + +def fail(scenario_telemetry: ScenarioTelemetry, scenario_telemetries: list[ScenarioTelemetry]): + scenario_telemetry.exit_status = 1 + scenario_telemetry.end_timestamp = time.time() + scenario_telemetries.append(scenario_telemetry) + diff --git a/requirements.txt b/requirements.txt index ed6d11f6..d09ce017 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ google-api-python-client==2.116.0 ibm_cloud_sdk_core==3.18.0 ibm_vpc==0.20.0 jinja2==3.1.4 -krkn-lib==2.1.2 +krkn-lib==2.1.3 lxml==5.1.0 kubernetes==26.1.0 oauth2client==4.1.3 diff --git a/run_kraken.py b/run_kraken.py index c9eb58f8..4e9f355b 100644 --- a/run_kraken.py +++ b/run_kraken.py @@ -25,6 +25,7 @@ import kraken.pvc.pvc_scenario as pvc_scenario import kraken.network_chaos.actions as network_chaos import kraken.arcaflow_plugin as arcaflow_plugin import kraken.prometheus as prometheus_plugin +import kraken.service_hijacking.service_hijacking as service_hijacking_plugin import server as server from kraken import plugins from krkn_lib.k8s import KrknKubernetes @@ -348,6 +349,10 @@ def main(cfg): elif scenario_type == "network_chaos": logging.info("Running Network Chaos") failed_post_scenarios, scenario_telemetries = network_chaos.run(scenarios_list, config, wait_duration, kubecli, telemetry_k8s) + elif scenario_type == "service_hijacking": + logging.info("Running Service Hijacking Chaos") + failed_post_scenarios, scenario_telemetries = service_hijacking_plugin.run(scenarios_list, kubecli, telemetry_k8s) + chaos_telemetry.scenarios.extend(scenario_telemetries) # Check for critical alerts when enabled post_critical_alerts = 0 diff --git a/scenarios/kube/service_hijacking.yaml b/scenarios/kube/service_hijacking.yaml new file mode 100644 index 00000000..870ecf9d --- /dev/null +++ b/scenarios/kube/service_hijacking.yaml @@ -0,0 +1,56 @@ +# refer to the documentation for further infos https://github.com/krkn-chaos/krkn/blob/main/docs/service_hijacking.md + +service_target_port: http-web-svc # The port of the service to be hijacked (can be named or numeric, based on the workload and service configuration). +service_name: nginx-service # name of the service to be hijacked +service_namespace: default # The namespace where the target service is located +image: quay.io/krkn-chaos/krkn-service-hijacking:v0.1.3 # Image of the krkn web service to be deployed to receive traffic. +chaos_duration: 30 # Total duration of the chaos scenario in seconds. +plan: + - resource: "/list/index.php" # Specifies the resource or path to respond to in the scenario. For paths, both the path and query parameters are captured but ignored. + # For resources, only query parameters are captured. + + steps: # A time-based plan consisting of steps can be defined for each resource. + GET: # One or more HTTP methods can be specified for each step. + # Note: Non-standard methods are supported + # for fully custom web services (e.g., using NONEXISTENT instead of POST). + + - duration: 15 # Duration in seconds for this step before moving to the next one, if defined. Otherwise, + # this step will continue until the chaos scenario ends. + + status: 500 # HTTP status code to be returned in this step. + mime_type: "application/json" # MIME type of the response for this step. + payload: | # The response payload for this step. + { + "status":"internal server error" + } + - duration: 15 + status: 201 + mime_type: "application/json" + payload: | + { + "status":"resource created" + } + POST: + - duration: 15 + status: 401 + mime_type: "application/json" + payload: | + { + "status": "unauthorized" + } + - duration: 15 + status: 404 + mime_type: "text/plain" + payload: "not found" + + - resource: "/patch" + steps: + PATCH: + - duration: 15 + status: 201 + mime_type: "text/plain" + payload: "resource patched" + - duration: 15 + status: 400 + mime_type: "text/plain" + payload: "bad request" \ No newline at end of file