From cebc60f5a8395c28041f2767a764f569de483bcd Mon Sep 17 00:00:00 2001 From: Tullio Sebastiani Date: Wed, 18 Feb 2026 16:20:16 +0100 Subject: [PATCH] Network chaos NG porting - `pod network chaos` `node network chaos` (#991) * fix ibm Signed-off-by: Paige Patton * type hint fix Signed-off-by: Tullio Sebastiani * pod network chaos plugin structure + utils method refactoring Signed-off-by: Tullio Sebastiani * Pod network chaos plugin Signed-off-by: Tullio Sebastiani * Node network chaos plugin Signed-off-by: Tullio Sebastiani * default config files Signed-off-by: Tullio Sebastiani * config.yaml Signed-off-by: Tullio Sebastiani * all field optional Signed-off-by: Tullio Sebastiani * minor fixes Signed-off-by: Tullio Sebastiani * minor nit on config Signed-off-by: Tullio Sebastiani * utils unit tests Signed-off-by: Tullio Sebastiani * PodNetworkChaos unit tests Signed-off-by: Tullio Sebastiani * NodeNetworkChaos unit test Signed-off-by: Tullio Sebastiani * PodNetworkChaos functional test Signed-off-by: Tullio Sebastiani * NodeNetworkChaso functional test Signed-off-by: Tullio Sebastiani * added funtests to the gh action Signed-off-by: Tullio Sebastiani * unit test fix Signed-off-by: Tullio Sebastiani * changed test order + resource rename * functional tests fix smallchange Signed-off-by: Tullio Sebastiani fix requirements Signed-off-by: Tullio Sebastiani * changed pod test target Signed-off-by: Tullio Sebastiani * added kind port mapping and removed portforwarding Signed-off-by: Tullio Sebastiani fix Signed-off-by: Tullio Sebastiani test fixes Signed-off-by: Tullio Sebastiani test fixes Signed-off-by: Tullio Sebastiani --------- Signed-off-by: Paige Patton Signed-off-by: Tullio Sebastiani Co-authored-by: Paige Patton --- .github/workflows/tests.yml | 20 +- CI/tests/test_node_network_chaos.sh | 165 +++++ CI/tests/test_pod.sh | 5 +- CI/tests/test_pod_error.sh | 3 + CI/tests/test_pod_network_chaos.sh | 143 +++++ config/config.yaml | 2 + kind-config.yml | 6 + krkn/__init__.py | 0 .../network_chaos_ng/models.py | 45 +- .../modules/abstract_network_chaos_module.py | 39 +- .../modules/node_network_chaos.py | 156 +++++ .../modules/node_network_filter.py | 30 +- .../modules/pod_network_chaos.py | 159 +++++ .../modules/pod_network_filter.py | 49 +- .../network_chaos_ng/modules/utils.py | 109 ++++ .../modules/utils_network_chaos.py | 263 ++++++++ .../modules/utils_network_filter.py | 95 +-- .../network_chaos_ng/network_chaos_factory.py | 44 +- .../scenario_plugin_factory.py | 4 +- scenarios/kind/pod_path_provisioner.yml | 6 + scenarios/kube/node-network-chaos.yml | 18 + scenarios/kube/node-network-filter.yml | 2 +- scenarios/kube/pod-network-chaos.yml | 17 + scenarios/kube/pod-network-filter.yml | 2 +- tests/test_node_network_chaos.py | 492 ++++++++++++++ tests/test_pod_network_chaos.py | 451 +++++++++++++ tests/test_utils_network_chaos.py | 599 ++++++++++++++++++ 27 files changed, 2764 insertions(+), 160 deletions(-) create mode 100755 CI/tests/test_node_network_chaos.sh create mode 100755 CI/tests/test_pod_network_chaos.sh create mode 100644 krkn/__init__.py create mode 100644 krkn/scenario_plugins/network_chaos_ng/modules/node_network_chaos.py create mode 100644 krkn/scenario_plugins/network_chaos_ng/modules/pod_network_chaos.py create mode 100644 krkn/scenario_plugins/network_chaos_ng/modules/utils_network_chaos.py create mode 100755 scenarios/kind/pod_path_provisioner.yml create mode 100644 scenarios/kube/node-network-chaos.yml create mode 100644 scenarios/kube/pod-network-chaos.yml create mode 100644 tests/test_node_network_chaos.py create mode 100644 tests/test_pod_network_chaos.py create mode 100644 tests/test_utils_network_chaos.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 167b6575..5e9581b8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -43,11 +43,11 @@ jobs: - name: Deploy test workloads run: | - es_pod_name=$(kubectl get pods -l "app=elasticsearch-master" -o name) - echo "POD_NAME: $es_pod_name" - kubectl --namespace default port-forward $es_pod_name 9200 & - prom_name=$(kubectl get pods -n monitoring -l "app.kubernetes.io/name=prometheus" -o name) - kubectl --namespace monitoring port-forward $prom_name 9090 & + # es_pod_name=$(kubectl get pods -l "app=elasticsearch-master" -o name) + # echo "POD_NAME: $es_pod_name" + # kubectl --namespace default port-forward $es_pod_name 9200 & + # prom_name=$(kubectl get pods -n monitoring -l "app.kubernetes.io/name=prometheus" -o name) + # kubectl --namespace monitoring port-forward $prom_name 9090 & # Wait for Elasticsearch to be ready echo "Waiting for Elasticsearch to be ready..." @@ -85,7 +85,7 @@ jobs: yq -i '.elastic.enable_elastic=False' CI/config/common_test_config.yaml yq -i '.elastic.password="${{env.ELASTIC_PASSWORD}}"' CI/config/common_test_config.yaml yq -i '.performance_monitoring.prometheus_url="http://localhost:9090"' CI/config/common_test_config.yaml - echo "test_app_outages" >> ./CI/tests/functional_tests + echo "test_app_outages" > ./CI/tests/functional_tests echo "test_container" >> ./CI/tests/functional_tests echo "test_cpu_hog" >> ./CI/tests/functional_tests echo "test_customapp_pod" >> ./CI/tests/functional_tests @@ -94,13 +94,17 @@ jobs: echo "test_namespace" >> ./CI/tests/functional_tests echo "test_net_chaos" >> ./CI/tests/functional_tests echo "test_node" >> ./CI/tests/functional_tests - echo "test_pod" >> ./CI/tests/functional_tests - echo "test_pod_error" >> ./CI/tests/functional_tests + echo "test_service_hijacking" >> ./CI/tests/functional_tests echo "test_pod_network_filter" >> ./CI/tests/functional_tests echo "test_pod_server" >> ./CI/tests/functional_tests echo "test_time" >> ./CI/tests/functional_tests + echo "test_node_network_chaos" >> ./CI/tests/functional_tests + echo "test_pod_network_chaos" >> ./CI/tests/functional_tests + echo "test_pod_error" >> ./CI/tests/functional_tests + echo "test_pod" >> ./CI/tests/functional_tests # echo "test_pvc" >> ./CI/tests/functional_tests + # Push on main only steps + all other functional to collect coverage # for the badge diff --git a/CI/tests/test_node_network_chaos.sh b/CI/tests/test_node_network_chaos.sh new file mode 100755 index 00000000..606c7ecf --- /dev/null +++ b/CI/tests/test_node_network_chaos.sh @@ -0,0 +1,165 @@ +set -xeEo pipefail + +source CI/tests/common.sh + +trap error ERR +trap finish EXIT + +function functional_test_node_network_chaos { + echo "Starting node network chaos functional test" + + # Get a worker node + get_node + export TARGET_NODE=$(echo $WORKER_NODE | awk '{print $1}') + echo "Target node: $TARGET_NODE" + + # Deploy nginx workload on the target node + echo "Deploying nginx workload on $TARGET_NODE..." + kubectl create deployment nginx-node-net-chaos --image=nginx:latest + + # Add node selector to ensure pod runs on target node + kubectl patch deployment nginx-node-net-chaos -p '{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"'$TARGET_NODE'"}}}}}' + + # Expose service + kubectl expose deployment nginx-node-net-chaos --port=80 --target-port=80 --name=nginx-node-net-chaos-svc + + # Wait for nginx to be ready + echo "Waiting for nginx pod to be ready on $TARGET_NODE..." + kubectl wait --for=condition=ready pod -l app=nginx-node-net-chaos --timeout=120s + + # Verify pod is on correct node + export POD_NAME=$(kubectl get pods -l app=nginx-node-net-chaos -o jsonpath='{.items[0].metadata.name}') + export POD_NODE=$(kubectl get pod $POD_NAME -o jsonpath='{.spec.nodeName}') + echo "Pod $POD_NAME is running on node $POD_NODE" + + if [ "$POD_NODE" != "$TARGET_NODE" ]; then + echo "ERROR: Pod is not on target node (expected $TARGET_NODE, got $POD_NODE)" + kubectl get pods -l app=nginx-node-net-chaos -o wide + exit 1 + fi + + # Setup port-forward to access nginx + echo "Setting up port-forward to nginx service..." + kubectl port-forward service/nginx-node-net-chaos-svc 8091:80 & + PORT_FORWARD_PID=$! + sleep 3 # Give port-forward time to start + + # Test baseline connectivity + echo "Testing baseline connectivity..." + response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://localhost:8091 || echo "000") + if [ "$response" != "200" ]; then + echo "ERROR: Nginx not responding correctly (got $response, expected 200)" + kubectl get pods -l app=nginx-node-net-chaos + kubectl describe pod $POD_NAME + exit 1 + fi + echo "Baseline test passed: nginx responding with 200" + + # Measure baseline latency + echo "Measuring baseline latency..." + baseline_start=$(date +%s%3N) + curl -s http://localhost:8091 > /dev/null || true + baseline_end=$(date +%s%3N) + baseline_latency=$((baseline_end - baseline_start)) + echo "Baseline latency: ${baseline_latency}ms" + + # Configure node network chaos scenario + echo "Configuring node network chaos scenario..." + yq -i '.[0].config.target="'$TARGET_NODE'"' scenarios/kube/node-network-chaos.yml + yq -i '.[0].config.namespace="default"' scenarios/kube/node-network-chaos.yml + yq -i '.[0].config.test_duration=20' scenarios/kube/node-network-chaos.yml + yq -i '.[0].config.latency="200ms"' scenarios/kube/node-network-chaos.yml + yq -i '.[0].config.loss=15' scenarios/kube/node-network-chaos.yml + yq -i '.[0].config.bandwidth="10mbit"' scenarios/kube/node-network-chaos.yml + yq -i '.[0].config.ingress=true' scenarios/kube/node-network-chaos.yml + yq -i '.[0].config.egress=true' scenarios/kube/node-network-chaos.yml + yq -i '.[0].config.force=false' scenarios/kube/node-network-chaos.yml + yq -i 'del(.[0].config.interfaces)' scenarios/kube/node-network-chaos.yml + + # Prepare krkn config + export scenario_type="network_chaos_ng_scenarios" + export scenario_file="scenarios/kube/node-network-chaos.yml" + export post_config="" + envsubst < CI/config/common_test_config.yaml > CI/config/node_network_chaos_config.yaml + + # Run krkn in background + echo "Starting krkn with node network chaos scenario..." + python3 -m coverage run -a run_kraken.py -c CI/config/node_network_chaos_config.yaml & + KRKN_PID=$! + echo "Krkn started with PID: $KRKN_PID" + + # Wait for chaos to start (give it time to inject chaos) + echo "Waiting for chaos injection to begin..." + sleep 10 + + # Test during chaos - check for increased latency or packet loss effects + echo "Testing network behavior during chaos..." + chaos_test_count=0 + chaos_success=0 + + for i in {1..5}; do + chaos_test_count=$((chaos_test_count + 1)) + chaos_start=$(date +%s%3N) + response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 http://localhost:8091 || echo "000") + chaos_end=$(date +%s%3N) + chaos_latency=$((chaos_end - chaos_start)) + + echo "Attempt $i: HTTP $response, latency: ${chaos_latency}ms" + + # We expect either increased latency or some failures due to packet loss + if [ "$response" == "200" ] || [ "$response" == "000" ]; then + chaos_success=$((chaos_success + 1)) + fi + + sleep 2 + done + + echo "Chaos test results: $chaos_success/$chaos_test_count requests processed" + + # Verify node-level chaos affects pod + echo "Verifying node-level chaos affects pod on $TARGET_NODE..." + # The node chaos should affect all pods on the node + + # Wait for krkn to complete + echo "Waiting for krkn to complete..." + wait $KRKN_PID || true + echo "Krkn completed" + + # Wait a bit for cleanup + sleep 5 + + # Verify recovery - nginx should respond normally again + echo "Verifying service recovery..." + recovery_attempts=0 + max_recovery_attempts=10 + + while [ $recovery_attempts -lt $max_recovery_attempts ]; do + recovery_attempts=$((recovery_attempts + 1)) + response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://localhost:8091 || echo "000") + + if [ "$response" == "200" ]; then + echo "Recovery verified: nginx responding normally (attempt $recovery_attempts)" + break + fi + + echo "Recovery attempt $recovery_attempts/$max_recovery_attempts: got $response, retrying..." + sleep 3 + done + + if [ "$response" != "200" ]; then + echo "ERROR: Service did not recover after chaos (got $response)" + kubectl get pods -l app=nginx-node-net-chaos + kubectl describe pod $POD_NAME + exit 1 + fi + + # Cleanup + echo "Cleaning up test resources..." + kill $PORT_FORWARD_PID 2>/dev/null || true + kubectl delete deployment nginx-node-net-chaos --ignore-not-found=true + kubectl delete service nginx-node-net-chaos-svc --ignore-not-found=true + + echo "Node network chaos test: Success" +} + +functional_test_node_network_chaos \ No newline at end of file diff --git a/CI/tests/test_pod.sh b/CI/tests/test_pod.sh index e09356ef..cd44c21b 100755 --- a/CI/tests/test_pod.sh +++ b/CI/tests/test_pod.sh @@ -7,14 +7,15 @@ trap finish EXIT function functional_test_pod_crash { export scenario_type="pod_disruption_scenarios" - export scenario_file="scenarios/kind/pod_etcd.yml" + export scenario_file="scenarios/kind/pod_path_provisioner.yml" + export post_config="" envsubst < CI/config/common_test_config.yaml > CI/config/pod_config.yaml python3 -m coverage run -a run_kraken.py -c CI/config/pod_config.yaml echo "Pod disruption scenario test: Success" date - kubectl get pods -n kube-system -l component=etcd -o yaml + kubectl get pods -n local-path-storage -l app=local-path-provisioner -o yaml } functional_test_pod_crash diff --git a/CI/tests/test_pod_error.sh b/CI/tests/test_pod_error.sh index 256dc27a..10ea0c74 100755 --- a/CI/tests/test_pod_error.sh +++ b/CI/tests/test_pod_error.sh @@ -1,4 +1,5 @@ + source CI/tests/common.sh trap error ERR @@ -8,7 +9,9 @@ function functional_test_pod_error { export scenario_type="pod_disruption_scenarios" export scenario_file="scenarios/kind/pod_etcd.yml" export post_config="" + # this test will check if krkn exits with an error when too many pods are targeted yq -i '.[0].config.kill=5' scenarios/kind/pod_etcd.yml + yq -i '.[0].config.krkn_pod_recovery_time=1' scenarios/kind/pod_etcd.yml envsubst < CI/config/common_test_config.yaml > CI/config/pod_config.yaml cat CI/config/pod_config.yaml diff --git a/CI/tests/test_pod_network_chaos.sh b/CI/tests/test_pod_network_chaos.sh new file mode 100755 index 00000000..7c77e484 --- /dev/null +++ b/CI/tests/test_pod_network_chaos.sh @@ -0,0 +1,143 @@ +set -xeEo pipefail + +source CI/tests/common.sh + +trap error ERR +trap finish EXIT + +function functional_test_pod_network_chaos { + echo "Starting pod network chaos functional test" + + # Deploy nginx workload + echo "Deploying nginx workload..." + kubectl create deployment nginx-pod-net-chaos --image=nginx:latest + kubectl expose deployment nginx-pod-net-chaos --port=80 --target-port=80 --name=nginx-pod-net-chaos-svc + + # Wait for nginx to be ready + echo "Waiting for nginx pod to be ready..." + kubectl wait --for=condition=ready pod -l app=nginx-pod-net-chaos --timeout=120s + + # Get pod name + export POD_NAME=$(kubectl get pods -l app=nginx-pod-net-chaos -o jsonpath='{.items[0].metadata.name}') + echo "Target pod: $POD_NAME" + + # Setup port-forward to access nginx + echo "Setting up port-forward to nginx service..." + kubectl port-forward service/nginx-pod-net-chaos-svc 8090:80 & + PORT_FORWARD_PID=$! + sleep 3 # Give port-forward time to start + + # Test baseline connectivity + echo "Testing baseline connectivity..." + response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://localhost:8090 || echo "000") + if [ "$response" != "200" ]; then + echo "ERROR: Nginx not responding correctly (got $response, expected 200)" + kubectl get pods -l app=nginx-pod-net-chaos + kubectl describe pod $POD_NAME + exit 1 + fi + echo "Baseline test passed: nginx responding with 200" + + # Measure baseline latency + echo "Measuring baseline latency..." + baseline_start=$(date +%s%3N) + curl -s http://localhost:8090 > /dev/null || true + baseline_end=$(date +%s%3N) + baseline_latency=$((baseline_end - baseline_start)) + echo "Baseline latency: ${baseline_latency}ms" + + # Configure pod network chaos scenario + echo "Configuring pod network chaos scenario..." + yq -i '.[0].config.target="'$POD_NAME'"' scenarios/kube/pod-network-chaos.yml + yq -i '.[0].config.namespace="default"' scenarios/kube/pod-network-chaos.yml + yq -i '.[0].config.test_duration=20' scenarios/kube/pod-network-chaos.yml + yq -i '.[0].config.latency="200ms"' scenarios/kube/pod-network-chaos.yml + yq -i '.[0].config.loss=15' scenarios/kube/pod-network-chaos.yml + yq -i '.[0].config.bandwidth="10mbit"' scenarios/kube/pod-network-chaos.yml + yq -i '.[0].config.ingress=true' scenarios/kube/pod-network-chaos.yml + yq -i '.[0].config.egress=true' scenarios/kube/pod-network-chaos.yml + yq -i 'del(.[0].config.interfaces)' scenarios/kube/pod-network-chaos.yml + + # Prepare krkn config + export scenario_type="network_chaos_ng_scenarios" + export scenario_file="scenarios/kube/pod-network-chaos.yml" + export post_config="" + envsubst < CI/config/common_test_config.yaml > CI/config/pod_network_chaos_config.yaml + + # Run krkn in background + echo "Starting krkn with pod network chaos scenario..." + python3 -m coverage run -a run_kraken.py -c CI/config/pod_network_chaos_config.yaml & + KRKN_PID=$! + echo "Krkn started with PID: $KRKN_PID" + + # Wait for chaos to start (give it time to inject chaos) + echo "Waiting for chaos injection to begin..." + sleep 10 + + # Test during chaos - check for increased latency or packet loss effects + echo "Testing network behavior during chaos..." + chaos_test_count=0 + chaos_success=0 + + for i in {1..5}; do + chaos_test_count=$((chaos_test_count + 1)) + chaos_start=$(date +%s%3N) + response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 http://localhost:8090 || echo "000") + chaos_end=$(date +%s%3N) + chaos_latency=$((chaos_end - chaos_start)) + + echo "Attempt $i: HTTP $response, latency: ${chaos_latency}ms" + + # We expect either increased latency or some failures due to packet loss + if [ "$response" == "200" ] || [ "$response" == "000" ]; then + chaos_success=$((chaos_success + 1)) + fi + + sleep 2 + done + + echo "Chaos test results: $chaos_success/$chaos_test_count requests processed" + + # Wait for krkn to complete + echo "Waiting for krkn to complete..." + wait $KRKN_PID || true + echo "Krkn completed" + + # Wait a bit for cleanup + sleep 5 + + # Verify recovery - nginx should respond normally again + echo "Verifying service recovery..." + recovery_attempts=0 + max_recovery_attempts=10 + + while [ $recovery_attempts -lt $max_recovery_attempts ]; do + recovery_attempts=$((recovery_attempts + 1)) + response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://localhost:8090 || echo "000") + + if [ "$response" == "200" ]; then + echo "Recovery verified: nginx responding normally (attempt $recovery_attempts)" + break + fi + + echo "Recovery attempt $recovery_attempts/$max_recovery_attempts: got $response, retrying..." + sleep 3 + done + + if [ "$response" != "200" ]; then + echo "ERROR: Service did not recover after chaos (got $response)" + kubectl get pods -l app=nginx-pod-net-chaos + kubectl describe pod $POD_NAME + exit 1 + fi + + # Cleanup + echo "Cleaning up test resources..." + kill $PORT_FORWARD_PID 2>/dev/null || true + kubectl delete deployment nginx-pod-net-chaos --ignore-not-found=true + kubectl delete service nginx-pod-net-chaos-svc --ignore-not-found=true + + echo "Pod network chaos test: Success" +} + +functional_test_pod_network_chaos \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index 1ddd9ac0..6e3b5ea3 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -50,6 +50,8 @@ kraken: - network_chaos_ng_scenarios: - scenarios/kube/pod-network-filter.yml - scenarios/kube/node-network-filter.yml + - scenarios/kube/node-network-chaos.yml + - scenarios/kube/pod-network-chaos.yml - kubevirt_vm_outage: - scenarios/kubevirt/kubevirt-vm-outage.yaml diff --git a/kind-config.yml b/kind-config.yml index 8363cc1d..d78661e9 100644 --- a/kind-config.yml +++ b/kind-config.yml @@ -3,10 +3,16 @@ apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane extraPortMappings: + - containerPort: 30000 + hostPort: 9090 + - containerPort: 32766 + hostPort: 9200 - containerPort: 30036 hostPort: 8888 - containerPort: 30037 hostPort: 8889 + - containerPort: 30080 + hostPort: 30080 - role: control-plane - role: control-plane - role: worker diff --git a/krkn/__init__.py b/krkn/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/krkn/scenario_plugins/network_chaos_ng/models.py b/krkn/scenario_plugins/network_chaos_ng/models.py index a0f5e2d1..41734143 100644 --- a/krkn/scenario_plugins/network_chaos_ng/models.py +++ b/krkn/scenario_plugins/network_chaos_ng/models.py @@ -1,5 +1,7 @@ +import re from dataclasses import dataclass from enum import Enum +from typing import TypeVar, Optional class NetworkChaosScenarioType(Enum): @@ -9,16 +11,21 @@ class NetworkChaosScenarioType(Enum): @dataclass class BaseNetworkChaosConfig: - supported_execution = ["serial", "parallel"] id: str + image: str wait_duration: int test_duration: int label_selector: str service_account: str + taints: list[str] + namespace: str instance_count: int execution: str - namespace: str - taints: list[str] + supported_execution = ["serial", "parallel"] + interfaces: list[str] + target: str + ingress: bool + egress: bool def validate(self) -> list[str]: errors = [] @@ -41,12 +48,7 @@ class BaseNetworkChaosConfig: @dataclass class NetworkFilterConfig(BaseNetworkChaosConfig): - ingress: bool - egress: bool - interfaces: list[str] - target: str ports: list[int] - image: str protocols: list[str] def validate(self) -> list[str]: @@ -58,3 +60,30 @@ class NetworkFilterConfig(BaseNetworkChaosConfig): f"{self.protocols} contains not allowed protocols only tcp and udp is allowed" ) return errors + + +@dataclass +class NetworkChaosConfig(BaseNetworkChaosConfig): + latency: Optional[str] = None + loss: Optional[str] = None + bandwidth: Optional[str] = None + force: Optional[bool] = None + + def validate(self) -> list[str]: + errors = super().validate() + latency_regex = re.compile(r"^(\d+)(us|ms|s)$") + bandwidth_regex = re.compile(r"^(\d+)(bit|kbit|mbit|gbit|tbit)$") + if self.latency: + if not (latency_regex.match(self.latency)): + errors.append( + "latency must be a number followed by `us` (microseconds) or `ms` (milliseconds), or `s` (seconds)" + ) + if self.bandwidth: + if not (bandwidth_regex.match(self.bandwidth)): + errors.append( + "bandwidth must be a number followed by `bit` `kbit` or `mbit` or `tbit`" + ) + if self.loss: + if "%" in self.loss or not self.loss.isdigit(): + errors.append("loss must be a number followed without the `%` symbol") + return errors diff --git a/krkn/scenario_plugins/network_chaos_ng/modules/abstract_network_chaos_module.py b/krkn/scenario_plugins/network_chaos_ng/modules/abstract_network_chaos_module.py index fd55cace..8241b0a8 100644 --- a/krkn/scenario_plugins/network_chaos_ng/modules/abstract_network_chaos_module.py +++ b/krkn/scenario_plugins/network_chaos_ng/modules/abstract_network_chaos_module.py @@ -1,6 +1,7 @@ import abc import logging import queue +from typing import Tuple from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift from krkn.scenario_plugins.network_chaos_ng.models import ( @@ -27,7 +28,7 @@ class AbstractNetworkChaosModule(abc.ABC): pass @abc.abstractmethod - def get_config(self) -> (NetworkChaosScenarioType, BaseNetworkChaosConfig): + def get_config(self) -> Tuple[NetworkChaosScenarioType, BaseNetworkChaosConfig]: """ returns the common subset of settings shared by all the scenarios `BaseNetworkChaosConfig` and the type of Network Chaos Scenario that is running (Pod Scenario or Node Scenario) @@ -41,6 +42,42 @@ class AbstractNetworkChaosModule(abc.ABC): pass + def get_node_targets(self, config: BaseNetworkChaosConfig): + if self.base_network_config.label_selector: + return self.kubecli.get_lib_kubernetes().list_nodes( + self.base_network_config.label_selector + ) + else: + if not config.target: + raise Exception( + "neither node selector nor node_name (target) specified, aborting." + ) + node_info = self.kubecli.get_lib_kubernetes().list_nodes() + if config.target not in node_info: + raise Exception(f"node {config.target} not found, aborting") + + return [config.target] + + def get_pod_targets(self, config: BaseNetworkChaosConfig): + if not config.namespace: + raise Exception("namespace not specified, aborting") + if self.base_network_config.label_selector: + return self.kubecli.get_lib_kubernetes().list_pods( + config.namespace, config.label_selector + ) + else: + if not config.target: + raise Exception( + "neither node selector nor node_name (target) specified, aborting." + ) + if not self.kubecli.get_lib_kubernetes().check_if_pod_exists( + config.target, config.namespace + ): + raise Exception( + f"pod {config.target} not found in namespace {config.namespace}" + ) + return [config.target] + def __init__( self, base_network_config: BaseNetworkChaosConfig, diff --git a/krkn/scenario_plugins/network_chaos_ng/modules/node_network_chaos.py b/krkn/scenario_plugins/network_chaos_ng/modules/node_network_chaos.py new file mode 100644 index 00000000..0a73417c --- /dev/null +++ b/krkn/scenario_plugins/network_chaos_ng/modules/node_network_chaos.py @@ -0,0 +1,156 @@ +import queue +import time +from typing import Tuple + +from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift +from krkn_lib.utils import get_random_string + +from krkn.scenario_plugins.network_chaos_ng.models import ( + NetworkChaosScenarioType, + BaseNetworkChaosConfig, + NetworkChaosConfig, +) +from krkn.scenario_plugins.network_chaos_ng.modules.abstract_network_chaos_module import ( + AbstractNetworkChaosModule, +) +from krkn.scenario_plugins.network_chaos_ng.modules.utils import ( + log_info, + setup_network_chaos_ng_scenario, + log_error, + log_warning, +) +from krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos import ( + common_set_limit_rules, + common_delete_limit_rules, + node_qdisc_is_simple, +) + + +class NodeNetworkChaosModule(AbstractNetworkChaosModule): + + def __init__(self, config: NetworkChaosConfig, kubecli: KrknTelemetryOpenshift): + super().__init__(config, kubecli) + self.config = config + + def run(self, target: str, error_queue: queue.Queue = None): + parallel = False + if error_queue: + parallel = True + try: + network_chaos_pod_name = f"node-network-chaos-{get_random_string(5)}" + container_name = f"fedora-container-{get_random_string(5)}" + + log_info( + f"creating workload to inject network chaos in node {target} network" + f"latency:{str(self.config.latency) if self.config.latency else '0'}, " + f"packet drop:{str(self.config.loss) if self.config.loss else '0'} " + f"bandwidth restriction:{str(self.config.bandwidth) if self.config.bandwidth else '0'} ", + parallel, + network_chaos_pod_name, + ) + + _, interfaces = setup_network_chaos_ng_scenario( + self.config, + target, + network_chaos_pod_name, + container_name, + self.kubecli.get_lib_kubernetes(), + target, + parallel, + True, + ) + + if len(self.config.interfaces) == 0: + if len(interfaces) == 0: + log_error( + "no network interface found in pod, impossible to execute the network chaos scenario", + parallel, + network_chaos_pod_name, + ) + return + log_info( + f"detected network interfaces: {','.join(interfaces)}", + parallel, + network_chaos_pod_name, + ) + else: + interfaces = self.config.interfaces + + log_info( + f"targeting node {target}", + parallel, + network_chaos_pod_name, + ) + + complex_config_interfaces = [] + for interface in interfaces: + is_simple = node_qdisc_is_simple( + self.kubecli.get_lib_kubernetes(), + network_chaos_pod_name, + self.config.namespace, + interface, + ) + if not is_simple: + complex_config_interfaces.append(interface) + + if len(complex_config_interfaces) > 0 and not self.config.force: + log_warning( + f"node already has tc rules set for {','.join(complex_config_interfaces)}, this action might damage the cluster," + "if you want to continue set `force` to True in the node network " + "chaos scenario config file and try again" + ) + else: + if len(complex_config_interfaces) > 0 and self.config.force: + log_warning( + f"you are forcing node network configuration override for {','.join(complex_config_interfaces)}," + "this action might lead to unpredictable node behaviour, " + "you're doing it in your own responsibility" + "waiting 10 seconds before continuing" + ) + time.sleep(10) + common_set_limit_rules( + self.config.egress, + self.config.ingress, + interfaces, + self.config.bandwidth, + self.config.latency, + self.config.loss, + parallel, + network_chaos_pod_name, + self.kubecli.get_lib_kubernetes(), + network_chaos_pod_name, + self.config.namespace, + None, + ) + + time.sleep(self.config.test_duration) + + log_info("removing tc rules", parallel, network_chaos_pod_name) + + common_delete_limit_rules( + self.config.egress, + self.config.ingress, + interfaces, + network_chaos_pod_name, + self.config.namespace, + self.kubecli.get_lib_kubernetes(), + None, + parallel, + network_chaos_pod_name, + ) + + self.kubecli.get_lib_kubernetes().delete_pod( + network_chaos_pod_name, self.config.namespace + ) + + except Exception as e: + if error_queue is None: + raise e + else: + error_queue.put(str(e)) + + def get_config(self) -> Tuple[NetworkChaosScenarioType, BaseNetworkChaosConfig]: + return NetworkChaosScenarioType.Node, self.config + + def get_targets(self) -> list[str]: + return self.get_node_targets(self.config) diff --git a/krkn/scenario_plugins/network_chaos_ng/modules/node_network_filter.py b/krkn/scenario_plugins/network_chaos_ng/modules/node_network_filter.py index 6dd01c79..4e79a060 100644 --- a/krkn/scenario_plugins/network_chaos_ng/modules/node_network_filter.py +++ b/krkn/scenario_plugins/network_chaos_ng/modules/node_network_filter.py @@ -1,5 +1,6 @@ import queue import time +from typing import Tuple from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift from krkn_lib.utils import get_random_string @@ -11,14 +12,16 @@ from krkn.scenario_plugins.network_chaos_ng.models import ( from krkn.scenario_plugins.network_chaos_ng.modules.abstract_network_chaos_module import ( AbstractNetworkChaosModule, ) -from krkn.scenario_plugins.network_chaos_ng.modules.utils import log_info +from krkn.scenario_plugins.network_chaos_ng.modules.utils import ( + log_info, + deploy_network_chaos_ng_pod, + get_pod_default_interface, +) from krkn.scenario_plugins.network_chaos_ng.modules.utils_network_filter import ( - deploy_network_filter_pod, apply_network_rules, clean_network_rules, generate_rules, - get_default_interface, ) @@ -41,7 +44,7 @@ class NodeNetworkFilterModule(AbstractNetworkChaosModule): ) pod_name = f"node-filter-{get_random_string(5)}" - deploy_network_filter_pod( + deploy_network_chaos_ng_pod( self.config, target, pod_name, @@ -50,7 +53,7 @@ class NodeNetworkFilterModule(AbstractNetworkChaosModule): if len(self.config.interfaces) == 0: interfaces = [ - get_default_interface( + get_pod_default_interface( pod_name, self.config.namespace, self.kubecli.get_lib_kubernetes(), @@ -108,21 +111,8 @@ class NodeNetworkFilterModule(AbstractNetworkChaosModule): super().__init__(config, kubecli) self.config = config - def get_config(self) -> (NetworkChaosScenarioType, BaseNetworkChaosConfig): + def get_config(self) -> Tuple[NetworkChaosScenarioType, BaseNetworkChaosConfig]: return NetworkChaosScenarioType.Node, self.config def get_targets(self) -> list[str]: - if self.base_network_config.label_selector: - return self.kubecli.get_lib_kubernetes().list_nodes( - self.base_network_config.label_selector - ) - else: - if not self.config.target: - raise Exception( - "neither node selector nor node_name (target) specified, aborting." - ) - node_info = self.kubecli.get_lib_kubernetes().list_nodes() - if self.config.target not in node_info: - raise Exception(f"node {self.config.target} not found, aborting") - - return [self.config.target] + return self.get_node_targets(self.config) diff --git a/krkn/scenario_plugins/network_chaos_ng/modules/pod_network_chaos.py b/krkn/scenario_plugins/network_chaos_ng/modules/pod_network_chaos.py new file mode 100644 index 00000000..caf8db12 --- /dev/null +++ b/krkn/scenario_plugins/network_chaos_ng/modules/pod_network_chaos.py @@ -0,0 +1,159 @@ +import queue +import time +from typing import Tuple + +from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift +from krkn_lib.utils import get_random_string + +from krkn.scenario_plugins.network_chaos_ng.models import ( + NetworkChaosScenarioType, + BaseNetworkChaosConfig, + NetworkChaosConfig, +) +from krkn.scenario_plugins.network_chaos_ng.modules.abstract_network_chaos_module import ( + AbstractNetworkChaosModule, +) +from krkn.scenario_plugins.network_chaos_ng.modules.utils import ( + log_info, + setup_network_chaos_ng_scenario, + log_error, +) +from krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos import ( + common_set_limit_rules, + common_delete_limit_rules, +) + + +class PodNetworkChaosModule(AbstractNetworkChaosModule): + + def __init__(self, config: NetworkChaosConfig, kubecli: KrknTelemetryOpenshift): + super().__init__(config, kubecli) + self.config = config + + def run(self, target: str, error_queue: queue.Queue = None): + parallel = False + if error_queue: + parallel = True + try: + network_chaos_pod_name = f"pod-network-chaos-{get_random_string(5)}" + container_name = f"fedora-container-{get_random_string(5)}" + pod_info = self.kubecli.get_lib_kubernetes().get_pod_info( + target, self.config.namespace + ) + + log_info( + f"creating workload to inject network chaos in pod {target} network" + f"latency:{str(self.config.latency) if self.config.latency else '0'}, " + f"packet drop:{str(self.config.loss) if self.config.loss else '0'} " + f"bandwidth restriction:{str(self.config.bandwidth) if self.config.bandwidth else '0'} ", + parallel, + network_chaos_pod_name, + ) + + if not pod_info: + raise Exception( + f"impossible to retrieve infos for pod {target} namespace {self.config.namespace}" + ) + + container_ids, interfaces = setup_network_chaos_ng_scenario( + self.config, + pod_info.nodeName, + network_chaos_pod_name, + container_name, + self.kubecli.get_lib_kubernetes(), + target, + parallel, + False, + ) + + if len(self.config.interfaces) == 0: + if len(interfaces) == 0: + log_error( + "no network interface found in pod, impossible to execute the network chaos scenario", + parallel, + network_chaos_pod_name, + ) + return + log_info( + f"detected network interfaces: {','.join(interfaces)}", + parallel, + network_chaos_pod_name, + ) + else: + interfaces = self.config.interfaces + + if len(container_ids) == 0: + raise Exception( + f"impossible to resolve container id for pod {target} namespace {self.config.namespace}" + ) + + log_info( + f"targeting container {container_ids[0]}", + parallel, + network_chaos_pod_name, + ) + + pids = self.kubecli.get_lib_kubernetes().get_pod_pids( + base_pod_name=network_chaos_pod_name, + base_pod_namespace=self.config.namespace, + base_pod_container_name=container_name, + pod_name=target, + pod_namespace=self.config.namespace, + pod_container_id=container_ids[0], + ) + + if not pids: + raise Exception(f"impossible to resolve pid for pod {target}") + + log_info( + f"resolved pids {pids} in node {pod_info.nodeName} for pod {target}", + parallel, + network_chaos_pod_name, + ) + + common_set_limit_rules( + self.config.egress, + self.config.ingress, + interfaces, + self.config.bandwidth, + self.config.latency, + self.config.loss, + parallel, + network_chaos_pod_name, + self.kubecli.get_lib_kubernetes(), + network_chaos_pod_name, + self.config.namespace, + pids, + ) + + time.sleep(self.config.test_duration) + + log_info("removing tc rules", parallel, network_chaos_pod_name) + + common_delete_limit_rules( + self.config.egress, + self.config.ingress, + interfaces, + network_chaos_pod_name, + self.config.namespace, + self.kubecli.get_lib_kubernetes(), + pids, + parallel, + network_chaos_pod_name, + ) + + self.kubecli.get_lib_kubernetes().delete_pod( + network_chaos_pod_name, self.config.namespace + ) + + except Exception as e: + if error_queue is None: + raise e + else: + error_queue.put(str(e)) + + def get_config(self) -> Tuple[NetworkChaosScenarioType, BaseNetworkChaosConfig]: + return NetworkChaosScenarioType.Pod, self.config + + def get_targets(self) -> list[str]: + return self.get_pod_targets(self.config) diff --git a/krkn/scenario_plugins/network_chaos_ng/modules/pod_network_filter.py b/krkn/scenario_plugins/network_chaos_ng/modules/pod_network_filter.py index 8b1933b0..8ecf8ddb 100644 --- a/krkn/scenario_plugins/network_chaos_ng/modules/pod_network_filter.py +++ b/krkn/scenario_plugins/network_chaos_ng/modules/pod_network_filter.py @@ -1,6 +1,6 @@ -import logging import queue import time +from typing import Tuple from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift from krkn_lib.utils import get_random_string @@ -13,12 +13,17 @@ from krkn.scenario_plugins.network_chaos_ng.models import ( from krkn.scenario_plugins.network_chaos_ng.modules.abstract_network_chaos_module import ( AbstractNetworkChaosModule, ) -from krkn.scenario_plugins.network_chaos_ng.modules.utils import log_info, log_error +from krkn.scenario_plugins.network_chaos_ng.modules.utils import ( + log_info, + log_error, + deploy_network_chaos_ng_pod, + get_pod_default_interface, + setup_network_chaos_ng_scenario, +) from krkn.scenario_plugins.network_chaos_ng.modules.utils_network_filter import ( - deploy_network_filter_pod, - generate_namespaced_rules, apply_network_rules, clean_network_rules_namespaced, + generate_namespaced_rules, ) @@ -50,22 +55,18 @@ class PodNetworkFilterModule(AbstractNetworkChaosModule): f"impossible to retrieve infos for pod {self.config.target} namespace {self.config.namespace}" ) - deploy_network_filter_pod( + container_ids, interfaces = setup_network_chaos_ng_scenario( self.config, pod_info.nodeName, pod_name, - self.kubecli.get_lib_kubernetes(), container_name, - host_network=False, + self.kubecli.get_lib_kubernetes(), + target, + parallel, + False, ) if len(self.config.interfaces) == 0: - interfaces = ( - self.kubecli.get_lib_kubernetes().list_pod_network_interfaces( - target, self.config.namespace - ) - ) - if len(interfaces) == 0: log_error( "no network interface found in pod, impossible to execute the network filter scenario", @@ -157,26 +158,8 @@ class PodNetworkFilterModule(AbstractNetworkChaosModule): super().__init__(config, kubecli) self.config = config - def get_config(self) -> (NetworkChaosScenarioType, BaseNetworkChaosConfig): + def get_config(self) -> Tuple[NetworkChaosScenarioType, BaseNetworkChaosConfig]: return NetworkChaosScenarioType.Pod, self.config def get_targets(self) -> list[str]: - if not self.config.namespace: - raise Exception("namespace not specified, aborting") - if self.base_network_config.label_selector: - return self.kubecli.get_lib_kubernetes().list_pods( - self.config.namespace, self.config.label_selector - ) - else: - if not self.config.target: - raise Exception( - "neither node selector nor node_name (target) specified, aborting." - ) - if not self.kubecli.get_lib_kubernetes().check_if_pod_exists( - self.config.target, self.config.namespace - ): - raise Exception( - f"pod {self.config.target} not found in namespace {self.config.namespace}" - ) - - return [self.config.target] + return self.get_pod_targets(self.config) diff --git a/krkn/scenario_plugins/network_chaos_ng/modules/utils.py b/krkn/scenario_plugins/network_chaos_ng/modules/utils.py index 7e6ab2a8..c92e9df4 100644 --- a/krkn/scenario_plugins/network_chaos_ng/modules/utils.py +++ b/krkn/scenario_plugins/network_chaos_ng/modules/utils.py @@ -1,4 +1,15 @@ import logging +import os +from typing import Tuple + +import yaml +from jinja2 import FileSystemLoader, Environment +from krkn_lib.k8s import KrknKubernetes +from krkn_lib.models.k8s import Pod + +from krkn.scenario_plugins.network_chaos_ng.models import ( + BaseNetworkChaosConfig, +) def log_info(message: str, parallel: bool = False, node_name: str = ""): @@ -29,3 +40,101 @@ def log_warning(message: str, parallel: bool = False, node_name: str = ""): logging.warning(f"[{node_name}]: {message}") else: logging.warning(message) + + +def deploy_network_chaos_ng_pod( + config: BaseNetworkChaosConfig, + target_node: str, + pod_name: str, + kubecli: KrknKubernetes, + container_name: str = "fedora", + host_network: bool = True, +): + file_loader = FileSystemLoader(os.path.abspath(os.path.dirname(__file__))) + env = Environment(loader=file_loader, autoescape=True) + pod_template = env.get_template("templates/network-chaos.j2") + tolerations = [] + + for taint in config.taints: + key_value_part, effect = taint.split(":", 1) + if "=" in key_value_part: + key, value = key_value_part.split("=", 1) + operator = "Equal" + else: + key = key_value_part + value = None + operator = "Exists" + toleration = { + "key": key, + "operator": operator, + "effect": effect, + } + if value is not None: + toleration["value"] = value + tolerations.append(toleration) + + pod_body = yaml.safe_load( + pod_template.render( + pod_name=pod_name, + namespace=config.namespace, + host_network=host_network, + target=target_node, + container_name=container_name, + workload_image=config.image, + taints=tolerations, + service_account=config.service_account, + ) + ) + + kubecli.create_pod(pod_body, config.namespace, 300) + + +def get_pod_default_interface( + pod_name: str, namespace: str, kubecli: KrknKubernetes +) -> str: + cmd = "ip r | grep default | awk '/default/ {print $5}'" + output = kubecli.exec_cmd_in_pod([cmd], pod_name, namespace) + return output.replace("\n", "") + + +def setup_network_chaos_ng_scenario( + config: BaseNetworkChaosConfig, + node_name: str, + pod_name: str, + container_name: str, + kubecli: KrknKubernetes, + target: str, + parallel: bool, + host_network: bool, +) -> Tuple[list[str], list[str]]: + + deploy_network_chaos_ng_pod( + config, + node_name, + pod_name, + kubecli, + container_name, + host_network=host_network, + ) + + if len(config.interfaces) == 0: + interfaces = [ + get_pod_default_interface( + pod_name, + config.namespace, + kubecli, + ) + ] + + log_info(f"detected default interface {interfaces[0]}", parallel, target) + + else: + interfaces = config.interfaces + # if not host_network means that the target is a pod so container_ids need to be resolved + # otherwise it's not needed + if not host_network: + container_ids = kubecli.get_container_ids(target, config.namespace) + else: + container_ids = [] + + return container_ids, interfaces diff --git a/krkn/scenario_plugins/network_chaos_ng/modules/utils_network_chaos.py b/krkn/scenario_plugins/network_chaos_ng/modules/utils_network_chaos.py new file mode 100644 index 00000000..689ccb28 --- /dev/null +++ b/krkn/scenario_plugins/network_chaos_ng/modules/utils_network_chaos.py @@ -0,0 +1,263 @@ +import subprocess +import logging +from typing import Optional + +from krkn_lib.k8s import KrknKubernetes + +from krkn.scenario_plugins.network_chaos_ng.modules.utils import ( + log_info, + log_warning, + log_error, +) + +ROOT_HANDLE = "100:" +CLASS_ID = "100:1" +NETEM_HANDLE = "101:" + + +def run(cmd: list[str], check: bool = True) -> subprocess.CompletedProcess: + return subprocess.run(cmd, check=check, text=True, capture_output=True) + + +def tc_node(args: list[str]) -> subprocess.CompletedProcess: + return run(["tc"] + args) + + +def get_build_tc_tree_commands(devs: list[str]) -> list[str]: + tree = [] + for dev in devs: + tree.append(f"tc qdisc add dev {dev} root handle {ROOT_HANDLE} htb default 1") + tree.append( + f"tc class add dev {dev} parent {ROOT_HANDLE} classid {CLASS_ID} htb rate 1gbit", + ) + tree.append( + f"tc qdisc add dev {dev} parent {CLASS_ID} handle {NETEM_HANDLE} netem delay 0ms loss 0%", + ) + + return tree + + +def namespaced_tc_commands(pids: list[str], commands: list[str]) -> list[str]: + return [ + f"nsenter --target {pid} --net -- {rule}" for pid in pids for rule in commands + ] + + +def get_egress_shaping_comand( + devices: list[str], + rate_mbit: Optional[str], + delay_ms: Optional[str], + loss_pct: Optional[str], +) -> list[str]: + + rate_commands = [] + rate = f"{rate_mbit}mbit" if rate_mbit is not None else "1gbit" + d = delay_ms if delay_ms is not None else 0 + l = loss_pct if loss_pct is not None else 0 + for dev in devices: + rate_commands.append( + f"tc class change dev {dev} parent {ROOT_HANDLE} classid {CLASS_ID} htb rate {rate}" + ) + rate_commands.append( + f"tc qdisc change dev {dev} parent {CLASS_ID} handle {NETEM_HANDLE} netem delay {d}ms loss {l}%" + ) + return rate_commands + + +def get_clear_egress_shaping_commands(devices: list[str]) -> list[str]: + return [f"tc qdisc del dev {dev} root handle {ROOT_HANDLE}" for dev in devices] + + +def get_ingress_shaping_commands( + devs: list[str], + rate_mbit: Optional[str], + delay_ms: Optional[str], + loss_pct: Optional[str], + ifb_dev: str = "ifb0", +) -> list[str]: + + rate_commands = [ + f"modprobe ifb || true", + f"ip link add {ifb_dev} type ifb || true", + f"ip link set {ifb_dev} up || true", + ] + + for dev in devs: + rate_commands.append(f"tc qdisc add dev {dev} handle ffff: ingress || true") + + rate_commands.append( + f"tc filter add dev {dev} parent ffff: protocol all prio 1 " + f"matchall action mirred egress redirect dev {ifb_dev} || true" + ) + + rate_commands.append( + f"tc qdisc add dev {ifb_dev} root handle {ROOT_HANDLE} htb default 1 || true" + ) + rate_commands.append( + f"tc class add dev {ifb_dev} parent {ROOT_HANDLE} classid {CLASS_ID} " + f"htb rate {rate_mbit if rate_mbit else '1gbit'} || true" + ) + rate_commands.append( + f"tc qdisc add dev {ifb_dev} parent {CLASS_ID} handle {NETEM_HANDLE} " + f"netem delay {delay_ms if delay_ms else '0ms'} " + f"loss {loss_pct if loss_pct else '0'}% || true" + ) + + return rate_commands + + +def get_clear_ingress_shaping_commands( + devs: list[str], + ifb_dev: str = "ifb0", +) -> list[str]: + + cmds: list[str] = [] + for dev in devs: + cmds.append(f"tc qdisc del dev {dev} ingress || true") + + cmds.append(f"tc qdisc del dev {ifb_dev} root handle {ROOT_HANDLE} || true") + + cmds.append(f"ip link set {ifb_dev} down || true") + cmds.append(f"ip link del {ifb_dev} || true") + + return cmds + + +def node_qdisc_is_simple( + kubecli: KrknKubernetes, pod_name, namespace: str, interface: str +) -> bool: + + result = kubecli.exec_cmd_in_pod( + [f"tc qdisc show dev {interface}"], pod_name, namespace + ) + lines = [l for l in result.splitlines() if l.strip()] + if len(lines) != 1: + return False + + line = lines[0].lower() + if "htb" in line or "netem" in line or "clsact" in line: + return False + + return True + + +def common_set_limit_rules( + egress: bool, + ingress: bool, + interfaces: list[str], + bandwidth: str, + latency: str, + loss: str, + parallel: bool, + target: str, + kubecli: KrknKubernetes, + network_chaos_pod_name: str, + namespace: str, + pids: Optional[list[str]] = None, +): + if egress: + build_tree_commands = get_build_tc_tree_commands(interfaces) + if pids: + build_tree_commands = namespaced_tc_commands(pids, build_tree_commands) + egress_shaping_commands = get_egress_shaping_comand( + interfaces, + bandwidth, + latency, + loss, + ) + if pids: + egress_shaping_commands = namespaced_tc_commands( + pids, egress_shaping_commands + ) + error_counter = 0 + for rule in build_tree_commands: + result = kubecli.exec_cmd_in_pod([rule], network_chaos_pod_name, namespace) + if not result: + log_info(f"created tc tree in pod: {rule}", parallel, target) + else: + error_counter += 1 + if len(build_tree_commands) == error_counter: + log_error( + "failed to apply egress shaping rules on cluster", parallel, target + ) + + for rule in egress_shaping_commands: + result = kubecli.exec_cmd_in_pod([rule], network_chaos_pod_name, namespace) + if not result: + log_info(f"applied egress shaping rules: {rule}", parallel, target) + if ingress: + ingress_shaping_commands = get_ingress_shaping_commands( + interfaces, + bandwidth, + latency, + loss, + ) + if pids: + ingress_shaping_commands = namespaced_tc_commands( + pids, ingress_shaping_commands + ) + error_counter = 0 + for rule in ingress_shaping_commands: + + result = kubecli.exec_cmd_in_pod([rule], network_chaos_pod_name, namespace) + if not result: + log_info( + f"applied ingress shaping rule: {rule}", + parallel, + network_chaos_pod_name, + ) + else: + error_counter += 1 + + if len(ingress_shaping_commands) == error_counter: + log_error( + "failed to apply ingress shaping rules on cluster", parallel, target + ) + + +def common_delete_limit_rules( + egress: bool, + ingress: bool, + interfaces: list[str], + network_chaos_pod_name: str, + network_chaos_namespace: str, + kubecli: KrknKubernetes, + pids: Optional[list[str]], + parallel: bool, + target: str, +): + if egress: + clear_commands = get_clear_egress_shaping_commands(interfaces) + if pids: + clear_commands = namespaced_tc_commands(pids, clear_commands) + error_counter = 0 + for rule in clear_commands: + result = kubecli.exec_cmd_in_pod( + [rule], network_chaos_pod_name, network_chaos_namespace + ) + if not result: + log_info(f"removed egress shaping rule : {rule}", parallel, target) + else: + error_counter += 1 + if len(clear_commands) == error_counter: + log_error( + "failed to remove egress shaping rules on cluster", parallel, target + ) + + if ingress: + clear_commands = get_clear_ingress_shaping_commands(interfaces) + if pids: + clear_commands = namespaced_tc_commands(pids, clear_commands) + error_counter = 0 + for rule in clear_commands: + result = kubecli.exec_cmd_in_pod( + [rule], network_chaos_pod_name, network_chaos_namespace + ) + if not result: + log_info(f"removed ingress shaping rule: {rule}", parallel, target) + else: + error_counter += 1 + if len(clear_commands) == error_counter: + log_error( + "failed to remove ingress shaping rules on cluster", parallel, target + ) diff --git a/krkn/scenario_plugins/network_chaos_ng/modules/utils_network_filter.py b/krkn/scenario_plugins/network_chaos_ng/modules/utils_network_filter.py index 136c2321..7677e086 100644 --- a/krkn/scenario_plugins/network_chaos_ng/modules/utils_network_filter.py +++ b/krkn/scenario_plugins/network_chaos_ng/modules/utils_network_filter.py @@ -1,7 +1,5 @@ -import os +from typing import Tuple -import yaml -from jinja2 import FileSystemLoader, Environment from krkn_lib.k8s import KrknKubernetes from krkn.scenario_plugins.network_chaos_ng.models import NetworkFilterConfig @@ -10,7 +8,7 @@ from krkn.scenario_plugins.network_chaos_ng.modules.utils import log_info def generate_rules( interfaces: list[str], config: NetworkFilterConfig -) -> (list[str], list[str]): +) -> Tuple[list[str], list[str]]: input_rules = [] output_rules = [] for interface in interfaces: @@ -29,72 +27,6 @@ def generate_rules( return input_rules, output_rules -def generate_namespaced_rules( - interfaces: list[str], config: NetworkFilterConfig, pids: list[str] -) -> (list[str], list[str]): - namespaced_input_rules: list[str] = [] - namespaced_output_rules: list[str] = [] - input_rules, output_rules = generate_rules(interfaces, config) - for pid in pids: - ns_input_rules = [ - f"nsenter --target {pid} --net -- {rule}" for rule in input_rules - ] - ns_output_rules = [ - f"nsenter --target {pid} --net -- {rule}" for rule in output_rules - ] - namespaced_input_rules.extend(ns_input_rules) - namespaced_output_rules.extend(ns_output_rules) - - return namespaced_input_rules, namespaced_output_rules - - -def deploy_network_filter_pod( - config: NetworkFilterConfig, - target_node: str, - pod_name: str, - kubecli: KrknKubernetes, - container_name: str = "fedora", - host_network: bool = True, -): - file_loader = FileSystemLoader(os.path.abspath(os.path.dirname(__file__))) - env = Environment(loader=file_loader, autoescape=True) - pod_template = env.get_template("templates/network-chaos.j2") - tolerations = [] - - for taint in config.taints: - key_value_part, effect = taint.split(":", 1) - if "=" in key_value_part: - key, value = key_value_part.split("=", 1) - operator = "Equal" - else: - key = key_value_part - value = None - operator = "Exists" - toleration = { - "key": key, - "operator": operator, - "effect": effect, - } - if value is not None: - toleration["value"] = value - tolerations.append(toleration) - - pod_body = yaml.safe_load( - pod_template.render( - pod_name=pod_name, - namespace=config.namespace, - host_network=host_network, - target=target_node, - container_name=container_name, - workload_image=config.image, - taints=tolerations, - service_account=config.service_account, - ) - ) - - kubecli.create_pod(pod_body, config.namespace, 300) - - def apply_network_rules( kubecli: KrknKubernetes, input_rules: list[str], @@ -153,9 +85,20 @@ def clean_network_rules_namespaced( ) -def get_default_interface( - pod_name: str, namespace: str, kubecli: KrknKubernetes -) -> str: - cmd = "ip r | grep default | awk '/default/ {print $5}'" - output = kubecli.exec_cmd_in_pod([cmd], pod_name, namespace) - return output.replace("\n", "") +def generate_namespaced_rules( + interfaces: list[str], config: NetworkFilterConfig, pids: list[str] +) -> Tuple[list[str], list[str]]: + namespaced_input_rules: list[str] = [] + namespaced_output_rules: list[str] = [] + input_rules, output_rules = generate_rules(interfaces, config) + for pid in pids: + ns_input_rules = [ + f"nsenter --target {pid} --net -- {rule}" for rule in input_rules + ] + ns_output_rules = [ + f"nsenter --target {pid} --net -- {rule}" for rule in output_rules + ] + namespaced_input_rules.extend(ns_input_rules) + namespaced_output_rules.extend(ns_output_rules) + + return namespaced_input_rules, namespaced_output_rules diff --git a/krkn/scenario_plugins/network_chaos_ng/network_chaos_factory.py b/krkn/scenario_plugins/network_chaos_ng/network_chaos_factory.py index 901ac5af..3dd04326 100644 --- a/krkn/scenario_plugins/network_chaos_ng/network_chaos_factory.py +++ b/krkn/scenario_plugins/network_chaos_ng/network_chaos_factory.py @@ -1,17 +1,31 @@ from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift -from krkn.scenario_plugins.network_chaos_ng.models import NetworkFilterConfig +from krkn.scenario_plugins.network_chaos_ng.models import ( + NetworkFilterConfig, + NetworkChaosConfig, +) from krkn.scenario_plugins.network_chaos_ng.modules.abstract_network_chaos_module import ( AbstractNetworkChaosModule, ) +from krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos import ( + NodeNetworkChaosModule, +) from krkn.scenario_plugins.network_chaos_ng.modules.node_network_filter import ( NodeNetworkFilterModule, ) +from krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos import ( + PodNetworkChaosModule, +) from krkn.scenario_plugins.network_chaos_ng.modules.pod_network_filter import ( PodNetworkFilterModule, ) -supported_modules = ["node_network_filter", "pod_network_filter"] +supported_modules = [ + "node_network_filter", + "pod_network_filter", + "pod_network_chaos", + "node_network_chaos", +] class NetworkChaosFactory: @@ -26,14 +40,28 @@ class NetworkChaosFactory: raise Exception(f"{config['id']} is not a supported network chaos module") if config["id"] == "node_network_filter": - config = NetworkFilterConfig(**config) - errors = config.validate() + scenario_config = NetworkFilterConfig(**config) + errors = scenario_config.validate() if len(errors) > 0: raise Exception(f"config validation errors: [{';'.join(errors)}]") - return NodeNetworkFilterModule(config, kubecli) + return NodeNetworkFilterModule(scenario_config, kubecli) if config["id"] == "pod_network_filter": - config = NetworkFilterConfig(**config) - errors = config.validate() + scenario_config = NetworkFilterConfig(**config) + errors = scenario_config.validate() if len(errors) > 0: raise Exception(f"config validation errors: [{';'.join(errors)}]") - return PodNetworkFilterModule(config, kubecli) + return PodNetworkFilterModule(scenario_config, kubecli) + if config["id"] == "pod_network_chaos": + scenario_config = NetworkChaosConfig(**config) + errors = scenario_config.validate() + if len(errors) > 0: + raise Exception(f"config validation errors: [{';'.join(errors)}]") + return PodNetworkChaosModule(scenario_config, kubecli) + if config["id"] == "node_network_chaos": + scenario_config = NetworkChaosConfig(**config) + errors = scenario_config.validate() + if len(errors) > 0: + raise Exception(f"config validation errors: [{';'.join(errors)}]") + return NodeNetworkChaosModule(scenario_config, kubecli) + else: + raise Exception(f"invalid network chaos id {config['id']}") diff --git a/krkn/scenario_plugins/scenario_plugin_factory.py b/krkn/scenario_plugins/scenario_plugin_factory.py index 28dede5c..ec791df5 100644 --- a/krkn/scenario_plugins/scenario_plugin_factory.py +++ b/krkn/scenario_plugins/scenario_plugin_factory.py @@ -1,7 +1,7 @@ import importlib import inspect import pkgutil -from typing import Type, Tuple, Optional +from typing import Type, Tuple, Optional, Any from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin @@ -11,7 +11,7 @@ class ScenarioPluginNotFound(Exception): class ScenarioPluginFactory: - loaded_plugins: dict[str, any] = {} + loaded_plugins: dict[str, Any] = {} failed_plugins: list[Tuple[str, str, str]] = [] package_name = None diff --git a/scenarios/kind/pod_path_provisioner.yml b/scenarios/kind/pod_path_provisioner.yml new file mode 100755 index 00000000..b8be2c10 --- /dev/null +++ b/scenarios/kind/pod_path_provisioner.yml @@ -0,0 +1,6 @@ +- id: kill-pods + config: + namespace_pattern: "local-path-storage" + label_selector: "app=local-path-provisioner" + krkn_pod_recovery_time: 20 + kill: 1 \ No newline at end of file diff --git a/scenarios/kube/node-network-chaos.yml b/scenarios/kube/node-network-chaos.yml new file mode 100644 index 00000000..d99ae6cc --- /dev/null +++ b/scenarios/kube/node-network-chaos.yml @@ -0,0 +1,18 @@ +- id: node_network_chaos + image: "quay.io/krkn-chaos/krkn-network-chaos:latest" + wait_duration: 1 + test_duration: 60 + label_selector: "" + service_account: "" + taints: [] + namespace: 'default' + instance_count: 1 + target: "" + execution: parallel + interfaces: [] + ingress: true + egress: true + latency: 0s # supported units are us (microseconds), ms, s + loss: 10 # percentage + bandwidth: 1gbit #supported units are bit kbit mbit gbit tbit + force: false \ No newline at end of file diff --git a/scenarios/kube/node-network-filter.yml b/scenarios/kube/node-network-filter.yml index 86458e86..2f6ca73a 100644 --- a/scenarios/kube/node-network-filter.yml +++ b/scenarios/kube/node-network-filter.yml @@ -4,7 +4,7 @@ test_duration: 10 label_selector: "" service_account: "" - taints: [] # example ["node-role.kubernetes.io/master:NoSchedule"] + taints: [] namespace: 'default' instance_count: 1 execution: parallel diff --git a/scenarios/kube/pod-network-chaos.yml b/scenarios/kube/pod-network-chaos.yml new file mode 100644 index 00000000..214ad3d8 --- /dev/null +++ b/scenarios/kube/pod-network-chaos.yml @@ -0,0 +1,17 @@ +- id: pod_network_chaos + image: "quay.io/krkn-chaos/krkn-network-chaos:latest" + wait_duration: 1 + test_duration: 60 + label_selector: "" + service_account: "" + taints: [] + namespace: 'default' + instance_count: 1 + target: "" + execution: parallel + interfaces: [] + ingress: true + egress: true + latency: 0s # supported units are us (microseconds), ms, s + loss: 10 # percentage + bandwidth: 1gbit #supported units are bit kbit mbit gbit tbit \ No newline at end of file diff --git a/scenarios/kube/pod-network-filter.yml b/scenarios/kube/pod-network-filter.yml index 1d6ca246..bc3f9711 100644 --- a/scenarios/kube/pod-network-filter.yml +++ b/scenarios/kube/pod-network-filter.yml @@ -4,7 +4,7 @@ test_duration: 60 label_selector: "" service_account: "" - taints: [] # example ["node-role.kubernetes.io/master:NoSchedule"] + taints: [] namespace: 'default' instance_count: 1 execution: parallel diff --git a/tests/test_node_network_chaos.py b/tests/test_node_network_chaos.py new file mode 100644 index 00000000..51045303 --- /dev/null +++ b/tests/test_node_network_chaos.py @@ -0,0 +1,492 @@ +#!/usr/bin/env python3 + +""" +Test suite for NodeNetworkChaosModule class + +Usage: + python -m coverage run -a -m unittest tests/test_node_network_chaos.py -v + +Assisted By: Claude Code +""" + +import unittest +import queue +from unittest.mock import MagicMock, patch, call + +from krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos import ( + NodeNetworkChaosModule, +) +from krkn.scenario_plugins.network_chaos_ng.models import ( + NetworkChaosConfig, + NetworkChaosScenarioType, +) + + +class TestNodeNetworkChaosModule(unittest.TestCase): + + def setUp(self): + """ + Set up test fixtures for NodeNetworkChaosModule + """ + self.mock_kubecli = MagicMock() + self.mock_kubernetes = MagicMock() + self.mock_kubecli.get_lib_kubernetes.return_value = self.mock_kubernetes + + self.config = NetworkChaosConfig( + id="test-node-network-chaos", + image="test-image", + wait_duration=1, + test_duration=30, + label_selector="", + service_account="", + taints=[], + namespace="default", + instance_count=1, + target="worker-1", + execution="parallel", + interfaces=["eth0"], + ingress=True, + egress=True, + latency="100ms", + loss="10", + bandwidth="100mbit", + force=False, + ) + + self.module = NodeNetworkChaosModule(self.config, self.mock_kubecli) + + def test_initialization(self): + """ + Test NodeNetworkChaosModule initialization + """ + self.assertEqual(self.module.config, self.config) + self.assertEqual(self.module.kubecli, self.mock_kubecli) + self.assertEqual(self.module.base_network_config, self.config) + + def test_get_config(self): + """ + Test get_config returns correct scenario type and config + """ + scenario_type, config = self.module.get_config() + + self.assertEqual(scenario_type, NetworkChaosScenarioType.Node) + self.assertEqual(config, self.config) + + def test_get_targets_with_target_name(self): + """ + Test get_targets with specific node target name + """ + self.config.label_selector = "" + self.config.target = "worker-1" + self.mock_kubernetes.list_nodes.return_value = ["worker-1", "worker-2"] + + targets = self.module.get_targets() + + self.assertEqual(targets, ["worker-1"]) + + def test_get_targets_with_label_selector(self): + """ + Test get_targets with label selector + """ + self.config.label_selector = "node-role.kubernetes.io/worker=" + self.mock_kubernetes.list_nodes.return_value = ["worker-1", "worker-2"] + + targets = self.module.get_targets() + + self.assertEqual(targets, ["worker-1", "worker-2"]) + self.mock_kubernetes.list_nodes.assert_called_once_with( + "node-role.kubernetes.io/worker=" + ) + + def test_get_targets_node_not_found(self): + """ + Test get_targets raises exception when node doesn't exist + """ + self.config.label_selector = "" + self.config.target = "non-existent-node" + self.mock_kubernetes.list_nodes.return_value = ["worker-1", "worker-2"] + + with self.assertRaises(Exception) as context: + self.module.get_targets() + + self.assertIn("not found", str(context.exception)) + + def test_get_targets_no_target_or_selector(self): + """ + Test get_targets raises exception when neither target nor selector specified + """ + self.config.label_selector = "" + self.config.target = "" + + with self.assertRaises(Exception) as context: + self.module.get_targets() + + self.assertIn("neither", str(context.exception)) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.node_qdisc_is_simple" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.time.sleep") + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_delete_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_set_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_info") + def test_run_success( + self, + mock_log_info, + mock_setup, + mock_set_rules, + mock_delete_rules, + mock_sleep, + mock_qdisc_is_simple, + ): + """ + Test successful run of node network chaos + """ + # Mock setup returns container_ids and interfaces + mock_setup.return_value = (["container-123"], ["eth0"]) + + # Mock qdisc check - simple qdisc + mock_qdisc_is_simple.return_value = True + + self.module.run("worker-1") + + # Verify setup was called with node name + mock_setup.assert_called_once() + setup_args = mock_setup.call_args[0] + # Node name should be passed as target and is_node=True (8th arg, index 7) + self.assertEqual(setup_args[7], True) # is_node flag + + # Verify qdisc was checked + mock_qdisc_is_simple.assert_called_once() + + # Verify tc rules were set (with pids=None for node scenario) + mock_set_rules.assert_called_once() + set_call_args = mock_set_rules.call_args + # pids should be None (last argument) + self.assertIsNone(set_call_args[0][-1]) + + # Verify sleep for test duration + mock_sleep.assert_called_once_with(30) + + # Verify tc rules were deleted + mock_delete_rules.assert_called_once() + delete_call_args = mock_delete_rules.call_args + # pids should be None (7th argument, index 6) + self.assertIsNone(delete_call_args[0][6]) + + # Verify cleanup pod was deleted + self.assertEqual(self.mock_kubernetes.delete_pod.call_count, 1) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.node_qdisc_is_simple" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_error") + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_info") + def test_run_no_interfaces_detected( + self, mock_log_info, mock_log_error, mock_setup, mock_qdisc_is_simple + ): + """ + Test run handles case when no network interfaces detected + """ + # Mock setup returns empty interfaces + mock_setup.return_value = (["container-123"], []) + + # Set config to auto-detect interfaces + self.config.interfaces = [] + + self.module.run("worker-1") + + # Verify error was logged + mock_log_error.assert_called() + self.assertIn("no network interface", str(mock_log_error.call_args)) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.node_qdisc_is_simple" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_warning" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_info") + def test_run_complex_qdisc_without_force( + self, mock_log_info, mock_log_warning, mock_setup, mock_qdisc_is_simple + ): + """ + Test run skips chaos when complex qdisc exists and force=False + """ + # Mock setup + mock_setup.return_value = (["container-123"], ["eth0"]) + + # Mock qdisc check - complex qdisc + mock_qdisc_is_simple.return_value = False + + # force is False + self.config.force = False + + self.module.run("worker-1") + + # Verify warning was logged + mock_log_warning.assert_called() + self.assertIn("already has tc rules", str(mock_log_warning.call_args)) + + # Verify cleanup pod was still deleted + self.assertEqual(self.mock_kubernetes.delete_pod.call_count, 1) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.node_qdisc_is_simple" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.time.sleep") + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_delete_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_set_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_warning" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_info") + def test_run_complex_qdisc_with_force( + self, + mock_log_info, + mock_log_warning, + mock_setup, + mock_set_rules, + mock_delete_rules, + mock_sleep, + mock_qdisc_is_simple, + ): + """ + Test run proceeds with chaos when complex qdisc exists and force=True + """ + # Mock setup + mock_setup.return_value = (["container-123"], ["eth0"]) + + # Mock qdisc check - complex qdisc + mock_qdisc_is_simple.return_value = False + + # force is True + self.config.force = True + + self.module.run("worker-1") + + # Verify warning was logged about forcing + mock_log_warning.assert_called() + self.assertIn("forcing", str(mock_log_warning.call_args)) + + # Verify sleep for safety warning (10 seconds) + sleep_calls = [call[0][0] for call in mock_sleep.call_args_list] + self.assertIn(10, sleep_calls) + + # Verify tc rules were set + mock_set_rules.assert_called_once() + + # Verify tc rules were deleted + mock_delete_rules.assert_called_once() + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.node_qdisc_is_simple" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.time.sleep") + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_delete_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_set_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_info") + def test_run_uses_configured_interfaces( + self, + mock_log_info, + mock_setup, + mock_set_rules, + mock_delete_rules, + mock_sleep, + mock_qdisc_is_simple, + ): + """ + Test run uses configured interfaces instead of detected ones + """ + # Mock setup returns different interfaces + mock_setup.return_value = (["container-123"], ["eth0", "eth1"]) + + # Mock qdisc check - simple qdisc + mock_qdisc_is_simple.return_value = True + + # Set specific interfaces in config + self.config.interfaces = ["eth2"] + + self.module.run("worker-1") + + # Verify set_rules was called with configured interfaces + call_args = mock_set_rules.call_args + # interfaces is the 3rd positional argument (index 2) + self.assertEqual(call_args[0][2], ["eth2"]) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.node_qdisc_is_simple" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_info") + def test_run_with_error_queue( + self, mock_log_info, mock_setup, mock_qdisc_is_simple + ): + """ + Test run with error_queue for parallel execution + """ + # Mock setup to raise exception + mock_setup.side_effect = Exception("Test error") + + error_queue = queue.Queue() + self.module.run("worker-1", error_queue) + + # Verify error was put in queue instead of raising + self.assertFalse(error_queue.empty()) + error = error_queue.get() + self.assertEqual(error, "Test error") + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.node_qdisc_is_simple" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.time.sleep") + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_delete_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_set_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_info") + def test_run_ingress_egress_flags( + self, + mock_log_info, + mock_setup, + mock_set_rules, + mock_delete_rules, + mock_sleep, + mock_qdisc_is_simple, + ): + """ + Test run passes ingress and egress flags correctly + """ + # Mock setup + mock_setup.return_value = (["container-123"], ["eth0"]) + + # Mock qdisc check + mock_qdisc_is_simple.return_value = True + + # Set specific ingress/egress config + self.config.ingress = False + self.config.egress = True + + self.module.run("worker-1") + + # Verify set_rules was called with correct egress/ingress flags + set_call_args = mock_set_rules.call_args + # egress is 1st arg (index 0), ingress is 2nd arg (index 1) + self.assertEqual(set_call_args[0][0], True) # egress + self.assertEqual(set_call_args[0][1], False) # ingress + + # Verify delete_rules was called with correct flags + delete_call_args = mock_delete_rules.call_args + self.assertEqual(delete_call_args[0][0], True) # egress + self.assertEqual(delete_call_args[0][1], False) # ingress + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.node_qdisc_is_simple" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_warning" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_info") + def test_run_mixed_simple_and_complex_qdisc( + self, mock_log_info, mock_log_warning, mock_setup, mock_qdisc_is_simple + ): + """ + Test run with multiple interfaces where some have complex qdisc + """ + # Mock setup with multiple interfaces + mock_setup.return_value = (["container-123"], ["eth0", "eth1"]) + + # Set config to use detected interfaces + self.config.interfaces = [] + self.config.force = False + + # Mock qdisc check - eth0 simple, eth1 complex + mock_qdisc_is_simple.side_effect = [True, False] + + self.module.run("worker-1") + + # Verify warning about complex qdisc on eth1 + mock_log_warning.assert_called() + warning_message = str(mock_log_warning.call_args) + self.assertIn("already has tc rules", warning_message) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.node_qdisc_is_simple" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.time.sleep") + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_delete_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.common_set_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.node_network_chaos.log_info") + def test_run_checks_qdisc_for_all_interfaces( + self, + mock_log_info, + mock_setup, + mock_set_rules, + mock_delete_rules, + mock_sleep, + mock_qdisc_is_simple, + ): + """ + Test run checks qdisc for all interfaces + """ + # Mock setup with multiple interfaces + mock_setup.return_value = (["container-123"], ["eth0", "eth1", "eth2"]) + + # Set config to use detected interfaces + self.config.interfaces = [] + + # All interfaces simple + mock_qdisc_is_simple.return_value = True + + self.module.run("worker-1") + + # Verify qdisc was checked for all 3 interfaces + self.assertEqual(mock_qdisc_is_simple.call_count, 3) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/test_pod_network_chaos.py b/tests/test_pod_network_chaos.py new file mode 100644 index 00000000..d49c7b59 --- /dev/null +++ b/tests/test_pod_network_chaos.py @@ -0,0 +1,451 @@ +#!/usr/bin/env python3 + +""" +Test suite for PodNetworkChaosModule class + +Usage: + python -m coverage run -a -m unittest tests/test_pod_network_chaos.py -v + +Assisted By: Claude Code +""" + +import unittest +import queue +from unittest.mock import MagicMock, patch, call +from dataclasses import dataclass + +from krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos import ( + PodNetworkChaosModule, +) +from krkn.scenario_plugins.network_chaos_ng.models import ( + NetworkChaosConfig, + NetworkChaosScenarioType, +) + + +class TestPodNetworkChaosModule(unittest.TestCase): + + def setUp(self): + """ + Set up test fixtures for PodNetworkChaosModule + """ + self.mock_kubecli = MagicMock() + self.mock_kubernetes = MagicMock() + self.mock_kubecli.get_lib_kubernetes.return_value = self.mock_kubernetes + + self.config = NetworkChaosConfig( + id="test-pod-network-chaos", + image="test-image", + wait_duration=1, + test_duration=30, + label_selector="", + service_account="", + taints=[], + namespace="default", + instance_count=1, + target="test-pod", + execution="parallel", + interfaces=["eth0"], + ingress=True, + egress=True, + latency="100ms", + loss="10", + bandwidth="100mbit", + ) + + self.module = PodNetworkChaosModule(self.config, self.mock_kubecli) + + def test_initialization(self): + """ + Test PodNetworkChaosModule initialization + """ + self.assertEqual(self.module.config, self.config) + self.assertEqual(self.module.kubecli, self.mock_kubecli) + self.assertEqual(self.module.base_network_config, self.config) + + def test_get_config(self): + """ + Test get_config returns correct scenario type and config + """ + scenario_type, config = self.module.get_config() + + self.assertEqual(scenario_type, NetworkChaosScenarioType.Pod) + self.assertEqual(config, self.config) + + def test_get_targets_with_target_name(self): + """ + Test get_targets with specific pod target name + """ + self.config.label_selector = "" + self.config.target = "test-pod" + self.mock_kubernetes.check_if_pod_exists.return_value = True + + targets = self.module.get_targets() + + self.assertEqual(targets, ["test-pod"]) + self.mock_kubernetes.check_if_pod_exists.assert_called_once_with( + "test-pod", "default" + ) + + def test_get_targets_with_label_selector(self): + """ + Test get_targets with label selector + """ + self.config.label_selector = "app=nginx" + self.mock_kubernetes.list_pods.return_value = ["pod1", "pod2", "pod3"] + + targets = self.module.get_targets() + + self.assertEqual(targets, ["pod1", "pod2", "pod3"]) + self.mock_kubernetes.list_pods.assert_called_once_with( + "default", "app=nginx" + ) + + def test_get_targets_pod_not_found(self): + """ + Test get_targets raises exception when pod doesn't exist + """ + self.config.label_selector = "" + self.config.target = "non-existent-pod" + self.mock_kubernetes.check_if_pod_exists.return_value = False + + with self.assertRaises(Exception) as context: + self.module.get_targets() + + self.assertIn("not found", str(context.exception)) + + def test_get_targets_no_namespace(self): + """ + Test get_targets raises exception when namespace not specified + """ + self.config.namespace = None + + with self.assertRaises(Exception) as context: + self.module.get_targets() + + self.assertIn("namespace not specified", str(context.exception)) + + def test_get_targets_no_target_or_selector(self): + """ + Test get_targets raises exception when neither target nor selector specified + """ + self.config.label_selector = "" + self.config.target = "" + + with self.assertRaises(Exception) as context: + self.module.get_targets() + + self.assertIn("neither", str(context.exception)) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.time.sleep") + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.common_delete_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.common_set_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_info") + def test_run_success( + self, + mock_log_info, + mock_setup, + mock_set_rules, + mock_delete_rules, + mock_sleep, + ): + """ + Test successful run of pod network chaos + """ + # Mock pod info + mock_pod_info = MagicMock() + mock_pod_info.nodeName = "worker-1" + self.mock_kubernetes.get_pod_info.return_value = mock_pod_info + + # Mock setup returns container_ids and interfaces + mock_setup.return_value = (["container-123"], ["eth0"]) + + # Mock get_pod_pids + self.mock_kubernetes.get_pod_pids.return_value = ["1234"] + + self.module.run("test-pod") + + # Verify pod info was retrieved + self.mock_kubernetes.get_pod_info.assert_called_once_with( + "test-pod", "default" + ) + + # Verify setup was called + mock_setup.assert_called_once() + + # Verify pids were resolved + self.mock_kubernetes.get_pod_pids.assert_called_once() + + # Verify tc rules were set + mock_set_rules.assert_called_once() + + # Verify sleep for test duration + mock_sleep.assert_called_once_with(30) + + # Verify tc rules were deleted + mock_delete_rules.assert_called_once() + + # Verify cleanup pod was deleted + self.assertEqual(self.mock_kubernetes.delete_pod.call_count, 1) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_info") + def test_run_pod_info_not_found(self, mock_log_info, mock_setup): + """ + Test run raises exception when pod info cannot be retrieved + """ + self.mock_kubernetes.get_pod_info.return_value = None + + with self.assertRaises(Exception) as context: + self.module.run("test-pod") + + self.assertIn("impossible to retrieve infos", str(context.exception)) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_error") + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_info") + def test_run_no_interfaces_detected( + self, mock_log_info, mock_log_error, mock_setup + ): + """ + Test run handles case when no network interfaces detected + """ + # Mock pod info + mock_pod_info = MagicMock() + mock_pod_info.nodeName = "worker-1" + self.mock_kubernetes.get_pod_info.return_value = mock_pod_info + + # Mock setup returns empty interfaces + mock_setup.return_value = (["container-123"], []) + + # Set config to auto-detect interfaces + self.config.interfaces = [] + + self.module.run("test-pod") + + # Verify error was logged + mock_log_error.assert_called() + self.assertIn("no network interface", str(mock_log_error.call_args)) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_info") + def test_run_no_container_id(self, mock_log_info, mock_setup): + """ + Test run raises exception when container id cannot be resolved + """ + # Mock pod info + mock_pod_info = MagicMock() + mock_pod_info.nodeName = "worker-1" + self.mock_kubernetes.get_pod_info.return_value = mock_pod_info + + # Mock setup returns empty container_ids + mock_setup.return_value = ([], ["eth0"]) + + with self.assertRaises(Exception) as context: + self.module.run("test-pod") + + self.assertIn("impossible to resolve container id", str(context.exception)) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_info") + def test_run_no_pids(self, mock_log_info, mock_setup): + """ + Test run raises exception when pids cannot be resolved + """ + # Mock pod info + mock_pod_info = MagicMock() + mock_pod_info.nodeName = "worker-1" + self.mock_kubernetes.get_pod_info.return_value = mock_pod_info + + # Mock setup + mock_setup.return_value = (["container-123"], ["eth0"]) + + # Mock get_pod_pids returns empty + self.mock_kubernetes.get_pod_pids.return_value = [] + + with self.assertRaises(Exception) as context: + self.module.run("test-pod") + + self.assertIn("impossible to resolve pid", str(context.exception)) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.time.sleep") + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.common_delete_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.common_set_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_info") + def test_run_uses_configured_interfaces( + self, + mock_log_info, + mock_setup, + mock_set_rules, + mock_delete_rules, + mock_sleep, + ): + """ + Test run uses configured interfaces instead of detected ones + """ + # Mock pod info + mock_pod_info = MagicMock() + mock_pod_info.nodeName = "worker-1" + self.mock_kubernetes.get_pod_info.return_value = mock_pod_info + + # Mock setup returns different interfaces + mock_setup.return_value = (["container-123"], ["eth0", "eth1"]) + + # Mock get_pod_pids + self.mock_kubernetes.get_pod_pids.return_value = ["1234"] + + # Set specific interfaces in config + self.config.interfaces = ["eth2"] + + self.module.run("test-pod") + + # Verify set_rules was called with configured interfaces, not detected ones + call_args = mock_set_rules.call_args + # interfaces is the 3rd positional argument (index 2) + self.assertEqual(call_args[0][2], ["eth2"]) + + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_info") + def test_run_with_error_queue(self, mock_log_info, mock_setup): + """ + Test run with error_queue for parallel execution + """ + # Mock pod info + mock_pod_info = MagicMock() + mock_pod_info.nodeName = "worker-1" + self.mock_kubernetes.get_pod_info.return_value = mock_pod_info + + # Mock setup to raise exception + mock_setup.side_effect = Exception("Test error") + + error_queue = queue.Queue() + self.module.run("test-pod", error_queue) + + # Verify error was put in queue instead of raising + self.assertFalse(error_queue.empty()) + error = error_queue.get() + self.assertEqual(error, "Test error") + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.time.sleep") + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.common_delete_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.common_set_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_info") + def test_run_passes_correct_pids( + self, + mock_log_info, + mock_setup, + mock_set_rules, + mock_delete_rules, + mock_sleep, + ): + """ + Test run passes pids correctly to set and delete rules + """ + # Mock pod info + mock_pod_info = MagicMock() + mock_pod_info.nodeName = "worker-1" + self.mock_kubernetes.get_pod_info.return_value = mock_pod_info + + # Mock setup + mock_setup.return_value = (["container-123"], ["eth0"]) + + # Mock get_pod_pids + test_pids = ["1234", "5678"] + self.mock_kubernetes.get_pod_pids.return_value = test_pids + + self.module.run("test-pod") + + # Verify set_rules was called with pids + set_call_args = mock_set_rules.call_args + # pids is the last positional argument + self.assertEqual(set_call_args[0][-1], test_pids) + + # Verify delete_rules was called with pids + delete_call_args = mock_delete_rules.call_args + # pids is argument at index 6 + self.assertEqual(delete_call_args[0][6], test_pids) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.time.sleep") + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.common_delete_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.common_set_limit_rules" + ) + @patch( + "krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.setup_network_chaos_ng_scenario" + ) + @patch("krkn.scenario_plugins.network_chaos_ng.modules.pod_network_chaos.log_info") + def test_run_ingress_egress_flags( + self, + mock_log_info, + mock_setup, + mock_set_rules, + mock_delete_rules, + mock_sleep, + ): + """ + Test run passes ingress and egress flags correctly + """ + # Mock pod info + mock_pod_info = MagicMock() + mock_pod_info.nodeName = "worker-1" + self.mock_kubernetes.get_pod_info.return_value = mock_pod_info + + # Mock setup + mock_setup.return_value = (["container-123"], ["eth0"]) + + # Mock get_pod_pids + self.mock_kubernetes.get_pod_pids.return_value = ["1234"] + + # Set specific ingress/egress config + self.config.ingress = False + self.config.egress = True + + self.module.run("test-pod") + + # Verify set_rules was called with correct egress/ingress flags + set_call_args = mock_set_rules.call_args + # egress is 1st arg (index 0), ingress is 2nd arg (index 1) + self.assertEqual(set_call_args[0][0], True) # egress + self.assertEqual(set_call_args[0][1], False) # ingress + + # Verify delete_rules was called with correct flags + delete_call_args = mock_delete_rules.call_args + self.assertEqual(delete_call_args[0][0], True) # egress + self.assertEqual(delete_call_args[0][1], False) # ingress + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/test_utils_network_chaos.py b/tests/test_utils_network_chaos.py new file mode 100644 index 00000000..28385fd2 --- /dev/null +++ b/tests/test_utils_network_chaos.py @@ -0,0 +1,599 @@ +#!/usr/bin/env python3 + +""" +Test suite for utils_network_chaos module + +Usage: + python -m coverage run -a -m unittest tests/test_utils_network_chaos.py -v + +Assisted By: Claude Code +""" + +import unittest +from unittest.mock import MagicMock, patch, call + +from krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos import ( + get_build_tc_tree_commands, + namespaced_tc_commands, + get_egress_shaping_comand, + get_clear_egress_shaping_commands, + get_ingress_shaping_commands, + get_clear_ingress_shaping_commands, + node_qdisc_is_simple, + common_set_limit_rules, + common_delete_limit_rules, + ROOT_HANDLE, + CLASS_ID, + NETEM_HANDLE, +) + + +class TestBuildTcTreeCommands(unittest.TestCase): + + def test_build_tc_tree_single_interface(self): + """ + Test building tc tree commands for a single interface + """ + devices = ["eth0"] + result = get_build_tc_tree_commands(devices) + + self.assertEqual(len(result), 3) + self.assertIn("tc qdisc add dev eth0 root handle 100: htb default 1", result) + self.assertIn( + "tc class add dev eth0 parent 100: classid 100:1 htb rate 1gbit", result + ) + self.assertIn( + "tc qdisc add dev eth0 parent 100:1 handle 101: netem delay 0ms loss 0%", + result, + ) + + def test_build_tc_tree_multiple_interfaces(self): + """ + Test building tc tree commands for multiple interfaces + """ + devices = ["eth0", "eth1"] + result = get_build_tc_tree_commands(devices) + + self.assertEqual(len(result), 6) + # Verify commands for eth0 + self.assertIn("tc qdisc add dev eth0 root handle 100: htb default 1", result) + # Verify commands for eth1 + self.assertIn("tc qdisc add dev eth1 root handle 100: htb default 1", result) + + def test_build_tc_tree_empty_list(self): + """ + Test building tc tree commands with empty device list + """ + devices = [] + result = get_build_tc_tree_commands(devices) + + self.assertEqual(len(result), 0) + + +class TestNamespacedTcCommands(unittest.TestCase): + + def test_namespaced_commands_single_pid(self): + """ + Test wrapping commands with nsenter for single pid + """ + pids = ["1234"] + commands = ["tc qdisc add dev eth0 root handle 100: htb"] + result = namespaced_tc_commands(pids, commands) + + self.assertEqual(len(result), 1) + self.assertEqual( + result[0], + "nsenter --target 1234 --net -- tc qdisc add dev eth0 root handle 100: htb", + ) + + def test_namespaced_commands_multiple_pids(self): + """ + Test wrapping commands with nsenter for multiple pids + """ + pids = ["1234", "5678"] + commands = ["tc qdisc add dev eth0 root handle 100: htb"] + result = namespaced_tc_commands(pids, commands) + + self.assertEqual(len(result), 2) + self.assertIn( + "nsenter --target 1234 --net -- tc qdisc add dev eth0 root handle 100: htb", + result, + ) + self.assertIn( + "nsenter --target 5678 --net -- tc qdisc add dev eth0 root handle 100: htb", + result, + ) + + def test_namespaced_commands_multiple_pids_and_commands(self): + """ + Test wrapping multiple commands for multiple pids + """ + pids = ["1234", "5678"] + commands = ["tc qdisc add dev eth0 root", "tc class add dev eth0"] + result = namespaced_tc_commands(pids, commands) + + self.assertEqual(len(result), 4) + + +class TestEgressShapingCommands(unittest.TestCase): + + def test_egress_shaping_with_all_params(self): + """ + Test egress shaping commands with bandwidth, latency and loss + """ + devices = ["eth0"] + result = get_egress_shaping_comand(devices, "100", "50", "10") + + self.assertEqual(len(result), 2) + self.assertIn( + "tc class change dev eth0 parent 100: classid 100:1 htb rate 100mbit", + result, + ) + self.assertIn( + "tc qdisc change dev eth0 parent 100:1 handle 101: netem delay 50ms loss 10%", + result, + ) + + def test_egress_shaping_with_defaults(self): + """ + Test egress shaping commands with None values defaults to 1gbit, 0ms, 0% + """ + devices = ["eth0"] + result = get_egress_shaping_comand(devices, None, None, None) + + self.assertEqual(len(result), 2) + self.assertIn( + "tc class change dev eth0 parent 100: classid 100:1 htb rate 1gbit", result + ) + self.assertIn( + "tc qdisc change dev eth0 parent 100:1 handle 101: netem delay 0ms loss 0%", + result, + ) + + def test_egress_shaping_multiple_interfaces(self): + """ + Test egress shaping for multiple interfaces + """ + devices = ["eth0", "eth1"] + result = get_egress_shaping_comand(devices, "100", "50", "10") + + self.assertEqual(len(result), 4) + + +class TestClearEgressShapingCommands(unittest.TestCase): + + def test_clear_egress_single_interface(self): + """ + Test clear egress shaping for single interface + """ + devices = ["eth0"] + result = get_clear_egress_shaping_commands(devices) + + self.assertEqual(len(result), 1) + self.assertIn("tc qdisc del dev eth0 root handle 100:", result) + + def test_clear_egress_multiple_interfaces(self): + """ + Test clear egress shaping for multiple interfaces + """ + devices = ["eth0", "eth1"] + result = get_clear_egress_shaping_commands(devices) + + self.assertEqual(len(result), 2) + self.assertIn("tc qdisc del dev eth0 root handle 100:", result) + self.assertIn("tc qdisc del dev eth1 root handle 100:", result) + + +class TestIngressShapingCommands(unittest.TestCase): + + def test_ingress_shaping_with_all_params(self): + """ + Test ingress shaping commands with bandwidth, latency and loss + """ + devices = ["eth0"] + result = get_ingress_shaping_commands(devices, "100", "50ms", "10") + + # Should have: modprobe, ip link add, ip link set, tc qdisc add ingress, + # tc filter add, tc qdisc add root, tc class add, tc qdisc add netem + self.assertGreater(len(result), 7) + self.assertIn("modprobe ifb || true", result) + self.assertIn("ip link add ifb0 type ifb || true", result) + self.assertIn("ip link set ifb0 up || true", result) + self.assertIn("tc qdisc add dev eth0 handle ffff: ingress || true", result) + # Check that bandwidth, latency, loss are in commands + self.assertTrue(any("100" in cmd for cmd in result)) + self.assertTrue(any("50ms" in cmd for cmd in result)) + self.assertTrue(any("10" in cmd for cmd in result)) + + def test_ingress_shaping_with_defaults(self): + """ + Test ingress shaping with None values uses defaults + """ + devices = ["eth0"] + result = get_ingress_shaping_commands(devices, None, None, None) + + self.assertGreater(len(result), 7) + # Should use 1gbit, 0ms, 0% as defaults + self.assertTrue(any("1gbit" in cmd for cmd in result)) + self.assertTrue(any("0ms" in cmd for cmd in result)) + self.assertTrue(any("0%" in cmd for cmd in result)) + + def test_ingress_shaping_custom_ifb_device(self): + """ + Test ingress shaping with custom ifb device name + """ + devices = ["eth0"] + result = get_ingress_shaping_commands(devices, "100", "50ms", "10", "ifb1") + + self.assertIn("ip link add ifb1 type ifb || true", result) + self.assertIn("ip link set ifb1 up || true", result) + + +class TestClearIngressShapingCommands(unittest.TestCase): + + def test_clear_ingress_single_interface(self): + """ + Test clear ingress shaping for single interface + """ + devices = ["eth0"] + result = get_clear_ingress_shaping_commands(devices) + + self.assertGreater(len(result), 3) + self.assertIn("tc qdisc del dev eth0 ingress || true", result) + self.assertIn("tc qdisc del dev ifb0 root handle 100: || true", result) + self.assertIn("ip link set ifb0 down || true", result) + self.assertIn("ip link del ifb0 || true", result) + + def test_clear_ingress_multiple_interfaces(self): + """ + Test clear ingress shaping for multiple interfaces + """ + devices = ["eth0", "eth1"] + result = get_clear_ingress_shaping_commands(devices) + + self.assertIn("tc qdisc del dev eth0 ingress || true", result) + self.assertIn("tc qdisc del dev eth1 ingress || true", result) + + def test_clear_ingress_custom_ifb_device(self): + """ + Test clear ingress with custom ifb device + """ + devices = ["eth0"] + result = get_clear_ingress_shaping_commands(devices, "ifb1") + + self.assertIn("tc qdisc del dev ifb1 root handle 100: || true", result) + self.assertIn("ip link set ifb1 down || true", result) + self.assertIn("ip link del ifb1 || true", result) + + +class TestNodeQdiscIsSimple(unittest.TestCase): + + def test_node_qdisc_is_simple_with_simple_qdisc(self): + """ + Test node_qdisc_is_simple returns True for simple qdisc (e.g., pfifo_fast) + """ + mock_kubecli = MagicMock() + mock_kubecli.exec_cmd_in_pod.return_value = ( + "qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2" + ) + + result = node_qdisc_is_simple(mock_kubecli, "test-pod", "default", "eth0") + + self.assertTrue(result) + mock_kubecli.exec_cmd_in_pod.assert_called_once_with( + ["tc qdisc show dev eth0"], "test-pod", "default" + ) + + def test_node_qdisc_is_simple_with_htb(self): + """ + Test node_qdisc_is_simple returns False for htb qdisc + """ + mock_kubecli = MagicMock() + mock_kubecli.exec_cmd_in_pod.return_value = ( + "qdisc htb 100: root refcnt 2 r2q 10 default 1" + ) + + result = node_qdisc_is_simple(mock_kubecli, "test-pod", "default", "eth0") + + self.assertFalse(result) + + def test_node_qdisc_is_simple_with_netem(self): + """ + Test node_qdisc_is_simple returns False for netem qdisc + """ + mock_kubecli = MagicMock() + mock_kubecli.exec_cmd_in_pod.return_value = ( + "qdisc netem 101: root refcnt 2 limit 1000 delay 100ms" + ) + + result = node_qdisc_is_simple(mock_kubecli, "test-pod", "default", "eth0") + + self.assertFalse(result) + + def test_node_qdisc_is_simple_with_clsact(self): + """ + Test node_qdisc_is_simple returns False for clsact qdisc + """ + mock_kubecli = MagicMock() + mock_kubecli.exec_cmd_in_pod.return_value = "qdisc clsact ffff: parent ffff:fff1" + + result = node_qdisc_is_simple(mock_kubecli, "test-pod", "default", "eth0") + + self.assertFalse(result) + + def test_node_qdisc_is_simple_with_multiple_lines(self): + """ + Test node_qdisc_is_simple returns False when multiple qdisc lines exist + """ + mock_kubecli = MagicMock() + mock_kubecli.exec_cmd_in_pod.return_value = ( + "qdisc pfifo_fast 0: root\nqdisc htb 100: dev eth0" + ) + + result = node_qdisc_is_simple(mock_kubecli, "test-pod", "default", "eth0") + + self.assertFalse(result) + + def test_node_qdisc_is_simple_case_insensitive(self): + """ + Test node_qdisc_is_simple check is case insensitive + """ + mock_kubecli = MagicMock() + mock_kubecli.exec_cmd_in_pod.return_value = "qdisc HTB 100: root" + + result = node_qdisc_is_simple(mock_kubecli, "test-pod", "default", "eth0") + + self.assertFalse(result) + + +class TestCommonSetLimitRules(unittest.TestCase): + + def setUp(self): + """ + Set up mock kubecli for all tests + """ + self.mock_kubecli = MagicMock() + self.mock_kubecli.exec_cmd_in_pod.return_value = "" + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_info") + def test_set_egress_only(self, mock_log_info): + """ + Test setting egress rules only + """ + common_set_limit_rules( + egress=True, + ingress=False, + interfaces=["eth0"], + bandwidth="100", + latency="50", + loss="10", + parallel=False, + target="test-target", + kubecli=self.mock_kubecli, + network_chaos_pod_name="chaos-pod", + namespace="default", + pids=None, + ) + + # Should call exec_cmd_in_pod for egress rules (3 build + 2 shaping) + self.assertGreaterEqual(self.mock_kubecli.exec_cmd_in_pod.call_count, 5) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_info") + def test_set_ingress_only(self, mock_log_info): + """ + Test setting ingress rules only + """ + common_set_limit_rules( + egress=False, + ingress=True, + interfaces=["eth0"], + bandwidth="100", + latency="50", + loss="10", + parallel=False, + target="test-target", + kubecli=self.mock_kubecli, + network_chaos_pod_name="chaos-pod", + namespace="default", + pids=None, + ) + + # Should call exec_cmd_in_pod for ingress rules + self.assertGreater(self.mock_kubecli.exec_cmd_in_pod.call_count, 0) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_info") + def test_set_both_egress_and_ingress(self, mock_log_info): + """ + Test setting both egress and ingress rules + """ + common_set_limit_rules( + egress=True, + ingress=True, + interfaces=["eth0"], + bandwidth="100", + latency="50", + loss="10", + parallel=False, + target="test-target", + kubecli=self.mock_kubecli, + network_chaos_pod_name="chaos-pod", + namespace="default", + pids=None, + ) + + # Should call exec_cmd_in_pod for both egress and ingress + self.assertGreater(self.mock_kubecli.exec_cmd_in_pod.call_count, 10) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_info") + def test_set_with_pids(self, mock_log_info): + """ + Test setting rules with pids (namespace mode) + """ + common_set_limit_rules( + egress=True, + ingress=False, + interfaces=["eth0"], + bandwidth="100", + latency="50", + loss="10", + parallel=False, + target="test-target", + kubecli=self.mock_kubecli, + network_chaos_pod_name="chaos-pod", + namespace="default", + pids=["1234"], + ) + + # Verify that commands include nsenter + calls = self.mock_kubecli.exec_cmd_in_pod.call_args_list + self.assertTrue( + any("nsenter" in str(call) for call in calls), + "Expected nsenter commands when pids are provided", + ) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_error") + def test_set_with_command_failure(self, mock_log_error): + """ + Test handling of command failures + """ + # Simulate all commands failing + self.mock_kubecli.exec_cmd_in_pod.return_value = "error" + + common_set_limit_rules( + egress=True, + ingress=False, + interfaces=["eth0"], + bandwidth="100", + latency="50", + loss="10", + parallel=False, + target="test-target", + kubecli=self.mock_kubecli, + network_chaos_pod_name="chaos-pod", + namespace="default", + pids=None, + ) + + # Should log error when all commands fail + mock_log_error.assert_called() + + +class TestCommonDeleteLimitRules(unittest.TestCase): + + def setUp(self): + """ + Set up mock kubecli for all tests + """ + self.mock_kubecli = MagicMock() + self.mock_kubecli.exec_cmd_in_pod.return_value = "" + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_info") + def test_delete_egress_only(self, mock_log_info): + """ + Test deleting egress rules only + """ + common_delete_limit_rules( + egress=True, + ingress=False, + interfaces=["eth0"], + network_chaos_pod_name="chaos-pod", + network_chaos_namespace="default", + kubecli=self.mock_kubecli, + pids=None, + parallel=False, + target="test-target", + ) + + # Should call exec_cmd_in_pod for egress cleanup + self.assertGreater(self.mock_kubecli.exec_cmd_in_pod.call_count, 0) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_info") + def test_delete_ingress_only(self, mock_log_info): + """ + Test deleting ingress rules only + """ + common_delete_limit_rules( + egress=False, + ingress=True, + interfaces=["eth0"], + network_chaos_pod_name="chaos-pod", + network_chaos_namespace="default", + kubecli=self.mock_kubecli, + pids=None, + parallel=False, + target="test-target", + ) + + # Should call exec_cmd_in_pod for ingress cleanup + self.assertGreater(self.mock_kubecli.exec_cmd_in_pod.call_count, 0) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_info") + def test_delete_both_egress_and_ingress(self, mock_log_info): + """ + Test deleting both egress and ingress rules + """ + common_delete_limit_rules( + egress=True, + ingress=True, + interfaces=["eth0"], + network_chaos_pod_name="chaos-pod", + network_chaos_namespace="default", + kubecli=self.mock_kubecli, + pids=None, + parallel=False, + target="test-target", + ) + + # Should call exec_cmd_in_pod for both egress and ingress + self.assertGreater(self.mock_kubecli.exec_cmd_in_pod.call_count, 3) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_info") + def test_delete_with_pids(self, mock_log_info): + """ + Test deleting rules with pids (namespace mode) + """ + common_delete_limit_rules( + egress=True, + ingress=False, + interfaces=["eth0"], + network_chaos_pod_name="chaos-pod", + network_chaos_namespace="default", + kubecli=self.mock_kubecli, + pids=["1234"], + parallel=False, + target="test-target", + ) + + # Verify that commands include nsenter + calls = self.mock_kubecli.exec_cmd_in_pod.call_args_list + self.assertTrue( + any("nsenter" in str(call) for call in calls), + "Expected nsenter commands when pids are provided", + ) + + @patch("krkn.scenario_plugins.network_chaos_ng.modules.utils_network_chaos.log_error") + def test_delete_with_command_failure(self, mock_log_error): + """ + Test handling of command failures during deletion + """ + # Simulate all commands failing + self.mock_kubecli.exec_cmd_in_pod.return_value = "error" + + common_delete_limit_rules( + egress=True, + ingress=False, + interfaces=["eth0"], + network_chaos_pod_name="chaos-pod", + network_chaos_namespace="default", + kubecli=self.mock_kubecli, + pids=None, + parallel=False, + target="test-target", + ) + + # Should log error when all commands fail + mock_log_error.assert_called() + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file