mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-04-15 06:57:28 +00:00
* fix ibm Signed-off-by: Paige Patton <prubenda@redhat.com> * type hint fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * pod network chaos plugin structure + utils method refactoring Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * Pod network chaos plugin Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * Node network chaos plugin Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * default config files Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * config.yaml Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * all field optional Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * minor fixes Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * minor nit on config Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * utils unit tests Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * PodNetworkChaos unit tests Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * NodeNetworkChaos unit test Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * PodNetworkChaos functional test Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * NodeNetworkChaso functional test Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * added funtests to the gh action Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * unit test fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * changed test order + resource rename * functional tests fix smallchange Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fix requirements Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * changed pod test target Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> * added kind port mapping and removed portforwarding Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> fix Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> test fixes Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> test fixes Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> --------- Signed-off-by: Paige Patton <prubenda@redhat.com> Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com> Co-authored-by: Paige Patton <prubenda@redhat.com>
165 lines
6.0 KiB
Bash
Executable File
165 lines
6.0 KiB
Bash
Executable File
set -xeEo pipefail
|
|
|
|
source CI/tests/common.sh
|
|
|
|
trap error ERR
|
|
trap finish EXIT
|
|
|
|
function functional_test_node_network_chaos {
|
|
echo "Starting node network chaos functional test"
|
|
|
|
# Get a worker node
|
|
get_node
|
|
export TARGET_NODE=$(echo $WORKER_NODE | awk '{print $1}')
|
|
echo "Target node: $TARGET_NODE"
|
|
|
|
# Deploy nginx workload on the target node
|
|
echo "Deploying nginx workload on $TARGET_NODE..."
|
|
kubectl create deployment nginx-node-net-chaos --image=nginx:latest
|
|
|
|
# Add node selector to ensure pod runs on target node
|
|
kubectl patch deployment nginx-node-net-chaos -p '{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"'$TARGET_NODE'"}}}}}'
|
|
|
|
# Expose service
|
|
kubectl expose deployment nginx-node-net-chaos --port=80 --target-port=80 --name=nginx-node-net-chaos-svc
|
|
|
|
# Wait for nginx to be ready
|
|
echo "Waiting for nginx pod to be ready on $TARGET_NODE..."
|
|
kubectl wait --for=condition=ready pod -l app=nginx-node-net-chaos --timeout=120s
|
|
|
|
# Verify pod is on correct node
|
|
export POD_NAME=$(kubectl get pods -l app=nginx-node-net-chaos -o jsonpath='{.items[0].metadata.name}')
|
|
export POD_NODE=$(kubectl get pod $POD_NAME -o jsonpath='{.spec.nodeName}')
|
|
echo "Pod $POD_NAME is running on node $POD_NODE"
|
|
|
|
if [ "$POD_NODE" != "$TARGET_NODE" ]; then
|
|
echo "ERROR: Pod is not on target node (expected $TARGET_NODE, got $POD_NODE)"
|
|
kubectl get pods -l app=nginx-node-net-chaos -o wide
|
|
exit 1
|
|
fi
|
|
|
|
# Setup port-forward to access nginx
|
|
echo "Setting up port-forward to nginx service..."
|
|
kubectl port-forward service/nginx-node-net-chaos-svc 8091:80 &
|
|
PORT_FORWARD_PID=$!
|
|
sleep 3 # Give port-forward time to start
|
|
|
|
# Test baseline connectivity
|
|
echo "Testing baseline connectivity..."
|
|
response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://localhost:8091 || echo "000")
|
|
if [ "$response" != "200" ]; then
|
|
echo "ERROR: Nginx not responding correctly (got $response, expected 200)"
|
|
kubectl get pods -l app=nginx-node-net-chaos
|
|
kubectl describe pod $POD_NAME
|
|
exit 1
|
|
fi
|
|
echo "Baseline test passed: nginx responding with 200"
|
|
|
|
# Measure baseline latency
|
|
echo "Measuring baseline latency..."
|
|
baseline_start=$(date +%s%3N)
|
|
curl -s http://localhost:8091 > /dev/null || true
|
|
baseline_end=$(date +%s%3N)
|
|
baseline_latency=$((baseline_end - baseline_start))
|
|
echo "Baseline latency: ${baseline_latency}ms"
|
|
|
|
# Configure node network chaos scenario
|
|
echo "Configuring node network chaos scenario..."
|
|
yq -i '.[0].config.target="'$TARGET_NODE'"' scenarios/kube/node-network-chaos.yml
|
|
yq -i '.[0].config.namespace="default"' scenarios/kube/node-network-chaos.yml
|
|
yq -i '.[0].config.test_duration=20' scenarios/kube/node-network-chaos.yml
|
|
yq -i '.[0].config.latency="200ms"' scenarios/kube/node-network-chaos.yml
|
|
yq -i '.[0].config.loss=15' scenarios/kube/node-network-chaos.yml
|
|
yq -i '.[0].config.bandwidth="10mbit"' scenarios/kube/node-network-chaos.yml
|
|
yq -i '.[0].config.ingress=true' scenarios/kube/node-network-chaos.yml
|
|
yq -i '.[0].config.egress=true' scenarios/kube/node-network-chaos.yml
|
|
yq -i '.[0].config.force=false' scenarios/kube/node-network-chaos.yml
|
|
yq -i 'del(.[0].config.interfaces)' scenarios/kube/node-network-chaos.yml
|
|
|
|
# Prepare krkn config
|
|
export scenario_type="network_chaos_ng_scenarios"
|
|
export scenario_file="scenarios/kube/node-network-chaos.yml"
|
|
export post_config=""
|
|
envsubst < CI/config/common_test_config.yaml > CI/config/node_network_chaos_config.yaml
|
|
|
|
# Run krkn in background
|
|
echo "Starting krkn with node network chaos scenario..."
|
|
python3 -m coverage run -a run_kraken.py -c CI/config/node_network_chaos_config.yaml &
|
|
KRKN_PID=$!
|
|
echo "Krkn started with PID: $KRKN_PID"
|
|
|
|
# Wait for chaos to start (give it time to inject chaos)
|
|
echo "Waiting for chaos injection to begin..."
|
|
sleep 10
|
|
|
|
# Test during chaos - check for increased latency or packet loss effects
|
|
echo "Testing network behavior during chaos..."
|
|
chaos_test_count=0
|
|
chaos_success=0
|
|
|
|
for i in {1..5}; do
|
|
chaos_test_count=$((chaos_test_count + 1))
|
|
chaos_start=$(date +%s%3N)
|
|
response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 http://localhost:8091 || echo "000")
|
|
chaos_end=$(date +%s%3N)
|
|
chaos_latency=$((chaos_end - chaos_start))
|
|
|
|
echo "Attempt $i: HTTP $response, latency: ${chaos_latency}ms"
|
|
|
|
# We expect either increased latency or some failures due to packet loss
|
|
if [ "$response" == "200" ] || [ "$response" == "000" ]; then
|
|
chaos_success=$((chaos_success + 1))
|
|
fi
|
|
|
|
sleep 2
|
|
done
|
|
|
|
echo "Chaos test results: $chaos_success/$chaos_test_count requests processed"
|
|
|
|
# Verify node-level chaos affects pod
|
|
echo "Verifying node-level chaos affects pod on $TARGET_NODE..."
|
|
# The node chaos should affect all pods on the node
|
|
|
|
# Wait for krkn to complete
|
|
echo "Waiting for krkn to complete..."
|
|
wait $KRKN_PID || true
|
|
echo "Krkn completed"
|
|
|
|
# Wait a bit for cleanup
|
|
sleep 5
|
|
|
|
# Verify recovery - nginx should respond normally again
|
|
echo "Verifying service recovery..."
|
|
recovery_attempts=0
|
|
max_recovery_attempts=10
|
|
|
|
while [ $recovery_attempts -lt $max_recovery_attempts ]; do
|
|
recovery_attempts=$((recovery_attempts + 1))
|
|
response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://localhost:8091 || echo "000")
|
|
|
|
if [ "$response" == "200" ]; then
|
|
echo "Recovery verified: nginx responding normally (attempt $recovery_attempts)"
|
|
break
|
|
fi
|
|
|
|
echo "Recovery attempt $recovery_attempts/$max_recovery_attempts: got $response, retrying..."
|
|
sleep 3
|
|
done
|
|
|
|
if [ "$response" != "200" ]; then
|
|
echo "ERROR: Service did not recover after chaos (got $response)"
|
|
kubectl get pods -l app=nginx-node-net-chaos
|
|
kubectl describe pod $POD_NAME
|
|
exit 1
|
|
fi
|
|
|
|
# Cleanup
|
|
echo "Cleaning up test resources..."
|
|
kill $PORT_FORWARD_PID 2>/dev/null || true
|
|
kubectl delete deployment nginx-node-net-chaos --ignore-not-found=true
|
|
kubectl delete service nginx-node-net-chaos-svc --ignore-not-found=true
|
|
|
|
echo "Node network chaos test: Success"
|
|
}
|
|
|
|
functional_test_node_network_chaos |