mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-14 09:59:59 +00:00
checking chunk error in ci tests (#937)
Signed-off-by: Paige Patton <prubenda@redhat.com>
This commit is contained in:
4
.github/workflows/tests.yml
vendored
4
.github/workflows/tests.yml
vendored
@@ -102,7 +102,7 @@ jobs:
|
||||
echo "test_pod_network_filter" >> ./CI/tests/functional_tests
|
||||
echo "test_pod_server" >> ./CI/tests/functional_tests
|
||||
echo "test_node" >> ./CI/tests/functional_tests
|
||||
echo "test_pvc" >> ./CI/tests/functional_tests
|
||||
# echo "test_pvc" >> ./CI/tests/functional_tests
|
||||
|
||||
# Push on main only steps + all other functional to collect coverage
|
||||
# for the badge
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
echo "test_pod_network_filter" >> ./CI/tests/functional_tests
|
||||
echo "test_pod_server" >> ./CI/tests/functional_tests
|
||||
echo "test_node" >> ./CI/tests/functional_tests
|
||||
echo "test_pvc" >> ./CI/tests/functional_tests
|
||||
# echo "test_pvc" >> ./CI/tests/functional_tests
|
||||
# Final common steps
|
||||
- name: Run Functional tests
|
||||
env:
|
||||
|
||||
@@ -16,8 +16,10 @@ function functional_test_container_crash {
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/container_config.yaml
|
||||
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/container_config.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/container_config.yaml -d True
|
||||
echo "Container scenario test: Success"
|
||||
|
||||
kubectl get pods -n kube-system -l component=etcd
|
||||
}
|
||||
|
||||
functional_test_container_crash
|
||||
|
||||
@@ -11,7 +11,7 @@ function functional_test_customapp_pod_node_selector {
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/customapp_pod_config.yaml
|
||||
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml -d True
|
||||
echo "Pod disruption with node_label_selector test: Success"
|
||||
}
|
||||
|
||||
|
||||
@@ -10,9 +10,11 @@ function functional_test_pod_crash {
|
||||
export scenario_file="scenarios/kind/pod_etcd.yml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/pod_config.yaml
|
||||
cat CI/config/pod_config.yaml
|
||||
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/pod_config.yaml
|
||||
echo "Pod disruption scenario test: Success"
|
||||
date
|
||||
kubectl get pods -n kube-system -l component=etcd -o yaml
|
||||
}
|
||||
|
||||
functional_test_pod_crash
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
import traceback
|
||||
from asyncio import Future
|
||||
import yaml
|
||||
from krkn_lib.k8s import KrknKubernetes
|
||||
@@ -41,6 +42,7 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
|
||||
logging.info("ContainerScenarioPlugin failed with unrecovered containers")
|
||||
return 1
|
||||
except (RuntimeError, Exception) as e:
|
||||
logging.error("Stack trace:\n%s", traceback.format_exc())
|
||||
logging.error("ContainerScenarioPlugin exiting due to Exception %s" % e)
|
||||
return 1
|
||||
else:
|
||||
@@ -50,7 +52,6 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
|
||||
return ["container_scenarios"]
|
||||
|
||||
def start_monitoring(self, kill_scenario: dict, lib_telemetry: KrknTelemetryOpenshift) -> Future:
|
||||
|
||||
namespace_pattern = f"^{kill_scenario['namespace']}$"
|
||||
label_selector = kill_scenario["label_selector"]
|
||||
recovery_time = kill_scenario["expected_recovery_time"]
|
||||
@@ -232,4 +233,5 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
|
||||
timer += 5
|
||||
logging.info("Waiting 5 seconds for containers to become ready")
|
||||
time.sleep(5)
|
||||
|
||||
return killed_container_list
|
||||
|
||||
@@ -2,7 +2,7 @@ import logging
|
||||
import random
|
||||
import time
|
||||
from asyncio import Future
|
||||
|
||||
import traceback
|
||||
import yaml
|
||||
from krkn_lib.k8s import KrknKubernetes
|
||||
from krkn_lib.k8s.pod_monitor import select_and_monitor_by_namespace_pattern_and_label, \
|
||||
@@ -74,6 +74,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
return 1
|
||||
|
||||
except (RuntimeError, Exception) as e:
|
||||
logging.error("Stack trace:\n%s", traceback.format_exc())
|
||||
logging.error("PodDisruptionScenariosPlugin exiting due to Exception %s" % e)
|
||||
return 1
|
||||
else:
|
||||
@@ -150,7 +151,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
field_selector=combined_field_selector
|
||||
)
|
||||
|
||||
def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None, quiet: bool = False):
|
||||
def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None):
|
||||
if label_selector and name_pattern:
|
||||
logging.error('Only, one of name pattern or label pattern can be specified')
|
||||
return []
|
||||
@@ -161,8 +162,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
|
||||
# If specific node names are provided, make multiple calls with field selector
|
||||
if node_names:
|
||||
if not quiet:
|
||||
logging.info(f"Targeting pods on {len(node_names)} specific nodes")
|
||||
logging.debug(f"Targeting pods on {len(node_names)} specific nodes")
|
||||
all_pods = []
|
||||
for node_name in node_names:
|
||||
pods = self._select_pods_with_field_selector(
|
||||
@@ -172,8 +172,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
if pods:
|
||||
all_pods.extend(pods)
|
||||
|
||||
if not quiet:
|
||||
logging.info(f"Found {len(all_pods)} target pods across {len(node_names)} nodes")
|
||||
logging.debug(f"Found {len(all_pods)} target pods across {len(node_names)} nodes")
|
||||
return all_pods
|
||||
|
||||
# Node label selector approach - use field selectors
|
||||
@@ -181,11 +180,10 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
# Get nodes matching the label selector first
|
||||
nodes_with_label = kubecli.list_nodes(label_selector=node_label_selector)
|
||||
if not nodes_with_label:
|
||||
logging.info(f"No nodes found with label selector: {node_label_selector}")
|
||||
logging.debug(f"No nodes found with label selector: {node_label_selector}")
|
||||
return []
|
||||
|
||||
if not quiet:
|
||||
logging.info(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}")
|
||||
logging.debug(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}")
|
||||
# Use field selector for each node
|
||||
all_pods = []
|
||||
for node_name in nodes_with_label:
|
||||
@@ -196,8 +194,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
if pods:
|
||||
all_pods.extend(pods)
|
||||
|
||||
if not quiet:
|
||||
logging.info(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes")
|
||||
logging.debug(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes")
|
||||
return all_pods
|
||||
|
||||
# Standard pod selection (no node targeting)
|
||||
@@ -207,11 +204,10 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
|
||||
def killing_pods(self, config: InputParams, kubecli: KrknKubernetes):
|
||||
# region Select target pods
|
||||
|
||||
try:
|
||||
namespace = config.namespace_pattern
|
||||
if not namespace:
|
||||
logging.error('Namespace pattern must be specified')
|
||||
return 2
|
||||
|
||||
pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
|
||||
exclude_pods = set()
|
||||
@@ -220,11 +216,12 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
for pod in _exclude_pods:
|
||||
exclude_pods.add(pod[0])
|
||||
|
||||
|
||||
pods_count = len(pods)
|
||||
if len(pods) < config.kill:
|
||||
logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format(
|
||||
config.kill, len(pods)))
|
||||
return 2
|
||||
return 1
|
||||
|
||||
random.shuffle(pods)
|
||||
for i in range(config.kill):
|
||||
@@ -236,8 +233,11 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
logging.info(f'Deleting pod {pod[0]}')
|
||||
kubecli.delete_pod(pod[0], pod[1])
|
||||
|
||||
ret = self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
|
||||
return ret
|
||||
return_val = self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
|
||||
except Exception as e:
|
||||
raise(e)
|
||||
|
||||
return return_val
|
||||
|
||||
def wait_for_pods(
|
||||
self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes, node_label_selector, node_names
|
||||
@@ -246,7 +246,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
start_time = datetime.now()
|
||||
|
||||
while not timeout:
|
||||
pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names, quiet=True)
|
||||
pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names)
|
||||
if pod_count == len(pods):
|
||||
return 0
|
||||
|
||||
@@ -259,5 +259,4 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
logging.error("timeout while waiting for pods to come up")
|
||||
return 1
|
||||
|
||||
# should never get to this return
|
||||
return 0
|
||||
|
||||
@@ -16,7 +16,7 @@ google-cloud-compute==1.22.0
|
||||
ibm_cloud_sdk_core==3.18.0
|
||||
ibm_vpc==0.20.0
|
||||
jinja2==3.1.6
|
||||
krkn-lib==5.1.12
|
||||
krkn-lib==5.1.13
|
||||
lxml==5.1.0
|
||||
kubernetes==34.1.0
|
||||
numpy==1.26.4
|
||||
|
||||
@@ -2,6 +2,6 @@ pvc_scenario:
|
||||
pvc_name: kraken-test-pvc # Name of the target PVC
|
||||
pod_name: kraken-test-pod # Name of the pod where the PVC is mounted, it will be ignored if the pvc_name is defined
|
||||
namespace: kraken # Namespace where the PVC is
|
||||
fill_percentage: 38 # Target percentage to fill up the cluster, value must be higher than current percentage, valid values are between 0 and 99
|
||||
fill_percentage: 98 # Target percentage to fill up the cluster, value must be higher than current percentage, valid values are between 0 and 99
|
||||
duration: 10 # Duration in seconds for the fault
|
||||
block_size: 102400 # used only by dd if fallocate not present in the container
|
||||
|
||||
Reference in New Issue
Block a user