mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-03-11 22:22:26 +00:00
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
543729b18a | ||
|
|
a0ea4dc749 | ||
|
|
a5459792ef | ||
|
|
d434bb26fa | ||
|
|
fee41d404e | ||
|
|
8663ee8893 | ||
|
|
a072f0306a | ||
|
|
8221392356 | ||
|
|
671fc581dd | ||
|
|
11508ce017 | ||
|
|
0d78139fb6 | ||
|
|
a3baffe8ee | ||
|
|
438b08fcd5 | ||
|
|
9b930a02a5 | ||
|
|
194e3b87ee | ||
|
|
8c05e44c23 | ||
|
|
88f8cf49f1 |
1
.github/CODEOWNERS
vendored
Normal file
1
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1 @@
|
||||
* @paigerube14 @tsebastiani @chaitanyaenr
|
||||
13
.github/workflows/release.yml
vendored
13
.github/workflows/release.yml
vendored
@@ -16,6 +16,7 @@ jobs:
|
||||
PREVIOUS_TAG=$(git tag --sort=-creatordate | sed -n '2 p')
|
||||
echo $PREVIOUS_TAG
|
||||
echo "PREVIOUS_TAG=$PREVIOUS_TAG" >> "$GITHUB_ENV"
|
||||
|
||||
- name: generate release notes from template
|
||||
id: release-notes
|
||||
env:
|
||||
@@ -45,3 +46,15 @@ jobs:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
gh release create ${{ github.ref_name }} --title "${{ github.ref_name }}" -F release-notes.md
|
||||
|
||||
- name: Install Syft
|
||||
run: |
|
||||
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sudo sh -s -- -b /usr/local/bin
|
||||
|
||||
- name: Generate SBOM
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
syft . --scope all-layers --output cyclonedx-json > sbom.json
|
||||
echo "SBOM generated successfully!"
|
||||
gh release upload ${{ github.ref_name }} sbom.json
|
||||
|
||||
17
.github/workflows/tests.yml
vendored
17
.github/workflows/tests.yml
vendored
@@ -16,14 +16,19 @@ jobs:
|
||||
uses: redhat-chaos/actions/kind@main
|
||||
- name: Deploy prometheus & Port Forwarding
|
||||
uses: redhat-chaos/actions/prometheus@main
|
||||
|
||||
- name: Deploy Elasticsearch
|
||||
with:
|
||||
ELASTIC_URL: ${{ vars.ELASTIC_URL }}
|
||||
ELASTIC_PORT: ${{ vars.ELASTIC_PORT }}
|
||||
ELASTIC_USER: ${{ vars.ELASTIC_USER }}
|
||||
ELASTIC_PASSWORD: ${{ vars.ELASTIC_PASSWORD }}
|
||||
ELASTIC_PORT: ${{ env.ELASTIC_PORT }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
uses: redhat-chaos/actions/elastic@main
|
||||
- name: Download elastic password
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: elastic_password_${{ github.run_id }}
|
||||
- name: Set elastic password on env
|
||||
run: |
|
||||
ELASTIC_PASSWORD=$(cat elastic_password.txt)
|
||||
echo "ELASTIC_PASSWORD=$ELASTIC_PASSWORD" >> "$GITHUB_ENV"
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
@@ -73,6 +78,7 @@ jobs:
|
||||
echo "test_app_outages" >> ./CI/tests/functional_tests
|
||||
echo "test_container" >> ./CI/tests/functional_tests
|
||||
echo "test_pod" >> ./CI/tests/functional_tests
|
||||
echo "test_customapp_pod" >> ./CI/tests/functional_tests
|
||||
echo "test_namespace" >> ./CI/tests/functional_tests
|
||||
echo "test_net_chaos" >> ./CI/tests/functional_tests
|
||||
echo "test_time" >> ./CI/tests/functional_tests
|
||||
@@ -108,6 +114,7 @@ jobs:
|
||||
echo "test_app_outages" >> ./CI/tests/functional_tests
|
||||
echo "test_container" >> ./CI/tests/functional_tests
|
||||
echo "test_pod" >> ./CI/tests/functional_tests
|
||||
echo "test_customapp_pod" >> ./CI/tests/functional_tests
|
||||
echo "test_namespace" >> ./CI/tests/functional_tests
|
||||
echo "test_net_chaos" >> ./CI/tests/functional_tests
|
||||
echo "test_time" >> ./CI/tests/functional_tests
|
||||
|
||||
18
CI/tests/test_customapp_pod.sh
Executable file
18
CI/tests/test_customapp_pod.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
set -xeEo pipefail
|
||||
|
||||
source CI/tests/common.sh
|
||||
|
||||
trap error ERR
|
||||
trap finish EXIT
|
||||
|
||||
function functional_test_customapp_pod_node_selector {
|
||||
export scenario_type="pod_disruption_scenarios"
|
||||
export scenario_file="scenarios/openshift/customapp_pod.yaml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/customapp_pod_config.yaml
|
||||
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml
|
||||
echo "Pod disruption with node_label_selector test: Success"
|
||||
}
|
||||
|
||||
functional_test_customapp_pod_node_selector
|
||||
@@ -11,6 +11,7 @@ function functional_pod_network_filter {
|
||||
yq -i '.[0].target="pod-network-filter-test"' scenarios/kube/pod-network-filter.yml
|
||||
yq -i '.[0].protocols=["tcp"]' scenarios/kube/pod-network-filter.yml
|
||||
yq -i '.[0].ports=[443]' scenarios/kube/pod-network-filter.yml
|
||||
yq -i '.performance_monitoring.check_critical_alerts=False' CI/config/pod_network_filter.yaml
|
||||
|
||||
## Test webservice deployment
|
||||
kubectl apply -f ./CI/templates/pod_network_filter.yaml
|
||||
|
||||
10
README.md
10
README.md
@@ -22,14 +22,8 @@ Kraken injects deliberate failures into Kubernetes clusters to check if it is re
|
||||
Instructions on how to setup, configure and run Kraken can be found in the [documentation](https://krkn-chaos.dev/docs/).
|
||||
|
||||
|
||||
### Blogs and other useful resources
|
||||
- Blog post on introduction to Kraken: https://www.openshift.com/blog/introduction-to-kraken-a-chaos-tool-for-openshift/kubernetes
|
||||
- Discussion and demo on how Kraken can be leveraged to ensure OpenShift is reliable, performant and scalable: https://www.youtube.com/watch?v=s1PvupI5sD0&ab_channel=OpenShift
|
||||
- Blog post emphasizing the importance of making Chaos part of Performance and Scale runs to mimic the production environments: https://www.openshift.com/blog/making-chaos-part-of-kubernetes/openshift-performance-and-scalability-tests
|
||||
- Blog post on findings from Chaos test runs: https://cloud.redhat.com/blog/openshift/kubernetes-chaos-stories
|
||||
- Discussion with CNCF TAG App Delivery on Krkn workflow, features and addition to CNCF sandbox: [Github](https://github.com/cncf/sandbox/issues/44), [Tracker](https://github.com/cncf/tag-app-delivery/issues/465), [recording](https://www.youtube.com/watch?v=nXQkBFK_MWc&t=722s)
|
||||
- Blog post on supercharging chaos testing using AI integration in Krkn: https://www.redhat.com/en/blog/supercharging-chaos-testing-using-ai
|
||||
- Blog post announcing Krkn joining CNCF Sandbox: https://www.redhat.com/en/blog/krknchaos-joining-cncf-sandbox
|
||||
### Blogs, podcasts and interviews
|
||||
Additional resources, including blog posts, podcasts, and community interviews, can be found on the [website](https://krkn-chaos.dev/blog)
|
||||
|
||||
|
||||
### Roadmap
|
||||
|
||||
@@ -28,7 +28,7 @@ ENV KUBECONFIG /home/krkn/.kube/config
|
||||
|
||||
# This overwrites any existing configuration in /etc/yum.repos.d/kubernetes.repo
|
||||
RUN dnf update && dnf install -y --setopt=install_weak_deps=False \
|
||||
git python39 jq yq gettext wget which ipmitool &&\
|
||||
git python39 jq yq gettext wget which ipmitool openssh-server &&\
|
||||
dnf clean all
|
||||
|
||||
# Virtctl
|
||||
|
||||
@@ -444,7 +444,7 @@
|
||||
"required": "false"
|
||||
},
|
||||
{
|
||||
"name": "kubevirt-namespace",
|
||||
"name": "kubevirt-name",
|
||||
"short_description": "KubeVirt regex names to watch",
|
||||
"description": "KubeVirt regex names to check VMs",
|
||||
"variable": "KUBE_VIRT_NAME",
|
||||
|
||||
@@ -75,10 +75,12 @@ def alerts(
|
||||
def critical_alerts(
|
||||
prom_cli: KrknPrometheus,
|
||||
summary: ChaosRunAlertSummary,
|
||||
elastic: KrknElastic,
|
||||
run_id,
|
||||
scenario,
|
||||
start_time,
|
||||
end_time,
|
||||
elastic_alerts_index
|
||||
):
|
||||
summary.scenario = scenario
|
||||
summary.run_id = run_id
|
||||
@@ -113,7 +115,6 @@ def critical_alerts(
|
||||
summary.chaos_alerts.append(alert)
|
||||
|
||||
post_critical_alerts = prom_cli.process_query(query)
|
||||
|
||||
for alert in post_critical_alerts:
|
||||
if "metric" in alert:
|
||||
alertname = (
|
||||
@@ -136,6 +137,21 @@ def critical_alerts(
|
||||
)
|
||||
alert = ChaosRunAlert(alertname, alertstate, namespace, severity)
|
||||
summary.post_chaos_alerts.append(alert)
|
||||
if elastic:
|
||||
elastic_alert = ElasticAlert(
|
||||
run_uuid=run_id,
|
||||
severity=severity,
|
||||
alert=alertname,
|
||||
created_at=end_time,
|
||||
namespace=namespace,
|
||||
alertstate=alertstate,
|
||||
phase="post_chaos"
|
||||
)
|
||||
result = elastic.push_alert(elastic_alert, elastic_alerts_index)
|
||||
if result == -1:
|
||||
logging.error("failed to save alert on ElasticSearch")
|
||||
pass
|
||||
|
||||
|
||||
during_critical_alerts_count = len(during_critical_alerts)
|
||||
post_critical_alerts_count = len(post_critical_alerts)
|
||||
@@ -149,8 +165,8 @@ def critical_alerts(
|
||||
|
||||
if not firing_alerts:
|
||||
logging.info("No critical alerts are firing!!")
|
||||
|
||||
|
||||
|
||||
|
||||
def metrics(
|
||||
prom_cli: KrknPrometheus,
|
||||
elastic: KrknElastic,
|
||||
@@ -252,6 +268,14 @@ def metrics(
|
||||
metric[k] = v
|
||||
metric['timestamp'] = str(datetime.datetime.now())
|
||||
metrics_list.append(metric.copy())
|
||||
if telemetry_json['virt_checks']:
|
||||
for virt_check in telemetry_json["virt_checks"]:
|
||||
metric_name = "virt_check_recovery"
|
||||
metric = {"metricName": metric_name}
|
||||
for k,v in virt_check.items():
|
||||
metric[k] = v
|
||||
metric['timestamp'] = str(datetime.datetime.now())
|
||||
metrics_list.append(metric.copy())
|
||||
|
||||
save_metrics = False
|
||||
if elastic is not None and elastic_metrics_index is not None:
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
|
||||
from asyncio import Future
|
||||
import yaml
|
||||
from krkn_lib.k8s import KrknKubernetes
|
||||
from krkn_lib.k8s.pods_monitor_pool import PodsMonitorPool
|
||||
from krkn_lib.k8s.pod_monitor import select_and_monitor_by_namespace_pattern_and_label
|
||||
from krkn_lib.models.telemetry import ScenarioTelemetry
|
||||
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
|
||||
from krkn_lib.utils import get_yaml_item_value
|
||||
@@ -22,30 +22,26 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
|
||||
lib_telemetry: KrknTelemetryOpenshift,
|
||||
scenario_telemetry: ScenarioTelemetry,
|
||||
) -> int:
|
||||
pool = PodsMonitorPool(lib_telemetry.get_lib_kubernetes())
|
||||
try:
|
||||
with open(scenario, "r") as f:
|
||||
cont_scenario_config = yaml.full_load(f)
|
||||
|
||||
for kill_scenario in cont_scenario_config["scenarios"]:
|
||||
self.start_monitoring(
|
||||
kill_scenario, pool
|
||||
future_snapshot = self.start_monitoring(
|
||||
kill_scenario,
|
||||
lib_telemetry
|
||||
)
|
||||
killed_containers = self.container_killing_in_pod(
|
||||
self.container_killing_in_pod(
|
||||
kill_scenario, lib_telemetry.get_lib_kubernetes()
|
||||
)
|
||||
result = pool.join()
|
||||
if result.error:
|
||||
logging.error(
|
||||
logging.error(
|
||||
f"ContainerScenarioPlugin pods failed to recovery: {result.error}"
|
||||
)
|
||||
)
|
||||
return 1
|
||||
scenario_telemetry.affected_pods = result
|
||||
|
||||
except (RuntimeError, Exception):
|
||||
logging.error("ContainerScenarioPlugin exiting due to Exception %s")
|
||||
snapshot = future_snapshot.result()
|
||||
result = snapshot.get_pods_status()
|
||||
scenario_telemetry.affected_pods = result
|
||||
if len(result.unrecovered) > 0:
|
||||
logging.info("ContainerScenarioPlugin failed with unrecovered containers")
|
||||
return 1
|
||||
except (RuntimeError, Exception) as e:
|
||||
logging.error("ContainerScenarioPlugin exiting due to Exception %s" % e)
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
@@ -53,17 +49,18 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
|
||||
def get_scenario_types(self) -> list[str]:
|
||||
return ["container_scenarios"]
|
||||
|
||||
def start_monitoring(self, kill_scenario: dict, pool: PodsMonitorPool):
|
||||
def start_monitoring(self, kill_scenario: dict, lib_telemetry: KrknTelemetryOpenshift) -> Future:
|
||||
|
||||
namespace_pattern = f"^{kill_scenario['namespace']}$"
|
||||
label_selector = kill_scenario["label_selector"]
|
||||
recovery_time = kill_scenario["expected_recovery_time"]
|
||||
pool.select_and_monitor_by_namespace_pattern_and_label(
|
||||
future_snapshot = select_and_monitor_by_namespace_pattern_and_label(
|
||||
namespace_pattern=namespace_pattern,
|
||||
label_selector=label_selector,
|
||||
max_timeout=recovery_time,
|
||||
field_selector="status.phase=Running"
|
||||
v1_client=lib_telemetry.get_lib_kubernetes().cli
|
||||
)
|
||||
return future_snapshot
|
||||
|
||||
def container_killing_in_pod(self, cont_scenario, kubecli: KrknKubernetes):
|
||||
scenario_name = get_yaml_item_value(cont_scenario, "name", "")
|
||||
|
||||
@@ -149,44 +149,48 @@ class KubevirtVmOutageScenarioPlugin(AbstractScenarioPlugin):
|
||||
disable_auto_restart = params.get("disable_auto_restart", False)
|
||||
|
||||
if not vm_name:
|
||||
raise Exception("vm_name parameter is required")
|
||||
logging.error("vm_name parameter is required")
|
||||
return 1
|
||||
self.pods_status = PodsStatus()
|
||||
vmis_list = self.get_vmis(vm_name,namespace)
|
||||
if len(vmis_list) == 0:
|
||||
raise Exception(f"No matching VMs with name {vm_name} in namespace {namespace}")
|
||||
rand_int = random.randint(0, len(vmis_list) - 1)
|
||||
vmi = vmis_list[rand_int]
|
||||
for _ in range(kill_count):
|
||||
|
||||
logging.info(f"Starting KubeVirt VM outage scenario for VM: {vm_name} in namespace: {namespace}")
|
||||
vmi_name = vmi.get("metadata").get("name")
|
||||
if not self.validate_environment(vmi_name, namespace):
|
||||
return self.pods_status
|
||||
|
||||
vmi = self.get_vmi(vmi_name, namespace)
|
||||
self.affected_pod = AffectedPod(
|
||||
pod_name=vmi_name,
|
||||
namespace=namespace,
|
||||
)
|
||||
if not vmi:
|
||||
logging.error(f"VMI {vm_name} not found in namespace {namespace}")
|
||||
return self.pods_status
|
||||
|
||||
self.original_vmi = vmi
|
||||
logging.info(f"Captured initial state of VMI: {vm_name}")
|
||||
result = self.delete_vmi(vmi_name, namespace, disable_auto_restart)
|
||||
if result != 0:
|
||||
return self.pods_status
|
||||
rand_int = random.randint(0, len(vmis_list) - 1)
|
||||
vmi = vmis_list[rand_int]
|
||||
|
||||
logging.info(f"Starting KubeVirt VM outage scenario for VM: {vm_name} in namespace: {namespace}")
|
||||
vmi_name = vmi.get("metadata").get("name")
|
||||
if not self.validate_environment(vmi_name, namespace):
|
||||
return 1
|
||||
|
||||
vmi = self.get_vmi(vmi_name, namespace)
|
||||
self.affected_pod = AffectedPod(
|
||||
pod_name=vmi_name,
|
||||
namespace=namespace,
|
||||
)
|
||||
if not vmi:
|
||||
logging.error(f"VMI {vm_name} not found in namespace {namespace}")
|
||||
return 1
|
||||
|
||||
self.original_vmi = vmi
|
||||
logging.info(f"Captured initial state of VMI: {vm_name}")
|
||||
result = self.delete_vmi(vmi_name, namespace, disable_auto_restart)
|
||||
if result != 0:
|
||||
self.pods_status.unrecovered.append(self.affected_pod)
|
||||
continue
|
||||
|
||||
result = self.wait_for_running(vmi_name,namespace, timeout)
|
||||
if result != 0:
|
||||
return self.pods_status
|
||||
|
||||
self.affected_pod.total_recovery_time = (
|
||||
self.affected_pod.pod_readiness_time
|
||||
+ self.affected_pod.pod_rescheduling_time
|
||||
)
|
||||
result = self.wait_for_running(vmi_name,namespace, timeout)
|
||||
if result != 0:
|
||||
self.pods_status.unrecovered.append(self.affected_pod)
|
||||
continue
|
||||
|
||||
self.affected_pod.total_recovery_time = (
|
||||
self.affected_pod.pod_readiness_time
|
||||
+ self.affected_pod.pod_rescheduling_time
|
||||
)
|
||||
|
||||
self.pods_status.recovered.append(self.affected_pod)
|
||||
logging.info(f"Successfully completed KubeVirt VM outage scenario for VM: {vm_name}")
|
||||
self.pods_status.recovered.append(self.affected_pod)
|
||||
logging.info(f"Successfully completed KubeVirt VM outage scenario for VM: {vm_name}")
|
||||
|
||||
return self.pods_status
|
||||
|
||||
@@ -316,13 +320,13 @@ class KubevirtVmOutageScenarioPlugin(AbstractScenarioPlugin):
|
||||
time.sleep(1)
|
||||
|
||||
logging.error(f"Timed out waiting for VMI {vm_name} to be deleted")
|
||||
self.pods_status.unrecovered = self.affected_pod
|
||||
self.pods_status.unrecovered.append(self.affected_pod)
|
||||
return 1
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error deleting VMI {vm_name}: {e}")
|
||||
log_exception(e)
|
||||
self.pods_status.unrecovered = self.affected_pod
|
||||
self.pods_status.unrecovered.append(self.affected_pod)
|
||||
return 1
|
||||
|
||||
def wait_for_running(self, vm_name: str, namespace: str, timeout: int = 120) -> int:
|
||||
|
||||
@@ -23,8 +23,7 @@ def create_job(batch_cli, body, namespace="default"):
|
||||
"""
|
||||
|
||||
try:
|
||||
api_response = batch_cli.create_namespaced_job(
|
||||
body=body, namespace=namespace)
|
||||
api_response = batch_cli.create_namespaced_job(body=body, namespace=namespace)
|
||||
return api_response
|
||||
except ApiException as api:
|
||||
logging.warning(
|
||||
@@ -71,7 +70,8 @@ def create_pod(cli, body, namespace, timeout=120):
|
||||
end_time = time.time() + timeout
|
||||
while True:
|
||||
pod_stat = cli.read_namespaced_pod(
|
||||
name=body["metadata"]["name"], namespace=namespace)
|
||||
name=body["metadata"]["name"], namespace=namespace
|
||||
)
|
||||
if pod_stat.status.phase == "Running":
|
||||
break
|
||||
if time.time() > end_time:
|
||||
@@ -121,16 +121,18 @@ def exec_cmd_in_pod(cli, command, pod_name, namespace, container=None):
|
||||
return ret
|
||||
|
||||
|
||||
def list_pods(cli, namespace, label_selector=None):
|
||||
def list_pods(cli, namespace, label_selector=None, exclude_label=None):
|
||||
"""
|
||||
Function used to list pods in a given namespace and having a certain label
|
||||
Function used to list pods in a given namespace and having a certain label and excluding pods with exclude_label
|
||||
and excluding pods with exclude_label
|
||||
"""
|
||||
|
||||
pods = []
|
||||
try:
|
||||
if label_selector:
|
||||
ret = cli.list_namespaced_pod(
|
||||
namespace, pretty=True, label_selector=label_selector)
|
||||
namespace, pretty=True, label_selector=label_selector
|
||||
)
|
||||
else:
|
||||
ret = cli.list_namespaced_pod(namespace, pretty=True)
|
||||
except ApiException as e:
|
||||
@@ -140,7 +142,16 @@ def list_pods(cli, namespace, label_selector=None):
|
||||
% e
|
||||
)
|
||||
raise e
|
||||
|
||||
for pod in ret.items:
|
||||
# Skip pods with the exclude label if specified
|
||||
if exclude_label and pod.metadata.labels:
|
||||
exclude_key, exclude_value = exclude_label.split("=", 1)
|
||||
if (
|
||||
exclude_key in pod.metadata.labels
|
||||
and pod.metadata.labels[exclude_key] == exclude_value
|
||||
):
|
||||
continue
|
||||
pods.append(pod.metadata.name)
|
||||
|
||||
return pods
|
||||
@@ -152,8 +163,7 @@ def get_job_status(batch_cli, name, namespace="default"):
|
||||
"""
|
||||
|
||||
try:
|
||||
return batch_cli.read_namespaced_job_status(
|
||||
name=name, namespace=namespace)
|
||||
return batch_cli.read_namespaced_job_status(name=name, namespace=namespace)
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
"Exception when calling \
|
||||
@@ -169,7 +179,10 @@ def get_pod_log(cli, name, namespace="default"):
|
||||
"""
|
||||
|
||||
return cli.read_namespaced_pod_log(
|
||||
name=name, namespace=namespace, _return_http_data_only=True, _preload_content=False
|
||||
name=name,
|
||||
namespace=namespace,
|
||||
_return_http_data_only=True,
|
||||
_preload_content=False,
|
||||
)
|
||||
|
||||
|
||||
@@ -191,7 +204,8 @@ def delete_job(batch_cli, name, namespace="default"):
|
||||
name=name,
|
||||
namespace=namespace,
|
||||
body=client.V1DeleteOptions(
|
||||
propagation_policy="Foreground", grace_period_seconds=0),
|
||||
propagation_policy="Foreground", grace_period_seconds=0
|
||||
),
|
||||
)
|
||||
logging.debug("Job deleted. status='%s'" % str(api_response.status))
|
||||
return api_response
|
||||
@@ -247,11 +261,8 @@ def get_node(node_name, label_selector, instance_kill_count, cli):
|
||||
)
|
||||
nodes = list_ready_nodes(cli, label_selector)
|
||||
if not nodes:
|
||||
raise Exception(
|
||||
"Ready nodes with the provided label selector do not exist")
|
||||
logging.info(
|
||||
"Ready nodes with the label selector %s: %s" % (label_selector, nodes)
|
||||
)
|
||||
raise Exception("Ready nodes with the provided label selector do not exist")
|
||||
logging.info("Ready nodes with the label selector %s: %s" % (label_selector, nodes))
|
||||
number_of_nodes = len(nodes)
|
||||
if instance_kill_count == number_of_nodes:
|
||||
return nodes
|
||||
|
||||
@@ -19,7 +19,11 @@ from . import cerberus
|
||||
|
||||
|
||||
def get_test_pods(
|
||||
pod_name: str, pod_label: str, namespace: str, kubecli: KrknKubernetes
|
||||
pod_name: str,
|
||||
pod_label: str,
|
||||
namespace: str,
|
||||
kubecli: KrknKubernetes,
|
||||
exclude_label: str = None,
|
||||
) -> typing.List[str]:
|
||||
"""
|
||||
Function that returns a list of pods to apply network policy
|
||||
@@ -38,11 +42,16 @@ def get_test_pods(
|
||||
kubecli (KrknKubernetes)
|
||||
- Object to interact with Kubernetes Python client
|
||||
|
||||
exclude_label (string)
|
||||
- pods matching this label will be excluded from the outage
|
||||
|
||||
Returns:
|
||||
pod names (string) in the namespace
|
||||
"""
|
||||
pods_list = []
|
||||
pods_list = kubecli.list_pods(label_selector=pod_label, namespace=namespace)
|
||||
pods_list = kubecli.list_pods(
|
||||
label_selector=pod_label, namespace=namespace, exclude_label=exclude_label
|
||||
)
|
||||
if pod_name and pod_name not in pods_list:
|
||||
raise Exception("pod name not found in namespace ")
|
||||
elif pod_name and pod_name in pods_list:
|
||||
@@ -226,6 +235,10 @@ def apply_outage_policy(
|
||||
|
||||
image (string)
|
||||
- Image of network chaos tool
|
||||
|
||||
exclude_label (string)
|
||||
- pods matching this label will be excluded from the outage
|
||||
|
||||
Returns:
|
||||
The name of the job created that executes the commands on a node
|
||||
for ingress chaos scenario
|
||||
@@ -324,6 +337,9 @@ def apply_ingress_policy(
|
||||
test_execution (String)
|
||||
- The order in which the filters are applied
|
||||
|
||||
exclude_label (string)
|
||||
- pods matching this label will be excluded from the outage
|
||||
|
||||
Returns:
|
||||
The name of the job created that executes the traffic shaping
|
||||
filter
|
||||
@@ -407,6 +423,9 @@ def apply_net_policy(
|
||||
test_execution (String)
|
||||
- The order in which the filters are applied
|
||||
|
||||
exclude_label (string)
|
||||
- pods matching this label will be excluded from the outage
|
||||
|
||||
Returns:
|
||||
The name of the job created that executes the traffic shaping
|
||||
filter
|
||||
@@ -466,6 +485,9 @@ def get_ingress_cmd(
|
||||
duration (str):
|
||||
- Duration for which the traffic control is to be done
|
||||
|
||||
exclude_label (string)
|
||||
- pods matching this label will be excluded from the outage
|
||||
|
||||
Returns:
|
||||
str: ingress filter
|
||||
"""
|
||||
@@ -517,6 +539,9 @@ def get_egress_cmd(
|
||||
duration (str):
|
||||
- Duration for which the traffic control is to be done
|
||||
|
||||
exclude_label (string)
|
||||
- pods matching this label will be excluded from the outage
|
||||
|
||||
Returns:
|
||||
str: egress filter
|
||||
"""
|
||||
@@ -652,6 +677,10 @@ def list_bridges(node: str, pod_template, kubecli: KrknKubernetes, image: str) -
|
||||
|
||||
image (string)
|
||||
- Image of network chaos tool
|
||||
|
||||
exclude_label (string)
|
||||
- pods matching this label will be excluded from the outage
|
||||
|
||||
Returns:
|
||||
List of bridges on the node.
|
||||
"""
|
||||
@@ -829,6 +858,9 @@ def check_bridge_interface(
|
||||
kubecli (KrknKubernetes)
|
||||
- Object to interact with Kubernetes Python client
|
||||
|
||||
exclude_label (string)
|
||||
- pods matching this label will be excluded from the outage
|
||||
|
||||
Returns:
|
||||
Returns True if the bridge is found in the node.
|
||||
"""
|
||||
@@ -922,6 +954,15 @@ class InputParams:
|
||||
},
|
||||
)
|
||||
|
||||
exclude_label: typing.Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"name": "Exclude label",
|
||||
"description": "Kubernetes label selector for pods to exclude from the chaos. "
|
||||
"Pods matching this label will be excluded even if they match the label_selector",
|
||||
},
|
||||
)
|
||||
|
||||
kraken_config: typing.Dict[str, typing.Any] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
@@ -1055,7 +1096,11 @@ def pod_outage(
|
||||
|
||||
br_name = get_bridge_name(api_ext, custom_obj)
|
||||
pods_list = get_test_pods(
|
||||
test_pod_name, test_label_selector, test_namespace, kubecli
|
||||
test_pod_name,
|
||||
test_label_selector,
|
||||
test_namespace,
|
||||
kubecli,
|
||||
params.exclude_label,
|
||||
)
|
||||
|
||||
while not len(pods_list) <= params.instance_count:
|
||||
@@ -1176,6 +1221,15 @@ class EgressParams:
|
||||
},
|
||||
)
|
||||
|
||||
exclude_label: typing.Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"name": "Exclude label",
|
||||
"description": "Kubernetes label selector for pods to exclude from the chaos. "
|
||||
"Pods matching this label will be excluded even if they match the label_selector",
|
||||
},
|
||||
)
|
||||
|
||||
kraken_config: typing.Dict[str, typing.Any] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
@@ -1314,7 +1368,11 @@ def pod_egress_shaping(
|
||||
|
||||
br_name = get_bridge_name(api_ext, custom_obj)
|
||||
pods_list = get_test_pods(
|
||||
test_pod_name, test_label_selector, test_namespace, kubecli
|
||||
test_pod_name,
|
||||
test_label_selector,
|
||||
test_namespace,
|
||||
kubecli,
|
||||
params.exclude_label,
|
||||
)
|
||||
|
||||
while not len(pods_list) <= params.instance_count:
|
||||
@@ -1450,6 +1508,15 @@ class IngressParams:
|
||||
},
|
||||
)
|
||||
|
||||
exclude_label: typing.Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"name": "Exclude label",
|
||||
"description": "Kubernetes label selector for pods to exclude from the chaos. "
|
||||
"Pods matching this label will be excluded even if they match the label_selector",
|
||||
},
|
||||
)
|
||||
|
||||
kraken_config: typing.Dict[str, typing.Any] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
@@ -1589,7 +1656,11 @@ def pod_ingress_shaping(
|
||||
|
||||
br_name = get_bridge_name(api_ext, custom_obj)
|
||||
pods_list = get_test_pods(
|
||||
test_pod_name, test_label_selector, test_namespace, kubecli
|
||||
test_pod_name,
|
||||
test_label_selector,
|
||||
test_namespace,
|
||||
kubecli,
|
||||
params.exclude_label,
|
||||
)
|
||||
|
||||
while not len(pods_list) <= params.instance_count:
|
||||
|
||||
@@ -11,6 +11,9 @@ class InputParams:
|
||||
self.label_selector = config["label_selector"] if "label_selector" in config else ""
|
||||
self.namespace_pattern = config["namespace_pattern"] if "namespace_pattern" in config else ""
|
||||
self.name_pattern = config["name_pattern"] if "name_pattern" in config else ""
|
||||
self.node_label_selector = config["node_label_selector"] if "node_label_selector" in config else ""
|
||||
self.node_names = config["node_names"] if "node_names" in config else []
|
||||
self.exclude_label = config["exclude_label"] if "exclude_label" in config else ""
|
||||
|
||||
namespace_pattern: str
|
||||
krkn_pod_recovery_time: int
|
||||
@@ -18,4 +21,7 @@ class InputParams:
|
||||
duration: int
|
||||
kill: int
|
||||
label_selector: str
|
||||
name_pattern: str
|
||||
name_pattern: str
|
||||
node_label_selector: str
|
||||
node_names: list
|
||||
exclude_label: str
|
||||
@@ -1,14 +1,16 @@
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
from asyncio import Future
|
||||
|
||||
import yaml
|
||||
from krkn_lib.k8s import KrknKubernetes
|
||||
from krkn_lib.k8s.pods_monitor_pool import PodsMonitorPool
|
||||
from krkn_lib.k8s.pod_monitor import select_and_monitor_by_namespace_pattern_and_label, \
|
||||
select_and_monitor_by_name_pattern_and_namespace_pattern
|
||||
|
||||
from krkn.scenario_plugins.pod_disruption.models.models import InputParams
|
||||
from krkn_lib.models.telemetry import ScenarioTelemetry
|
||||
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
|
||||
from krkn_lib.utils import get_yaml_item_value
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -29,31 +31,25 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
lib_telemetry: KrknTelemetryOpenshift,
|
||||
scenario_telemetry: ScenarioTelemetry,
|
||||
) -> int:
|
||||
pool = PodsMonitorPool(lib_telemetry.get_lib_kubernetes())
|
||||
try:
|
||||
with open(scenario, "r") as f:
|
||||
cont_scenario_config = yaml.full_load(f)
|
||||
for kill_scenario in cont_scenario_config:
|
||||
kill_scenario_config = InputParams(kill_scenario["config"])
|
||||
self.start_monitoring(
|
||||
kill_scenario_config, pool
|
||||
future_snapshot=self.start_monitoring(
|
||||
kill_scenario_config,
|
||||
lib_telemetry
|
||||
)
|
||||
return_status = self.killing_pods(
|
||||
self.killing_pods(
|
||||
kill_scenario_config, lib_telemetry.get_lib_kubernetes()
|
||||
)
|
||||
if return_status != 0:
|
||||
result = pool.cancel()
|
||||
else:
|
||||
result = pool.join()
|
||||
if result.error:
|
||||
logging.error(
|
||||
logging.error(
|
||||
f"PodDisruptionScenariosPlugin pods failed to recovery: {result.error}"
|
||||
)
|
||||
)
|
||||
return 1
|
||||
|
||||
scenario_telemetry.affected_pods = result
|
||||
|
||||
snapshot = future_snapshot.result()
|
||||
result = snapshot.get_pods_status()
|
||||
scenario_telemetry.affected_pods = result
|
||||
if len(result.unrecovered) > 0:
|
||||
logging.info("PodDisruptionScenarioPlugin failed with unrecovered pods")
|
||||
return 1
|
||||
|
||||
except (RuntimeError, Exception) as e:
|
||||
logging.error("PodDisruptionScenariosPlugin exiting due to Exception %s" % e)
|
||||
@@ -64,7 +60,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
def get_scenario_types(self) -> list[str]:
|
||||
return ["pod_disruption_scenarios"]
|
||||
|
||||
def start_monitoring(self, kill_scenario: InputParams, pool: PodsMonitorPool):
|
||||
def start_monitoring(self, kill_scenario: InputParams, lib_telemetry: KrknTelemetryOpenshift) -> Future:
|
||||
|
||||
recovery_time = kill_scenario.krkn_pod_recovery_time
|
||||
if (
|
||||
@@ -73,16 +69,17 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
):
|
||||
namespace_pattern = kill_scenario.namespace_pattern
|
||||
label_selector = kill_scenario.label_selector
|
||||
pool.select_and_monitor_by_namespace_pattern_and_label(
|
||||
future_snapshot = select_and_monitor_by_namespace_pattern_and_label(
|
||||
namespace_pattern=namespace_pattern,
|
||||
label_selector=label_selector,
|
||||
max_timeout=recovery_time,
|
||||
field_selector="status.phase=Running"
|
||||
v1_client=lib_telemetry.get_lib_kubernetes().cli
|
||||
)
|
||||
logging.info(
|
||||
f"waiting up to {recovery_time} seconds for pod recovery, "
|
||||
f"pod label pattern: {label_selector} namespace pattern: {namespace_pattern}"
|
||||
)
|
||||
return future_snapshot
|
||||
|
||||
elif (
|
||||
kill_scenario.namespace_pattern
|
||||
@@ -90,32 +87,101 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
):
|
||||
namespace_pattern = kill_scenario.namespace_pattern
|
||||
name_pattern = kill_scenario.name_pattern
|
||||
pool.select_and_monitor_by_name_pattern_and_namespace_pattern(
|
||||
future_snapshot = select_and_monitor_by_name_pattern_and_namespace_pattern(
|
||||
pod_name_pattern=name_pattern,
|
||||
namespace_pattern=namespace_pattern,
|
||||
max_timeout=recovery_time,
|
||||
field_selector="status.phase=Running"
|
||||
v1_client=lib_telemetry.get_lib_kubernetes().cli
|
||||
)
|
||||
logging.info(
|
||||
f"waiting up to {recovery_time} seconds for pod recovery, "
|
||||
f"pod name pattern: {name_pattern} namespace pattern: {namespace_pattern}"
|
||||
)
|
||||
return future_snapshot
|
||||
else:
|
||||
raise Exception(
|
||||
f"impossible to determine monitor parameters, check {kill_scenario} configuration"
|
||||
)
|
||||
|
||||
def _select_pods_with_field_selector(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str, node_name: str = None):
|
||||
"""Helper function to select pods using either label_selector or name_pattern with field_selector, optionally filtered by node"""
|
||||
# Combine field selectors if node targeting is specified
|
||||
if node_name:
|
||||
node_field_selector = f"spec.nodeName={node_name}"
|
||||
if field_selector:
|
||||
combined_field_selector = f"{field_selector},{node_field_selector}"
|
||||
else:
|
||||
combined_field_selector = node_field_selector
|
||||
else:
|
||||
combined_field_selector = field_selector
|
||||
|
||||
if label_selector:
|
||||
return kubecli.select_pods_by_namespace_pattern_and_label(
|
||||
label_selector=label_selector,
|
||||
namespace_pattern=namespace,
|
||||
field_selector=combined_field_selector
|
||||
)
|
||||
else: # name_pattern
|
||||
return kubecli.select_pods_by_name_pattern_and_namespace_pattern(
|
||||
pod_name_pattern=name_pattern,
|
||||
namespace_pattern=namespace,
|
||||
field_selector=combined_field_selector
|
||||
)
|
||||
|
||||
def get_pods(self, name_pattern, label_selector,namespace, kubecli: KrknKubernetes, field_selector: str =None):
|
||||
def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None, quiet: bool = False):
|
||||
if label_selector and name_pattern:
|
||||
logging.error('Only, one of name pattern or label pattern can be specified')
|
||||
elif label_selector:
|
||||
pods = kubecli.select_pods_by_namespace_pattern_and_label(label_selector=label_selector,namespace_pattern=namespace, field_selector=field_selector)
|
||||
elif name_pattern:
|
||||
pods = kubecli.select_pods_by_name_pattern_and_namespace_pattern(pod_name_pattern=name_pattern, namespace_pattern=namespace, field_selector=field_selector)
|
||||
else:
|
||||
return []
|
||||
|
||||
if not label_selector and not name_pattern:
|
||||
logging.error('Name pattern or label pattern must be specified ')
|
||||
return pods
|
||||
return []
|
||||
|
||||
# If specific node names are provided, make multiple calls with field selector
|
||||
if node_names:
|
||||
if not quiet:
|
||||
logging.info(f"Targeting pods on {len(node_names)} specific nodes")
|
||||
all_pods = []
|
||||
for node_name in node_names:
|
||||
pods = self._select_pods_with_field_selector(
|
||||
name_pattern, label_selector, namespace, kubecli, field_selector, node_name
|
||||
)
|
||||
|
||||
if pods:
|
||||
all_pods.extend(pods)
|
||||
|
||||
if not quiet:
|
||||
logging.info(f"Found {len(all_pods)} target pods across {len(node_names)} nodes")
|
||||
return all_pods
|
||||
|
||||
# Node label selector approach - use field selectors
|
||||
if node_label_selector:
|
||||
# Get nodes matching the label selector first
|
||||
nodes_with_label = kubecli.list_nodes(label_selector=node_label_selector)
|
||||
if not nodes_with_label:
|
||||
logging.info(f"No nodes found with label selector: {node_label_selector}")
|
||||
return []
|
||||
|
||||
if not quiet:
|
||||
logging.info(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}")
|
||||
# Use field selector for each node
|
||||
all_pods = []
|
||||
for node_name in nodes_with_label:
|
||||
pods = self._select_pods_with_field_selector(
|
||||
name_pattern, label_selector, namespace, kubecli, field_selector, node_name
|
||||
)
|
||||
|
||||
if pods:
|
||||
all_pods.extend(pods)
|
||||
|
||||
if not quiet:
|
||||
logging.info(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes")
|
||||
return all_pods
|
||||
|
||||
# Standard pod selection (no node targeting)
|
||||
return self._select_pods_with_field_selector(
|
||||
name_pattern, label_selector, namespace, kubecli, field_selector
|
||||
)
|
||||
|
||||
def killing_pods(self, config: InputParams, kubecli: KrknKubernetes):
|
||||
# region Select target pods
|
||||
@@ -124,7 +190,14 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
if not namespace:
|
||||
logging.error('Namespace pattern must be specified')
|
||||
|
||||
pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running")
|
||||
pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
|
||||
exclude_pods = set()
|
||||
if config.exclude_label:
|
||||
_exclude_pods = self.get_pods("",config.exclude_label,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
|
||||
for pod in _exclude_pods:
|
||||
exclude_pods.add(pod[0])
|
||||
|
||||
|
||||
pods_count = len(pods)
|
||||
if len(pods) < config.kill:
|
||||
logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format(
|
||||
@@ -133,23 +206,25 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
||||
|
||||
random.shuffle(pods)
|
||||
for i in range(config.kill):
|
||||
|
||||
pod = pods[i]
|
||||
logging.info(pod)
|
||||
logging.info(f'Deleting pod {pod[0]}')
|
||||
kubecli.delete_pod(pod[0], pod[1])
|
||||
if pod[0] in exclude_pods:
|
||||
logging.info(f"Excluding {pod[0]} from chaos")
|
||||
else:
|
||||
logging.info(f'Deleting pod {pod[0]}')
|
||||
kubecli.delete_pod(pod[0], pod[1])
|
||||
|
||||
self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli)
|
||||
self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
|
||||
return 0
|
||||
|
||||
def wait_for_pods(
|
||||
self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes
|
||||
self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes, node_label_selector, node_names
|
||||
):
|
||||
timeout = False
|
||||
start_time = datetime.now()
|
||||
|
||||
while not timeout:
|
||||
pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli)
|
||||
pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names, quiet=True)
|
||||
if pod_count == len(pods):
|
||||
return
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ class VirtChecker:
|
||||
:param namespace:
|
||||
:return: virtctl_status 'True' if successful, or an error message if it fails.
|
||||
"""
|
||||
virtctl_vm_cmd = f"virtctl ssh --local-ssh-opts='-o BatchMode=yes' --local-ssh-opts='-o PasswordAuthentication=no' --local-ssh-opts='-o ConnectTimeout=2' root@{vm_name} -n {namespace}"
|
||||
virtctl_vm_cmd = f"virtctl ssh --local-ssh-opts='-o BatchMode=yes' --local-ssh-opts='-o PasswordAuthentication=no' --local-ssh-opts='-o ConnectTimeout=2' root@vmi/{vm_name} -n {namespace} 2>&1 |egrep 'denied|verification failed' && echo 'True' || echo 'False'"
|
||||
check_virtctl_vm_cmd = f"virtctl ssh --local-ssh-opts='-o BatchMode=yes' --local-ssh-opts='-o PasswordAuthentication=no' --local-ssh-opts='-o ConnectTimeout=2' root@{vm_name} -n {namespace} 2>&1 |egrep 'denied|verification failed' && echo 'True' || echo 'False'"
|
||||
if 'True' in invoke_no_exit(check_virtctl_vm_cmd):
|
||||
return True
|
||||
|
||||
@@ -16,7 +16,7 @@ google-cloud-compute==1.22.0
|
||||
ibm_cloud_sdk_core==3.18.0
|
||||
ibm_vpc==0.20.0
|
||||
jinja2==3.1.6
|
||||
krkn-lib==5.1.2
|
||||
krkn-lib==5.1.8
|
||||
lxml==5.1.0
|
||||
kubernetes==28.1.0
|
||||
numpy==1.26.4
|
||||
|
||||
@@ -375,10 +375,12 @@ def main(options, command: Optional[str]) -> int:
|
||||
prometheus_plugin.critical_alerts(
|
||||
prometheus,
|
||||
summary,
|
||||
elastic_search,
|
||||
run_uuid,
|
||||
scenario_type,
|
||||
start_time,
|
||||
datetime.datetime.now(),
|
||||
elastic_alerts_index
|
||||
)
|
||||
|
||||
chaos_output.critical_alerts = summary
|
||||
|
||||
@@ -1,6 +1,15 @@
|
||||
# yaml-language-server: $schema=../plugin.schema.json
|
||||
- id: kill-pods
|
||||
config:
|
||||
namespace_pattern: ^acme-air$
|
||||
namespace_pattern: "kube-system"
|
||||
name_pattern: .*
|
||||
krkn_pod_recovery_time: 120
|
||||
krkn_pod_recovery_time: 60
|
||||
kill: 1 # num of pods to kill
|
||||
#Not needed by default, but can be used if you want to target pods on specific nodes
|
||||
# Option 1: Target pods on nodes with specific labels [master/worker nodes]
|
||||
node_label_selector: node-role.kubernetes.io/control-plane= # Target control-plane nodes (works on both k8s and openshift)
|
||||
# Option 2: Target pods of specific nodes (testing mixed node types)
|
||||
# node_names:
|
||||
# - ip-10-0-31-8.us-east-2.compute.internal # Worker node 1
|
||||
# - ip-10-0-48-188.us-east-2.compute.internal # Worker node 2
|
||||
# - ip-10-0-14-59.us-east-2.compute.internal # Master node 1
|
||||
@@ -4,3 +4,4 @@
|
||||
namespace_pattern: ^openshift-etcd$
|
||||
label_selector: k8s-app=etcd
|
||||
krkn_pod_recovery_time: 120
|
||||
exclude_label: "" # excludes pods marked with this label from chaos
|
||||
|
||||
@@ -4,4 +4,5 @@
|
||||
namespace_pattern: ^openshift-apiserver$
|
||||
label_selector: app=openshift-apiserver-a
|
||||
krkn_pod_recovery_time: 120
|
||||
exclude_label: "" # excludes pods marked with this label from chaos
|
||||
|
||||
|
||||
@@ -4,4 +4,5 @@
|
||||
namespace_pattern: ^openshift-kube-apiserver$
|
||||
label_selector: app=openshift-kube-apiserver
|
||||
krkn_pod_recovery_time: 120
|
||||
exclude_label: "" # excludes pods marked with this label from chaos
|
||||
|
||||
|
||||
@@ -2,4 +2,5 @@
|
||||
config:
|
||||
namespace_pattern: ^openshift-monitoring$
|
||||
label_selector: statefulset.kubernetes.io/pod-name=prometheus-k8s-0
|
||||
krkn_pod_recovery_time: 120
|
||||
krkn_pod_recovery_time: 120
|
||||
exclude_label: "" # excludes pods marked with this label from chaos
|
||||
@@ -5,3 +5,4 @@
|
||||
name_pattern: .*
|
||||
kill: 3
|
||||
krkn_pod_recovery_time: 120
|
||||
exclude_label: "" # excludes pods marked with this label from chaos
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
This file is generated by running the "plugins" module in the kraken project:
|
||||
|
||||
```
|
||||
python -m kraken.plugins >scenarios/plugin.schema.json
|
||||
```
|
||||
@@ -1,584 +0,0 @@
|
||||
{
|
||||
"$id": "https://github.com/redhat-chaos/krkn/",
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Kraken Arcaflow scenarios",
|
||||
"description": "Serial execution of Arcaflow Python plugins. See https://github.com/arcaflow for details.",
|
||||
"type": "array",
|
||||
"minContains": 1,
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"const": "run_python"
|
||||
},
|
||||
"config": {
|
||||
"$defs": {
|
||||
"RunPythonFileInput": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filename": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"filename"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filename": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"filename"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"config"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"title": "pod_network_outage Arcaflow scenarios",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"const": "pod_network_outage"
|
||||
},
|
||||
"config": {
|
||||
"$defs": {
|
||||
"InputParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"namespace": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Namespace",
|
||||
"description": "Namespace of the pod to which filter need to be appliedfor details."
|
||||
},
|
||||
"image": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Image",
|
||||
"default": "image: quay.io/krkn-chaos/krkn:tools",
|
||||
"description": "Image of the krkn tools to run network outage."
|
||||
},
|
||||
"direction": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"default": [
|
||||
"ingress",
|
||||
"egress"
|
||||
],
|
||||
"title": "Direction",
|
||||
"description": "List of directions to apply filtersDefault both egress and ingress."
|
||||
},
|
||||
"ingress_ports": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
},
|
||||
"default": [],
|
||||
"title": "Ingress ports",
|
||||
"description": "List of ports to block traffic onDefault [], i.e. all ports"
|
||||
},
|
||||
"egress_ports": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
},
|
||||
"default": [],
|
||||
"title": "Egress ports",
|
||||
"description": "List of ports to block traffic onDefault [], i.e. all ports"
|
||||
},
|
||||
"kubeconfig_path": {
|
||||
"type": "string",
|
||||
"title": "Kubeconfig path",
|
||||
"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
|
||||
},
|
||||
"pod_name": {
|
||||
"type": "string",
|
||||
"title": "Pod name",
|
||||
"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
|
||||
},
|
||||
"label_selector": {
|
||||
"type": "string",
|
||||
"title": "Label selector",
|
||||
"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
|
||||
},
|
||||
"kraken_config": {
|
||||
"type": "string",
|
||||
"title": "Kraken Config",
|
||||
"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
|
||||
},
|
||||
"test_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 120,
|
||||
"title": "Test duration",
|
||||
"description": "Duration for which each step of the ingress chaos testing is to be performed."
|
||||
},
|
||||
"wait_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 300,
|
||||
"title": "Wait Duration",
|
||||
"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
|
||||
},
|
||||
"instance_count": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 1,
|
||||
"title": "Instance Count",
|
||||
"description": "Number of pods to perform action/select that match the label selector."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"namespace"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"namespace": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Namespace",
|
||||
"description": "Namespace of the pod to which filter need to be appliedfor details."
|
||||
},
|
||||
"direction": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"default": [
|
||||
"ingress",
|
||||
"egress"
|
||||
],
|
||||
"title": "Direction",
|
||||
"description": "List of directions to apply filtersDefault both egress and ingress."
|
||||
},
|
||||
"ingress_ports": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
},
|
||||
"default": [],
|
||||
"title": "Ingress ports",
|
||||
"description": "List of ports to block traffic onDefault [], i.e. all ports"
|
||||
},
|
||||
"egress_ports": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
},
|
||||
"default": [],
|
||||
"title": "Egress ports",
|
||||
"description": "List of ports to block traffic onDefault [], i.e. all ports"
|
||||
},
|
||||
"kubeconfig_path": {
|
||||
"type": "string",
|
||||
"title": "Kubeconfig path",
|
||||
"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
|
||||
},
|
||||
"pod_name": {
|
||||
"type": "string",
|
||||
"title": "Pod name",
|
||||
"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
|
||||
},
|
||||
"label_selector": {
|
||||
"type": "string",
|
||||
"title": "Label selector",
|
||||
"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
|
||||
},
|
||||
"kraken_config": {
|
||||
"type": "string",
|
||||
"title": "Kraken Config",
|
||||
"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
|
||||
},
|
||||
"test_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 120,
|
||||
"title": "Test duration",
|
||||
"description": "Duration for which each step of the ingress chaos testing is to be performed."
|
||||
},
|
||||
"wait_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 300,
|
||||
"title": "Wait Duration",
|
||||
"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
|
||||
},
|
||||
"instance_count": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 1,
|
||||
"title": "Instance Count",
|
||||
"description": "Number of pods to perform action/select that match the label selector."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"namespace"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"config"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"title": "pod_egress_shaping Arcaflow scenarios",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"const": "pod_egress_shaping"
|
||||
},
|
||||
"config": {
|
||||
"$defs": {
|
||||
"EgressParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"namespace": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Namespace",
|
||||
"description": "Namespace of the pod to which filter need to be appliedfor details."
|
||||
},
|
||||
"image": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Image",
|
||||
"default": "image: quay.io/krkn-chaos/krkn:tools",
|
||||
"description": "Image of the krkn tools to run network outage."
|
||||
},
|
||||
"kubeconfig_path": {
|
||||
"type": "string",
|
||||
"title": "Kubeconfig path",
|
||||
"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
|
||||
},
|
||||
"pod_name": {
|
||||
"type": "string",
|
||||
"title": "Pod name",
|
||||
"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
|
||||
},
|
||||
"label_selector": {
|
||||
"type": "string",
|
||||
"title": "Label selector",
|
||||
"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
|
||||
},
|
||||
"kraken_config": {
|
||||
"type": "string",
|
||||
"title": "Kraken Config",
|
||||
"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
|
||||
},
|
||||
"test_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 90,
|
||||
"title": "Test duration",
|
||||
"description": "Duration for which each step of the ingress chaos testing is to be performed."
|
||||
},
|
||||
"wait_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 300,
|
||||
"title": "Wait Duration",
|
||||
"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
|
||||
},
|
||||
"instance_count": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 1,
|
||||
"title": "Instance Count",
|
||||
"description": "Number of pods to perform action/select that match the label selector."
|
||||
},
|
||||
"execution_type": {
|
||||
"type": "string",
|
||||
"default": "parallel",
|
||||
"title": "Execution Type",
|
||||
"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
|
||||
},
|
||||
"network_params": {
|
||||
"type": "object",
|
||||
"propertyNames": {},
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"namespace"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"namespace": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Namespace",
|
||||
"description": "Namespace of the pod to which filter need to be appliedfor details."
|
||||
},
|
||||
"kubeconfig_path": {
|
||||
"type": "string",
|
||||
"title": "Kubeconfig path",
|
||||
"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
|
||||
},
|
||||
"pod_name": {
|
||||
"type": "string",
|
||||
"title": "Pod name",
|
||||
"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
|
||||
},
|
||||
"label_selector": {
|
||||
"type": "string",
|
||||
"title": "Label selector",
|
||||
"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
|
||||
},
|
||||
"kraken_config": {
|
||||
"type": "string",
|
||||
"title": "Kraken Config",
|
||||
"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
|
||||
},
|
||||
"test_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 90,
|
||||
"title": "Test duration",
|
||||
"description": "Duration for which each step of the ingress chaos testing is to be performed."
|
||||
},
|
||||
"wait_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 300,
|
||||
"title": "Wait Duration",
|
||||
"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
|
||||
},
|
||||
"instance_count": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 1,
|
||||
"title": "Instance Count",
|
||||
"description": "Number of pods to perform action/select that match the label selector."
|
||||
},
|
||||
"execution_type": {
|
||||
"type": "string",
|
||||
"default": "parallel",
|
||||
"title": "Execution Type",
|
||||
"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
|
||||
},
|
||||
"network_params": {
|
||||
"type": "object",
|
||||
"propertyNames": {},
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"namespace"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"config"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"title": "pod_ingress_shaping Arcaflow scenarios",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"const": "pod_ingress_shaping"
|
||||
},
|
||||
"config": {
|
||||
"$defs": {
|
||||
"IngressParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"namespace": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Namespace",
|
||||
"description": "Namespace of the pod to which filter need to be appliedfor details."
|
||||
},
|
||||
"image": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Image",
|
||||
"default": "image: quay.io/krkn-chaos/krkn:tools",
|
||||
"description": "Image of the krkn tools to run network outage."
|
||||
},
|
||||
"network_params": {
|
||||
"type": "object",
|
||||
"propertyNames": {},
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
|
||||
},
|
||||
"kubeconfig_path": {
|
||||
"type": "string",
|
||||
"title": "Kubeconfig path",
|
||||
"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
|
||||
},
|
||||
"pod_name": {
|
||||
"type": "string",
|
||||
"title": "Pod name",
|
||||
"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
|
||||
},
|
||||
"label_selector": {
|
||||
"type": "string",
|
||||
"title": "Label selector",
|
||||
"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
|
||||
},
|
||||
"kraken_config": {
|
||||
"type": "string",
|
||||
"title": "Kraken Config",
|
||||
"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
|
||||
},
|
||||
"test_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 90,
|
||||
"title": "Test duration",
|
||||
"description": "Duration for which each step of the ingress chaos testing is to be performed."
|
||||
},
|
||||
"wait_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 300,
|
||||
"title": "Wait Duration",
|
||||
"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
|
||||
},
|
||||
"instance_count": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 1,
|
||||
"title": "Instance Count",
|
||||
"description": "Number of pods to perform action/select that match the label selector."
|
||||
},
|
||||
"execution_type": {
|
||||
"type": "string",
|
||||
"default": "parallel",
|
||||
"title": "Execution Type",
|
||||
"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"namespace"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"namespace": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Namespace",
|
||||
"description": "Namespace of the pod to which filter need to be appliedfor details."
|
||||
},
|
||||
"network_params": {
|
||||
"type": "object",
|
||||
"propertyNames": {},
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
|
||||
},
|
||||
"kubeconfig_path": {
|
||||
"type": "string",
|
||||
"title": "Kubeconfig path",
|
||||
"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
|
||||
},
|
||||
"pod_name": {
|
||||
"type": "string",
|
||||
"title": "Pod name",
|
||||
"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
|
||||
},
|
||||
"label_selector": {
|
||||
"type": "string",
|
||||
"title": "Label selector",
|
||||
"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
|
||||
},
|
||||
"kraken_config": {
|
||||
"type": "string",
|
||||
"title": "Kraken Config",
|
||||
"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
|
||||
},
|
||||
"test_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 90,
|
||||
"title": "Test duration",
|
||||
"description": "Duration for which each step of the ingress chaos testing is to be performed."
|
||||
},
|
||||
"wait_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 300,
|
||||
"title": "Wait Duration",
|
||||
"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
|
||||
},
|
||||
"instance_count": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 1,
|
||||
"title": "Instance Count",
|
||||
"description": "Number of pods to perform action/select that match the label selector."
|
||||
},
|
||||
"execution_type": {
|
||||
"type": "string",
|
||||
"default": "parallel",
|
||||
"title": "Execution Type",
|
||||
"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"namespace"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"config"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
93
tests/test_pod_network_outage.py
Normal file
93
tests/test_pod_network_outage.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
|
||||
from krkn.scenario_plugins.native.pod_network_outage.kubernetes_functions import (
|
||||
list_pods,
|
||||
)
|
||||
from krkn.scenario_plugins.native.pod_network_outage.pod_network_outage_plugin import (
|
||||
get_test_pods,
|
||||
)
|
||||
|
||||
|
||||
class TestPodNetworkOutage(unittest.TestCase):
|
||||
def test_list_pods_with_exclude_label(self):
|
||||
"""Test that list_pods correctly excludes pods with matching exclude_label"""
|
||||
# Create mock pod items
|
||||
pod1 = MagicMock()
|
||||
pod1.metadata.name = "pod1"
|
||||
pod1.metadata.labels = {"app": "test", "skip": "true"}
|
||||
|
||||
pod2 = MagicMock()
|
||||
pod2.metadata.name = "pod2"
|
||||
pod2.metadata.labels = {"app": "test"}
|
||||
|
||||
pod3 = MagicMock()
|
||||
pod3.metadata.name = "pod3"
|
||||
pod3.metadata.labels = {"app": "test", "skip": "false"}
|
||||
|
||||
# Create mock API response
|
||||
mock_response = MagicMock()
|
||||
mock_response.items = [pod1, pod2, pod3]
|
||||
|
||||
# Create mock client
|
||||
mock_cli = MagicMock()
|
||||
mock_cli.list_namespaced_pod.return_value = mock_response
|
||||
|
||||
# Test without exclude_label
|
||||
result = list_pods(mock_cli, "test-namespace", "app=test")
|
||||
self.assertEqual(result, ["pod1", "pod2", "pod3"])
|
||||
|
||||
# Test with exclude_label
|
||||
result = list_pods(mock_cli, "test-namespace", "app=test", "skip=true")
|
||||
self.assertEqual(result, ["pod2", "pod3"])
|
||||
|
||||
def test_get_test_pods_with_exclude_label(self):
|
||||
"""Test that get_test_pods passes exclude_label to list_pods correctly"""
|
||||
# Create mock kubecli
|
||||
mock_kubecli = MagicMock()
|
||||
mock_kubecli.list_pods.return_value = ["pod2", "pod3"]
|
||||
|
||||
# Test get_test_pods with exclude_label
|
||||
result = get_test_pods(
|
||||
None, "app=test", "test-namespace", mock_kubecli, "skip=true"
|
||||
)
|
||||
|
||||
# Verify list_pods was called with the correct parameters
|
||||
mock_kubecli.list_pods.assert_called_once_with(
|
||||
label_selector="app=test",
|
||||
namespace="test-namespace",
|
||||
exclude_label="skip=true",
|
||||
)
|
||||
|
||||
# Verify the result
|
||||
self.assertEqual(result, ["pod2", "pod3"])
|
||||
|
||||
def test_get_test_pods_with_pod_name_and_exclude_label(self):
|
||||
"""Test that get_test_pods prioritizes pod_name over label filters"""
|
||||
# Create mock kubecli
|
||||
mock_kubecli = MagicMock()
|
||||
mock_kubecli.list_pods.return_value = ["pod1", "pod2", "pod3"]
|
||||
|
||||
# Test get_test_pods with both pod_name and exclude_label
|
||||
# The pod_name should take precedence
|
||||
result = get_test_pods(
|
||||
"pod1", "app=test", "test-namespace", mock_kubecli, "skip=true"
|
||||
)
|
||||
|
||||
# Verify list_pods was called with the correct parameters
|
||||
mock_kubecli.list_pods.assert_called_once_with(
|
||||
label_selector="app=test",
|
||||
namespace="test-namespace",
|
||||
exclude_label="skip=true",
|
||||
)
|
||||
|
||||
# Verify the result contains only the specified pod
|
||||
self.assertEqual(result, ["pod1"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user