mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-18 20:09:55 +00:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4f250c9601 | ||
|
|
6480adc00a | ||
|
|
5002f210ae | ||
|
|
62c5afa9a2 | ||
|
|
c109fc0b17 | ||
|
|
fff675f3dd | ||
|
|
c125e5acf7 | ||
|
|
ca6995a1a1 |
25
.github/workflows/tests.yml
vendored
25
.github/workflows/tests.yml
vendored
@@ -14,14 +14,16 @@ jobs:
|
||||
uses: actions/checkout@v3
|
||||
- name: Create multi-node KinD cluster
|
||||
uses: redhat-chaos/actions/kind@main
|
||||
- name: Install Helm & add repos
|
||||
run: |
|
||||
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
helm repo add stable https://charts.helm.sh/stable
|
||||
helm repo update
|
||||
- name: Deploy prometheus & Port Forwarding
|
||||
uses: redhat-chaos/actions/prometheus@main
|
||||
|
||||
- name: Deploy Elasticsearch
|
||||
with:
|
||||
ELASTIC_URL: ${{ vars.ELASTIC_URL }}
|
||||
ELASTIC_PORT: ${{ vars.ELASTIC_PORT }}
|
||||
ELASTIC_USER: ${{ vars.ELASTIC_USER }}
|
||||
ELASTIC_PASSWORD: ${{ vars.ELASTIC_PASSWORD }}
|
||||
uses: redhat-chaos/actions/elastic@main
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
@@ -35,6 +37,8 @@ jobs:
|
||||
|
||||
- name: Deploy test workloads
|
||||
run: |
|
||||
es_pod_name=$(kubectl get pods -l "app.kubernetes.io/instance=elasticsearch" -o name)
|
||||
kubectl --namespace default port-forward $es_pod_name 9200 &
|
||||
kubectl apply -f CI/templates/outage_pod.yaml
|
||||
kubectl wait --for=condition=ready pod -l scenario=outage --timeout=300s
|
||||
kubectl apply -f CI/templates/container_scenario_pod.yaml
|
||||
@@ -59,6 +63,9 @@ jobs:
|
||||
yq -i '.kraken.port="8081"' CI/config/common_test_config.yaml
|
||||
yq -i '.kraken.signal_address="0.0.0.0"' CI/config/common_test_config.yaml
|
||||
yq -i '.kraken.performance_monitoring="localhost:9090"' CI/config/common_test_config.yaml
|
||||
yq -i '.elastic.elastic_port=9200' CI/config/common_test_config.yaml
|
||||
yq -i '.elastic.elastic_url="https://localhost"' CI/config/common_test_config.yaml
|
||||
yq -i '.elastic.enable_elastic=True' CI/config/common_test_config.yaml
|
||||
echo "test_service_hijacking" > ./CI/tests/functional_tests
|
||||
echo "test_app_outages" >> ./CI/tests/functional_tests
|
||||
echo "test_container" >> ./CI/tests/functional_tests
|
||||
@@ -87,6 +94,8 @@ jobs:
|
||||
yq -i '.kraken.port="8081"' CI/config/common_test_config.yaml
|
||||
yq -i '.kraken.signal_address="0.0.0.0"' CI/config/common_test_config.yaml
|
||||
yq -i '.kraken.performance_monitoring="localhost:9090"' CI/config/common_test_config.yaml
|
||||
yq -i '.elastic.elastic_port=9200' CI/config/common_test_config.yaml
|
||||
yq -i '.elastic.elastic_url="https://localhost"' CI/config/common_test_config.yaml
|
||||
yq -i '.telemetry.username="${{secrets.TELEMETRY_USERNAME}}"' CI/config/common_test_config.yaml
|
||||
yq -i '.telemetry.password="${{secrets.TELEMETRY_PASSWORD}}"' CI/config/common_test_config.yaml
|
||||
echo "test_telemetry" > ./CI/tests/functional_tests
|
||||
@@ -111,20 +120,24 @@ jobs:
|
||||
cat ./CI/results.markdown >> $GITHUB_STEP_SUMMARY
|
||||
echo >> $GITHUB_STEP_SUMMARY
|
||||
- name: Upload CI logs
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ci-logs
|
||||
path: CI/out
|
||||
if-no-files-found: error
|
||||
- name: Collect coverage report
|
||||
if: ${{ success() || failure() }}
|
||||
run: |
|
||||
python -m coverage html
|
||||
python -m coverage json
|
||||
- name: Publish coverage report to job summary
|
||||
if: ${{ success() || failure() }}
|
||||
run: |
|
||||
pip install html2text
|
||||
html2text --ignore-images --ignore-links -b 0 htmlcov/index.html >> $GITHUB_STEP_SUMMARY
|
||||
- name: Upload coverage data
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: coverage
|
||||
|
||||
@@ -10,8 +10,6 @@ cerberus:
|
||||
cerberus_url: # When cerberus_enabled is set to True, provide the url where cerberus publishes go/no-go signal.
|
||||
|
||||
performance_monitoring:
|
||||
deploy_dashboards: False # Install a mutable grafana and load the performance dashboards. Enable this only when running on OpenShift.
|
||||
repo: "https://github.com/cloud-bulldozer/performance-dashboards.git"
|
||||
capture_metrics: False
|
||||
metrics_profile_path: config/metrics-aggregated.yaml
|
||||
prometheus_url: # The prometheus url/route is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes.
|
||||
|
||||
@@ -8,9 +8,9 @@ spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: fedtools
|
||||
image: docker.io/fedora/tools
|
||||
image: quay.io/krkn-chaos/krkn:tools
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
sleep infinity
|
||||
sleep infinity
|
||||
|
||||
@@ -8,9 +8,9 @@ spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: fedtools
|
||||
image: docker.io/fedora/tools
|
||||
image: quay.io/krkn-chaos/krkn:tools
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
sleep infinity
|
||||
sleep infinity
|
||||
|
||||
@@ -13,6 +13,10 @@ function functional_test_app_outage {
|
||||
export scenario_type="application_outages_scenarios"
|
||||
export scenario_file="scenarios/openshift/app_outage.yaml"
|
||||
export post_config=""
|
||||
|
||||
kubectl get services -A
|
||||
|
||||
kubectl get pods
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/app_outage.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/app_outage.yaml
|
||||
echo "App outage scenario test: Success"
|
||||
|
||||
10
ROADMAP.md
10
ROADMAP.md
@@ -2,11 +2,11 @@
|
||||
|
||||
Following are a list of enhancements that we are planning to work on adding support in Krkn. Of course any help/contributions are greatly appreciated.
|
||||
|
||||
- [ ] [Ability to run multiple chaos scenarios in parallel under load to mimic real world outages](https://github.com/krkn-chaos/krkn/issues/424)
|
||||
- [x] [Ability to run multiple chaos scenarios in parallel under load to mimic real world outages](https://github.com/krkn-chaos/krkn/issues/424)
|
||||
- [x] [Centralized storage for chaos experiments artifacts](https://github.com/krkn-chaos/krkn/issues/423)
|
||||
- [ ] [Support for causing DNS outages](https://github.com/krkn-chaos/krkn/issues/394)
|
||||
- [x] [Support for causing DNS outages](https://github.com/krkn-chaos/krkn/issues/394)
|
||||
- [x] [Chaos recommender](https://github.com/krkn-chaos/krkn/tree/main/utils/chaos-recommender) to suggest scenarios having probability of impacting the service under test using profiling results
|
||||
- [] Chaos AI integration to improve test coverage while reducing fault space to save costs and execution time
|
||||
- [x] Chaos AI integration to improve test coverage while reducing fault space to save costs and execution time [krkn-chaos-ai](https://github.com/krkn-chaos/krkn-chaos-ai)
|
||||
- [x] [Support for pod level network traffic shaping](https://github.com/krkn-chaos/krkn/issues/393)
|
||||
- [ ] [Ability to visualize the metrics that are being captured by Kraken and stored in Elasticsearch](https://github.com/krkn-chaos/krkn/issues/124)
|
||||
- [x] Support for running all the scenarios of Kraken on Kubernetes distribution - see https://github.com/krkn-chaos/krkn/issues/185, https://github.com/redhat-chaos/krkn/issues/186
|
||||
@@ -14,3 +14,7 @@ Following are a list of enhancements that we are planning to work on adding supp
|
||||
- [x] [Switch documentation references to Kubernetes](https://github.com/krkn-chaos/krkn/issues/495)
|
||||
- [x] [OCP and Kubernetes functionalities segregation](https://github.com/krkn-chaos/krkn/issues/497)
|
||||
- [x] [Krknctl - client for running Krkn scenarios with ease](https://github.com/krkn-chaos/krknctl)
|
||||
- [x] [AI Chat bot to help get started with Krkn and commands](https://github.com/krkn-chaos/krkn-lightspeed)
|
||||
- [ ] [Ability to roll back cluster to original state if chaos fails](https://github.com/krkn-chaos/krkn/issues/804)
|
||||
- [ ] Add recovery time metrics to each scenario for each better regression analysis
|
||||
- [ ] [Add resiliency scoring to chaos scenarios ran on cluster](https://github.com/krkn-chaos/krkn/issues/125)
|
||||
@@ -57,9 +57,7 @@ cerberus:
|
||||
check_applicaton_routes: False # When enabled will look for application unavailability using the routes specified in the cerberus config and fails the run
|
||||
|
||||
performance_monitoring:
|
||||
deploy_dashboards: False # Install a mutable grafana and load the performance dashboards. Enable this only when running on OpenShift
|
||||
repo: "https://github.com/cloud-bulldozer/performance-dashboards.git"
|
||||
prometheus_url: '' # The prometheus url/route is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes.
|
||||
prometheus_url: '' # The prometheus url/route is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes.
|
||||
prometheus_bearer_token: # The bearer token is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes. This is needed to authenticate with prometheus.
|
||||
uuid: # uuid for the run is generated by default if not set
|
||||
enable_alerts: False # Runs the queries specified in the alert profile and displays the info or exits 1 when severity=error
|
||||
|
||||
@@ -16,8 +16,6 @@ cerberus:
|
||||
check_applicaton_routes: False # When enabled will look for application unavailability using the routes specified in the cerberus config and fails the run
|
||||
|
||||
performance_monitoring:
|
||||
deploy_dashboards: False # Install a mutable grafana and load the performance dashboards. Enable this only when running on OpenShift
|
||||
repo: "https://github.com/cloud-bulldozer/performance-dashboards.git"
|
||||
prometheus_url: # The prometheus url/route is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes.
|
||||
prometheus_bearer_token: # The bearer token is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes. This is needed to authenticate with prometheus.
|
||||
uuid: # uuid for the run is generated by default if not set
|
||||
|
||||
@@ -17,8 +17,6 @@ cerberus:
|
||||
check_applicaton_routes: False # When enabled will look for application unavailability using the routes specified in the cerberus config and fails the run
|
||||
|
||||
performance_monitoring:
|
||||
deploy_dashboards: False # Install a mutable grafana and load the performance dashboards. Enable this only when running on OpenShift
|
||||
repo: "https://github.com/cloud-bulldozer/performance-dashboards.git"
|
||||
prometheus_url: # The prometheus url/route is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes.
|
||||
prometheus_bearer_token: # The bearer token is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes. This is needed to authenticate with prometheus.
|
||||
uuid: # uuid for the run is generated by default if not set
|
||||
|
||||
@@ -10,7 +10,7 @@ RUN go mod edit -go 1.23.1 &&\
|
||||
go get github.com/docker/docker@v25.0.6&&\
|
||||
go get github.com/opencontainers/runc@v1.1.14&&\
|
||||
go get github.com/go-git/go-git/v5@v5.13.0&&\
|
||||
go get golang.org/x/net@v0.36.0&&\
|
||||
go get golang.org/x/net@v0.38.0&&\
|
||||
go get github.com/containerd/containerd@v1.7.27&&\
|
||||
go get golang.org/x/oauth2@v0.27.0&&\
|
||||
go get golang.org/x/crypto@v0.35.0&&\
|
||||
@@ -47,7 +47,7 @@ RUN if [ -n "$PR_NUMBER" ]; then git fetch origin pull/${PR_NUMBER}/head:pr-${PR
|
||||
RUN if [ -n "$TAG" ]; then git checkout "$TAG";fi
|
||||
|
||||
RUN python3.9 -m ensurepip --upgrade --default-pip
|
||||
RUN python3.9 -m pip install --upgrade pip setuptools==70.0.0
|
||||
RUN python3.9 -m pip install --upgrade pip setuptools==78.1.1
|
||||
RUN pip3.9 install -r requirements.txt
|
||||
RUN pip3.9 install jsonschema
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@ def invoke_no_exit(command, timeout=None):
|
||||
output = ""
|
||||
try:
|
||||
output = subprocess.check_output(command, shell=True, universal_newlines=True, timeout=timeout)
|
||||
logging.info("output " + str(output))
|
||||
except Exception as e:
|
||||
logging.error("Failed to run %s, error: %s" % (command, e))
|
||||
return str(e)
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
import subprocess
|
||||
import logging
|
||||
import git
|
||||
import sys
|
||||
|
||||
|
||||
# Installs a mutable grafana on the Kubernetes/OpenShift cluster and loads the performance dashboards
|
||||
def setup(repo, distribution):
|
||||
if distribution == "kubernetes":
|
||||
command = "cd performance-dashboards/dittybopper && ./k8s-deploy.sh"
|
||||
elif distribution == "openshift":
|
||||
command = "cd performance-dashboards/dittybopper && ./deploy.sh"
|
||||
else:
|
||||
logging.error("Provided distribution: %s is not supported" % (distribution))
|
||||
sys.exit(1)
|
||||
delete_repo = "rm -rf performance-dashboards || exit 0"
|
||||
logging.info(
|
||||
"Cloning, installing mutable grafana on the cluster and loading the dashboards"
|
||||
)
|
||||
try:
|
||||
# delete repo to clone the latest copy if exists
|
||||
subprocess.run(delete_repo, shell=True, universal_newlines=True, timeout=45)
|
||||
# clone the repo
|
||||
git.Repo.clone_from(repo, "performance-dashboards")
|
||||
# deploy performance dashboards
|
||||
subprocess.run(command, shell=True, universal_newlines=True)
|
||||
except Exception as e:
|
||||
logging.error("Failed to install performance-dashboards, error: %s" % (e))
|
||||
@@ -1,6 +1,5 @@
|
||||
from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin
|
||||
from krkn.scenario_plugins.native.plugins import PLUGINS
|
||||
from krkn_lib.k8s.pods_monitor_pool import PodsMonitorPool
|
||||
from krkn_lib.models.telemetry import ScenarioTelemetry
|
||||
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
|
||||
from typing import Any
|
||||
@@ -28,7 +27,6 @@ class NativeScenarioPlugin(AbstractScenarioPlugin):
|
||||
|
||||
except Exception as e:
|
||||
logging.error("NativeScenarioPlugin exiting due to Exception %s" % e)
|
||||
pool.cancel()
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
@@ -28,6 +28,14 @@ class NetworkScenarioConfig:
|
||||
},
|
||||
)
|
||||
|
||||
image: typing.Annotated[str, validation.min(1)]= field(
|
||||
default="quay.io/krkn-chaos/krkn:tools",
|
||||
metadata={
|
||||
"name": "Image",
|
||||
"description": "Image of krkn tools to run"
|
||||
}
|
||||
)
|
||||
|
||||
label_selector: typing.Annotated[
|
||||
typing.Optional[str], validation.required_if_not("node_interface_name")
|
||||
] = field(
|
||||
@@ -142,7 +150,7 @@ class NetworkScenarioErrorOutput:
|
||||
)
|
||||
|
||||
|
||||
def get_default_interface(node: str, pod_template, cli: CoreV1Api) -> str:
|
||||
def get_default_interface(node: str, pod_template, cli: CoreV1Api, image: str) -> str:
|
||||
"""
|
||||
Function that returns a random interface from a node
|
||||
|
||||
@@ -161,7 +169,7 @@ def get_default_interface(node: str, pod_template, cli: CoreV1Api) -> str:
|
||||
Default interface (string) belonging to the node
|
||||
"""
|
||||
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node, image=image))
|
||||
logging.info("Creating pod to query interface on node %s" % node)
|
||||
kube_helper.create_pod(cli, pod_body, "default", 300)
|
||||
|
||||
@@ -189,7 +197,7 @@ def get_default_interface(node: str, pod_template, cli: CoreV1Api) -> str:
|
||||
|
||||
|
||||
def verify_interface(
|
||||
input_interface_list: typing.List[str], node: str, pod_template, cli: CoreV1Api
|
||||
input_interface_list: typing.List[str], node: str, pod_template, cli: CoreV1Api, image: str
|
||||
) -> typing.List[str]:
|
||||
"""
|
||||
Function that verifies whether a list of interfaces is present in the node.
|
||||
@@ -212,7 +220,7 @@ def verify_interface(
|
||||
Returns:
|
||||
The interface list for the node
|
||||
"""
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node, image=image))
|
||||
logging.info("Creating pod to query interface on node %s" % node)
|
||||
kube_helper.create_pod(cli, pod_body, "default", 300)
|
||||
try:
|
||||
@@ -268,6 +276,7 @@ def get_node_interfaces(
|
||||
instance_count: int,
|
||||
pod_template,
|
||||
cli: CoreV1Api,
|
||||
image: str
|
||||
) -> typing.Dict[str, typing.List[str]]:
|
||||
"""
|
||||
Function that is used to process the input dictionary with the nodes and
|
||||
@@ -309,7 +318,7 @@ def get_node_interfaces(
|
||||
nodes = kube_helper.get_node(None, label_selector, instance_count, cli)
|
||||
node_interface_dict = {}
|
||||
for node in nodes:
|
||||
node_interface_dict[node] = get_default_interface(node, pod_template, cli)
|
||||
node_interface_dict[node] = get_default_interface(node, pod_template, cli, image)
|
||||
else:
|
||||
node_name_list = node_interface_dict.keys()
|
||||
filtered_node_list = []
|
||||
@@ -321,7 +330,7 @@ def get_node_interfaces(
|
||||
|
||||
for node in filtered_node_list:
|
||||
node_interface_dict[node] = verify_interface(
|
||||
node_interface_dict[node], node, pod_template, cli
|
||||
node_interface_dict[node], node, pod_template, cli, image
|
||||
)
|
||||
|
||||
return node_interface_dict
|
||||
@@ -337,6 +346,7 @@ def apply_ingress_filter(
|
||||
cli: CoreV1Api,
|
||||
create_interfaces: bool = True,
|
||||
param_selector: str = "all",
|
||||
image:str = "quay.io/krkn-chaos/krkn:tools",
|
||||
) -> str:
|
||||
"""
|
||||
Function that applies the filters to shape incoming traffic to
|
||||
@@ -382,14 +392,14 @@ def apply_ingress_filter(
|
||||
network_params = {param_selector: cfg.network_params[param_selector]}
|
||||
|
||||
if create_interfaces:
|
||||
create_virtual_interfaces(cli, interface_list, node, pod_template)
|
||||
create_virtual_interfaces(cli, interface_list, node, pod_template, image)
|
||||
|
||||
exec_cmd = get_ingress_cmd(
|
||||
interface_list, network_params, duration=cfg.test_duration
|
||||
)
|
||||
logging.info("Executing %s on node %s" % (exec_cmd, node))
|
||||
job_body = yaml.safe_load(
|
||||
job_template.render(jobname=str(hash(node))[:5], nodename=node, cmd=exec_cmd)
|
||||
job_template.render(jobname=str(hash(node))[:5], nodename=node, image=image, cmd=exec_cmd)
|
||||
)
|
||||
api_response = kube_helper.create_job(batch_cli, job_body)
|
||||
|
||||
@@ -400,7 +410,7 @@ def apply_ingress_filter(
|
||||
|
||||
|
||||
def create_virtual_interfaces(
|
||||
cli: CoreV1Api, interface_list: typing.List[str], node: str, pod_template
|
||||
cli: CoreV1Api, interface_list: typing.List[str], node: str, pod_template, image: str
|
||||
) -> None:
|
||||
"""
|
||||
Function that creates a privileged pod and uses it to create
|
||||
@@ -421,7 +431,7 @@ def create_virtual_interfaces(
|
||||
- The YAML template used to instantiate a pod to create
|
||||
virtual interfaces on the node
|
||||
"""
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node, image=image))
|
||||
kube_helper.create_pod(cli, pod_body, "default", 300)
|
||||
logging.info(
|
||||
"Creating {0} virtual interfaces on node {1} using a pod".format(
|
||||
@@ -434,7 +444,7 @@ def create_virtual_interfaces(
|
||||
|
||||
|
||||
def delete_virtual_interfaces(
|
||||
cli: CoreV1Api, node_list: typing.List[str], pod_template
|
||||
cli: CoreV1Api, node_list: typing.List[str], pod_template, image: str
|
||||
):
|
||||
"""
|
||||
Function that creates a privileged pod and uses it to delete all
|
||||
@@ -457,7 +467,7 @@ def delete_virtual_interfaces(
|
||||
"""
|
||||
|
||||
for node in node_list:
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node, image=image))
|
||||
kube_helper.create_pod(cli, pod_body, "default", 300)
|
||||
logging.info("Deleting all virtual interfaces on node {0}".format(node))
|
||||
delete_ifb(cli, "modtools")
|
||||
@@ -700,7 +710,7 @@ def network_chaos(
|
||||
pod_interface_template = env.get_template("pod_interface.j2")
|
||||
pod_module_template = env.get_template("pod_module.j2")
|
||||
cli, batch_cli = kube_helper.setup_kubernetes(cfg.kubeconfig_path)
|
||||
|
||||
test_image = cfg.image
|
||||
logging.info("Starting Ingress Network Chaos")
|
||||
try:
|
||||
node_interface_dict = get_node_interfaces(
|
||||
@@ -709,6 +719,7 @@ def network_chaos(
|
||||
cfg.instance_count,
|
||||
pod_interface_template,
|
||||
cli,
|
||||
test_image
|
||||
)
|
||||
except Exception:
|
||||
return "error", NetworkScenarioErrorOutput(format_exc())
|
||||
@@ -726,6 +737,7 @@ def network_chaos(
|
||||
job_template,
|
||||
batch_cli,
|
||||
cli,
|
||||
test_image
|
||||
)
|
||||
)
|
||||
logging.info("Waiting for parallel job to finish")
|
||||
@@ -746,6 +758,7 @@ def network_chaos(
|
||||
cli,
|
||||
create_interfaces=create_interfaces,
|
||||
param_selector=param,
|
||||
image=test_image
|
||||
)
|
||||
)
|
||||
logging.info("Waiting for serial job to finish")
|
||||
@@ -772,6 +785,6 @@ def network_chaos(
|
||||
logging.error("Ingress Network Chaos exiting due to Exception - %s" % e)
|
||||
return "error", NetworkScenarioErrorOutput(format_exc())
|
||||
finally:
|
||||
delete_virtual_interfaces(cli, node_interface_dict.keys(), pod_module_template)
|
||||
delete_virtual_interfaces(cli, node_interface_dict.keys(), pod_module_template, test_image)
|
||||
logging.info("Deleting jobs(if any)")
|
||||
delete_jobs(cli, batch_cli, job_list[:])
|
||||
|
||||
@@ -9,7 +9,7 @@ spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: networkchaos
|
||||
image: docker.io/fedora/tools
|
||||
image: {{image}}
|
||||
command: ["/bin/sh", "-c", "{{cmd}}"]
|
||||
securityContext:
|
||||
privileged: true
|
||||
@@ -22,4 +22,4 @@ spec:
|
||||
hostPath:
|
||||
path: /lib/modules
|
||||
restartPolicy: Never
|
||||
backoffLimit: 0
|
||||
backoffLimit: 0
|
||||
|
||||
@@ -7,7 +7,7 @@ spec:
|
||||
nodeName: {{nodename}}
|
||||
containers:
|
||||
- name: fedtools
|
||||
image: docker.io/fedora/tools
|
||||
image: {{image}}
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
|
||||
@@ -6,7 +6,7 @@ spec:
|
||||
nodeName: {{nodename}}
|
||||
containers:
|
||||
- name: modtools
|
||||
image: docker.io/fedora/tools
|
||||
image: {{image}}
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
@@ -27,4 +27,4 @@ spec:
|
||||
hostNetwork: true
|
||||
hostIPC: true
|
||||
hostPID: true
|
||||
restartPolicy: Never
|
||||
restartPolicy: Never
|
||||
|
||||
@@ -9,7 +9,7 @@ spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: networkchaos
|
||||
image: docker.io/fedora/tools
|
||||
image: {{image}}
|
||||
command: ["chroot", "/host", "/bin/sh", "-c", "{{cmd}}"]
|
||||
securityContext:
|
||||
privileged: true
|
||||
|
||||
@@ -6,7 +6,7 @@ spec:
|
||||
nodeName: {{nodename}}
|
||||
containers:
|
||||
- name: modtools
|
||||
image: docker.io/fedora/tools
|
||||
image: {{image}}
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
@@ -27,4 +27,4 @@ spec:
|
||||
hostNetwork: true
|
||||
hostIPC: true
|
||||
hostPID: true
|
||||
restartPolicy: Never
|
||||
restartPolicy: Never
|
||||
|
||||
@@ -192,6 +192,7 @@ def apply_outage_policy(
|
||||
duration: str,
|
||||
bridge_name: str,
|
||||
kubecli: KrknKubernetes,
|
||||
image: str
|
||||
) -> typing.List[str]:
|
||||
"""
|
||||
Function that applies filters(ingress or egress) to block traffic.
|
||||
@@ -223,6 +224,8 @@ def apply_outage_policy(
|
||||
batch_cli (BatchV1Api)
|
||||
- Object to interact with Kubernetes Python client's BatchV1Api API
|
||||
|
||||
image (string)
|
||||
- Image of network chaos tool
|
||||
Returns:
|
||||
The name of the job created that executes the commands on a node
|
||||
for ingress chaos scenario
|
||||
@@ -239,7 +242,7 @@ def apply_outage_policy(
|
||||
br = "br-int"
|
||||
table = 8
|
||||
for node, ips in node_dict.items():
|
||||
while len(check_cookie(node, pod_template, br, cookie, kubecli)) > 2 or cookie in cookie_list:
|
||||
while len(check_cookie(node, pod_template, br, cookie, kubecli, image)) > 2 or cookie in cookie_list:
|
||||
cookie = random.randint(100, 10000)
|
||||
exec_cmd = ""
|
||||
for ip in ips:
|
||||
@@ -257,6 +260,7 @@ def apply_outage_policy(
|
||||
job_template.render(
|
||||
jobname=str(hash(node))[:5] + str(random.randint(0, 10000)),
|
||||
nodename=node,
|
||||
image=image,
|
||||
cmd=exec_cmd,
|
||||
)
|
||||
)
|
||||
@@ -281,6 +285,7 @@ def apply_ingress_policy(
|
||||
bridge_name: str,
|
||||
kubecli: KrknKubernetes,
|
||||
test_execution: str,
|
||||
image: str,
|
||||
) -> typing.List[str]:
|
||||
"""
|
||||
Function that applies ingress traffic shaping to pod interface.
|
||||
@@ -327,22 +332,23 @@ def apply_ingress_policy(
|
||||
job_list = []
|
||||
yml_list = []
|
||||
|
||||
create_virtual_interfaces(kubecli, len(ips), node, pod_template)
|
||||
create_virtual_interfaces(kubecli, len(ips), node, pod_template, image)
|
||||
|
||||
for count, pod_ip in enumerate(set(ips)):
|
||||
pod_inf = get_pod_interface(node, pod_ip, pod_template, bridge_name, kubecli)
|
||||
pod_inf = get_pod_interface(node, pod_ip, pod_template, bridge_name, kubecli, image)
|
||||
exec_cmd = get_ingress_cmd(
|
||||
test_execution, pod_inf, mod, count, network_params, duration
|
||||
)
|
||||
logging.info("Executing %s on pod %s in node %s" % (exec_cmd, pod_ip, node))
|
||||
job_body = yaml.safe_load(
|
||||
job_template.render(jobname=mod + str(pod_ip), nodename=node, cmd=exec_cmd)
|
||||
job_template.render(jobname=mod + str(pod_ip), nodename=node, image=image, cmd=exec_cmd)
|
||||
)
|
||||
yml_list.append(job_body)
|
||||
if pod_ip == node:
|
||||
break
|
||||
|
||||
for job_body in yml_list:
|
||||
print('jbo body' + str(job_body))
|
||||
api_response = kubecli.create_job(job_body)
|
||||
if api_response is None:
|
||||
raise Exception("Error creating job")
|
||||
@@ -362,6 +368,7 @@ def apply_net_policy(
|
||||
bridge_name: str,
|
||||
kubecli: KrknKubernetes,
|
||||
test_execution: str,
|
||||
image: str,
|
||||
) -> typing.List[str]:
|
||||
"""
|
||||
Function that applies egress traffic shaping to pod interface.
|
||||
@@ -415,7 +422,7 @@ def apply_net_policy(
|
||||
)
|
||||
logging.info("Executing %s on pod %s in node %s" % (exec_cmd, pod_ip, node))
|
||||
job_body = yaml.safe_load(
|
||||
job_template.render(jobname=mod + str(pod_ip), nodename=node, cmd=exec_cmd)
|
||||
job_template.render(jobname=mod + str(pod_ip), nodename=node, image=image, cmd=exec_cmd)
|
||||
)
|
||||
yml_list.append(job_body)
|
||||
|
||||
@@ -530,7 +537,7 @@ def get_egress_cmd(
|
||||
|
||||
|
||||
def create_virtual_interfaces(
|
||||
kubecli: KrknKubernetes, nummber: int, node: str, pod_template
|
||||
kubecli: KrknKubernetes, nummber: int, node: str, pod_template, image: str,
|
||||
) -> None:
|
||||
"""
|
||||
Function that creates a privileged pod and uses it to create
|
||||
@@ -550,8 +557,11 @@ def create_virtual_interfaces(
|
||||
pod_template (jinja2.environment.Template))
|
||||
- The YAML template used to instantiate a pod to create
|
||||
virtual interfaces on the node
|
||||
|
||||
image (string)
|
||||
- Image of network chaos tool
|
||||
"""
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node, image=image))
|
||||
kubecli.create_pod(pod_body, "default", 300)
|
||||
logging.info(
|
||||
"Creating {0} virtual interfaces on node {1} using a pod".format(nummber, node)
|
||||
@@ -562,7 +572,7 @@ def create_virtual_interfaces(
|
||||
|
||||
|
||||
def delete_virtual_interfaces(
|
||||
kubecli: KrknKubernetes, node_list: typing.List[str], pod_template
|
||||
kubecli: KrknKubernetes, node_list: typing.List[str], pod_template, image: str,
|
||||
):
|
||||
"""
|
||||
Function that creates a privileged pod and uses it to delete all
|
||||
@@ -582,10 +592,13 @@ def delete_virtual_interfaces(
|
||||
pod_template (jinja2.environment.Template))
|
||||
- The YAML template used to instantiate a pod to delete
|
||||
virtual interfaces on the node
|
||||
|
||||
image (string)
|
||||
- Image of network chaos tool
|
||||
"""
|
||||
|
||||
for node in node_list:
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node, image=image))
|
||||
kubecli.create_pod(pod_body, "default", 300)
|
||||
logging.info("Deleting all virtual interfaces on node {0}".format(node))
|
||||
delete_ifb(kubecli, "modtools")
|
||||
@@ -619,7 +632,7 @@ def delete_ifb(kubecli: KrknKubernetes, pod_name: str):
|
||||
kubecli.exec_cmd_in_pod(exec_command, pod_name, "default", base_command="chroot")
|
||||
|
||||
|
||||
def list_bridges(node: str, pod_template, kubecli: KrknKubernetes) -> typing.List[str]:
|
||||
def list_bridges(node: str, pod_template, kubecli: KrknKubernetes, image: str) -> typing.List[str]:
|
||||
"""
|
||||
Function that returns a list of bridges on the node
|
||||
|
||||
@@ -634,11 +647,13 @@ def list_bridges(node: str, pod_template, kubecli: KrknKubernetes) -> typing.Lis
|
||||
kubecli (KrknKubernetes)
|
||||
- Object to interact with Kubernetes Python client
|
||||
|
||||
image (string)
|
||||
- Image of network chaos tool
|
||||
Returns:
|
||||
List of bridges on the node.
|
||||
"""
|
||||
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node, image=image))
|
||||
logging.info("Creating pod to query bridge on node %s" % node)
|
||||
kubecli.create_pod(pod_body, "default", 300)
|
||||
|
||||
@@ -662,7 +677,7 @@ def list_bridges(node: str, pod_template, kubecli: KrknKubernetes) -> typing.Lis
|
||||
|
||||
|
||||
def check_cookie(
|
||||
node: str, pod_template, br_name, cookie, kubecli: KrknKubernetes
|
||||
node: str, pod_template, br_name, cookie, kubecli: KrknKubernetes, image: str
|
||||
) -> str:
|
||||
"""
|
||||
Function to check for matching flow rules
|
||||
@@ -684,11 +699,13 @@ def check_cookie(
|
||||
cli (CoreV1Api)
|
||||
- Object to interact with Kubernetes Python client's CoreV1 API
|
||||
|
||||
image (string)
|
||||
- Image of network chaos tool
|
||||
Returns
|
||||
Returns the matching flow rules
|
||||
"""
|
||||
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node, image=image))
|
||||
logging.info("Creating pod to query duplicate rules on node %s" % node)
|
||||
kubecli.create_pod(pod_body, "default", 300)
|
||||
|
||||
@@ -721,7 +738,7 @@ def check_cookie(
|
||||
|
||||
|
||||
def get_pod_interface(
|
||||
node: str, ip: str, pod_template, br_name, kubecli: KrknKubernetes
|
||||
node: str, ip: str, pod_template, br_name, kubecli: KrknKubernetes, image: str = "quay.io/krkn-chaos/krkn:tools"
|
||||
) -> str:
|
||||
"""
|
||||
Function to query the pod interface on a node
|
||||
@@ -747,7 +764,7 @@ def get_pod_interface(
|
||||
Returns the pod interface name
|
||||
"""
|
||||
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node, image=image))
|
||||
logging.info("Creating pod to query pod interface on node %s" % node)
|
||||
kubecli.create_pod(pod_body, "default", 300)
|
||||
inf = ""
|
||||
@@ -788,7 +805,8 @@ def get_pod_interface(
|
||||
|
||||
|
||||
def check_bridge_interface(
|
||||
node_name: str, pod_template, bridge_name: str, kubecli: KrknKubernetes
|
||||
node_name: str, pod_template, bridge_name: str, kubecli: KrknKubernetes,
|
||||
image: str = "quay.io/krkn-chaos/krkn:tools"
|
||||
) -> bool:
|
||||
"""
|
||||
Function is used to check if the required OVS or OVN bridge is found in
|
||||
@@ -814,7 +832,7 @@ def check_bridge_interface(
|
||||
nodes = kubecli.get_node(node_name, None, 1)
|
||||
node_bridge = []
|
||||
for node in nodes:
|
||||
node_bridge = list_bridges(node, pod_template, kubecli)
|
||||
node_bridge = list_bridges(node, pod_template, kubecli, image=image)
|
||||
if bridge_name not in node_bridge:
|
||||
raise Exception(f"OVS bridge {bridge_name} not found on the node ")
|
||||
|
||||
@@ -835,6 +853,14 @@ class InputParams:
|
||||
}
|
||||
)
|
||||
|
||||
image: typing.Annotated[str, validation.min(1)]= field(
|
||||
default="quay.io/krkn-chaos/krkn:tools",
|
||||
metadata={
|
||||
"name": "Image",
|
||||
"description": "Image of krkn tools to run"
|
||||
}
|
||||
)
|
||||
|
||||
direction: typing.List[str] = field(
|
||||
default_factory=lambda: ["ingress", "egress"],
|
||||
metadata={
|
||||
@@ -1004,6 +1030,7 @@ def pod_outage(
|
||||
test_namespace = params.namespace
|
||||
test_label_selector = params.label_selector
|
||||
test_pod_name = params.pod_name
|
||||
test_image = params.image
|
||||
filter_dict = {}
|
||||
job_list = []
|
||||
publish = False
|
||||
@@ -1040,7 +1067,7 @@ def pod_outage(
|
||||
label_set.add("%s=%s" % (key, value))
|
||||
|
||||
check_bridge_interface(
|
||||
list(node_dict.keys())[0], pod_module_template, br_name, kubecli
|
||||
list(node_dict.keys())[0], pod_module_template, br_name, kubecli, test_image
|
||||
)
|
||||
|
||||
for direction, ports in filter_dict.items():
|
||||
@@ -1055,6 +1082,7 @@ def pod_outage(
|
||||
params.test_duration,
|
||||
br_name,
|
||||
kubecli,
|
||||
test_image
|
||||
)
|
||||
)
|
||||
|
||||
@@ -1095,7 +1123,16 @@ class EgressParams:
|
||||
}
|
||||
)
|
||||
|
||||
image: typing.Annotated[str, validation.min(1)]= field(
|
||||
default="quay.io/krkn-chaos/krkn:tools",
|
||||
metadata={
|
||||
"name": "Image",
|
||||
"description": "Image of krkn tools to run"
|
||||
}
|
||||
)
|
||||
|
||||
network_params: typing.Dict[str, str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"name": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interface. "
|
||||
@@ -1254,6 +1291,7 @@ def pod_egress_shaping(
|
||||
test_namespace = params.namespace
|
||||
test_label_selector = params.label_selector
|
||||
test_pod_name = params.pod_name
|
||||
test_image = params.image
|
||||
job_list = []
|
||||
publish = False
|
||||
|
||||
@@ -1287,7 +1325,7 @@ def pod_egress_shaping(
|
||||
label_set.add("%s=%s" % (key, value))
|
||||
|
||||
check_bridge_interface(
|
||||
list(node_dict.keys())[0], pod_module_template, br_name, kubecli
|
||||
list(node_dict.keys())[0], pod_module_template, br_name, kubecli, test_image
|
||||
)
|
||||
|
||||
for mod in mod_lst:
|
||||
@@ -1304,6 +1342,7 @@ def pod_egress_shaping(
|
||||
br_name,
|
||||
kubecli,
|
||||
params.execution_type,
|
||||
test_image
|
||||
)
|
||||
)
|
||||
if params.execution_type == "serial":
|
||||
@@ -1357,8 +1396,17 @@ class IngressParams:
|
||||
"for details.",
|
||||
}
|
||||
)
|
||||
|
||||
image: typing.Annotated[str, validation.min(1)] = field(
|
||||
default="quay.io/krkn-chaos/krkn:tools",
|
||||
metadata={
|
||||
"name": "Image",
|
||||
"description": "Image to use for injecting network chaos",
|
||||
}
|
||||
)
|
||||
|
||||
network_params: typing.Dict[str, str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"name": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interface. "
|
||||
@@ -1518,6 +1566,7 @@ def pod_ingress_shaping(
|
||||
test_namespace = params.namespace
|
||||
test_label_selector = params.label_selector
|
||||
test_pod_name = params.pod_name
|
||||
test_image = params.image
|
||||
job_list = []
|
||||
publish = False
|
||||
|
||||
@@ -1551,7 +1600,7 @@ def pod_ingress_shaping(
|
||||
label_set.add("%s=%s" % (key, value))
|
||||
|
||||
check_bridge_interface(
|
||||
list(node_dict.keys())[0], pod_module_template, br_name, kubecli
|
||||
list(node_dict.keys())[0], pod_module_template, br_name, kubecli, test_image
|
||||
)
|
||||
|
||||
for mod in mod_lst:
|
||||
@@ -1568,6 +1617,7 @@ def pod_ingress_shaping(
|
||||
br_name,
|
||||
kubecli,
|
||||
params.execution_type,
|
||||
image=test_image
|
||||
)
|
||||
)
|
||||
if params.execution_type == "serial":
|
||||
@@ -1604,6 +1654,6 @@ def pod_ingress_shaping(
|
||||
logging.error("Pod network Shaping scenario exiting due to Exception - %s" % e)
|
||||
return "error", PodIngressNetShapingErrorOutput(format_exc())
|
||||
finally:
|
||||
delete_virtual_interfaces(kubecli, node_dict.keys(), pod_module_template)
|
||||
delete_virtual_interfaces(kubecli, node_dict.keys(), pod_module_template, test_image)
|
||||
logging.info("Deleting jobs(if any)")
|
||||
delete_jobs(kubecli, job_list[:])
|
||||
|
||||
@@ -9,7 +9,7 @@ spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: networkchaos
|
||||
image: docker.io/fedora/tools
|
||||
image: {{image}}
|
||||
command: ["/bin/sh", "-c", "{{cmd}}"]
|
||||
securityContext:
|
||||
privileged: true
|
||||
|
||||
@@ -42,7 +42,9 @@ class NetworkChaosScenarioPlugin(AbstractScenarioPlugin):
|
||||
test_egress = get_yaml_item_value(
|
||||
test_dict, "egress", {"bandwidth": "100mbit"}
|
||||
)
|
||||
|
||||
test_image = get_yaml_item_value(
|
||||
test_dict, "image", "quay.io/krkn-chaos/krkn:tools"
|
||||
)
|
||||
if test_node:
|
||||
node_name_list = test_node.split(",")
|
||||
nodelst = common_node_functions.get_node_by_name(node_name_list, lib_telemetry.get_lib_kubernetes())
|
||||
@@ -60,6 +62,7 @@ class NetworkChaosScenarioPlugin(AbstractScenarioPlugin):
|
||||
nodelst,
|
||||
pod_template,
|
||||
lib_telemetry.get_lib_kubernetes(),
|
||||
image=test_image
|
||||
)
|
||||
joblst = []
|
||||
egress_lst = [i for i in param_lst if i in test_egress]
|
||||
@@ -71,6 +74,7 @@ class NetworkChaosScenarioPlugin(AbstractScenarioPlugin):
|
||||
"execution": test_execution,
|
||||
"instance_count": test_instance_count,
|
||||
"egress": test_egress,
|
||||
"image": test_image
|
||||
}
|
||||
}
|
||||
logging.info(
|
||||
@@ -94,6 +98,7 @@ class NetworkChaosScenarioPlugin(AbstractScenarioPlugin):
|
||||
jobname=i + str(hash(node))[:5],
|
||||
nodename=node,
|
||||
cmd=exec_cmd,
|
||||
image=test_image
|
||||
)
|
||||
)
|
||||
joblst.append(job_body["metadata"]["name"])
|
||||
@@ -153,10 +158,10 @@ class NetworkChaosScenarioPlugin(AbstractScenarioPlugin):
|
||||
return 0
|
||||
|
||||
def verify_interface(
|
||||
self, test_interface, nodelst, template, kubecli: KrknKubernetes
|
||||
self, test_interface, nodelst, template, kubecli: KrknKubernetes, image: str
|
||||
):
|
||||
pod_index = random.randint(0, len(nodelst) - 1)
|
||||
pod_body = yaml.safe_load(template.render(nodename=nodelst[pod_index]))
|
||||
pod_body = yaml.safe_load(template.render(nodename=nodelst[pod_index], image=image))
|
||||
logging.info("Creating pod to query interface on node %s" % nodelst[pod_index])
|
||||
kubecli.create_pod(pod_body, "default", 300)
|
||||
try:
|
||||
@@ -177,7 +182,7 @@ class NetworkChaosScenarioPlugin(AbstractScenarioPlugin):
|
||||
raise RuntimeError()
|
||||
return test_interface
|
||||
finally:
|
||||
logging.info("Deleteing pod to query interface on node")
|
||||
logging.info("Deleting pod to query interface on node")
|
||||
kubecli.delete_pod("fedtools", "default")
|
||||
|
||||
# krkn_lib
|
||||
|
||||
@@ -7,7 +7,7 @@ spec:
|
||||
nodeName: {{nodename}}
|
||||
containers:
|
||||
- name: fedtools
|
||||
image: docker.io/fedora/tools
|
||||
image: {{image}}
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
|
||||
@@ -14,6 +14,7 @@ class BaseNetworkChaosConfig:
|
||||
wait_duration: int
|
||||
test_duration: int
|
||||
label_selector: str
|
||||
service_account: str
|
||||
instance_count: int
|
||||
execution: str
|
||||
namespace: str
|
||||
|
||||
@@ -4,6 +4,9 @@ metadata:
|
||||
name: {{pod_name}}
|
||||
namespace: {{namespace}}
|
||||
spec:
|
||||
{% if service_account %}
|
||||
serviceAccountName: {{ service_account }}
|
||||
{%endif%}
|
||||
{% if host_network %}
|
||||
hostNetwork: true
|
||||
{%endif%}
|
||||
|
||||
@@ -87,7 +87,8 @@ def deploy_network_filter_pod(
|
||||
target=target_node,
|
||||
container_name=container_name,
|
||||
workload_image=config.image,
|
||||
taints=tolerations
|
||||
taints=tolerations,
|
||||
service_account=config.service_account
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ class abstract_node_scenarios:
|
||||
)
|
||||
logging.error("stop_kubelet_scenario injection failed!")
|
||||
raise e
|
||||
self.add_affected_node(affected_node)
|
||||
self.affected_nodes_status.affected_nodes.append(affected_node)
|
||||
|
||||
# Node scenario to stop and start the kubelet
|
||||
def stop_start_kubelet_scenario(self, instance_kill_count, node, timeout):
|
||||
@@ -106,7 +106,6 @@ class abstract_node_scenarios:
|
||||
+ node
|
||||
+ " -- chroot /host systemctl restart kubelet &"
|
||||
)
|
||||
nodeaction.wait_for_not_ready_status(node, timeout, self.kubecli, affected_node)
|
||||
nodeaction.wait_for_ready_status(node, timeout, self.kubecli,affected_node)
|
||||
logging.info("The kubelet of the node %s has been restarted" % (node))
|
||||
logging.info("restart_kubelet_scenario has been successfuly injected!")
|
||||
@@ -117,7 +116,7 @@ class abstract_node_scenarios:
|
||||
)
|
||||
logging.error("restart_kubelet_scenario injection failed!")
|
||||
raise e
|
||||
self.add_affected_node(affected_node)
|
||||
self.affected_nodes_status.affected_nodes.append(affected_node)
|
||||
|
||||
# Node scenario to crash the node
|
||||
def node_crash_scenario(self, instance_kill_count, node, timeout):
|
||||
@@ -125,7 +124,7 @@ class abstract_node_scenarios:
|
||||
try:
|
||||
logging.info("Starting node_crash_scenario injection")
|
||||
logging.info("Crashing the node %s" % (node))
|
||||
runcommand.invoke(
|
||||
runcommand.run(
|
||||
"oc debug node/" + node + " -- chroot /host "
|
||||
"dd if=/dev/urandom of=/proc/sysrq-trigger"
|
||||
)
|
||||
@@ -136,7 +135,7 @@ class abstract_node_scenarios:
|
||||
"Test Failed" % (e)
|
||||
)
|
||||
logging.error("node_crash_scenario injection failed!")
|
||||
raise e
|
||||
return 1
|
||||
|
||||
# Node scenario to check service status on helper node
|
||||
def node_service_status(self, node, service, ssh_private_key, timeout):
|
||||
|
||||
@@ -38,3 +38,4 @@ zope.interface==5.4.0
|
||||
|
||||
git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.0.0
|
||||
cryptography>=42.0.4 # not directly required, pinned by Snyk to avoid a vulnerability
|
||||
protobuf>=4.25.8 # not directly required, pinned by Snyk to avoid a vulnerability
|
||||
|
||||
@@ -16,7 +16,6 @@ from krkn_lib.elastic.krkn_elastic import KrknElastic
|
||||
from krkn_lib.models.elastic import ElasticChaosRunTelemetry
|
||||
from krkn_lib.models.krkn import ChaosRunOutput, ChaosRunAlertSummary
|
||||
from krkn_lib.prometheus.krkn_prometheus import KrknPrometheus
|
||||
import krkn.performance_dashboards.setup as performance_dashboards
|
||||
import krkn.prometheus as prometheus_plugin
|
||||
import server as server
|
||||
from krkn_lib.k8s import KrknKubernetes
|
||||
@@ -69,14 +68,6 @@ def main(cfg) -> int:
|
||||
wait_duration = get_yaml_item_value(config["tunings"], "wait_duration", 60)
|
||||
iterations = get_yaml_item_value(config["tunings"], "iterations", 1)
|
||||
daemon_mode = get_yaml_item_value(config["tunings"], "daemon_mode", False)
|
||||
deploy_performance_dashboards = get_yaml_item_value(
|
||||
config["performance_monitoring"], "deploy_dashboards", False
|
||||
)
|
||||
dashboard_repo = get_yaml_item_value(
|
||||
config["performance_monitoring"],
|
||||
"repo",
|
||||
"https://github.com/cloud-bulldozer/performance-dashboards.git",
|
||||
)
|
||||
|
||||
prometheus_url = config["performance_monitoring"].get("prometheus_url")
|
||||
prometheus_bearer_token = config["performance_monitoring"].get(
|
||||
@@ -240,10 +231,6 @@ def main(cfg) -> int:
|
||||
|
||||
logging.info("Server URL: %s" % kubecli.get_host())
|
||||
|
||||
# Deploy performance dashboards
|
||||
if deploy_performance_dashboards:
|
||||
performance_dashboards.setup(dashboard_repo, distribution)
|
||||
|
||||
# Initialize the start iteration to 0
|
||||
iteration = 0
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
wait_duration: 1
|
||||
test_duration: 10
|
||||
label_selector: "<node_selector>"
|
||||
service_account: ""
|
||||
taints: [] # example ["node-role.kubernetes.io/master:NoSchedule"]
|
||||
namespace: 'default'
|
||||
instance_count: 1
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
wait_duration: 1
|
||||
test_duration: 60
|
||||
label_selector: "<pod_selector>"
|
||||
service_account: ""
|
||||
taints: [] # example ["node-role.kubernetes.io/master:NoSchedule"]
|
||||
namespace: 'default'
|
||||
instance_count: 1
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user