mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-03-16 16:40:42 +00:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f154bcb692 | ||
|
|
60ece4b1b8 | ||
|
|
d660542a40 | ||
|
|
2e651798fa | ||
|
|
f801dfce54 |
16
.github/workflows/tests.yml
vendored
16
.github/workflows/tests.yml
vendored
@@ -68,14 +68,14 @@ jobs:
|
||||
yq -i '.kraken.port="8081"' CI/config/common_test_config.yaml
|
||||
yq -i '.kraken.signal_address="0.0.0.0"' CI/config/common_test_config.yaml
|
||||
yq -i '.kraken.performance_monitoring="localhost:9090"' CI/config/common_test_config.yaml
|
||||
echo "test_app_outages" > ./CI/tests/my_tests
|
||||
echo "test_container" >> ./CI/tests/my_tests
|
||||
echo "test_namespace" >> ./CI/tests/my_tests
|
||||
echo "test_net_chaos" >> ./CI/tests/my_tests
|
||||
echo "test_time" >> ./CI/tests/my_tests
|
||||
echo "test_arca_cpu_hog" >> ./CI/tests/my_tests
|
||||
echo "test_arca_memory_hog" >> ./CI/tests/my_tests
|
||||
echo "test_arca_io_hog" >> ./CI/tests/my_tests
|
||||
echo "test_app_outages" > ./CI/tests/functional_tests
|
||||
echo "test_container" >> ./CI/tests/functional_tests
|
||||
echo "test_namespace" >> ./CI/tests/functional_tests
|
||||
echo "test_net_chaos" >> ./CI/tests/functional_tests
|
||||
echo "test_time" >> ./CI/tests/functional_tests
|
||||
echo "test_arca_cpu_hog" >> ./CI/tests/functional_tests
|
||||
echo "test_arca_memory_hog" >> ./CI/tests/functional_tests
|
||||
echo "test_arca_io_hog" >> ./CI/tests/functional_tests
|
||||
- name: Run Functional tests
|
||||
run: |
|
||||
./CI/run.sh
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -61,7 +61,7 @@ inspect.local.*
|
||||
!CI/config/common_test_config.yaml
|
||||
CI/out/*
|
||||
CI/ci_results
|
||||
CI/scenarios/*node.yaml
|
||||
CI/legacy/*node.yaml
|
||||
CI/results.markdown
|
||||
|
||||
#env
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
## CI Tests
|
||||
|
||||
### First steps
|
||||
Edit [my_tests](tests/my_tests) with tests you want to run
|
||||
Edit [functional_tests](tests/functional_tests) with tests you want to run
|
||||
|
||||
### How to run
|
||||
```./CI/run.sh```
|
||||
@@ -11,7 +11,7 @@ This will run kraken using python, make sure python3 is set up and configured pr
|
||||
|
||||
### Adding a test case
|
||||
|
||||
1. Add in simple scenario yaml file to execute under [../CI/scenarios/](scenarios)
|
||||
1. Add in simple scenario yaml file to execute under [../CI/scenarios/](legacy)
|
||||
|
||||
2. Copy [test_application_outages.sh](tests/test_app_outages.sh) for example on how to get started
|
||||
|
||||
@@ -27,7 +27,7 @@ This will run kraken using python, make sure python3 is set up and configured pr
|
||||
|
||||
e. 15: Make sure name of config in line 14 matches what you pass on this line
|
||||
|
||||
4. Add test name to [my_tests](../CI/tests/my_tests) file
|
||||
4. Add test name to [functional_tests](../CI/tests/functional_tests) file
|
||||
|
||||
a. This will be the name of the file without ".sh"
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ wait_cluster_become_ready() {
|
||||
|
||||
|
||||
|
||||
ci_tests_loc="CI/tests/my_tests"
|
||||
ci_tests_loc="CI/tests/functional_tests"
|
||||
|
||||
echo -e "********* Running Functional Tests Suite *********\n\n"
|
||||
|
||||
@@ -37,7 +37,7 @@ echo '-----------------------|--------|---------' >> $results
|
||||
|
||||
# Run each test
|
||||
failed_tests=()
|
||||
for test_name in `cat CI/tests/my_tests`
|
||||
for test_name in `cat CI/tests/functional_tests`
|
||||
do
|
||||
wait_cluster_become_ready
|
||||
return_value=`./CI/run_test.sh $test_name $results`
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
application_outage: # Scenario to create an outage of an application by blocking traffic
|
||||
duration: 10 # Duration in seconds after which the routes will be accessible
|
||||
namespace: openshift-monitoring # Namespace to target - all application routes will go inaccessible if pod selector is empty
|
||||
pod_selector: {} # Pods to target
|
||||
block: [Ingress, Egress] # It can be Ingress or Egress or Ingress, Egress
|
||||
@@ -1,12 +0,0 @@
|
||||
---
|
||||
deployers:
|
||||
image:
|
||||
connection: {}
|
||||
deployer_name: kubernetes
|
||||
log:
|
||||
level: debug
|
||||
logged_outputs:
|
||||
error:
|
||||
level: error
|
||||
success:
|
||||
level: debug
|
||||
@@ -1,9 +0,0 @@
|
||||
input_list:
|
||||
- cpu_count: 1
|
||||
cpu_load_percentage: 80
|
||||
cpu_method: all
|
||||
duration: 1s
|
||||
kubeconfig: ''
|
||||
namespace: default
|
||||
node_selector:
|
||||
kubernetes.io/hostname: kind-worker2
|
||||
@@ -1,98 +0,0 @@
|
||||
version: v0.2.0
|
||||
input:
|
||||
root: RootObject
|
||||
objects:
|
||||
RootObject:
|
||||
id: input_item
|
||||
properties:
|
||||
kubeconfig:
|
||||
display:
|
||||
description: The complete kubeconfig file as a string
|
||||
name: Kubeconfig file contents
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
namespace:
|
||||
display:
|
||||
description: The namespace where the container will be deployed
|
||||
name: Namespace
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
node_selector:
|
||||
display:
|
||||
description: kubernetes node name where the plugin must be deployed
|
||||
type:
|
||||
type_id: map
|
||||
values:
|
||||
type_id: string
|
||||
keys:
|
||||
type_id: string
|
||||
required: true
|
||||
duration:
|
||||
display:
|
||||
name: duration the scenario expressed in seconds
|
||||
description: stop stress test after T seconds. One can also specify the units of time in
|
||||
seconds, minutes, hours, days or years with the suffix s, m, h, d or y
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
cpu_count:
|
||||
display:
|
||||
description: Number of CPU cores to be used (0 means all)
|
||||
name: number of CPUs
|
||||
type:
|
||||
type_id: integer
|
||||
required: true
|
||||
cpu_method:
|
||||
display:
|
||||
description: CPU stress method
|
||||
name: fine grained control of which cpu stressors to use (ackermann, cfloat etc.)
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
cpu_load_percentage:
|
||||
display:
|
||||
description: load CPU by percentage
|
||||
name: CPU load
|
||||
type:
|
||||
type_id: integer
|
||||
required: true
|
||||
|
||||
steps:
|
||||
kubeconfig:
|
||||
plugin:
|
||||
src: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.2.0
|
||||
deployment_type: image
|
||||
input:
|
||||
kubeconfig: !expr $.input.kubeconfig
|
||||
stressng:
|
||||
plugin:
|
||||
src: quay.io/arcalot/arcaflow-plugin-stressng:0.5.0
|
||||
deployment_type: image
|
||||
step: workload
|
||||
input:
|
||||
cleanup: "true"
|
||||
StressNGParams:
|
||||
timeout: !expr $.input.duration
|
||||
stressors:
|
||||
- stressor: cpu
|
||||
cpu_count: !expr $.input.cpu_count
|
||||
cpu_method: !expr $.input.cpu_method
|
||||
cpu_load: !expr $.input.cpu_load_percentage
|
||||
deploy:
|
||||
deployer_name: kubernetes
|
||||
connection: !expr $.steps.kubeconfig.outputs.success.connection
|
||||
pod:
|
||||
metadata:
|
||||
namespace: !expr $.input.namespace
|
||||
labels:
|
||||
arcaflow: stressng
|
||||
spec:
|
||||
nodeSelector: !expr $.input.node_selector
|
||||
pluginContainer:
|
||||
imagePullPolicy: Always
|
||||
outputs:
|
||||
success:
|
||||
stressng: !expr $.steps.stressng.outputs.success
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
version: v0.2.0
|
||||
input:
|
||||
root: RootObject
|
||||
objects:
|
||||
RootObject:
|
||||
id: RootObject
|
||||
properties:
|
||||
input_list:
|
||||
type:
|
||||
type_id: list
|
||||
items:
|
||||
id: input_item
|
||||
type_id: object
|
||||
properties:
|
||||
kubeconfig:
|
||||
display:
|
||||
description: The complete kubeconfig file as a string
|
||||
name: Kubeconfig file contents
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
namespace:
|
||||
display:
|
||||
description: The namespace where the container will be deployed
|
||||
name: Namespace
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
node_selector:
|
||||
display:
|
||||
description: kubernetes node name where the plugin must be deployed
|
||||
type:
|
||||
type_id: map
|
||||
values:
|
||||
type_id: string
|
||||
keys:
|
||||
type_id: string
|
||||
required: true
|
||||
duration:
|
||||
display:
|
||||
name: duration the scenario expressed in seconds
|
||||
description: stop stress test after T seconds. One can also specify the units of time in
|
||||
seconds, minutes, hours, days or years with the suffix s, m, h, d or y
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
cpu_count:
|
||||
display:
|
||||
description: Number of CPU cores to be used (0 means all)
|
||||
name: number of CPUs
|
||||
type:
|
||||
type_id: integer
|
||||
required: true
|
||||
cpu_method:
|
||||
display:
|
||||
description: CPU stress method
|
||||
name: fine grained control of which cpu stressors to use (ackermann, cfloat etc.)
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
cpu_load_percentage:
|
||||
display:
|
||||
description: load CPU by percentage
|
||||
name: CPU load
|
||||
type:
|
||||
type_id: integer
|
||||
required: true
|
||||
steps:
|
||||
workload_loop:
|
||||
kind: foreach
|
||||
items: !expr $.input.input_list
|
||||
workflow: sub-workflow.yaml
|
||||
parallelism: 1000
|
||||
outputs:
|
||||
success:
|
||||
workloads: !expr $.steps.workload_loop.outputs.success.data
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
deployers:
|
||||
image:
|
||||
connection: {}
|
||||
deployer_name: kubernetes
|
||||
log:
|
||||
level: debug
|
||||
logged_outputs:
|
||||
error:
|
||||
level: error
|
||||
success:
|
||||
level: debug
|
||||
@@ -1,14 +0,0 @@
|
||||
input_list:
|
||||
- duration: 30s
|
||||
io_block_size: 1m
|
||||
io_workers: 1
|
||||
io_write_bytes: 10m
|
||||
kubeconfig: ''
|
||||
namespace: default
|
||||
node_selector:
|
||||
kubernetes.io/hostname: kind-worker2
|
||||
target_pod_folder: /hog-data
|
||||
target_pod_volume:
|
||||
hostPath:
|
||||
path: /tmp
|
||||
name: node-volume
|
||||
@@ -1,142 +0,0 @@
|
||||
version: v0.2.0
|
||||
input:
|
||||
root: RootObject
|
||||
objects:
|
||||
hostPath:
|
||||
id: HostPathVolumeSource
|
||||
properties:
|
||||
path:
|
||||
type:
|
||||
type_id: string
|
||||
Volume:
|
||||
id: Volume
|
||||
properties:
|
||||
name:
|
||||
type:
|
||||
type_id: string
|
||||
hostPath:
|
||||
type:
|
||||
id: hostPath
|
||||
type_id: ref
|
||||
RootObject:
|
||||
id: input_item
|
||||
properties:
|
||||
kubeconfig:
|
||||
display:
|
||||
description: The complete kubeconfig file as a string
|
||||
name: Kubeconfig file contents
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
namespace:
|
||||
display:
|
||||
description: The namespace where the container will be deployed
|
||||
name: Namespace
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
node_selector:
|
||||
display:
|
||||
description: kubernetes node name where the plugin must be deployed
|
||||
type:
|
||||
type_id: map
|
||||
values:
|
||||
type_id: string
|
||||
keys:
|
||||
type_id: string
|
||||
required: true
|
||||
duration:
|
||||
display:
|
||||
name: duration the scenario expressed in seconds
|
||||
description: stop stress test after T seconds. One can also specify the units of time in
|
||||
seconds, minutes, hours, days or years with the suffix s, m, h, d or y
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
io_workers:
|
||||
display:
|
||||
description: number of workers
|
||||
name: start N workers continually writing, reading and removing temporary files
|
||||
type:
|
||||
type_id: integer
|
||||
required: true
|
||||
io_block_size:
|
||||
display:
|
||||
description: single write size
|
||||
name: specify size of each write in bytes. Size can be from 1 byte to 4MB.
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
io_write_bytes:
|
||||
display:
|
||||
description: Total number of bytes written
|
||||
name: write N bytes for each hdd process, the default is 1 GB. One can specify the size
|
||||
as % of free space on the file system or in units of Bytes, KBytes, MBytes and
|
||||
GBytes using the suffix b, k, m or g
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
target_pod_folder:
|
||||
display:
|
||||
description: Target Folder
|
||||
name: Folder in the pod where the test will be executed and the test files will be written
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
target_pod_volume:
|
||||
display:
|
||||
name: kubernetes volume definition
|
||||
description: the volume that will be attached to the pod. In order to stress
|
||||
the node storage only hosPath mode is currently supported
|
||||
type:
|
||||
type_id: ref
|
||||
id: Volume
|
||||
required: true
|
||||
|
||||
steps:
|
||||
kubeconfig:
|
||||
plugin:
|
||||
src: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.2.0
|
||||
deployment_type: image
|
||||
input:
|
||||
kubeconfig: !expr $.input.kubeconfig
|
||||
stressng:
|
||||
plugin:
|
||||
src: quay.io/arcalot/arcaflow-plugin-stressng:0.5.0
|
||||
deployment_type: image
|
||||
step: workload
|
||||
input:
|
||||
cleanup: "true"
|
||||
StressNGParams:
|
||||
timeout: !expr $.input.duration
|
||||
workdir: !expr $.input.target_pod_folder
|
||||
stressors:
|
||||
- stressor: hdd
|
||||
hdd: !expr $.input.io_workers
|
||||
hdd_bytes: !expr $.input.io_write_bytes
|
||||
hdd_write_size: !expr $.input.io_block_size
|
||||
|
||||
deploy:
|
||||
deployer_name: kubernetes
|
||||
connection: !expr $.steps.kubeconfig.outputs.success.connection
|
||||
pod:
|
||||
metadata:
|
||||
namespace: !expr $.input.namespace
|
||||
labels:
|
||||
arcaflow: stressng
|
||||
spec:
|
||||
nodeSelector: !expr $.input.node_selector
|
||||
pluginContainer:
|
||||
imagePullPolicy: Always
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- mountPath: /hog-data
|
||||
name: node-volume
|
||||
volumes:
|
||||
- !expr $.input.target_pod_volume
|
||||
|
||||
outputs:
|
||||
success:
|
||||
stressng: !expr $.steps.stressng.outputs.success
|
||||
|
||||
@@ -1,113 +0,0 @@
|
||||
version: v0.2.0
|
||||
input:
|
||||
root: RootObject
|
||||
objects:
|
||||
hostPath:
|
||||
id: HostPathVolumeSource
|
||||
properties:
|
||||
path:
|
||||
type:
|
||||
type_id: string
|
||||
Volume:
|
||||
id: Volume
|
||||
properties:
|
||||
name:
|
||||
type:
|
||||
type_id: string
|
||||
hostPath:
|
||||
type:
|
||||
id: hostPath
|
||||
type_id: ref
|
||||
RootObject:
|
||||
id: RootObject
|
||||
properties:
|
||||
input_list:
|
||||
type:
|
||||
type_id: list
|
||||
items:
|
||||
id: input_item
|
||||
type_id: object
|
||||
properties:
|
||||
kubeconfig:
|
||||
display:
|
||||
description: The complete kubeconfig file as a string
|
||||
name: Kubeconfig file contents
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
namespace:
|
||||
display:
|
||||
description: The namespace where the container will be deployed
|
||||
name: Namespace
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
node_selector:
|
||||
display:
|
||||
description: kubernetes node name where the plugin must be deployed
|
||||
type:
|
||||
type_id: map
|
||||
values:
|
||||
type_id: string
|
||||
keys:
|
||||
type_id: string
|
||||
required: true
|
||||
duration:
|
||||
display:
|
||||
name: duration the scenario expressed in seconds
|
||||
description: stop stress test after T seconds. One can also specify the units of time in
|
||||
seconds, minutes, hours, days or years with the suffix s, m, h, d or y
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
io_workers:
|
||||
display:
|
||||
description: number of workers
|
||||
name: start N workers continually writing, reading and removing temporary files
|
||||
type:
|
||||
type_id: integer
|
||||
required: true
|
||||
io_block_size:
|
||||
display:
|
||||
description: single write size
|
||||
name: specify size of each write in bytes. Size can be from 1 byte to 4MB.
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
io_write_bytes:
|
||||
display:
|
||||
description: Total number of bytes written
|
||||
name: write N bytes for each hdd process, the default is 1 GB. One can specify the size
|
||||
as % of free space on the file system or in units of Bytes, KBytes, MBytes and
|
||||
GBytes using the suffix b, k, m or g
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
target_pod_folder:
|
||||
display:
|
||||
description: Target Folder
|
||||
name: Folder in the pod where the test will be executed and the test files will be written
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
target_pod_volume:
|
||||
display:
|
||||
name: kubernetes volume definition
|
||||
description: the volume that will be attached to the pod. In order to stress
|
||||
the node storage only hosPath mode is currently supported
|
||||
type:
|
||||
type_id: ref
|
||||
id: Volume
|
||||
required: true
|
||||
steps:
|
||||
workload_loop:
|
||||
kind: foreach
|
||||
items: !expr $.input.input_list
|
||||
workflow: sub-workflow.yaml
|
||||
parallelism: 1000
|
||||
outputs:
|
||||
success:
|
||||
workloads: !expr $.steps.workload_loop.outputs.success.data
|
||||
|
||||
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
---
|
||||
deployers:
|
||||
image:
|
||||
connection: {}
|
||||
deployer_name: kubernetes
|
||||
log:
|
||||
level: debug
|
||||
logged_outputs:
|
||||
error:
|
||||
level: error
|
||||
success:
|
||||
level: debug
|
||||
@@ -1,14 +0,0 @@
|
||||
input_list:
|
||||
- duration: 30s
|
||||
vm_bytes: 10%
|
||||
vm_workers: 2
|
||||
node_selector:
|
||||
kubernetes.io/hostname: kind-worker2
|
||||
# node selector example
|
||||
# node_selector:
|
||||
# kubernetes.io/hostname: master
|
||||
kubeconfig: ""
|
||||
namespace: default
|
||||
|
||||
# duplicate this section to run simultaneous stressors in the same run
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
version: v0.2.0
|
||||
input:
|
||||
root: RootObject
|
||||
objects:
|
||||
RootObject:
|
||||
id: input_item
|
||||
properties:
|
||||
kubeconfig:
|
||||
display:
|
||||
description: The complete kubeconfig file as a string
|
||||
name: Kubeconfig file contents
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
namespace:
|
||||
display:
|
||||
description: The namespace where the container will be deployed
|
||||
name: Namespace
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
node_selector:
|
||||
display:
|
||||
description: kubernetes node name where the plugin must be deployed
|
||||
type:
|
||||
type_id: map
|
||||
values:
|
||||
type_id: string
|
||||
keys:
|
||||
type_id: string
|
||||
required: true
|
||||
duration:
|
||||
display:
|
||||
name: duration the scenario expressed in seconds
|
||||
description: stop stress test after T seconds. One can also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or y
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
vm_workers:
|
||||
display:
|
||||
description: Number of VM stressors to be run (0 means 1 stressor per CPU)
|
||||
name: Number of VM stressors
|
||||
type:
|
||||
type_id: integer
|
||||
required: true
|
||||
vm_bytes:
|
||||
display:
|
||||
description: N bytes per vm process, the default is 256MB. The size can be expressed in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g.
|
||||
name: Kubeconfig file contents
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
|
||||
steps:
|
||||
kubeconfig:
|
||||
plugin:
|
||||
src: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.2.0
|
||||
deployment_type: image
|
||||
input:
|
||||
kubeconfig: !expr $.input.kubeconfig
|
||||
stressng:
|
||||
plugin:
|
||||
src: quay.io/arcalot/arcaflow-plugin-stressng:0.5.0
|
||||
deployment_type: image
|
||||
step: workload
|
||||
input:
|
||||
cleanup: "true"
|
||||
StressNGParams:
|
||||
timeout: !expr $.input.duration
|
||||
stressors:
|
||||
- stressor: vm
|
||||
vm: !expr $.input.vm_workers
|
||||
vm_bytes: !expr $.input.vm_bytes
|
||||
deploy:
|
||||
deployer_name: kubernetes
|
||||
connection: !expr $.steps.kubeconfig.outputs.success.connection
|
||||
pod:
|
||||
metadata:
|
||||
namespace: !expr $.input.namespace
|
||||
labels:
|
||||
arcaflow: stressng
|
||||
spec:
|
||||
nodeSelector: !expr $.input.node_selector
|
||||
pluginContainer:
|
||||
imagePullPolicy: Always
|
||||
|
||||
outputs:
|
||||
success:
|
||||
stressng: !expr $.steps.stressng.outputs.success
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
version: v0.2.0
|
||||
input:
|
||||
root: RootObject
|
||||
objects:
|
||||
RootObject:
|
||||
id: RootObject
|
||||
properties:
|
||||
input_list:
|
||||
type:
|
||||
type_id: list
|
||||
items:
|
||||
id: input_item
|
||||
type_id: object
|
||||
properties:
|
||||
kubeconfig:
|
||||
display:
|
||||
description: The complete kubeconfig file as a string
|
||||
name: Kubeconfig file contents
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
namespace:
|
||||
display:
|
||||
description: The namespace where the container will be deployed
|
||||
name: Namespace
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
node_selector:
|
||||
display:
|
||||
description: kubernetes node name where the plugin must be deployed
|
||||
type:
|
||||
type_id: map
|
||||
values:
|
||||
type_id: string
|
||||
keys:
|
||||
type_id: string
|
||||
required: true
|
||||
duration:
|
||||
display:
|
||||
name: duration the scenario expressed in seconds
|
||||
description: stop stress test after T seconds. One can also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or y
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
vm_workers:
|
||||
display:
|
||||
description: Number of VM stressors to be run (0 means 1 stressor per CPU)
|
||||
name: Number of VM stressors
|
||||
type:
|
||||
type_id: integer
|
||||
required: true
|
||||
vm_bytes:
|
||||
display:
|
||||
description: N bytes per vm process, the default is 256MB. The size can be expressed in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g.
|
||||
name: Kubeconfig file contents
|
||||
type:
|
||||
type_id: string
|
||||
required: true
|
||||
steps:
|
||||
workload_loop:
|
||||
kind: foreach
|
||||
items: !expr $.input.input_list
|
||||
workflow: sub-workflow.yaml
|
||||
parallelism: 1000
|
||||
outputs:
|
||||
success:
|
||||
workloads: !expr $.steps.workload_loop.outputs.success.data
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
scenarios:
|
||||
- name: "kill test container"
|
||||
namespace: "default"
|
||||
label_selector: "scenario=container"
|
||||
container_name: "fedtools"
|
||||
action: 1
|
||||
count: 1
|
||||
retry_wait: 60
|
||||
@@ -1,8 +0,0 @@
|
||||
network_chaos: # Scenario to create an outage by simulating random variations in the network.
|
||||
duration: 10 # seconds
|
||||
instance_count: 1
|
||||
node_name: kind-worker2
|
||||
execution: serial
|
||||
egress:
|
||||
bandwidth: 100mbit
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
scenarios:
|
||||
- action: delete
|
||||
namespace: "^namespace-scenario$"
|
||||
label_selector:
|
||||
runs: 1
|
||||
sleep: 15
|
||||
wait_time: 30
|
||||
@@ -1,5 +0,0 @@
|
||||
time_scenarios:
|
||||
- action: skew_time
|
||||
object_type: pod
|
||||
label_selector: scenario=time-skew
|
||||
container_name: ""
|
||||
@@ -7,10 +7,11 @@ trap finish EXIT
|
||||
|
||||
|
||||
function functional_test_app_outage {
|
||||
yq -i '.application_outage.pod_selector={"scenario":"outage"}' CI/scenarios/app_outage.yaml
|
||||
yq -i '.application_outage.namespace="default"' CI/scenarios/app_outage.yaml
|
||||
yq -i '.application_outage.duration=10' scenarios/openshift/app_outage.yaml
|
||||
yq -i '.application_outage.pod_selector={"scenario":"outage"}' scenarios/openshift/app_outage.yaml
|
||||
yq -i '.application_outage.namespace="default"' scenarios/openshift/app_outage.yaml
|
||||
export scenario_type="application_outages"
|
||||
export scenario_file="CI/scenarios/app_outage.yaml"
|
||||
export scenario_file="scenarios/openshift/app_outage.yaml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/app_outage.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/app_outage.yaml
|
||||
|
||||
@@ -7,8 +7,9 @@ trap finish EXIT
|
||||
|
||||
|
||||
function functional_test_arca_cpu_hog {
|
||||
yq -i '.input_list[0].node_selector={"kubernetes.io/hostname":"kind-worker2"}' scenarios/arcaflow/cpu-hog/input.yaml
|
||||
export scenario_type="arcaflow_scenarios"
|
||||
export scenario_file="CI/scenarios/arcaflow/cpu-hog/input.yaml"
|
||||
export scenario_file="scenarios/arcaflow/cpu-hog/input.yaml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/arca_cpu_hog.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/arca_cpu_hog.yaml
|
||||
|
||||
@@ -7,8 +7,9 @@ trap finish EXIT
|
||||
|
||||
|
||||
function functional_test_arca_io_hog {
|
||||
yq -i '.input_list[0].node_selector={"kubernetes.io/hostname":"kind-worker2"}' scenarios/arcaflow/io-hog/input.yaml
|
||||
export scenario_type="arcaflow_scenarios"
|
||||
export scenario_file="CI/scenarios/arcaflow/io-hog/input.yaml"
|
||||
export scenario_file="scenarios/arcaflow/io-hog/input.yaml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/arca_io_hog.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/arca_io_hog.yaml
|
||||
|
||||
@@ -7,8 +7,9 @@ trap finish EXIT
|
||||
|
||||
|
||||
function functional_test_arca_memory_hog {
|
||||
yq -i '.input_list[0].node_selector={"kubernetes.io/hostname":"kind-worker2"}' scenarios/arcaflow/memory-hog/input.yaml
|
||||
export scenario_type="arcaflow_scenarios"
|
||||
export scenario_file="CI/scenarios/arcaflow/memory-hog/input.yaml"
|
||||
export scenario_file="scenarios/arcaflow/memory-hog/input.yaml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/arca_memory_hog.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/arca_memory_hog.yaml
|
||||
|
||||
@@ -8,9 +8,11 @@ trap finish EXIT
|
||||
pod_file="CI/scenarios/hello_pod.yaml"
|
||||
|
||||
function functional_test_container_crash {
|
||||
|
||||
yq -i '.scenarios[0].namespace="default"' scenarios/openshift/app_outage.yaml
|
||||
yq -i '.scenarios[0].label_selector="scenario=container"' scenarios/openshift/app_outage.yaml
|
||||
yq -i '.scenarios[0].container_name="fedtools"' scenarios/openshift/app_outage.yaml
|
||||
export scenario_type="container_scenarios"
|
||||
export scenario_file="- CI/scenarios/container_scenario.yml"
|
||||
export scenario_file="- scenarios/openshift/app_outage.yaml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/container_config.yaml
|
||||
|
||||
|
||||
@@ -7,12 +7,13 @@ trap finish EXIT
|
||||
|
||||
function funtional_test_namespace_deletion {
|
||||
export scenario_type="namespace_scenarios"
|
||||
export scenario_file="- CI/scenarios/network_diagnostics_namespace.yaml"
|
||||
export scenario_file="- scenarios/openshift/ingress_namespace.yaml"
|
||||
export post_config=""
|
||||
yq '.scenarios.[0].namespace="^openshift-network-diagnostics$"' -i CI/scenarios/network_diagnostics_namespace.yaml
|
||||
yq '.scenarios[0].namespace="^namespace-scenario$"' -i scenarios/openshift/ingress_namespace.yaml
|
||||
yq '.scenarios[0].wait_time=30' -i scenarios/openshift/ingress_namespace.yaml
|
||||
yq '.scenarios[0].action="delete"' -i scenarios/openshift/ingress_namespace.yaml
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/namespace_config.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/namespace_config.yaml
|
||||
echo $?
|
||||
echo "Namespace scenario test: Success"
|
||||
}
|
||||
|
||||
|
||||
@@ -7,9 +7,16 @@ trap finish EXIT
|
||||
|
||||
|
||||
function functional_test_network_chaos {
|
||||
yq -i '.network_chaos.duration=10' scenarios/openshift/network_chaos.yaml
|
||||
yq -i '.network_chaos.node_name="kind-worker2"' scenarios/openshift/network_chaos.yaml
|
||||
yq -i '.network_chaos.egress.bandwidth="100mbit"' scenarios/openshift/network_chaos.yaml
|
||||
yq -i 'del(.network_chaos.interfaces)' scenarios/openshift/network_chaos.yaml
|
||||
yq -i 'del(.network_chaos.label_selector)' scenarios/openshift/network_chaos.yaml
|
||||
yq -i 'del(.network_chaos.egress.latency)' scenarios/openshift/network_chaos.yaml
|
||||
yq -i 'del(.network_chaos.egress.loss)' scenarios/openshift/network_chaos.yaml
|
||||
|
||||
export scenario_type="network_chaos"
|
||||
export scenario_file="CI/scenarios/network_chaos.yaml"
|
||||
export scenario_file="scenarios/openshift/network_chaos.yaml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/network_chaos.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/network_chaos.yaml
|
||||
|
||||
@@ -7,8 +7,12 @@ trap finish EXIT
|
||||
|
||||
|
||||
function functional_test_time_scenario {
|
||||
yq -i '.time_scenarios[0].label_selector="scenario=time-skew"' scenarios/openshift/time_scenarios_example.yml
|
||||
yq -i '.time_scenarios[0].container_name=""' scenarios/openshift/time_scenarios_example.yml
|
||||
yq -i '.time_scenarios[0].namespace="default"' scenarios/openshift/time_scenarios_example.yml
|
||||
yq -i '.time_scenarios[1].label_selector="kubernetes.io/hostname=kind-worker2"' scenarios/openshift/time_scenarios_example.yml
|
||||
export scenario_type="time_scenarios"
|
||||
export scenario_file="CI/scenarios/time_scenarios.yml"
|
||||
export scenario_file="scenarios/openshift/time_scenarios_example.yml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/time_config.yaml
|
||||
|
||||
|
||||
15
README.md
15
README.md
@@ -1,6 +1,6 @@
|
||||
# Krkn aka Kraken
|
||||
[](https://quay.io/repository/redhat-chaos/krkn?tab=tags&tag=latest)
|
||||

|
||||
[](https://quay.io/repository/krkn-chaos/krkn?tab=tags&tag=latest)
|
||||

|
||||
|
||||

|
||||
|
||||
@@ -79,7 +79,7 @@ Scenario type | Kubernetes
|
||||
### Kraken scenario pass/fail criteria and report
|
||||
It is important to make sure to check if the targeted component recovered from the chaos injection and also if the Kubernetes cluster is healthy as failures in one component can have an adverse impact on other components. Kraken does this by:
|
||||
- Having built in checks for pod and node based scenarios to ensure the expected number of replicas and nodes are up. It also supports running custom scripts with the checks.
|
||||
- Leveraging [Cerberus](https://github.com/redhat-chaos/cerberus) to monitor the cluster under test and consuming the aggregated go/no-go signal to determine pass/fail post chaos. It is highly recommended to turn on the Cerberus health check feature available in Kraken. Instructions on installing and setting up Cerberus can be found [here](https://github.com/openshift-scale/cerberus#installation) or can be installed from Kraken using the [instructions](https://github.com/redhat-chaos/krkn#setting-up-infrastructure-dependencies). Once Cerberus is up and running, set cerberus_enabled to True and cerberus_url to the url where Cerberus publishes go/no-go signal in the Kraken config file. Cerberus can monitor [application routes](https://github.com/redhat-chaos/cerberus/blob/main/docs/config.md#watch-routes) during the chaos and fails the run if it encounters downtime as it is a potential downtime in a customers, or users environment as well. It is especially important during the control plane chaos scenarios including the API server, Etcd, Ingress etc. It can be enabled by setting `check_applicaton_routes: True` in the [Kraken config](https://github.com/redhat-chaos/krkn/blob/main/config/config.yaml) provided application routes are being monitored in the [cerberus config](https://github.com/redhat-chaos/krkn/blob/main/config/cerberus.yaml).
|
||||
- Leveraging [Cerberus](https://github.com/krkn-chaos/cerberus) to monitor the cluster under test and consuming the aggregated go/no-go signal to determine pass/fail post chaos. It is highly recommended to turn on the Cerberus health check feature available in Kraken. Instructions on installing and setting up Cerberus can be found [here](https://github.com/openshift-scale/cerberus#installation) or can be installed from Kraken using the [instructions](https://github.com/krkn-chaos/krkn#setting-up-infrastructure-dependencies). Once Cerberus is up and running, set cerberus_enabled to True and cerberus_url to the url where Cerberus publishes go/no-go signal in the Kraken config file. Cerberus can monitor [application routes](https://github.com/redhat-chaos/cerberus/blob/main/docs/config.md#watch-routes) during the chaos and fails the run if it encounters downtime as it is a potential downtime in a customers, or users environment as well. It is especially important during the control plane chaos scenarios including the API server, Etcd, Ingress etc. It can be enabled by setting `check_applicaton_routes: True` in the [Kraken config](https://github.com/redhat-chaos/krkn/blob/main/config/config.yaml) provided application routes are being monitored in the [cerberus config](https://github.com/redhat-chaos/krkn/blob/main/config/cerberus.yaml).
|
||||
- Leveraging built-in alert collection feature to fail the runs in case of critical alerts.
|
||||
|
||||
### Signaling
|
||||
@@ -103,7 +103,7 @@ Information on enabling and leveraging this feature can be found [here](docs/SLO
|
||||
|
||||
### OCM / ACM integration
|
||||
|
||||
Kraken supports injecting faults into [Open Cluster Management (OCM)](https://open-cluster-management.io/) and [Red Hat Advanced Cluster Management for Kubernetes (ACM)](https://www.redhat.com/en/technologies/management/advanced-cluster-management) managed clusters through [ManagedCluster Scenarios](docs/managedcluster_scenarios.md).
|
||||
Kraken supports injecting faults into [Open Cluster Management (OCM)](https://open-cluster-management.io/) and [Red Hat Advanced Cluster Management for Kubernetes (ACM)](https://www.krkn.com/en/technologies/management/advanced-cluster-management) managed clusters through [ManagedCluster Scenarios](docs/managedcluster_scenarios.md).
|
||||
|
||||
|
||||
### Blogs and other useful resources
|
||||
@@ -129,6 +129,7 @@ Please read [this file]((CI/README.md#adding-a-test-case)) for more information
|
||||
|
||||
|
||||
### Community
|
||||
Key Members(slack_usernames/full name): paigerube14/Paige Rubendall, mffiedler/Mike Fiedler, ravielluri/Naga Ravi Chaitanya Elluri.
|
||||
* [**#krkn on Kubernetes Slack**](https://kubernetes.slack.com)
|
||||
* [**#forum-chaos on CoreOS Slack internal to Red Hat**](https://coreos.slack.com)
|
||||
Key Members(slack_usernames/full name): paigerube14/Paige Rubendall, mffiedler/Mike Fiedler, tsebasti/Tullio Sebastiani, yogi/Yogananth Subramanian, sahil/Sahil Shah, pradeep/Pradeep Surisetty and ravielluri/Naga Ravi Chaitanya Elluri.
|
||||
* [**#krkn on Kubernetes Slack**](https://kubernetes.slack.com/messages/C05SFMHRWK1)
|
||||
|
||||
The Linux Foundation® (TLF) has registered trademarks and uses trademarks. For a list of TLF trademarks, see [Trademark Usage](https://www.linuxfoundation.org/legal/trademark-usage).
|
||||
|
||||
20
ROADMAP.md
20
ROADMAP.md
@@ -2,14 +2,14 @@
|
||||
|
||||
Following are a list of enhancements that we are planning to work on adding support in Krkn. Of course any help/contributions are greatly appreciated.
|
||||
|
||||
- [ ] [Ability to run multiple chaos scenarios in parallel under load to mimic real world outages](https://github.com/redhat-chaos/krkn/issues/424)
|
||||
- [x] [Centralized storage for chaos experiments artifacts](https://github.com/redhat-chaos/krkn/issues/423)
|
||||
- [ ] [Support for causing DNS outages](https://github.com/redhat-chaos/krkn/issues/394)
|
||||
- [x] [Chaos recommender](https://github.com/redhat-chaos/krkn/tree/main/utils/chaos-recommender) to suggest scenarios having probability of impacting the service under test using profiling results
|
||||
- [ ] [Ability to run multiple chaos scenarios in parallel under load to mimic real world outages](https://github.com/krkn-chaos/krkn/issues/424)
|
||||
- [x] [Centralized storage for chaos experiments artifacts](https://github.com/krkn-chaos/krkn/issues/423)
|
||||
- [ ] [Support for causing DNS outages](https://github.com/krkn-chaos/krkn/issues/394)
|
||||
- [x] [Chaos recommender](https://github.com/krkn-chaos/krkn/tree/main/utils/chaos-recommender) to suggest scenarios having probability of impacting the service under test using profiling results
|
||||
- [ ] Chaos AI integration to improve and automate test coverage
|
||||
- [x] [Support for pod level network traffic shaping](https://github.com/redhat-chaos/krkn/issues/393)
|
||||
- [ ] [Ability to visualize the metrics that are being captured by Kraken and stored in Elasticsearch](https://github.com/redhat-chaos/krkn/issues/124)
|
||||
- [ ] Support for running all the scenarios of Kraken on Kubernetes distribution - see https://github.com/redhat-chaos/krkn/issues/185, https://github.com/redhat-chaos/krkn/issues/186
|
||||
- [ ] Continue to improve [Chaos Testing Guide](https://redhat-chaos.github.io/krkn) in terms of adding best practices, test environment recommendations and scenarios to make sure the OpenShift platform, as well the applications running on top it, are resilient and performant under chaotic conditions.
|
||||
- [ ] [Switch documentation references to Kubernetes](https://github.com/redhat-chaos/krkn/issues/495)
|
||||
- [ ] [OCP and Kubernetes functionalities segregation](https://github.com/redhat-chaos/krkn/issues/497)
|
||||
- [x] [Support for pod level network traffic shaping](https://github.com/krkn-chaos/krkn/issues/393)
|
||||
- [ ] [Ability to visualize the metrics that are being captured by Kraken and stored in Elasticsearch](https://github.com/krkn-chaos/krkn/issues/124)
|
||||
- [ ] Support for running all the scenarios of Kraken on Kubernetes distribution - see https://github.com/krkn-chaos/krkn/issues/185, https://github.com/redhat-chaos/krkn/issues/186
|
||||
- [ ] Continue to improve [Chaos Testing Guide](https://krkn-chaos.github.io/krkn) in terms of adding best practices, test environment recommendations and scenarios to make sure the OpenShift platform, as well the applications running on top it, are resilient and performant under chaotic conditions.
|
||||
- [ ] [Switch documentation references to Kubernetes](https://github.com/krkn-chaos/krkn/issues/495)
|
||||
- [ ] [OCP and Kubernetes functionalities segregation](https://github.com/krkn-chaos/krkn/issues/497)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#### Kubernetes/OpenShift cluster shut down scenario
|
||||
Scenario to shut down all the nodes including the masters and restart them after specified duration. Cluster shut down scenario can be injected by placing the shut_down config file under cluster_shut_down_scenario option in the kraken config. Refer to [cluster_shut_down_scenario](https://github.com/redhat-chaos/krkn/blob/main/scenarios/cluster_shut_down_scenario.yml) config file.
|
||||
#### Kubernetes cluster shut down scenario
|
||||
Scenario to shut down all the nodes including the masters and restart them after specified duration. Cluster shut down scenario can be injected by placing the shut_down config file under cluster_shut_down_scenario option in the kraken config. Refer to [cluster_shut_down_scenario](https://github.com/krkn-chaos/krkn/blob/main/scenarios/cluster_shut_down_scenario.yml) config file.
|
||||
|
||||
Refer to [cloud setup](cloud_setup.md) to configure your cli properly for the cloud provider of the cluster you want to shut down.
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ This can be based on the pods namespace or labels. If you know the exact object
|
||||
These scenarios are in a simple yaml format that you can manipulate to run your specific tests or use the pre-existing scenarios to see how it works.
|
||||
|
||||
#### Example Config
|
||||
The following are the components of Kubernetes/OpenShift for which a basic chaos scenario config exists today.
|
||||
The following are the components of Kubernetes for which a basic chaos scenario config exists today.
|
||||
|
||||
```
|
||||
scenarios:
|
||||
@@ -25,7 +25,7 @@ In all scenarios we do a post chaos check to wait and verify the specific compon
|
||||
Here there are two options:
|
||||
1. Pass a custom script in the main config scenario list that will run before the chaos and verify the output matches post chaos scenario.
|
||||
|
||||
See [scenarios/post_action_etcd_container.py](https://github.com/redhat-chaos/krkn/blob/main/scenarios/post_action_etcd_container.py) for an example.
|
||||
See [scenarios/post_action_etcd_container.py](https://github.com/krkn-chaos/krkn/blob/main/scenarios/post_action_etcd_container.py) for an example.
|
||||
```
|
||||
- container_scenarios: # List of chaos pod scenarios to load.
|
||||
- - scenarios/container_etcd.yml
|
||||
|
||||
@@ -62,7 +62,7 @@ If changes go into the main repository while you're working on your code it is b
|
||||
|
||||
If not already configured, set the upstream url for kraken.
|
||||
```
|
||||
git remote add upstream https://github.com/redhat-chaos/krkn.git
|
||||
git remote add upstream https://github.com/krkn-chaos/krkn.git
|
||||
```
|
||||
|
||||
Rebase to upstream master branch.
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
The following ways are supported to run Kraken:
|
||||
|
||||
- Standalone python program through Git.
|
||||
- Containerized version using either Podman or Docker as the runtime via [Krkn-hub](https://github.com/redhat-chaos/krkn-hub)
|
||||
- Containerized version using either Podman or Docker as the runtime via [Krkn-hub](https://github.com/krkn-chaos/krkn-hub)
|
||||
- Kubernetes or OpenShift deployment ( unsupported )
|
||||
|
||||
**NOTE**: It is recommended to run Kraken external to the cluster ( Standalone or Containerized ) hitting the Kubernetes/OpenShift API as running it internal to the cluster might be disruptive to itself and also might not report back the results if the chaos leads to cluster's API server instability.
|
||||
|
||||
**NOTE**: To run Kraken on Power (ppc64le) architecture, build and run a containerized version by following the
|
||||
instructions given [here](https://github.com/redhat-chaos/krkn/blob/main/containers/build_own_image-README.md).
|
||||
instructions given [here](https://github.com/krkn-chaos/krkn/blob/main/containers/build_own_image-README.md).
|
||||
|
||||
**NOTE**: Helper functions for interactions in Krkn are part of [krkn-lib](https://github.com/redhat-chaos/krkn-lib).
|
||||
Please feel free to reuse and expand them as you see fit when adding a new scenario or expanding
|
||||
@@ -19,9 +19,9 @@ the capabilities of the current supported scenarios.
|
||||
### Git
|
||||
|
||||
#### Clone the repository
|
||||
Pick the latest stable release to install [here](https://github.com/redhat-chaos/krkn/releases).
|
||||
Pick the latest stable release to install [here](https://github.com/krkn-chaos/krkn/releases).
|
||||
```
|
||||
$ git clone https://github.com/redhat-chaos/krkn.git --branch <release version>
|
||||
$ git clone https://github.com/krkn-chaos/krkn.git --branch <release version>
|
||||
$ cd kraken
|
||||
```
|
||||
|
||||
@@ -40,13 +40,13 @@ $ python3.9 run_kraken.py --config <config_file_location>
|
||||
```
|
||||
|
||||
### Run containerized version
|
||||
[Krkn-hub](https://github.com/redhat-chaos/krkn-hub) is a wrapper that allows running Krkn chaos scenarios via podman or docker runtime with scenario parameters/configuration defined as environment variables.
|
||||
[Krkn-hub](https://github.com/krkn-chaos/krkn-hub) is a wrapper that allows running Krkn chaos scenarios via podman or docker runtime with scenario parameters/configuration defined as environment variables.
|
||||
|
||||
Refer [instructions](https://github.com/redhat-chaos/krkn-hub#supported-chaos-scenarios) to get started.
|
||||
Refer [instructions](https://github.com/krkn-chaos/krkn-hub#supported-chaos-scenarios) to get started.
|
||||
|
||||
|
||||
### Run Kraken as a Kubernetes deployment ( unsupported option - standalone or containerized deployers are recommended )
|
||||
Refer [Instructions](https://github.com/redhat-chaos/krkn/blob/main/containers/README.md) on how to deploy and run Kraken as a Kubernetes/OpenShift deployment.
|
||||
Refer [Instructions](https://github.com/krkn-chaos/krkn/blob/main/containers/README.md) on how to deploy and run Kraken as a Kubernetes/OpenShift deployment.
|
||||
|
||||
|
||||
Refer to the [chaos-kraken chart manpage](https://artifacthub.io/packages/helm/startx/chaos-kraken)
|
||||
|
||||
@@ -16,7 +16,7 @@ Set to '^.*$' and label_selector to "" to randomly select any namespace in your
|
||||
|
||||
**sleep:** Number of seconds to wait between each iteration/count of killing namespaces. Defaults to 10 seconds if not set
|
||||
|
||||
Refer to [namespace_scenarios_example](https://github.com/redhat-chaos/krkn/blob/main/scenarios/regex_namespace.yaml) config file.
|
||||
Refer to [namespace_scenarios_example](https://github.com/krkn-chaos/krkn/blob/main/scenarios/regex_namespace.yaml) config file.
|
||||
|
||||
```
|
||||
scenarios:
|
||||
|
||||
@@ -16,7 +16,7 @@ Configuration Options:
|
||||
|
||||
**object_name:** List of the names of pods or nodes you want to skew.
|
||||
|
||||
Refer to [time_scenarios_example](https://github.com/redhat-chaos/krkn/blob/main/scenarios/time_scenarios_example.yml) config file.
|
||||
Refer to [time_scenarios_example](https://github.com/krkn-chaos/krkn/blob/main/scenarios/time_scenarios_example.yml) config file.
|
||||
|
||||
```
|
||||
time_scenarios:
|
||||
|
||||
@@ -11,7 +11,7 @@ coverage
|
||||
datetime
|
||||
docker
|
||||
docker-compose
|
||||
git+https://github.com/redhat-chaos/arcaflow-plugin-kill-pod.git
|
||||
git+https://github.com/krkn-chaos/arcaflow-plugin-kill-pod.git
|
||||
git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.0.0
|
||||
gitpython
|
||||
google-api-python-client
|
||||
@@ -35,6 +35,6 @@ requests
|
||||
service_identity
|
||||
setuptools==65.5.1
|
||||
werkzeug==3.0.1
|
||||
wheel
|
||||
wheel>=0.38.0
|
||||
zope.interface==5.4.0
|
||||
pandas<2.0.0
|
||||
pandas>=2.2.0
|
||||
|
||||
@@ -34,7 +34,7 @@ from krkn_lib.utils import SafeLogger
|
||||
from krkn_lib.utils.functions import get_yaml_item_value
|
||||
|
||||
|
||||
|
||||
report_file = ""
|
||||
|
||||
# Main function
|
||||
def main(cfg):
|
||||
@@ -414,10 +414,9 @@ def main(cfg):
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
run_dir = os.getcwd() + "/kraken.report"
|
||||
logging.info(
|
||||
"Successfully finished running Kraken. UUID for the run: "
|
||||
"%s. Report generated at %s. Exiting" % (run_uuid, run_dir)
|
||||
"%s. Report generated at %s. Exiting" % (run_uuid, report_file)
|
||||
)
|
||||
else:
|
||||
logging.error("Cannot find a config at %s, please check" % (cfg))
|
||||
@@ -434,12 +433,21 @@ if __name__ == "__main__":
|
||||
help="config location",
|
||||
default="config/config.yaml",
|
||||
)
|
||||
parser.add_option(
|
||||
"-o",
|
||||
"--output",
|
||||
dest="output",
|
||||
help="output report location",
|
||||
default="kraken.report",
|
||||
)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
report_file = options.output
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("kraken.report", mode="w"),
|
||||
logging.FileHandler(report_file, mode="w"),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
input_list:
|
||||
- cpu_count: 1
|
||||
cpu_load_percentage: 80
|
||||
cpu_method: all
|
||||
duration: 1s
|
||||
kubeconfig: ''
|
||||
namespace: default
|
||||
node_selector: {}
|
||||
- cpu_count: 1
|
||||
cpu_load_percentage: 80
|
||||
cpu_method: all
|
||||
duration: 1s
|
||||
kubeconfig: ''
|
||||
namespace: default
|
||||
node_selector:
|
||||
kubernetes.io/hostname: kind-worker2
|
||||
|
||||
@@ -3,6 +3,6 @@ scenarios:
|
||||
namespace: "kube-system"
|
||||
label_selector: "k8s-app=kube-dns"
|
||||
container_name: ""
|
||||
action: "kill 1"
|
||||
action: 1
|
||||
count: 1
|
||||
retry_wait: 60
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
network_chaos: # Scenario to create an outage by simulating random variations in the network.
|
||||
duration: 300 # seconds
|
||||
node_name: # node on which scenario has to be injected;
|
||||
label_selector: <label_selector> # when node_name is not specified, a node with matching label_selector is selected for running the scenario.
|
||||
network_chaos: # Scenario to create an outage by simulating random variations in the network.
|
||||
duration: 300 # seconds
|
||||
node_name: # node on which scenario has to be injected;
|
||||
label_selector: <label_selector> # when node_name is not specified, a node with matching label_selector is selected for running the scenario.
|
||||
instance_count: 1
|
||||
interfaces: # Interface name would be the Kernel host network interface name.
|
||||
- "<interface_name>"
|
||||
interfaces: # Interface name would be the Kernel host network interface name.
|
||||
- "<interface_name>"
|
||||
execution: serial
|
||||
egress:
|
||||
latency: 50ms # 50ms
|
||||
loss: 0.02 # percentage
|
||||
bandwidth: 100mbit
|
||||
latency: 50ms # 50ms
|
||||
loss: 0.02 # percentage
|
||||
|
||||
@@ -17,7 +17,7 @@ This tool profiles an application and gathers telemetry data such as CPU, Memory
|
||||
```
|
||||
$ python3.9 -m venv chaos
|
||||
$ source chaos/bin/activate
|
||||
$ git clone https://github.com/redhat-chaos/krkn.git
|
||||
$ git clone https://github.com/krkn-chaos/krkn.git
|
||||
$ cd krkn
|
||||
$ pip3 install -r requirements.txt
|
||||
$ python3.9 utils/chaos_recommender/chaos_recommender.py
|
||||
@@ -89,7 +89,7 @@ If you provide the input values through command-line arguments, the correspondin
|
||||
|
||||
## Podman & Docker image
|
||||
|
||||
To run the recommender image please visit the [krkn-hub](https://github.com/redhat-chaos/krkn-hub for further infos.
|
||||
To run the recommender image please visit the [krkn-hub](https://github.com/krkn-chaos/krkn-hub for further infos.
|
||||
|
||||
## How it works
|
||||
|
||||
|
||||
Reference in New Issue
Block a user