Compare commits

...

11 Commits

Author SHA1 Message Date
Tullio Sebastiani
04e44738d9 updated deprecated upload artfiact action (#717)
Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
2024-10-11 17:03:24 +02:00
Tullio Sebastiani
f810cadad2 Fixes the Plugin scenario schema error (#718)
* reformatting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* schema refactoring

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* plugin refactoring

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

---------

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
2024-10-10 09:59:53 -04:00
Tullio Sebastiani
4b869bad83 added fallback on dd if fallocate is not in the $PATH (#716)
Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
2024-10-10 11:15:03 +02:00
Matt Leader
a36b0c76b2 OCP Chaos Arcaflow Workflow (#699)
* add workflows

Signed-off-by: Matthew F Leader <mleader@redhat.com>

* update readme

Signed-off-by: Matthew F Leader <mleader@redhat.com>

* rm my kubeconfig path

Signed-off-by: Matthew F Leader <mleader@redhat.com>

* add workflow details to readme

Signed-off-by: Matthew F Leader <mleader@redhat.com>

* mv arcaflow to utils

Signed-off-by: Matthew F Leader <mleader@redhat.com>

---------

Signed-off-by: Matthew F Leader <mleader@redhat.com>
2024-10-09 14:46:08 -04:00
Tullio Sebastiani
a17e16390c cluster events check removed from funtest (deprecated krkn-lib v4.0.0)
Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
2024-10-09 10:19:24 -04:00
Paige Patton
f8534d616c v4.0.3
Signed-off-by: Paige Patton <prubenda@redhat.com>
2024-10-08 23:30:28 -04:00
Paige Patton
9670ce82f5 adding container updates
Signed-off-by: Paige Patton <prubenda@redhat.com>
2024-10-08 14:31:29 -04:00
Paige Patton
95e4b68389 plural pod network
Signed-off-by: Paige Patton <prubenda@redhat.com>
2024-10-08 11:14:54 -04:00
Tullio Sebastiani
0aac6119b0 hotfix: krkn-lib update (#709)
Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
2024-10-07 08:22:31 -04:00
Tullio Sebastiani
7e5bdfd5cf disabled elastic (#708)
Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
2024-10-04 12:42:34 -04:00
Tullio Sebastiani
3c207ab2ea hotfix: krkn-lib update (#706)
Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
2024-10-04 11:11:20 -04:00
16 changed files with 950 additions and 463 deletions

View File

@@ -126,7 +126,7 @@ jobs:
cat ./CI/results.markdown >> $GITHUB_STEP_SUMMARY
echo >> $GITHUB_STEP_SUMMARY
- name: Upload CI logs
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: ci-logs
path: CI/out
@@ -140,13 +140,13 @@ jobs:
pip install html2text
html2text --ignore-images --ignore-links -b 0 htmlcov/index.html >> $GITHUB_STEP_SUMMARY
- name: Upload coverage data
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: coverage
path: htmlcov
if-no-files-found: error
- name: Upload json coverage
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: coverage.json
path: coverage.json
@@ -169,7 +169,7 @@ jobs:
path: krkn-lib-docs
ssh-key: ${{ secrets.KRKN_LIB_DOCS_PRIV_KEY }}
- name: Download json coverage
uses: actions/download-artifact@v4.1.7
uses: actions/download-artifact@v4
with:
name: coverage.json
- name: Set up Python

View File

@@ -51,7 +51,7 @@ telemetry:
events_backup: True # enables/disables cluster events collection
telemetry_group: "funtests"
elastic:
enable_elastic: True
enable_elastic: False
collect_metrics: False
collect_alerts: False
verify_certs: False

View File

@@ -26,7 +26,6 @@ function functional_test_telemetry {
RUN_FOLDER=`cat CI/out/test_telemetry.out | grep amazonaws.com | sed -rn "s#.*https:\/\/.*\/files/(.*)#\1#p"`
$AWS_CLI s3 ls "s3://$AWS_BUCKET/$RUN_FOLDER/" | awk '{ print $4 }' > s3_remote_files
echo "checking if telemetry files are uploaded on s3"
cat s3_remote_files | grep events-00.json || ( echo "FAILED: events-00.json not uploaded" && exit 1 )
cat s3_remote_files | grep critical-alerts-00.log || ( echo "FAILED: critical-alerts-00.log not uploaded" && exit 1 )
cat s3_remote_files | grep prometheus-00.tar || ( echo "FAILED: prometheus backup not uploaded" && exit 1 )
cat s3_remote_files | grep telemetry.json || ( echo "FAILED: telemetry.json not uploaded" && exit 1 )

View File

@@ -28,14 +28,15 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
try:
with open(scenario, "r") as f:
cont_scenario_config = yaml.full_load(f)
self.start_monitoring(
kill_scenarios=cont_scenario_config["scenarios"], pool=pool
)
killed_containers = self.container_killing_in_pod(
cont_scenario_config, lib_telemetry.get_lib_kubernetes()
)
logging.info(f"killed containers: {str(killed_containers)}")
result = pool.join()
for kill_scenario in cont_scenario_config["scenarios"]:
self.start_monitoring(
kill_scenario, pool
)
killed_containers = self.container_killing_in_pod(
kill_scenario, lib_telemetry.get_lib_kubernetes()
)
result = pool.join()
if result.error:
logging.error(
logging.error(
@@ -61,16 +62,16 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
def get_scenario_types(self) -> list[str]:
return ["container_scenarios"]
def start_monitoring(self, kill_scenarios: list[any], pool: PodsMonitorPool):
for kill_scenario in kill_scenarios:
namespace_pattern = f"^{kill_scenario['namespace']}$"
label_selector = kill_scenario["label_selector"]
recovery_time = kill_scenario["expected_recovery_time"]
pool.select_and_monitor_by_namespace_pattern_and_label(
namespace_pattern=namespace_pattern,
label_selector=label_selector,
max_timeout=recovery_time,
)
def start_monitoring(self, kill_scenario: dict, pool: PodsMonitorPool):
namespace_pattern = f"^{kill_scenario['namespace']}$"
label_selector = kill_scenario["label_selector"]
recovery_time = kill_scenario["expected_recovery_time"]
pool.select_and_monitor_by_namespace_pattern_and_label(
namespace_pattern=namespace_pattern,
label_selector=label_selector,
max_timeout=recovery_time,
)
def container_killing_in_pod(self, cont_scenario, kubecli: KrknKubernetes):
scenario_name = get_yaml_item_value(cont_scenario, "name", "")
@@ -128,7 +129,6 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
container.name for container in pod_output.containers
]
container_pod_list.append([pod, namespace, container_names])
killed_count = 0
killed_container_list = []
while killed_count < kill_count:

View File

@@ -48,7 +48,7 @@ class NativeScenarioPlugin(AbstractScenarioPlugin):
def get_scenario_types(self) -> list[str]:
return [
"pod_disruption_scenarios",
"pod_network_scenario",
"pod_network_scenarios",
"vmware_node_scenarios",
"ibmcloud_node_scenarios",
]

View File

@@ -18,17 +18,14 @@ from kubernetes.client.api.batch_v1_api import BatchV1Api as BatchV1Api
@dataclass
class NetworkScenarioConfig:
node_interface_name: typing.Dict[
str, typing.List[str]
] = field(
node_interface_name: typing.Dict[str, typing.List[str]] = field(
default=None,
metadata={
"name": "Node Interface Name",
"description":
"Dictionary with node names as key and values as a list of "
"their test interfaces. "
"Required if label_selector is not set.",
}
"description": "Dictionary with node names as key and values as a list of "
"their test interfaces. "
"Required if label_selector is not set.",
},
)
label_selector: typing.Annotated[
@@ -37,93 +34,76 @@ class NetworkScenarioConfig:
default=None,
metadata={
"name": "Label selector",
"description":
"Kubernetes label selector for the target nodes. "
"Required if node_interface_name is not set.\n"
"See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ " # noqa
"for details.",
}
)
test_duration: typing.Annotated[
typing.Optional[int],
validation.min(1)
] = field(
default=120,
metadata={
"name": "Test duration",
"description":
"Duration for which each step of the ingress chaos testing "
"is to be performed.",
"description": "Kubernetes label selector for the target nodes. "
"Required if node_interface_name is not set.\n"
"See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ " # noqa
"for details.",
},
)
wait_duration: typing.Annotated[
typing.Optional[int],
validation.min(1)
] = field(
test_duration: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
default=120,
metadata={
"name": "Test duration",
"description": "Duration for which each step of the ingress chaos testing "
"is to be performed.",
},
)
wait_duration: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
default=30,
metadata={
"name": "Wait Duration",
"description":
"Wait duration for finishing a test and its cleanup."
"Ensure that it is significantly greater than wait_duration"
}
"description": "Wait duration for finishing a test and its cleanup."
"Ensure that it is significantly greater than wait_duration",
},
)
instance_count: typing.Annotated[
typing.Optional[int],
validation.min(1)
] = field(
instance_count: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
default=1,
metadata={
"name": "Instance Count",
"description":
"Number of nodes to perform action/select that match "
"the label selector.",
}
"description": "Number of nodes to perform action/select that match "
"the label selector.",
},
)
kubeconfig_path: typing.Optional[str] = field(
default=None,
metadata={
"name": "Kubeconfig path",
"description":
"Path to your Kubeconfig file. Defaults to ~/.kube/config.\n"
"See https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ " # noqa
"for details.",
}
"description": "Path to your Kubeconfig file. Defaults to ~/.kube/config.\n"
"See https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ " # noqa
"for details.",
},
)
execution_type: typing.Optional[str] = field(
default='parallel',
default="parallel",
metadata={
"name": "Execution Type",
"description":
"The order in which the ingress filters are applied. "
"Execution type can be 'serial' or 'parallel'"
}
"description": "The order in which the ingress filters are applied. "
"Execution type can be 'serial' or 'parallel'",
},
)
network_params: typing.Dict[str, str] = field(
default=None,
metadata={
"name": "Network Parameters",
"description":
"The network filters that are applied on the interface. "
"The currently supported filters are latency, "
"loss and bandwidth"
}
"description": "The network filters that are applied on the interface. "
"The currently supported filters are latency, "
"loss and bandwidth",
},
)
kraken_config: typing.Optional[str] = field(
default='',
default="",
metadata={
"name": "Kraken Config",
"description":
"Path to the config file of Kraken. "
"Set this field if you wish to publish status onto Cerberus"
}
"description": "Path to the config file of Kraken. "
"Set this field if you wish to publish status onto Cerberus",
},
)
@@ -132,33 +112,30 @@ class NetworkScenarioSuccessOutput:
filter_direction: str = field(
metadata={
"name": "Filter Direction",
"description":
"Direction in which the traffic control filters are applied "
"on the test interfaces"
"description": "Direction in which the traffic control filters are applied "
"on the test interfaces",
}
)
test_interfaces: typing.Dict[str, typing.List[str]] = field(
metadata={
"name": "Test Interfaces",
"description":
"Dictionary of nodes and their interfaces on which "
"the chaos experiment was performed"
"description": "Dictionary of nodes and their interfaces on which "
"the chaos experiment was performed",
}
)
network_parameters: typing.Dict[str, str] = field(
metadata={
"name": "Network Parameters",
"description":
"The network filters that are applied on the interfaces"
"description": "The network filters that are applied on the interfaces",
}
)
execution_type: str = field(
metadata={
"name": "Execution Type",
"description": "The order in which the filters are applied"
"description": "The order in which the filters are applied",
}
)
@@ -168,18 +145,13 @@ class NetworkScenarioErrorOutput:
error: str = field(
metadata={
"name": "Error",
"description":
"Error message when there is a run-time error during "
"the execution of the scenario"
"description": "Error message when there is a run-time error during "
"the execution of the scenario",
}
)
def get_default_interface(
node: str,
pod_template,
cli: CoreV1Api
) -> str:
def get_default_interface(node: str, pod_template, cli: CoreV1Api) -> str:
"""
Function that returns a random interface from a node
@@ -210,9 +182,9 @@ def get_default_interface(
logging.error("Exception occurred while executing command in pod")
sys.exit(1)
routes = output.split('\n')
routes = output.split("\n")
for route in routes:
if 'default' in route:
if "default" in route:
default_route = route
break
@@ -226,10 +198,7 @@ def get_default_interface(
def verify_interface(
input_interface_list: typing.List[str],
node: str,
pod_template,
cli: CoreV1Api
input_interface_list: typing.List[str], node: str, pod_template, cli: CoreV1Api
) -> typing.List[str]:
"""
Function that verifies whether a list of interfaces is present in the node.
@@ -258,22 +227,15 @@ def verify_interface(
try:
if input_interface_list == []:
cmd = ["ip", "r"]
output = kube_helper.exec_cmd_in_pod(
cli,
cmd,
"fedtools",
"default"
)
output = kube_helper.exec_cmd_in_pod(cli, cmd, "fedtools", "default")
if not output:
logging.error(
"Exception occurred while executing command in pod"
)
logging.error("Exception occurred while executing command in pod")
sys.exit(1)
routes = output.split('\n')
routes = output.split("\n")
for route in routes:
if 'default' in route:
if "default" in route:
default_route = route
break
@@ -281,20 +243,13 @@ def verify_interface(
else:
cmd = ["ip", "-br", "addr", "show"]
output = kube_helper.exec_cmd_in_pod(
cli,
cmd,
"fedtools",
"default"
)
output = kube_helper.exec_cmd_in_pod(cli, cmd, "fedtools", "default")
if not output:
logging.error(
"Exception occurred while executing command in pod"
)
logging.error("Exception occurred while executing command in pod")
sys.exit(1)
interface_ip = output.split('\n')
interface_ip = output.split("\n")
node_interface_list = [
interface.split()[0] for interface in interface_ip[:-1]
]
@@ -302,12 +257,12 @@ def verify_interface(
for interface in input_interface_list:
if interface not in node_interface_list:
logging.error(
"Interface %s not found in node %s interface list %s" %
(interface, node, node_interface_list)
"Interface %s not found in node %s interface list %s"
% (interface, node, node_interface_list)
)
raise Exception(
"Interface %s not found in node %s interface list %s" %
(interface, node, node_interface_list)
"Interface %s not found in node %s interface list %s"
% (interface, node, node_interface_list)
)
finally:
logging.info("Deleteing pod to query interface on node")
@@ -321,9 +276,8 @@ def get_node_interfaces(
label_selector: str,
instance_count: int,
pod_template,
cli: CoreV1Api
cli: CoreV1Api,
) -> typing.Dict[str, typing.List[str]]:
"""
Function that is used to process the input dictionary with the nodes and
its test interfaces.
@@ -364,11 +318,7 @@ def get_node_interfaces(
nodes = kube_helper.get_node(None, label_selector, instance_count, cli)
node_interface_dict = {}
for node in nodes:
node_interface_dict[node] = get_default_interface(
node,
pod_template,
cli
)
node_interface_dict[node] = get_default_interface(node, pod_template, cli)
else:
node_name_list = node_interface_dict.keys()
filtered_node_list = []
@@ -395,9 +345,8 @@ def apply_ingress_filter(
batch_cli: BatchV1Api,
cli: CoreV1Api,
create_interfaces: bool = True,
param_selector: str = 'all'
param_selector: str = "all",
) -> str:
"""
Function that applies the filters to shape incoming traffic to
the provided node's interfaces.
@@ -438,22 +387,18 @@ def apply_ingress_filter(
"""
network_params = cfg.network_params
if param_selector != 'all':
if param_selector != "all":
network_params = {param_selector: cfg.network_params[param_selector]}
if create_interfaces:
create_virtual_interfaces(cli, interface_list, node, pod_template)
exec_cmd = get_ingress_cmd(
interface_list, network_params, duration=cfg.test_duration
)
interface_list, network_params, duration=cfg.test_duration
)
logging.info("Executing %s on node %s" % (exec_cmd, node))
job_body = yaml.safe_load(
job_template.render(
jobname=str(hash(node))[:5],
nodename=node,
cmd=exec_cmd
)
job_template.render(jobname=str(hash(node))[:5], nodename=node, cmd=exec_cmd)
)
api_response = kube_helper.create_job(batch_cli, job_body)
@@ -464,10 +409,7 @@ def apply_ingress_filter(
def create_virtual_interfaces(
cli: CoreV1Api,
interface_list: typing.List[str],
node: str,
pod_template
cli: CoreV1Api, interface_list: typing.List[str], node: str, pod_template
) -> None:
"""
Function that creates a privileged pod and uses it to create
@@ -488,25 +430,20 @@ def create_virtual_interfaces(
- The YAML template used to instantiate a pod to create
virtual interfaces on the node
"""
pod_body = yaml.safe_load(
pod_template.render(nodename=node)
)
pod_body = yaml.safe_load(pod_template.render(nodename=node))
kube_helper.create_pod(cli, pod_body, "default", 300)
logging.info(
"Creating {0} virtual interfaces on node {1} using a pod".format(
len(interface_list),
node
len(interface_list), node
)
)
create_ifb(cli, len(interface_list), 'modtools')
create_ifb(cli, len(interface_list), "modtools")
logging.info("Deleting pod used to create virtual interfaces")
kube_helper.delete_pod(cli, "modtools", "default")
def delete_virtual_interfaces(
cli: CoreV1Api,
node_list: typing.List[str],
pod_template
cli: CoreV1Api, node_list: typing.List[str], pod_template
):
"""
Function that creates a privileged pod and uses it to delete all
@@ -529,14 +466,10 @@ def delete_virtual_interfaces(
"""
for node in node_list:
pod_body = yaml.safe_load(
pod_template.render(nodename=node)
)
pod_body = yaml.safe_load(pod_template.render(nodename=node))
kube_helper.create_pod(cli, pod_body, "default", 300)
logging.info(
"Deleting all virtual interfaces on node {0}".format(node)
)
delete_ifb(cli, 'modtools')
logging.info("Deleting all virtual interfaces on node {0}".format(node))
delete_ifb(cli, "modtools")
kube_helper.delete_pod(cli, "modtools", "default")
@@ -546,21 +479,13 @@ def create_ifb(cli: CoreV1Api, number: int, pod_name: str):
Makes use of modprobe commands
"""
exec_command = [
'chroot', '/host',
'modprobe', 'ifb', 'numifbs=' + str(number)
]
kube_helper.exec_cmd_in_pod(cli, exec_command, pod_name, 'default')
exec_command = ["chroot", "/host", "modprobe", "ifb", "numifbs=" + str(number)]
kube_helper.exec_cmd_in_pod(cli, exec_command, pod_name, "default")
for i in range(0, number):
exec_command = ['chroot', '/host', 'ip', 'link', 'set', 'dev']
exec_command += ['ifb' + str(i), 'up']
kube_helper.exec_cmd_in_pod(
cli,
exec_command,
pod_name,
'default'
)
exec_command = ["chroot", "/host", "ip", "link", "set", "dev"]
exec_command += ["ifb" + str(i), "up"]
kube_helper.exec_cmd_in_pod(cli, exec_command, pod_name, "default")
def delete_ifb(cli: CoreV1Api, pod_name: str):
@@ -569,8 +494,8 @@ def delete_ifb(cli: CoreV1Api, pod_name: str):
Makes use of modprobe command
"""
exec_command = ['chroot', '/host', 'modprobe', '-r', 'ifb']
kube_helper.exec_cmd_in_pod(cli, exec_command, pod_name, 'default')
exec_command = ["chroot", "/host", "modprobe", "-r", "ifb"]
kube_helper.exec_cmd_in_pod(cli, exec_command, pod_name, "default")
def get_job_pods(cli: CoreV1Api, api_response):
@@ -591,18 +516,14 @@ def get_job_pods(cli: CoreV1Api, api_response):
controllerUid = api_response.metadata.labels["controller-uid"]
pod_label_selector = "controller-uid=" + controllerUid
pods_list = kube_helper.list_pods(
cli,
label_selector=pod_label_selector,
namespace="default"
cli, label_selector=pod_label_selector, namespace="default"
)
return pods_list[0]
def wait_for_job(
batch_cli: BatchV1Api,
job_list: typing.List[str],
timeout: int = 300
batch_cli: BatchV1Api, job_list: typing.List[str], timeout: int = 300
) -> None:
"""
Function that waits for a list of jobs to finish within a time period
@@ -625,13 +546,11 @@ def wait_for_job(
for job_name in job_list:
try:
api_response = kube_helper.get_job_status(
batch_cli,
job_name,
namespace="default"
batch_cli, job_name, namespace="default"
)
if (
api_response.status.succeeded is not None or
api_response.status.failed is not None
api_response.status.succeeded is not None
or api_response.status.failed is not None
):
count += 1
job_list.remove(job_name)
@@ -645,11 +564,7 @@ def wait_for_job(
time.sleep(5)
def delete_jobs(
cli: CoreV1Api,
batch_cli: BatchV1Api,
job_list: typing.List[str]
):
def delete_jobs(cli: CoreV1Api, batch_cli: BatchV1Api, job_list: typing.List[str]):
"""
Function that deletes jobs
@@ -667,38 +582,28 @@ def delete_jobs(
for job_name in job_list:
try:
api_response = kube_helper.get_job_status(
batch_cli,
job_name,
namespace="default"
batch_cli, job_name, namespace="default"
)
if api_response.status.failed is not None:
pod_name = get_job_pods(cli, api_response)
pod_stat = kube_helper.read_pod(
cli,
name=pod_name,
namespace="default"
)
pod_stat = kube_helper.read_pod(cli, name=pod_name, namespace="default")
logging.error(pod_stat.status.container_statuses)
pod_log_response = kube_helper.get_pod_log(
cli,
name=pod_name,
namespace="default"
cli, name=pod_name, namespace="default"
)
pod_log = pod_log_response.data.decode("utf-8")
logging.error(pod_log)
except Exception as e:
logging.warn("Exception in getting job status: %s" % str(e))
api_response = kube_helper.delete_job(
batch_cli,
name=job_name,
namespace="default"
batch_cli, name=job_name, namespace="default"
)
def get_ingress_cmd(
interface_list: typing.List[str],
network_parameters: typing.Dict[str, str],
duration: int = 300
duration: int = 300,
):
"""
Function that returns the commands to the ingress traffic shaping on
@@ -736,9 +641,7 @@ def get_ingress_cmd(
for i, interface in enumerate(interface_list):
if not interface_pattern.match(interface):
logging.error(
"Interface name can only consist of alphanumeric characters"
)
logging.error("Interface name can only consist of alphanumeric characters")
raise Exception(
"Interface '{0}' does not match the required regex pattern :"
r" ^[a-z0-9\-\@\_]+$".format(interface)
@@ -752,33 +655,23 @@ def get_ingress_cmd(
"follow the regex pattern ^ifb[0-9]+$".format(ifb_name)
)
tc_set += "tc qdisc add dev {0} handle ffff: ingress;".format(
interface
)
tc_set += "tc qdisc add dev {0} handle ffff: ingress;".format(interface)
tc_set += "tc filter add dev {0} parent ffff: protocol ip u32 match u32 0 0 action mirred egress redirect dev {1};".format( # noqa
interface,
ifb_name
interface, ifb_name
)
tc_set = "{0} tc qdisc add dev {1} root netem".format(tc_set, ifb_name)
tc_unset = "{0} tc qdisc del dev {1} root ;".format(tc_unset, ifb_name)
tc_unset += "tc qdisc del dev {0} handle ffff: ingress;".format(
interface
)
tc_unset += "tc qdisc del dev {0} handle ffff: ingress;".format(interface)
tc_ls = "{0} tc qdisc ls dev {1} ;".format(tc_ls, ifb_name)
for parameter in network_parameters.keys():
tc_set += " {0} {1} ".format(
param_map[parameter],
network_parameters[parameter]
param_map[parameter], network_parameters[parameter]
)
tc_set += ";"
exec_cmd = "{0} {1} sleep {2};{3} sleep 20;{4}".format(
tc_set,
tc_ls,
duration,
tc_unset,
tc_ls
tc_set, tc_ls, duration, tc_unset, tc_ls
)
return exec_cmd
@@ -790,17 +683,14 @@ def get_ingress_cmd(
description="Applies filters to ihe ingress side of node(s) interfaces",
outputs={
"success": NetworkScenarioSuccessOutput,
"error": NetworkScenarioErrorOutput
"error": NetworkScenarioErrorOutput,
},
)
def network_chaos(cfg: NetworkScenarioConfig) -> typing.Tuple[
str,
typing.Union[
NetworkScenarioSuccessOutput,
NetworkScenarioErrorOutput
]
def network_chaos(
cfg: NetworkScenarioConfig,
) -> typing.Tuple[
str, typing.Union[NetworkScenarioSuccessOutput, NetworkScenarioErrorOutput]
]:
"""
Function that performs the ingress network chaos scenario based
on the provided configuration
@@ -826,12 +716,10 @@ def network_chaos(cfg: NetworkScenarioConfig) -> typing.Tuple[
cfg.label_selector,
cfg.instance_count,
pod_interface_template,
cli
cli,
)
except Exception:
return "error", NetworkScenarioErrorOutput(
format_exc()
)
return "error", NetworkScenarioErrorOutput(format_exc())
job_list = []
publish = False
if cfg.kraken_config:
@@ -840,16 +728,12 @@ def network_chaos(cfg: NetworkScenarioConfig) -> typing.Tuple[
with open(cfg.kraken_config, "r") as f:
config = yaml.full_load(f)
except Exception:
logging.error(
"Error reading Kraken config from %s" % cfg.kraken_config
)
return "error", NetworkScenarioErrorOutput(
format_exc()
)
logging.error("Error reading Kraken config from %s" % cfg.kraken_config)
return "error", NetworkScenarioErrorOutput(format_exc())
publish = True
try:
if cfg.execution_type == 'parallel':
if cfg.execution_type == "parallel":
for node in node_interface_dict:
job_list.append(
apply_ingress_filter(
@@ -859,22 +743,19 @@ def network_chaos(cfg: NetworkScenarioConfig) -> typing.Tuple[
pod_module_template,
job_template,
batch_cli,
cli
cli,
)
)
logging.info("Waiting for parallel job to finish")
start_time = int(time.time())
wait_for_job(batch_cli, job_list[:], cfg.test_duration+100)
wait_for_job(batch_cli, job_list[:], cfg.test_duration + 100)
end_time = int(time.time())
if publish:
cerberus.publish_kraken_status(
config,
failed_post_scenarios,
start_time,
end_time
config, failed_post_scenarios, start_time, end_time
)
elif cfg.execution_type == 'serial':
elif cfg.execution_type == "serial":
create_interfaces = True
for param in cfg.network_params:
for node in node_interface_dict:
@@ -888,50 +769,39 @@ def network_chaos(cfg: NetworkScenarioConfig) -> typing.Tuple[
batch_cli,
cli,
create_interfaces=create_interfaces,
param_selector=param
param_selector=param,
)
)
logging.info("Waiting for serial job to finish")
start_time = int(time.time())
wait_for_job(batch_cli, job_list[:], cfg.test_duration+100)
wait_for_job(batch_cli, job_list[:], cfg.test_duration + 100)
logging.info("Deleting jobs")
delete_jobs(cli, batch_cli, job_list[:])
job_list = []
logging.info(
"Waiting for wait_duration : %ss" % cfg.wait_duration
)
logging.info("Waiting for wait_duration : %ss" % cfg.wait_duration)
time.sleep(cfg.wait_duration)
end_time = int(time.time())
if publish:
cerberus.publish_kraken_status(
config,
failed_post_scenarios,
start_time,
end_time
config, failed_post_scenarios, start_time, end_time
)
create_interfaces = False
else:
return "error", NetworkScenarioErrorOutput(
"Invalid execution type - serial and parallel are "
"the only accepted types"
)
"Invalid execution type - serial and parallel are "
"the only accepted types"
)
return "success", NetworkScenarioSuccessOutput(
filter_direction="ingress",
test_interfaces=node_interface_dict,
network_parameters=cfg.network_params,
execution_type=cfg.execution_type
execution_type=cfg.execution_type,
)
except Exception as e:
logging.error("Network Chaos exiting due to Exception - %s" % e)
return "error", NetworkScenarioErrorOutput(
format_exc()
)
return "error", NetworkScenarioErrorOutput(format_exc())
finally:
delete_virtual_interfaces(
cli,
node_interface_dict.keys(),
pod_module_template
)
delete_virtual_interfaces(cli, node_interface_dict.keys(), pod_module_template)
logging.info("Deleting jobs(if any)")
delete_jobs(cli, batch_cli, job_list[:])

View File

@@ -42,8 +42,7 @@ def get_test_pods(
pod names (string) in the namespace
"""
pods_list = []
pods_list = kubecli.list_pods(
label_selector=pod_label, namespace=namespace)
pods_list = kubecli.list_pods(label_selector=pod_label, namespace=namespace)
if pod_name and pod_name not in pods_list:
raise Exception("pod name not found in namespace ")
elif pod_name and pod_name in pods_list:
@@ -92,8 +91,7 @@ def delete_jobs(kubecli: KrknKubernetes, job_list: typing.List[str]):
for job_name in job_list:
try:
api_response = kubecli.get_job_status(
job_name, namespace="default")
api_response = kubecli.get_job_status(job_name, namespace="default")
if api_response.status.failed is not None:
pod_name = get_job_pods(kubecli, api_response)
pod_stat = kubecli.read_pod(name=pod_name, namespace="default")
@@ -131,8 +129,7 @@ def wait_for_job(
while count != job_len:
for job_name in job_list:
try:
api_response = kubecli.get_job_status(
job_name, namespace="default")
api_response = kubecli.get_job_status(job_name, namespace="default")
if (
api_response.status.succeeded is not None
or api_response.status.failed is not None
@@ -149,8 +146,7 @@ def wait_for_job(
time.sleep(5)
def get_bridge_name(cli: ApiextensionsV1Api,
custom_obj: CustomObjectsApi) -> str:
def get_bridge_name(cli: ApiextensionsV1Api, custom_obj: CustomObjectsApi) -> str:
"""
Function that gets OVS bridge present in node.
@@ -328,16 +324,13 @@ def apply_ingress_policy(
create_virtual_interfaces(kubecli, len(ips), node, pod_template)
for count, pod_ip in enumerate(set(ips)):
pod_inf = get_pod_interface(
node, pod_ip, pod_template, bridge_name, kubecli)
pod_inf = get_pod_interface(node, pod_ip, pod_template, bridge_name, kubecli)
exec_cmd = get_ingress_cmd(
test_execution, pod_inf, mod, count, network_params, duration
)
logging.info("Executing %s on pod %s in node %s" %
(exec_cmd, pod_ip, node))
logging.info("Executing %s on pod %s in node %s" % (exec_cmd, pod_ip, node))
job_body = yaml.safe_load(
job_template.render(jobname=mod + str(pod_ip),
nodename=node, cmd=exec_cmd)
job_template.render(jobname=mod + str(pod_ip), nodename=node, cmd=exec_cmd)
)
job_list.append(job_body["metadata"]["name"])
api_response = kubecli.create_job(job_body)
@@ -405,16 +398,13 @@ def apply_net_policy(
job_list = []
for pod_ip in set(ips):
pod_inf = get_pod_interface(
node, pod_ip, pod_template, bridge_name, kubecli)
pod_inf = get_pod_interface(node, pod_ip, pod_template, bridge_name, kubecli)
exec_cmd = get_egress_cmd(
test_execution, pod_inf, mod, network_params, duration
)
logging.info("Executing %s on pod %s in node %s" %
(exec_cmd, pod_ip, node))
logging.info("Executing %s on pod %s in node %s" % (exec_cmd, pod_ip, node))
job_body = yaml.safe_load(
job_template.render(jobname=mod + str(pod_ip),
nodename=node, cmd=exec_cmd)
job_template.render(jobname=mod + str(pod_ip), nodename=node, cmd=exec_cmd)
)
job_list.append(job_body["metadata"]["name"])
api_response = kubecli.create_job(job_body)
@@ -456,18 +446,16 @@ def get_ingress_cmd(
Returns:
str: ingress filter
"""
ifb_dev = 'ifb{0}'.format(count)
ifb_dev = "ifb{0}".format(count)
tc_set = tc_unset = tc_ls = ""
param_map = {"latency": "delay", "loss": "loss", "bandwidth": "rate"}
tc_set = "tc qdisc add dev {0} ingress ;".format(test_interface)
tc_set = "{0} tc filter add dev {1} ingress matchall action mirred egress redirect dev {2} ;".format(
tc_set, test_interface, ifb_dev)
tc_set = "{0} tc qdisc replace dev {1} root netem".format(
tc_set, ifb_dev)
tc_unset = "{0} tc qdisc del dev {1} root ;".format(
tc_unset, ifb_dev)
tc_unset = "{0} tc qdisc del dev {1} ingress".format(
tc_unset, test_interface)
tc_set, test_interface, ifb_dev
)
tc_set = "{0} tc qdisc replace dev {1} root netem".format(tc_set, ifb_dev)
tc_unset = "{0} tc qdisc del dev {1} root ;".format(tc_unset, ifb_dev)
tc_unset = "{0} tc qdisc del dev {1} ingress".format(tc_unset, test_interface)
tc_ls = "{0} tc qdisc ls dev {1} ;".format(tc_ls, ifb_dev)
if execution == "parallel":
for val in vallst.keys():
@@ -475,8 +463,7 @@ def get_ingress_cmd(
tc_set += ";"
else:
tc_set += " {0} {1} ;".format(param_map[mod], vallst[mod])
exec_cmd = "{0} {1} sleep {2};{3}".format(
tc_set, tc_ls, duration, tc_unset)
exec_cmd = "{0} {1} sleep {2};{3}".format(tc_set, tc_ls, duration, tc_unset)
return exec_cmd
@@ -512,10 +499,8 @@ def get_egress_cmd(
"""
tc_set = tc_unset = tc_ls = ""
param_map = {"latency": "delay", "loss": "loss", "bandwidth": "rate"}
tc_set = "{0} tc qdisc replace dev {1} root netem".format(
tc_set, test_interface)
tc_unset = "{0} tc qdisc del dev {1} root ;".format(
tc_unset, test_interface)
tc_set = "{0} tc qdisc replace dev {1} root netem".format(tc_set, test_interface)
tc_unset = "{0} tc qdisc del dev {1} root ;".format(tc_unset, test_interface)
tc_ls = "{0} tc qdisc ls dev {1} ;".format(tc_ls, test_interface)
if execution == "parallel":
for val in vallst.keys():
@@ -523,17 +508,13 @@ def get_egress_cmd(
tc_set += ";"
else:
tc_set += " {0} {1} ;".format(param_map[mod], vallst[mod])
exec_cmd = "{0} {1} sleep {2};{3}".format(
tc_set, tc_ls, duration, tc_unset)
exec_cmd = "{0} {1} sleep {2};{3}".format(tc_set, tc_ls, duration, tc_unset)
return exec_cmd
def create_virtual_interfaces(
kubecli: KrknKubernetes,
nummber: int,
node: str,
pod_template
kubecli: KrknKubernetes, nummber: int, node: str, pod_template
) -> None:
"""
Function that creates a privileged pod and uses it to create
@@ -554,25 +535,18 @@ def create_virtual_interfaces(
- The YAML template used to instantiate a pod to create
virtual interfaces on the node
"""
pod_body = yaml.safe_load(
pod_template.render(nodename=node)
)
pod_body = yaml.safe_load(pod_template.render(nodename=node))
kubecli.create_pod(pod_body, "default", 300)
logging.info(
"Creating {0} virtual interfaces on node {1} using a pod".format(
nummber,
node
)
"Creating {0} virtual interfaces on node {1} using a pod".format(nummber, node)
)
create_ifb(kubecli, nummber, 'modtools')
create_ifb(kubecli, nummber, "modtools")
logging.info("Deleting pod used to create virtual interfaces")
kubecli.delete_pod("modtools", "default")
def delete_virtual_interfaces(
kubecli: KrknKubernetes,
node_list: typing.List[str],
pod_template
kubecli: KrknKubernetes, node_list: typing.List[str], pod_template
):
"""
Function that creates a privileged pod and uses it to delete all
@@ -595,14 +569,10 @@ def delete_virtual_interfaces(
"""
for node in node_list:
pod_body = yaml.safe_load(
pod_template.render(nodename=node)
)
pod_body = yaml.safe_load(pod_template.render(nodename=node))
kubecli.create_pod(pod_body, "default", 300)
logging.info(
"Deleting all virtual interfaces on node {0}".format(node)
)
delete_ifb(kubecli, 'modtools')
logging.info("Deleting all virtual interfaces on node {0}".format(node))
delete_ifb(kubecli, "modtools")
kubecli.delete_pod("modtools", "default")
@@ -612,24 +582,14 @@ def create_ifb(kubecli: KrknKubernetes, number: int, pod_name: str):
Makes use of modprobe commands
"""
exec_command = [
'/host',
'modprobe', 'ifb', 'numifbs=' + str(number)
]
kubecli.exec_cmd_in_pod(
exec_command,
pod_name,
'default',
base_command="chroot")
exec_command = ["/host", "modprobe", "ifb", "numifbs=" + str(number)]
kubecli.exec_cmd_in_pod(exec_command, pod_name, "default", base_command="chroot")
for i in range(0, number):
exec_command = ['/host', 'ip', 'link', 'set', 'dev']
exec_command += ['ifb' + str(i), 'up']
exec_command = ["/host", "ip", "link", "set", "dev"]
exec_command += ["ifb" + str(i), "up"]
kubecli.exec_cmd_in_pod(
exec_command,
pod_name,
'default',
base_command="chroot"
exec_command, pod_name, "default", base_command="chroot"
)
@@ -639,17 +599,11 @@ def delete_ifb(kubecli: KrknKubernetes, pod_name: str):
Makes use of modprobe command
"""
exec_command = ['/host', 'modprobe', '-r', 'ifb']
kubecli.exec_cmd_in_pod(
exec_command,
pod_name,
'default',
base_command="chroot")
exec_command = ["/host", "modprobe", "-r", "ifb"]
kubecli.exec_cmd_in_pod(exec_command, pod_name, "default", base_command="chroot")
def list_bridges(
node: str, pod_template, kubecli: KrknKubernetes
) -> typing.List[str]:
def list_bridges(node: str, pod_template, kubecli: KrknKubernetes) -> typing.List[str]:
"""
Function that returns a list of bridges on the node
@@ -787,7 +741,7 @@ def get_pod_interface(
find_ip = f"external-ids:ip_addresses={ip}/23"
else:
find_ip = f"external-ids:ip={ip}"
cmd = [
"/host",
"ovs-vsctl",
@@ -797,24 +751,20 @@ def get_pod_interface(
"interface",
find_ip,
]
output = kubecli.exec_cmd_in_pod(
cmd, "modtools", "default", base_command="chroot"
)
if not output:
cmd= [
"/host",
"ip",
"addr",
"show"
]
cmd = ["/host", "ip", "addr", "show"]
output = kubecli.exec_cmd_in_pod(
cmd, "modtools", "default", base_command="chroot")
cmd, "modtools", "default", base_command="chroot"
)
for if_str in output.split("\n"):
if re.search(ip,if_str):
inf = if_str.split(' ')[-1]
if re.search(ip, if_str):
inf = if_str.split(" ")[-1]
else:
inf = output
inf = output
finally:
logging.info("Deleting pod to query interface on node")
kubecli.delete_pod("modtools", "default")
@@ -927,11 +877,11 @@ class InputParams:
},
)
kraken_config: typing.Optional[str] = field(
kraken_config: typing.Dict[str, typing.Any] = field(
default=None,
metadata={
"name": "Kraken Config",
"description": "Path to the config file of Kraken. "
"description": "Kraken config file dictionary "
"Set this field if you wish to publish status onto Cerberus",
},
)
@@ -1043,14 +993,6 @@ def pod_outage(
publish = False
if params.kraken_config:
failed_post_scenarios = ""
try:
with open(params.kraken_config, "r") as f:
config = yaml.full_load(f)
except Exception:
logging.error("Error reading Kraken config from %s" %
params.kraken_config)
return "error", PodOutageErrorOutput(format_exc())
publish = True
for i in params.direction:
@@ -1106,7 +1048,7 @@ def pod_outage(
end_time = int(time.time())
if publish:
cerberus.publish_kraken_status(
config, failed_post_scenarios, start_time, end_time
params.kraken_config, "", start_time, end_time
)
return "success", PodOutageSuccessOutput(
@@ -1116,8 +1058,7 @@ def pod_outage(
egress_ports=params.egress_ports,
)
except Exception as e:
logging.error(
"Pod network outage scenario exiting due to Exception - %s" % e)
logging.error("Pod network outage scenario exiting due to Exception - %s" % e)
return "error", PodOutageErrorOutput(format_exc())
finally:
logging.info("Deleting jobs(if any)")
@@ -1179,11 +1120,11 @@ class EgressParams:
},
)
kraken_config: typing.Optional[str] = field(
kraken_config: typing.Dict[str, typing.Any] = field(
default=None,
metadata={
"name": "Kraken Config",
"description": "Path to the config file of Kraken. "
"description": "Krkn config file dictionary "
"Set this field if you wish to publish status onto Cerberus",
},
)
@@ -1276,8 +1217,7 @@ class PodEgressNetShapingErrorOutput:
def pod_egress_shaping(
params: EgressParams,
) -> typing.Tuple[
str, typing.Union[PodEgressNetShapingSuccessOutput,
PodEgressNetShapingErrorOutput]
str, typing.Union[PodEgressNetShapingSuccessOutput, PodEgressNetShapingErrorOutput]
]:
"""
Function that performs egress pod traffic shaping based
@@ -1302,14 +1242,6 @@ def pod_egress_shaping(
publish = False
if params.kraken_config:
failed_post_scenarios = ""
try:
with open(params.kraken_config, "r") as f:
config = yaml.full_load(f)
except Exception:
logging.error("Error reading Kraken config from %s" %
params.kraken_config)
return "error", PodEgressNetShapingErrorOutput(format_exc())
publish = True
try:
@@ -1344,30 +1276,30 @@ def pod_egress_shaping(
for mod in mod_lst:
for node, ips in node_dict.items():
job_list.extend( apply_net_policy(
mod,
node,
ips,
job_template,
pod_module_template,
params.network_params,
params.test_duration,
br_name,
kubecli,
params.execution_type,
))
job_list.extend(
apply_net_policy(
mod,
node,
ips,
job_template,
pod_module_template,
params.network_params,
params.test_duration,
br_name,
kubecli,
params.execution_type,
)
)
if params.execution_type == "serial":
logging.info("Waiting for serial job to finish")
start_time = int(time.time())
wait_for_job(job_list[:], kubecli,
params.test_duration + 20)
logging.info("Waiting for wait_duration %s" %
params.test_duration)
wait_for_job(job_list[:], kubecli, params.test_duration + 20)
logging.info("Waiting for wait_duration %s" % params.test_duration)
time.sleep(params.test_duration)
end_time = int(time.time())
if publish:
cerberus.publish_kraken_status(
config, failed_post_scenarios, start_time, end_time
params.kraken_config, "", start_time, end_time
)
if params.execution_type == "parallel":
break
@@ -1380,7 +1312,7 @@ def pod_egress_shaping(
end_time = int(time.time())
if publish:
cerberus.publish_kraken_status(
config, failed_post_scenarios, start_time, end_time
params.kraken_config, "", start_time, end_time
)
return "success", PodEgressNetShapingSuccessOutput(
@@ -1389,8 +1321,7 @@ def pod_egress_shaping(
execution_type=params.execution_type,
)
except Exception as e:
logging.error(
"Pod network Shaping scenario exiting due to Exception - %s" % e)
logging.error("Pod network Shaping scenario exiting due to Exception - %s" % e)
return "error", PodEgressNetShapingErrorOutput(format_exc())
finally:
logging.info("Deleting jobs(if any)")
@@ -1452,7 +1383,7 @@ class IngressParams:
},
)
kraken_config: typing.Optional[str] = field(
kraken_config: typing.Dict[str, typing.Any] = field(
default=None,
metadata={
"name": "Kraken Config",
@@ -1549,8 +1480,8 @@ class PodIngressNetShapingErrorOutput:
def pod_ingress_shaping(
params: IngressParams,
) -> typing.Tuple[
str, typing.Union[PodIngressNetShapingSuccessOutput,
PodIngressNetShapingErrorOutput]
str,
typing.Union[PodIngressNetShapingSuccessOutput, PodIngressNetShapingErrorOutput],
]:
"""
Function that performs ingress pod traffic shaping based
@@ -1575,14 +1506,6 @@ def pod_ingress_shaping(
publish = False
if params.kraken_config:
failed_post_scenarios = ""
try:
with open(params.kraken_config, "r") as f:
config = yaml.full_load(f)
except Exception:
logging.error("Error reading Kraken config from %s" %
params.kraken_config)
return "error", PodIngressNetShapingErrorOutput(format_exc())
publish = True
try:
@@ -1617,30 +1540,30 @@ def pod_ingress_shaping(
for mod in mod_lst:
for node, ips in node_dict.items():
job_list.extend(apply_ingress_policy(
mod,
node,
ips,
job_template,
pod_module_template,
params.network_params,
params.test_duration,
br_name,
kubecli,
params.execution_type,
))
job_list.extend(
apply_ingress_policy(
mod,
node,
ips,
job_template,
pod_module_template,
params.network_params,
params.test_duration,
br_name,
kubecli,
params.execution_type,
)
)
if params.execution_type == "serial":
logging.info("Waiting for serial job to finish")
start_time = int(time.time())
wait_for_job(job_list[:], kubecli,
params.test_duration + 20)
logging.info("Waiting for wait_duration %s" %
params.test_duration)
wait_for_job(job_list[:], kubecli, params.test_duration + 20)
logging.info("Waiting for wait_duration %s" % params.test_duration)
time.sleep(params.test_duration)
end_time = int(time.time())
if publish:
cerberus.publish_kraken_status(
config, failed_post_scenarios, start_time, end_time
params.kraken_config, "", start_time, end_time
)
if params.execution_type == "parallel":
break
@@ -1653,7 +1576,7 @@ def pod_ingress_shaping(
end_time = int(time.time())
if publish:
cerberus.publish_kraken_status(
config, failed_post_scenarios, start_time, end_time
params.kraken_config, "", start_time, end_time
)
return "success", PodIngressNetShapingSuccessOutput(
@@ -1662,14 +1585,9 @@ def pod_ingress_shaping(
execution_type=params.execution_type,
)
except Exception as e:
logging.error(
"Pod network Shaping scenario exiting due to Exception - %s" % e)
logging.error("Pod network Shaping scenario exiting due to Exception - %s" % e)
return "error", PodIngressNetShapingErrorOutput(format_exc())
finally:
delete_virtual_interfaces(
kubecli,
node_dict.keys(),
pod_module_template
)
delete_virtual_interfaces(kubecli, node_dict.keys(), pod_module_template)
logging.info("Deleting jobs(if any)")
delete_jobs(kubecli, job_list[:])

View File

@@ -176,10 +176,37 @@ class PvcScenarioPlugin(AbstractScenarioPlugin):
start_time = int(time.time())
# Create temp file in the PVC
full_path = "%s/%s" % (str(mount_path), str(file_name))
command = "fallocate -l $((%s*1024)) %s" % (
str(file_size_kb),
str(full_path),
fallocate = lib_telemetry.get_lib_kubernetes().exec_cmd_in_pod(
["command -v fallocate"],
pod_name,
namespace,
container_name,
)
dd = lib_telemetry.get_lib_kubernetes().exec_cmd_in_pod(
["command -v dd"],
pod_name,
namespace,
container_name,
)
if fallocate:
command = "fallocate -l $((%s*1024)) %s" % (
str(file_size_kb),
str(full_path),
)
elif dd is not None:
logging.warning(
"fallocate not found, using dd, it may take longer based on the amount of data, please wait..."
)
command = f"dd if=/dev/urandom of={str(full_path)} bs=1024 count={str(file_size_kb)} oflag=direct"
else:
logging.error(
"failed to locate required binaries fallocate or dd to execute the scenario"
)
return 1
logging.debug("Create temp file in the PVC command:\n %s" % command)
lib_telemetry.get_lib_kubernetes().exec_cmd_in_pod(
[command],

View File

@@ -15,7 +15,7 @@ google-api-python-client==2.116.0
ibm_cloud_sdk_core==3.18.0
ibm_vpc==0.20.0
jinja2==3.1.4
krkn-lib==4.0.0
krkn-lib==4.0.3
lxml==5.1.0
kubernetes==28.1.0
numpy==1.26.4

View File

@@ -0,0 +1,304 @@
# OpenShift Shenanigans
## Workflow Description
Given a target OpenShift cluster, this workflow executes a
[kube-burner plugin](https://github.com/redhat-performance/arcaflow-plugin-kube-burner)
workflow to place a load on the cluster, repeatedly removes a targeted pod at a given time frequency with the [kill-pod plugin](https://github.com/krkn-chaos/arcaflow-plugin-kill-pod),
and runs a [stress-ng](https://github.com/ColinIanKing/stress-ng) CPU workload on the cluster.
Target your OpenShift cluster with the appropriate `kubeconfig` file, and add its file path as
the value for `kubernetes_target.kubeconfig_path`, in the input file. Any combination of subworkflows can be disabled in the input file by setting either `cpu_hog_enabled`, `pod_chaos_enabled`, or `kubeburner_enabled` to `false`.
## Files
- [`workflow.yaml`](workflow.yaml) -- Defines the workflow input schema, the plugins to run
and their data relationships, and the output to present to the user
- [`input.yaml`](input.yaml) -- The input parameters that the user provides for running
the workflow
- [`config.yaml`](config.yaml) -- Global config parameters that are passed to the Arcaflow
engine
- [`cpu-hog.yaml`](subworkflows/cpu-hog.yaml) -- The StressNG workload on the CPU.
- [`kubeburner.yaml`](subworkflows/kubeburner.yaml) -- The KubeBurner workload for the Kubernetes Cluster API.
- [`pod-chaos.yaml`](subworkflows/pod-chaos.yaml) -- The Kill Pod workflow for the Kubernetes infrastructure pods.
## Running the Workflow
### Workflow Dependencies
Install Python, at least `3.9`.
First, add the path to your Python interpreter to `config.yaml` as the value
for `pythonPath` as shown here. A common choice for users working in
distributions of Linux operating systems is `usr/bin/python`. Second, add a
directory to which your Arcaflow process will have write access as the
value for `workdir`, `/tmp` is a common choice because your process will likely be able to write to it.
```yaml
deployers:
python:
pythonPath: /usr/bin/python
workdir: /tmp
```
To use this Python interpreter with our `kill-pod` plugin, go to the `deploy` section of the `kill_pod` step in [`pod-chaos.yaml`](subworkflows/pod-chaos.yaml). You can use the same `pythonPath` and `workdir` that you used in
your `config.yaml`.
```yaml
deploy:
deployer_name: python
modulePullPolicy: Always
pythonPath: /usr/bin/python
workdir: /tmp
```
Download a Go binary of the latest version of the Arcaflow engine from: https://github.com/arcalot/arcaflow-engine/releases.
#### OpenShift Target
Target your desired OpenShift cluster by setting the `kubeconfig_path` variable for each subworkflow's parameter list in [`input.yaml`](input.yaml).
#### Kube-Burner Plugin
The `kube-burner` plugin generates and reports the UUID to which the
`kube-burner` data is associated in your search database. The `uuidgen`
workflow step uses the `arcaflow-plugin-utilities` `uuid` plugin step to
randomly generate a UUID for you.
### Workflow Execution
Run the workflow:
```
$ export WFPATH=<path to this workflow directory>
$ arcaflow --context ${WFPATH} --input input.yaml --config config.yaml --workflow workflow.yaml
```
## Workflow Diagram
This diagram shows the complete end-to-end workflow logic.
### Main Workflow
```mermaid
%% Mermaid markdown workflow
flowchart LR
%% Success path
input-->steps.cpu_hog_wf.enabling
input-->steps.cpu_hog_wf.execute
input-->steps.kubeburner_wf.enabling
input-->steps.kubeburner_wf.execute
input-->steps.pod_chaos_wf.enabling
input-->steps.pod_chaos_wf.execute
outputs.workflow_success.cpu_hog-->outputs.workflow_success
outputs.workflow_success.cpu_hog.disabled-->outputs.workflow_success.cpu_hog
outputs.workflow_success.cpu_hog.enabled-->outputs.workflow_success.cpu_hog
outputs.workflow_success.kubeburner-->outputs.workflow_success
outputs.workflow_success.kubeburner.disabled-->outputs.workflow_success.kubeburner
outputs.workflow_success.kubeburner.enabled-->outputs.workflow_success.kubeburner
outputs.workflow_success.pod_chaos-->outputs.workflow_success
outputs.workflow_success.pod_chaos.disabled-->outputs.workflow_success.pod_chaos
outputs.workflow_success.pod_chaos.enabled-->outputs.workflow_success.pod_chaos
steps.cpu_hog_wf.closed-->steps.cpu_hog_wf.closed.result
steps.cpu_hog_wf.disabled-->steps.cpu_hog_wf.disabled.output
steps.cpu_hog_wf.disabled.output-->outputs.workflow_success.cpu_hog.disabled
steps.cpu_hog_wf.enabling-->steps.cpu_hog_wf.closed
steps.cpu_hog_wf.enabling-->steps.cpu_hog_wf.disabled
steps.cpu_hog_wf.enabling-->steps.cpu_hog_wf.enabling.resolved
steps.cpu_hog_wf.enabling-->steps.cpu_hog_wf.execute
steps.cpu_hog_wf.execute-->steps.cpu_hog_wf.outputs
steps.cpu_hog_wf.outputs-->steps.cpu_hog_wf.outputs.success
steps.cpu_hog_wf.outputs.success-->outputs.workflow_success.cpu_hog.enabled
steps.kubeburner_wf.closed-->steps.kubeburner_wf.closed.result
steps.kubeburner_wf.disabled-->steps.kubeburner_wf.disabled.output
steps.kubeburner_wf.disabled.output-->outputs.workflow_success.kubeburner.disabled
steps.kubeburner_wf.enabling-->steps.kubeburner_wf.closed
steps.kubeburner_wf.enabling-->steps.kubeburner_wf.disabled
steps.kubeburner_wf.enabling-->steps.kubeburner_wf.enabling.resolved
steps.kubeburner_wf.enabling-->steps.kubeburner_wf.execute
steps.kubeburner_wf.execute-->steps.kubeburner_wf.outputs
steps.kubeburner_wf.outputs-->steps.kubeburner_wf.outputs.success
steps.kubeburner_wf.outputs.success-->outputs.workflow_success.kubeburner.enabled
steps.pod_chaos_wf.closed-->steps.pod_chaos_wf.closed.result
steps.pod_chaos_wf.disabled-->steps.pod_chaos_wf.disabled.output
steps.pod_chaos_wf.disabled.output-->outputs.workflow_success.pod_chaos.disabled
steps.pod_chaos_wf.enabling-->steps.pod_chaos_wf.closed
steps.pod_chaos_wf.enabling-->steps.pod_chaos_wf.disabled
steps.pod_chaos_wf.enabling-->steps.pod_chaos_wf.enabling.resolved
steps.pod_chaos_wf.enabling-->steps.pod_chaos_wf.execute
steps.pod_chaos_wf.execute-->steps.pod_chaos_wf.outputs
steps.pod_chaos_wf.outputs-->steps.pod_chaos_wf.outputs.success
steps.pod_chaos_wf.outputs.success-->outputs.workflow_success.pod_chaos.enabled
%% Error path
steps.cpu_hog_wf.execute-->steps.cpu_hog_wf.failed
steps.cpu_hog_wf.failed-->steps.cpu_hog_wf.failed.error
steps.kubeburner_wf.execute-->steps.kubeburner_wf.failed
steps.kubeburner_wf.failed-->steps.kubeburner_wf.failed.error
steps.pod_chaos_wf.execute-->steps.pod_chaos_wf.failed
steps.pod_chaos_wf.failed-->steps.pod_chaos_wf.failed.error
%% Mermaid end
```
### Pod Chaos Workflow
```mermaid
%% Mermaid markdown workflow
flowchart LR
%% Success path
input-->steps.kill_pod.starting
steps.kill_pod.cancelled-->steps.kill_pod.closed
steps.kill_pod.cancelled-->steps.kill_pod.outputs
steps.kill_pod.closed-->steps.kill_pod.closed.result
steps.kill_pod.deploy-->steps.kill_pod.closed
steps.kill_pod.deploy-->steps.kill_pod.starting
steps.kill_pod.disabled-->steps.kill_pod.disabled.output
steps.kill_pod.enabling-->steps.kill_pod.closed
steps.kill_pod.enabling-->steps.kill_pod.disabled
steps.kill_pod.enabling-->steps.kill_pod.enabling.resolved
steps.kill_pod.enabling-->steps.kill_pod.starting
steps.kill_pod.outputs-->steps.kill_pod.outputs.success
steps.kill_pod.outputs.success-->outputs.success
steps.kill_pod.running-->steps.kill_pod.closed
steps.kill_pod.running-->steps.kill_pod.outputs
steps.kill_pod.starting-->steps.kill_pod.closed
steps.kill_pod.starting-->steps.kill_pod.running
steps.kill_pod.starting-->steps.kill_pod.starting.started
%% Error path
steps.kill_pod.cancelled-->steps.kill_pod.crashed
steps.kill_pod.cancelled-->steps.kill_pod.deploy_failed
steps.kill_pod.crashed-->steps.kill_pod.crashed.error
steps.kill_pod.deploy-->steps.kill_pod.deploy_failed
steps.kill_pod.deploy_failed-->steps.kill_pod.deploy_failed.error
steps.kill_pod.enabling-->steps.kill_pod.crashed
steps.kill_pod.outputs-->steps.kill_pod.outputs.error
steps.kill_pod.running-->steps.kill_pod.crashed
steps.kill_pod.starting-->steps.kill_pod.crashed
%% Mermaid end
```
### StressNG (CPU Hog) Workflow
```mermaid
%% Mermaid markdown workflow
flowchart LR
%% Success path
input-->steps.kubeconfig.starting
input-->steps.stressng.deploy
input-->steps.stressng.starting
steps.kubeconfig.cancelled-->steps.kubeconfig.closed
steps.kubeconfig.cancelled-->steps.kubeconfig.outputs
steps.kubeconfig.closed-->steps.kubeconfig.closed.result
steps.kubeconfig.deploy-->steps.kubeconfig.closed
steps.kubeconfig.deploy-->steps.kubeconfig.starting
steps.kubeconfig.disabled-->steps.kubeconfig.disabled.output
steps.kubeconfig.enabling-->steps.kubeconfig.closed
steps.kubeconfig.enabling-->steps.kubeconfig.disabled
steps.kubeconfig.enabling-->steps.kubeconfig.enabling.resolved
steps.kubeconfig.enabling-->steps.kubeconfig.starting
steps.kubeconfig.outputs-->steps.kubeconfig.outputs.success
steps.kubeconfig.outputs.success-->steps.stressng.deploy
steps.kubeconfig.running-->steps.kubeconfig.closed
steps.kubeconfig.running-->steps.kubeconfig.outputs
steps.kubeconfig.starting-->steps.kubeconfig.closed
steps.kubeconfig.starting-->steps.kubeconfig.running
steps.kubeconfig.starting-->steps.kubeconfig.starting.started
steps.stressng.cancelled-->steps.stressng.closed
steps.stressng.cancelled-->steps.stressng.outputs
steps.stressng.closed-->steps.stressng.closed.result
steps.stressng.deploy-->steps.stressng.closed
steps.stressng.deploy-->steps.stressng.starting
steps.stressng.disabled-->steps.stressng.disabled.output
steps.stressng.enabling-->steps.stressng.closed
steps.stressng.enabling-->steps.stressng.disabled
steps.stressng.enabling-->steps.stressng.enabling.resolved
steps.stressng.enabling-->steps.stressng.starting
steps.stressng.outputs-->steps.stressng.outputs.success
steps.stressng.outputs.success-->outputs.success
steps.stressng.running-->steps.stressng.closed
steps.stressng.running-->steps.stressng.outputs
steps.stressng.starting-->steps.stressng.closed
steps.stressng.starting-->steps.stressng.running
steps.stressng.starting-->steps.stressng.starting.started
%% Error path
steps.kubeconfig.cancelled-->steps.kubeconfig.crashed
steps.kubeconfig.cancelled-->steps.kubeconfig.deploy_failed
steps.kubeconfig.crashed-->steps.kubeconfig.crashed.error
steps.kubeconfig.deploy-->steps.kubeconfig.deploy_failed
steps.kubeconfig.deploy_failed-->steps.kubeconfig.deploy_failed.error
steps.kubeconfig.enabling-->steps.kubeconfig.crashed
steps.kubeconfig.outputs-->steps.kubeconfig.outputs.error
steps.kubeconfig.running-->steps.kubeconfig.crashed
steps.kubeconfig.starting-->steps.kubeconfig.crashed
steps.stressng.cancelled-->steps.stressng.crashed
steps.stressng.cancelled-->steps.stressng.deploy_failed
steps.stressng.crashed-->steps.stressng.crashed.error
steps.stressng.deploy-->steps.stressng.deploy_failed
steps.stressng.deploy_failed-->steps.stressng.deploy_failed.error
steps.stressng.enabling-->steps.stressng.crashed
steps.stressng.outputs-->steps.stressng.outputs.error
steps.stressng.running-->steps.stressng.crashed
steps.stressng.starting-->steps.stressng.crashed
%% Mermaid end
```
### Kube-Burner Workflow
```mermaid
%% Mermaid markdown workflow
flowchart LR
%% Success path
input-->steps.kubeburner.starting
steps.kubeburner.cancelled-->steps.kubeburner.closed
steps.kubeburner.cancelled-->steps.kubeburner.outputs
steps.kubeburner.closed-->steps.kubeburner.closed.result
steps.kubeburner.deploy-->steps.kubeburner.closed
steps.kubeburner.deploy-->steps.kubeburner.starting
steps.kubeburner.disabled-->steps.kubeburner.disabled.output
steps.kubeburner.enabling-->steps.kubeburner.closed
steps.kubeburner.enabling-->steps.kubeburner.disabled
steps.kubeburner.enabling-->steps.kubeburner.enabling.resolved
steps.kubeburner.enabling-->steps.kubeburner.starting
steps.kubeburner.outputs-->steps.kubeburner.outputs.success
steps.kubeburner.outputs.success-->outputs.success
steps.kubeburner.running-->steps.kubeburner.closed
steps.kubeburner.running-->steps.kubeburner.outputs
steps.kubeburner.starting-->steps.kubeburner.closed
steps.kubeburner.starting-->steps.kubeburner.running
steps.kubeburner.starting-->steps.kubeburner.starting.started
steps.uuidgen.cancelled-->steps.uuidgen.closed
steps.uuidgen.cancelled-->steps.uuidgen.outputs
steps.uuidgen.closed-->steps.uuidgen.closed.result
steps.uuidgen.deploy-->steps.uuidgen.closed
steps.uuidgen.deploy-->steps.uuidgen.starting
steps.uuidgen.disabled-->steps.uuidgen.disabled.output
steps.uuidgen.enabling-->steps.uuidgen.closed
steps.uuidgen.enabling-->steps.uuidgen.disabled
steps.uuidgen.enabling-->steps.uuidgen.enabling.resolved
steps.uuidgen.enabling-->steps.uuidgen.starting
steps.uuidgen.outputs-->steps.uuidgen.outputs.success
steps.uuidgen.outputs.success-->steps.kubeburner.starting
steps.uuidgen.running-->steps.uuidgen.closed
steps.uuidgen.running-->steps.uuidgen.outputs
steps.uuidgen.starting-->steps.uuidgen.closed
steps.uuidgen.starting-->steps.uuidgen.running
steps.uuidgen.starting-->steps.uuidgen.starting.started
%% Error path
steps.kubeburner.cancelled-->steps.kubeburner.crashed
steps.kubeburner.cancelled-->steps.kubeburner.deploy_failed
steps.kubeburner.crashed-->steps.kubeburner.crashed.error
steps.kubeburner.deploy-->steps.kubeburner.deploy_failed
steps.kubeburner.deploy_failed-->steps.kubeburner.deploy_failed.error
steps.kubeburner.enabling-->steps.kubeburner.crashed
steps.kubeburner.outputs-->steps.kubeburner.outputs.error
steps.kubeburner.running-->steps.kubeburner.crashed
steps.kubeburner.starting-->steps.kubeburner.crashed
steps.uuidgen.cancelled-->steps.uuidgen.crashed
steps.uuidgen.cancelled-->steps.uuidgen.deploy_failed
steps.uuidgen.crashed-->steps.uuidgen.crashed.error
steps.uuidgen.deploy-->steps.uuidgen.deploy_failed
steps.uuidgen.deploy_failed-->steps.uuidgen.deploy_failed.error
steps.uuidgen.enabling-->steps.uuidgen.crashed
steps.uuidgen.outputs-->steps.uuidgen.outputs.error
steps.uuidgen.running-->steps.uuidgen.crashed
steps.uuidgen.starting-->steps.uuidgen.crashed
%% Mermaid end
```

View File

@@ -0,0 +1,18 @@
---
deployers:
image:
deployer_name: podman
deployment:
imagePullPolicy: IfNotPresent
python:
deployer_name: python
modulePullPolicy: Always
pythonPath: /usr/bin/python
workdir: /tmp
log:
level: debug
logged_outputs:
error:
level: debug
success:
level: debug

View File

@@ -0,0 +1,41 @@
kubernetes_target:
kubeconfig_path:
cpu_hog_enabled: true
pod_chaos_enabled: true
kubeburner_enabled: true
kubeburner_list:
- kubeburner:
kubeconfig: 'given later in workflow by kubeconfig plugin'
workload: 'cluster-density'
qps: 20
burst: 20
log_level: 'info'
timeout: '1m'
iterations: 1
churn: 'true'
churn_duration: 1s
churn_delay: 1s
churn_percent: 10
alerting: 'true'
gc: 'true'
pod_chaos_list:
- namespace_pattern: ^openshift-etcd$
label_selector: k8s-app=etcd
kill: 1
krkn_pod_recovery_time: 1
cpu_hog_list:
- namespace: default
# set the node selector as a key-value pair eg.
# node_selector:
# kubernetes.io/hostname: kind-worker2
node_selector: {}
stressng_params:
timeout: 1
stressors:
- stressor: cpu
workers: 1
cpu-load: 20
cpu-method: all

View File

@@ -0,0 +1,75 @@
version: v0.2.0
input:
root: CpuHog__KubernetesTarget
objects:
CpuHog__KubernetesTarget:
id: CpuHog__KubernetesTarget
properties:
constant:
type:
type_id: ref
id: KubernetesTarget
item:
type:
type_id: ref
id: CpuHog
KubernetesTarget:
id: KubernetesTarget
properties:
kubeconfig_path:
type:
type_id: string
CpuHog:
id: CpuHog
properties:
namespace:
display:
description: The namespace where the container will be deployed
name: Namespace
type:
type_id: string
required: true
node_selector:
display:
description: kubernetes node name where the plugin must be deployed
type:
type_id: map
values:
type_id: string
keys:
type_id: string
required: true
stressng_params:
type:
type_id: ref
id: StressNGParams
namespace: $.steps.stressng.starting.inputs.input
steps:
kubeconfig:
plugin:
src: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.3.1
deployment_type: image
input:
kubeconfig: !expr 'readFile($.input.constant.kubeconfig_path)'
stressng:
plugin:
src: quay.io/arcalot/arcaflow-plugin-stressng:0.8.0
deployment_type: image
step: workload
input: !expr $.input.item.stressng_params
deploy:
deployer_name: kubernetes
connection: !expr $.steps.kubeconfig.outputs.success.connection
pod:
metadata:
namespace: !expr $.input.item.namespace
labels:
arcaflow: stressng
spec:
nodeSelector: !expr $.input.item.node_selector
pluginContainer:
imagePullPolicy: Always
outputs:
success: !expr $.steps.stressng.outputs.success

View File

@@ -0,0 +1,54 @@
version: v0.2.0
input:
root: KubeBurner__KubernetesTarget
objects:
KubeBurner__KubernetesTarget:
id: KubeBurner__KubernetesTarget
properties:
constant:
type:
type_id: ref
id: KubernetesTarget
item:
type:
type_id: ref
id: KubeBurner
KubernetesTarget:
id: KubernetesTarget
properties:
kubeconfig_path:
type:
type_id: string
KubeBurner:
id: KubeBurner
properties:
kubeburner:
type:
type_id: ref
id: KubeBurnerInputParams
namespace: $.steps.kubeburner.starting.inputs.input
steps:
uuidgen:
plugin:
deployment_type: image
src: quay.io/arcalot/arcaflow-plugin-utilities:0.6.0
step: uuid
input: {}
kubeburner:
plugin:
deployment_type: image
src: quay.io/redhat-performance/arcaflow-plugin-kube-burner:latest
step: kube-burner
input:
kubeconfig: !expr 'readFile($.input.constant.kubeconfig_path)'
uuid: !expr $.steps.uuidgen.outputs.success.uuid
workload: !expr $.input.item.kubeburner.workload
iterations: !expr $.input.item.kubeburner.iterations
churn: !expr $.input.item.kubeburner.churn
churn_duration: !expr $.input.item.kubeburner.churn_duration
churn_delay: !expr $.input.item.kubeburner.churn_delay
outputs:
success:
burner: !expr $.steps.kubeburner.outputs.success

View File

@@ -0,0 +1,108 @@
version: v0.2.0
input:
root: KillPodConfig__KubernetesTarget
objects:
KillPodConfig__KubernetesTarget:
id: KillPodConfig__KubernetesTarget
properties:
constant:
type:
type_id: ref
id: KubernetesTarget
item:
type:
type_id: ref
id: KillPodConfig
KubernetesTarget:
id: KubernetesTarget
properties:
kubeconfig_path:
type:
type_id: string
KillPodConfig:
id: KillPodConfig
properties:
backoff:
default: '1'
display:
description: How many seconds to wait between checks for the target
pod status.
name: Backoff
required: false
type:
type_id: integer
kill:
default: '1'
display:
description: How many pods should we attempt to kill?
name: Number of pods to kill
required: false
type:
min: 1
type_id: integer
krkn_pod_recovery_time:
default: '60'
display:
description: The Expected Recovery time fo the pod (used by Krkn to
monitor the pod lifecycle)
name: Recovery Time
required: false
type:
type_id: integer
label_selector:
display:
description: 'Kubernetes label selector for the target pods. Required
if name_pattern is not set.
See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
for details.'
name: Label selector
required: false
required_if_not:
- name_pattern
type:
type_id: string
name_pattern:
display:
description: Regular expression for target pods. Required if label_selector
is not set.
name: Name pattern
required: false
required_if_not:
- label_selector
type:
type_id: pattern
namespace_pattern:
display:
description: Regular expression for target pod namespaces.
name: Namespace pattern
required: true
type:
type_id: pattern
timeout:
default: '180'
display:
description: Timeout to wait for the target pod(s) to be removed in
seconds.
name: Timeout
required: false
type:
type_id: integer
steps:
kill_pod:
step: kill-pods
plugin:
deployment_type: python
src: arcaflow-plugin-kill-pod@git+https://github.com/krkn-chaos/arcaflow-plugin-kill-pod.git@a9f87f88d8e7763d111613bd8b2c7862fc49624f
input:
namespace_pattern: !expr $.input.item.namespace_pattern
label_selector: !expr $.input.item.label_selector
kubeconfig_path: !expr $.input.constant.kubeconfig_path
deploy:
deployer_name: python
modulePullPolicy: Always
pythonPath: /usr/bin/python
workdir: /tmp
outputs:
success: !expr $.steps.kill_pod.outputs.success

View File

@@ -0,0 +1,73 @@
version: v0.2.0
input:
root: RootObject
objects:
KubernetesTarget:
id: KubernetesTarget
properties:
kubeconfig_path:
type:
type_id: string
RootObject:
id: RootObject
properties:
cpu_hog_enabled:
type:
type_id: bool
pod_chaos_enabled:
type:
type_id: bool
kubeburner_enabled:
type:
type_id: bool
kubernetes_target:
type:
type_id: ref
id: KubernetesTarget
kubeburner_list:
type:
type_id: list
items:
type_id: ref
id: KubeBurner
namespace: $.steps.kubeburner_wf.execute.inputs.items
pod_chaos_list:
type:
type_id: list
items:
type_id: ref
id: KillPodConfig
namespace: $.steps.pod_chaos_wf.execute.inputs.items
cpu_hog_list:
type:
type_id: list
items:
type_id: ref
id: CpuHog
namespace: $.steps.cpu_hog_wf.execute.inputs.items
steps:
kubeburner_wf:
kind: foreach
items: !expr 'bindConstants($.input.kubeburner_list, $.input.kubernetes_target)'
workflow: subworkflows/kubeburner.yaml
parallelism: 1
enabled: !expr $.input.kubeburner_enabled
pod_chaos_wf:
kind: foreach
items: !expr 'bindConstants($.input.pod_chaos_list, $.input.kubernetes_target)'
workflow: subworkflows/pod-chaos.yaml
parallelism: 1
enabled: !expr $.input.pod_chaos_enabled
cpu_hog_wf:
kind: foreach
items: !expr 'bindConstants($.input.cpu_hog_list, $.input.kubernetes_target)'
workflow: subworkflows/cpu-hog.yaml
parallelism: 1
enabled: !expr $.input.cpu_hog_enabled
outputs:
workflow_success:
kubeburner: !ordisabled $.steps.kubeburner_wf.outputs.success
pod_chaos: !ordisabled $.steps.pod_chaos_wf.outputs.success
cpu_hog: !ordisabled $.steps.cpu_hog_wf.outputs.success