mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-20 13:00:28 +00:00
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dbf02a6c22 | ||
|
|
94bec8dc9b | ||
|
|
2111bab9a4 | ||
|
|
b734f1dd05 |
@@ -14,7 +14,7 @@ COPY --from=azure-cli /usr/local/bin/az /usr/bin/az
|
||||
# Install dependencies
|
||||
RUN yum install -y git python39 python3-pip jq gettext wget && \
|
||||
python3.9 -m pip install -U pip && \
|
||||
git clone https://github.com/redhat-chaos/krkn.git --branch v1.5.0 /root/kraken && \
|
||||
git clone https://github.com/redhat-chaos/krkn.git --branch v1.5.2 /root/kraken && \
|
||||
mkdir -p /root/.kube && cd /root/kraken && \
|
||||
pip3.9 install -r requirements.txt && \
|
||||
pip3.9 install virtualenv && \
|
||||
|
||||
@@ -14,7 +14,7 @@ COPY --from=azure-cli /usr/local/bin/az /usr/bin/az
|
||||
# Install dependencies
|
||||
RUN yum install -y git python39 python3-pip jq gettext wget && \
|
||||
python3.9 -m pip install -U pip && \
|
||||
git clone https://github.com/redhat-chaos/krkn.git --branch v1.5.0 /root/kraken && \
|
||||
git clone https://github.com/redhat-chaos/krkn.git --branch v1.5.2 /root/kraken && \
|
||||
mkdir -p /root/.kube && cd /root/kraken && \
|
||||
pip3.9 install -r requirements.txt && \
|
||||
pip3.9 install virtualenv && \
|
||||
|
||||
@@ -62,7 +62,7 @@ If changes go into the main repository while you're working on your code it is b
|
||||
|
||||
If not already configured, set the upstream url for kraken.
|
||||
```
|
||||
git remote add upstream https://github.com/cloud-bulldozer/kraken.git
|
||||
git remote add upstream https://github.com/redhat-chaos/krkn.git
|
||||
```
|
||||
|
||||
Rebase to upstream master branch.
|
||||
|
||||
@@ -12,9 +12,9 @@ network_chaos: # Scenario to create an outage
|
||||
- "ens5" # Interface name would be the Kernel host network interface name.
|
||||
execution: serial|parallel # Execute each of the egress options as a single scenario(parallel) or as separate scenario(serial).
|
||||
egress:
|
||||
latency: 50ms
|
||||
loss: 0.02 # percentage
|
||||
bandwidth: 100mbit
|
||||
latency: 500ms
|
||||
loss: 50% # percentage
|
||||
bandwidth: 10mbit
|
||||
```
|
||||
|
||||
##### Sample scenario config for ingress traffic shaping (using a plugin)
|
||||
@@ -30,9 +30,9 @@ network_chaos: # Scenario to create an outage
|
||||
kubeconfig_path: ~/.kube/config # Path to kubernetes config file. If not specified, it defaults to ~/.kube/config
|
||||
execution_type: parallel # Execute each of the ingress options as a single scenario(parallel) or as separate scenario(serial).
|
||||
network_params:
|
||||
latency: 50ms
|
||||
loss: '0.02'
|
||||
bandwidth: 100mbit
|
||||
latency: 500ms
|
||||
loss: '50%'
|
||||
bandwidth: 10mbit
|
||||
wait_duration: 120
|
||||
test_duration: 60
|
||||
'''
|
||||
|
||||
@@ -27,6 +27,15 @@ Scenario to introduce network latency, packet loss, and bandwidth restriction in
|
||||
network_params:
|
||||
latency: 500ms # Add 500ms latency to egress traffic from the pod.
|
||||
```
|
||||
##### Sample scenario config for ingress traffic shaping (using plugin)
|
||||
```
|
||||
- id: pod_ingress_shaping
|
||||
config:
|
||||
namespace: openshift-console # Required - Namespace of the pod to which filter need to be applied.
|
||||
label_selector: 'component=ui' # Applies traffic shaping to access openshift console.
|
||||
network_params:
|
||||
latency: 500ms # Add 500ms latency to egress traffic from the pod.
|
||||
```
|
||||
|
||||
##### Steps
|
||||
- Pick the pods to introduce the network anomaly either from label_selector or pod_name.
|
||||
|
||||
@@ -15,6 +15,8 @@ import kraken.cerberus.setup as cerberus
|
||||
from krkn_lib.k8s import KrknKubernetes
|
||||
from krkn_lib.telemetry.k8s import KrknTelemetryKubernetes
|
||||
from krkn_lib.models.telemetry import ScenarioTelemetry
|
||||
from krkn_lib.utils.functions import get_yaml_item_value
|
||||
|
||||
node_general = False
|
||||
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ from kraken.plugins.network.ingress_shaping import network_chaos
|
||||
from kraken.plugins.pod_network_outage.pod_network_outage_plugin import pod_outage
|
||||
from kraken.plugins.pod_network_outage.pod_network_outage_plugin import pod_egress_shaping
|
||||
from krkn_lib.telemetry.k8s import KrknTelemetryKubernetes
|
||||
from kraken.plugins.pod_network_outage.pod_network_outage_plugin import pod_ingress_shaping
|
||||
from krkn_lib.models.telemetry import ScenarioTelemetry
|
||||
from krkn_lib.utils.functions import log_exception
|
||||
|
||||
@@ -223,7 +224,13 @@ PLUGINS = Plugins(
|
||||
[
|
||||
"error"
|
||||
]
|
||||
)
|
||||
),
|
||||
PluginStep(
|
||||
pod_ingress_shaping,
|
||||
[
|
||||
"error"
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -269,6 +269,85 @@ def apply_outage_policy(
|
||||
return job_list
|
||||
|
||||
|
||||
def apply_ingress_policy(
|
||||
mod: str,
|
||||
node: str,
|
||||
ips: typing.List[str],
|
||||
job_template,
|
||||
pod_template,
|
||||
network_params: typing.Dict[str, str],
|
||||
duration: str,
|
||||
bridge_name: str,
|
||||
kubecli: KrknKubernetes,
|
||||
test_execution: str,
|
||||
) -> typing.List[str]:
|
||||
"""
|
||||
Function that applies ingress traffic shaping to pod interface.
|
||||
|
||||
Args:
|
||||
|
||||
mod (String)
|
||||
- Traffic shaping filter to apply
|
||||
|
||||
node (String)
|
||||
- node associated with the pod
|
||||
|
||||
ips (List)
|
||||
- IPs of pods found in the node
|
||||
|
||||
job_template (jinja2.environment.Template)
|
||||
- The YAML template used to instantiate a job to apply and remove
|
||||
the filters on the interfaces
|
||||
|
||||
pod_template (jinja2.environment.Template)
|
||||
- The YAML template used to instantiate a pod to query
|
||||
the node's interface
|
||||
|
||||
network_params (Dictionary with key and value as string)
|
||||
- Loss/Delay/Bandwidth and their corresponding value
|
||||
|
||||
duration (string)
|
||||
- Duration for which the traffic control is to be done
|
||||
|
||||
bridge_name (string):
|
||||
- bridge to which filter rules need to be applied
|
||||
|
||||
kubecli (KrknKubernetes)
|
||||
- Object to interact with Kubernetes Python client
|
||||
|
||||
test_execution (String)
|
||||
- The order in which the filters are applied
|
||||
|
||||
Returns:
|
||||
The name of the job created that executes the traffic shaping
|
||||
filter
|
||||
"""
|
||||
|
||||
job_list = []
|
||||
|
||||
create_virtual_interfaces(kubecli, len(ips), node, pod_template)
|
||||
|
||||
for count, pod_ip in enumerate(set(ips)):
|
||||
pod_inf = get_pod_interface(
|
||||
node, pod_ip, pod_template, bridge_name, kubecli)
|
||||
exec_cmd = get_ingress_cmd(
|
||||
test_execution, pod_inf, mod, count, network_params, duration
|
||||
)
|
||||
logging.info("Executing %s on pod %s in node %s" %
|
||||
(exec_cmd, pod_ip, node))
|
||||
job_body = yaml.safe_load(
|
||||
job_template.render(jobname=mod + str(pod_ip),
|
||||
nodename=node, cmd=exec_cmd)
|
||||
)
|
||||
job_list.append(job_body["metadata"]["name"])
|
||||
api_response = kubecli.create_job(job_body)
|
||||
if api_response is None:
|
||||
raise Exception("Error creating job")
|
||||
if pod_ip == node:
|
||||
break
|
||||
return job_list
|
||||
|
||||
|
||||
def apply_net_policy(
|
||||
mod: str,
|
||||
node: str,
|
||||
@@ -325,7 +404,7 @@ def apply_net_policy(
|
||||
|
||||
job_list = []
|
||||
|
||||
for pod_ip in ips:
|
||||
for pod_ip in set(ips):
|
||||
pod_inf = get_pod_interface(
|
||||
node, pod_ip, pod_template, bridge_name, kubecli)
|
||||
exec_cmd = get_egress_cmd(
|
||||
@@ -344,6 +423,64 @@ def apply_net_policy(
|
||||
return job_list
|
||||
|
||||
|
||||
def get_ingress_cmd(
|
||||
execution: str,
|
||||
test_interface: str,
|
||||
mod: str,
|
||||
count: int,
|
||||
vallst: typing.List[str],
|
||||
duration: str,
|
||||
) -> str:
|
||||
"""
|
||||
Function generates ingress filter to apply on pod
|
||||
|
||||
Args:
|
||||
execution (str):
|
||||
- The order in which the filters are applied
|
||||
|
||||
test_interface (str):
|
||||
- Pod interface
|
||||
|
||||
mod (str):
|
||||
- Filter to apply
|
||||
|
||||
count (int):
|
||||
- IFB device number
|
||||
|
||||
vallst (typing.List[str]):
|
||||
- List of filters to apply
|
||||
|
||||
duration (str):
|
||||
- Duration for which the traffic control is to be done
|
||||
|
||||
Returns:
|
||||
str: ingress filter
|
||||
"""
|
||||
ifb_dev = 'ifb{0}'.format(count)
|
||||
tc_set = tc_unset = tc_ls = ""
|
||||
param_map = {"latency": "delay", "loss": "loss", "bandwidth": "rate"}
|
||||
tc_set = "tc qdisc add dev {0} ingress ;".format(test_interface)
|
||||
tc_set = "{0} tc filter add dev {1} ingress matchall action mirred egress redirect dev {2} ;".format(
|
||||
tc_set, test_interface, ifb_dev)
|
||||
tc_set = "{0} tc qdisc replace dev {1} root netem".format(
|
||||
tc_set, ifb_dev)
|
||||
tc_unset = "{0} tc qdisc del dev {1} root ;".format(
|
||||
tc_unset, ifb_dev)
|
||||
tc_unset = "{0} tc qdisc del dev {1} ingress".format(
|
||||
tc_unset, test_interface)
|
||||
tc_ls = "{0} tc qdisc ls dev {1} ;".format(tc_ls, ifb_dev)
|
||||
if execution == "parallel":
|
||||
for val in vallst.keys():
|
||||
tc_set += " {0} {1} ".format(param_map[val], vallst[val])
|
||||
tc_set += ";"
|
||||
else:
|
||||
tc_set += " {0} {1} ;".format(param_map[mod], vallst[mod])
|
||||
exec_cmd = "{0} {1} sleep {2};{3}".format(
|
||||
tc_set, tc_ls, duration, tc_unset)
|
||||
|
||||
return exec_cmd
|
||||
|
||||
|
||||
def get_egress_cmd(
|
||||
execution: str,
|
||||
test_interface: str,
|
||||
@@ -392,6 +529,124 @@ def get_egress_cmd(
|
||||
return exec_cmd
|
||||
|
||||
|
||||
def create_virtual_interfaces(
|
||||
kubecli: KrknKubernetes,
|
||||
nummber: int,
|
||||
node: str,
|
||||
pod_template
|
||||
) -> None:
|
||||
"""
|
||||
Function that creates a privileged pod and uses it to create
|
||||
virtual interfaces on the node
|
||||
|
||||
Args:
|
||||
cli (CoreV1Api)
|
||||
- Object to interact with Kubernetes Python client's CoreV1 API
|
||||
|
||||
interface_list (List of strings)
|
||||
- The list of interfaces on the node for which virtual interfaces
|
||||
are to be created
|
||||
|
||||
node (string)
|
||||
- The node on which the virtual interfaces are created
|
||||
|
||||
pod_template (jinja2.environment.Template))
|
||||
- The YAML template used to instantiate a pod to create
|
||||
virtual interfaces on the node
|
||||
"""
|
||||
pod_body = yaml.safe_load(
|
||||
pod_template.render(nodename=node)
|
||||
)
|
||||
kubecli.create_pod(pod_body, "default", 300)
|
||||
logging.info(
|
||||
"Creating {0} virtual interfaces on node {1} using a pod".format(
|
||||
nummber,
|
||||
node
|
||||
)
|
||||
)
|
||||
create_ifb(kubecli, nummber, 'modtools')
|
||||
logging.info("Deleting pod used to create virtual interfaces")
|
||||
kubecli.delete_pod("modtools", "default")
|
||||
|
||||
|
||||
def delete_virtual_interfaces(
|
||||
kubecli: KrknKubernetes,
|
||||
node_list: typing.List[str],
|
||||
pod_template
|
||||
):
|
||||
"""
|
||||
Function that creates a privileged pod and uses it to delete all
|
||||
virtual interfaces on the specified nodes
|
||||
|
||||
Args:
|
||||
cli (CoreV1Api)
|
||||
- Object to interact with Kubernetes Python client's CoreV1 API
|
||||
|
||||
node_list (List of strings)
|
||||
- The list of nodes on which the list of virtual interfaces are
|
||||
to be deleted
|
||||
|
||||
node (string)
|
||||
- The node on which the virtual interfaces are created
|
||||
|
||||
pod_template (jinja2.environment.Template))
|
||||
- The YAML template used to instantiate a pod to delete
|
||||
virtual interfaces on the node
|
||||
"""
|
||||
|
||||
for node in node_list:
|
||||
pod_body = yaml.safe_load(
|
||||
pod_template.render(nodename=node)
|
||||
)
|
||||
kubecli.create_pod(pod_body, "default", 300)
|
||||
logging.info(
|
||||
"Deleting all virtual interfaces on node {0}".format(node)
|
||||
)
|
||||
delete_ifb(kubecli, 'modtools')
|
||||
kubecli.delete_pod("modtools", "default")
|
||||
|
||||
|
||||
def create_ifb(kubecli: KrknKubernetes, number: int, pod_name: str):
|
||||
"""
|
||||
Function that creates virtual interfaces in a pod.
|
||||
Makes use of modprobe commands
|
||||
"""
|
||||
|
||||
exec_command = [
|
||||
'/host',
|
||||
'modprobe', 'ifb', 'numifbs=' + str(number)
|
||||
]
|
||||
kubecli.exec_cmd_in_pod(
|
||||
exec_command,
|
||||
pod_name,
|
||||
'default',
|
||||
base_command="chroot")
|
||||
|
||||
for i in range(0, number):
|
||||
exec_command = ['/host', 'ip', 'link', 'set', 'dev']
|
||||
exec_command += ['ifb' + str(i), 'up']
|
||||
kubecli.exec_cmd_in_pod(
|
||||
exec_command,
|
||||
pod_name,
|
||||
'default',
|
||||
base_command="chroot"
|
||||
)
|
||||
|
||||
|
||||
def delete_ifb(kubecli: KrknKubernetes, pod_name: str):
|
||||
"""
|
||||
Function that deletes all virtual interfaces in a pod.
|
||||
Makes use of modprobe command
|
||||
"""
|
||||
|
||||
exec_command = ['/host', 'modprobe', '-r', 'ifb']
|
||||
kubecli.exec_cmd_in_pod(
|
||||
exec_command,
|
||||
pod_name,
|
||||
'default',
|
||||
base_command="chroot")
|
||||
|
||||
|
||||
def list_bridges(
|
||||
node: str, pod_template, kubecli: KrknKubernetes
|
||||
) -> typing.List[str]:
|
||||
@@ -424,7 +679,7 @@ def list_bridges(
|
||||
)
|
||||
|
||||
if not output:
|
||||
logging.error("Exception occurred while executing command in pod")
|
||||
logging.error(f"Exception occurred while executing command {cmd} in pod")
|
||||
sys.exit(1)
|
||||
|
||||
bridges = output.split("\n")
|
||||
@@ -483,7 +738,7 @@ def check_cookie(
|
||||
)
|
||||
|
||||
if not output:
|
||||
logging.error("Exception occurred while executing command in pod")
|
||||
logging.error(f"Exception occurred while executing command {cmd} in pod")
|
||||
sys.exit(1)
|
||||
|
||||
flow_list = output.split("\n")
|
||||
@@ -525,50 +780,41 @@ def get_pod_interface(
|
||||
pod_body = yaml.safe_load(pod_template.render(nodename=node))
|
||||
logging.info("Creating pod to query pod interface on node %s" % node)
|
||||
kubecli.create_pod(pod_body, "default", 300)
|
||||
inf = ""
|
||||
|
||||
try:
|
||||
if br_name == "br-int":
|
||||
find_ip = f"external-ids:ip_addresses={ip}/23"
|
||||
else:
|
||||
find_ip = f"external-ids:ip={ip}"
|
||||
|
||||
cmd = [
|
||||
"/host",
|
||||
"ovs-ofctl",
|
||||
"-O",
|
||||
"OpenFlow13",
|
||||
"dump-flows",
|
||||
br_name,
|
||||
f"ip,nw_src={ip}",
|
||||
"ovs-vsctl",
|
||||
"--bare",
|
||||
"--columns=name",
|
||||
"find",
|
||||
"interface",
|
||||
find_ip,
|
||||
]
|
||||
|
||||
output = kubecli.exec_cmd_in_pod(
|
||||
cmd, "modtools", "default", base_command="chroot"
|
||||
)
|
||||
if not output:
|
||||
logging.error("Exception occurred while executing command in pod")
|
||||
sys.exit(1)
|
||||
|
||||
flow_lists = output.split("\n")
|
||||
port = ""
|
||||
inf = ""
|
||||
for flow in flow_lists:
|
||||
match = re.search(r".*in_port=(.*),nw_src=.*", flow)
|
||||
if match is not None:
|
||||
port = match.group(1)
|
||||
exit
|
||||
if not re.findall("\\D", port):
|
||||
cmd = ["/host", "ovs-ofctl", "-O",
|
||||
"OpenFlow13", "dump-ports-desc", br_name]
|
||||
cmd= [
|
||||
"/host",
|
||||
"ip",
|
||||
"addr",
|
||||
"show"
|
||||
]
|
||||
output = kubecli.exec_cmd_in_pod(
|
||||
cmd, "modtools", "default", base_command="chroot"
|
||||
)
|
||||
if not output:
|
||||
logging.error(
|
||||
"Exception occurred while executing command in pod")
|
||||
sys.exit(1)
|
||||
ports_desc = output.split("\n")
|
||||
for desc in ports_desc:
|
||||
match = re.search(rf".*{port}\((.*)\):.*", desc)
|
||||
if match is not None:
|
||||
inf = match.group(1)
|
||||
exit
|
||||
cmd, "modtools", "default", base_command="chroot")
|
||||
for if_str in output.split("\n"):
|
||||
if re.search(ip,if_str):
|
||||
inf = if_str.split(' ')[-1]
|
||||
else:
|
||||
inf = port
|
||||
inf = output
|
||||
finally:
|
||||
logging.info("Deleting pod to query interface on node")
|
||||
kubecli.delete_pod("modtools", "default")
|
||||
@@ -1098,7 +1344,7 @@ def pod_egress_shaping(
|
||||
|
||||
for mod in mod_lst:
|
||||
for node, ips in node_dict.items():
|
||||
job_list = apply_net_policy(
|
||||
job_list.extend( apply_net_policy(
|
||||
mod,
|
||||
node,
|
||||
ips,
|
||||
@@ -1109,20 +1355,20 @@ def pod_egress_shaping(
|
||||
br_name,
|
||||
kubecli,
|
||||
params.execution_type,
|
||||
)
|
||||
if params.execution_type == "serial":
|
||||
logging.info("Waiting for serial job to finish")
|
||||
start_time = int(time.time())
|
||||
wait_for_job(job_list[:], kubecli,
|
||||
params.test_duration + 20)
|
||||
logging.info("Waiting for wait_duration %s" %
|
||||
params.test_duration)
|
||||
time.sleep(params.test_duration)
|
||||
end_time = int(time.time())
|
||||
if publish:
|
||||
cerberus.publish_kraken_status(
|
||||
config, failed_post_scenarios, start_time, end_time
|
||||
)
|
||||
))
|
||||
if params.execution_type == "serial":
|
||||
logging.info("Waiting for serial job to finish")
|
||||
start_time = int(time.time())
|
||||
wait_for_job(job_list[:], kubecli,
|
||||
params.test_duration + 20)
|
||||
logging.info("Waiting for wait_duration %s" %
|
||||
params.test_duration)
|
||||
time.sleep(params.test_duration)
|
||||
end_time = int(time.time())
|
||||
if publish:
|
||||
cerberus.publish_kraken_status(
|
||||
config, failed_post_scenarios, start_time, end_time
|
||||
)
|
||||
if params.execution_type == "parallel":
|
||||
break
|
||||
if params.execution_type == "parallel":
|
||||
@@ -1149,3 +1395,281 @@ def pod_egress_shaping(
|
||||
finally:
|
||||
logging.info("Deleting jobs(if any)")
|
||||
delete_jobs(kubecli, job_list[:])
|
||||
|
||||
|
||||
@dataclass
|
||||
class IngressParams:
|
||||
"""
|
||||
This is the data structure for the input parameters of the step defined below.
|
||||
"""
|
||||
|
||||
namespace: typing.Annotated[str, validation.min(1)] = field(
|
||||
metadata={
|
||||
"name": "Namespace",
|
||||
"description": "Namespace of the pod to which filter need to be applied"
|
||||
"for details.",
|
||||
}
|
||||
)
|
||||
|
||||
network_params: typing.Dict[str, str] = field(
|
||||
metadata={
|
||||
"name": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interface. "
|
||||
"The currently supported filters are latency, "
|
||||
"loss and bandwidth",
|
||||
},
|
||||
)
|
||||
|
||||
kubeconfig_path: typing.Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"name": "Kubeconfig path",
|
||||
"description": "Kubeconfig file as string\n"
|
||||
"See https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for "
|
||||
"details.",
|
||||
},
|
||||
)
|
||||
pod_name: typing.Annotated[
|
||||
typing.Optional[str],
|
||||
validation.required_if_not("label_selector"),
|
||||
] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"name": "Pod name",
|
||||
"description": "When label_selector is not specified, pod matching the name will be"
|
||||
"selected for the chaos scenario",
|
||||
},
|
||||
)
|
||||
|
||||
label_selector: typing.Annotated[
|
||||
typing.Optional[str], validation.required_if_not("pod_name")
|
||||
] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"name": "Label selector",
|
||||
"description": "Kubernetes label selector for the target pod. "
|
||||
"When pod_name is not specified, pod with matching label_selector is selected for chaos scenario",
|
||||
},
|
||||
)
|
||||
|
||||
kraken_config: typing.Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"name": "Kraken Config",
|
||||
"description": "Path to the config file of Kraken. "
|
||||
"Set this field if you wish to publish status onto Cerberus",
|
||||
},
|
||||
)
|
||||
|
||||
test_duration: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
|
||||
default=90,
|
||||
metadata={
|
||||
"name": "Test duration",
|
||||
"description": "Duration for which each step of the ingress chaos testing "
|
||||
"is to be performed.",
|
||||
},
|
||||
)
|
||||
|
||||
wait_duration: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
|
||||
default=300,
|
||||
metadata={
|
||||
"name": "Wait Duration",
|
||||
"description": "Wait duration for finishing a test and its cleanup."
|
||||
"Ensure that it is significantly greater than wait_duration",
|
||||
},
|
||||
)
|
||||
|
||||
instance_count: typing.Annotated[typing.Optional[int], validation.min(1)] = field(
|
||||
default=1,
|
||||
metadata={
|
||||
"name": "Instance Count",
|
||||
"description": "Number of pods to perform action/select that match "
|
||||
"the label selector.",
|
||||
},
|
||||
)
|
||||
|
||||
execution_type: typing.Optional[str] = field(
|
||||
default="parallel",
|
||||
metadata={
|
||||
"name": "Execution Type",
|
||||
"description": "The order in which the ingress filters are applied. "
|
||||
"Execution type can be 'serial' or 'parallel'",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PodIngressNetShapingSuccessOutput:
|
||||
"""
|
||||
This is the output data structure for the success case.
|
||||
"""
|
||||
|
||||
test_pods: typing.List[str] = field(
|
||||
metadata={
|
||||
"name": "Test pods",
|
||||
"description": "List of test pods where the selected for chaos scenario",
|
||||
}
|
||||
)
|
||||
|
||||
network_parameters: typing.Dict[str, str] = field(
|
||||
metadata={
|
||||
"name": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interfaces",
|
||||
}
|
||||
)
|
||||
|
||||
execution_type: str = field(
|
||||
metadata={
|
||||
"name": "Execution Type",
|
||||
"description": "The order in which the filters are applied",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PodIngressNetShapingErrorOutput:
|
||||
error: str = field(
|
||||
metadata={
|
||||
"name": "Error",
|
||||
"description": "Error message when there is a run-time error during "
|
||||
"the execution of the scenario",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@plugin.step(
|
||||
id="pod_ingress_shaping",
|
||||
name="Pod ingress network Shaping",
|
||||
description="Does ingress network traffic shaping at pod level",
|
||||
outputs={
|
||||
"success": PodIngressNetShapingSuccessOutput,
|
||||
"error": PodIngressNetShapingErrorOutput,
|
||||
},
|
||||
)
|
||||
def pod_ingress_shaping(
|
||||
params: IngressParams,
|
||||
) -> typing.Tuple[
|
||||
str, typing.Union[PodIngressNetShapingSuccessOutput,
|
||||
PodIngressNetShapingErrorOutput]
|
||||
]:
|
||||
"""
|
||||
Function that performs ingress pod traffic shaping based
|
||||
on the provided configuration
|
||||
|
||||
Args:
|
||||
params (IngressParams,)
|
||||
- The object containing the configuration for the scenario
|
||||
|
||||
Returns
|
||||
A 'success' or 'error' message along with their details
|
||||
"""
|
||||
|
||||
file_loader = FileSystemLoader(os.path.abspath(os.path.dirname(__file__)))
|
||||
env = Environment(loader=file_loader)
|
||||
job_template = env.get_template("job.j2")
|
||||
pod_module_template = env.get_template("pod_module.j2")
|
||||
test_namespace = params.namespace
|
||||
test_label_selector = params.label_selector
|
||||
test_pod_name = params.pod_name
|
||||
job_list = []
|
||||
publish = False
|
||||
|
||||
if params.kraken_config:
|
||||
failed_post_scenarios = ""
|
||||
try:
|
||||
with open(params.kraken_config, "r") as f:
|
||||
config = yaml.full_load(f)
|
||||
except Exception:
|
||||
logging.error("Error reading Kraken config from %s" %
|
||||
params.kraken_config)
|
||||
return "error", PodIngressNetShapingErrorOutput(format_exc())
|
||||
publish = True
|
||||
|
||||
try:
|
||||
ip_set = set()
|
||||
node_dict = {}
|
||||
label_set = set()
|
||||
param_lst = ["latency", "loss", "bandwidth"]
|
||||
mod_lst = [i for i in param_lst if i in params.network_params]
|
||||
|
||||
kubecli = KrknKubernetes(kubeconfig_path=params.kubeconfig_path)
|
||||
api_ext = client.ApiextensionsV1Api(kubecli.api_client)
|
||||
custom_obj = client.CustomObjectsApi(kubecli.api_client)
|
||||
|
||||
br_name = get_bridge_name(api_ext, custom_obj)
|
||||
pods_list = get_test_pods(
|
||||
test_pod_name, test_label_selector, test_namespace, kubecli
|
||||
)
|
||||
|
||||
while not len(pods_list) <= params.instance_count:
|
||||
pods_list.pop(random.randint(0, len(pods_list) - 1))
|
||||
for pod_name in pods_list:
|
||||
pod_stat = kubecli.read_pod(pod_name, test_namespace)
|
||||
ip_set.add(pod_stat.status.pod_ip)
|
||||
node_dict.setdefault(pod_stat.spec.node_name, [])
|
||||
node_dict[pod_stat.spec.node_name].append(pod_stat.status.pod_ip)
|
||||
for key, value in pod_stat.metadata.labels.items():
|
||||
label_set.add("%s=%s" % (key, value))
|
||||
|
||||
check_bridge_interface(
|
||||
list(node_dict.keys())[0], pod_module_template, br_name, kubecli
|
||||
)
|
||||
|
||||
for mod in mod_lst:
|
||||
for node, ips in node_dict.items():
|
||||
job_list.extend(apply_ingress_policy(
|
||||
mod,
|
||||
node,
|
||||
ips,
|
||||
job_template,
|
||||
pod_module_template,
|
||||
params.network_params,
|
||||
params.test_duration,
|
||||
br_name,
|
||||
kubecli,
|
||||
params.execution_type,
|
||||
))
|
||||
if params.execution_type == "serial":
|
||||
logging.info("Waiting for serial job to finish")
|
||||
start_time = int(time.time())
|
||||
wait_for_job(job_list[:], kubecli,
|
||||
params.test_duration + 20)
|
||||
logging.info("Waiting for wait_duration %s" %
|
||||
params.test_duration)
|
||||
time.sleep(params.test_duration)
|
||||
end_time = int(time.time())
|
||||
if publish:
|
||||
cerberus.publish_kraken_status(
|
||||
config, failed_post_scenarios, start_time, end_time
|
||||
)
|
||||
if params.execution_type == "parallel":
|
||||
break
|
||||
if params.execution_type == "parallel":
|
||||
logging.info("Waiting for parallel job to finish")
|
||||
start_time = int(time.time())
|
||||
wait_for_job(job_list[:], kubecli, params.test_duration + 300)
|
||||
logging.info("Waiting for wait_duration %s" % params.test_duration)
|
||||
time.sleep(params.test_duration)
|
||||
end_time = int(time.time())
|
||||
if publish:
|
||||
cerberus.publish_kraken_status(
|
||||
config, failed_post_scenarios, start_time, end_time
|
||||
)
|
||||
|
||||
return "success", PodIngressNetShapingSuccessOutput(
|
||||
test_pods=pods_list,
|
||||
network_parameters=params.network_params,
|
||||
execution_type=params.execution_type,
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
"Pod network Shaping scenario exiting due to Exception - %s" % e)
|
||||
return "error", PodIngressNetShapingErrorOutput(format_exc())
|
||||
finally:
|
||||
delete_virtual_interfaces(
|
||||
kubecli,
|
||||
node_dict.keys(),
|
||||
pod_module_template
|
||||
)
|
||||
logging.info("Deleting jobs(if any)")
|
||||
delete_jobs(kubecli, job_list[:])
|
||||
|
||||
@@ -19,7 +19,7 @@ ibm_cloud_sdk_core
|
||||
ibm_vpc
|
||||
itsdangerous==2.0.1
|
||||
jinja2==3.0.3
|
||||
krkn-lib>=1.4.1
|
||||
krkn-lib>=1.4.2
|
||||
kubernetes
|
||||
lxml >= 4.3.0
|
||||
oauth2client>=4.1.3
|
||||
|
||||
14
scenarios/openshift/pod_ingress_shaping.yml
Normal file
14
scenarios/openshift/pod_ingress_shaping.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
# yaml-language-server: $schema=../plugin.schema.json
|
||||
- id: pod_ingress_shaping
|
||||
config:
|
||||
namespace: <namespace> # Required - Namespace of the pod to which traffic shaping need to be applied
|
||||
label_selector: <label_selector> # When pod_name is not specified, pod with matching label_selector is selected for chaos scenario
|
||||
pod_name: <pod name> # When label_selector is not specified, pod matching the name will be selected for the chaos scenario
|
||||
network_params: # latency, loss and bandwidth are the three supported network parameters to alter for the chaos test
|
||||
latency: <time> # Value is a string. For example : 50ms
|
||||
loss: <fraction> # Loss is a fraction between 0 and 1. It has to be enclosed in quotes to treat it as a string. For example, '0.02%' (not 0.02%)
|
||||
bandwidth: <rate> # Value is a string. For example: 100mbit
|
||||
execution_type: <serial/parallel> # Used to specify whether you want to apply filters on interfaces one at a time or all at once. Default is 'parallel'
|
||||
instance_count: <number> # Number of pods to perform action/select that match the label selector
|
||||
wait_duration: <time_duration> # Default is 300. Ensure that it is at least about twice of test_duration
|
||||
test_duration: <time_duration> # Default is 120
|
||||
@@ -2413,7 +2413,168 @@
|
||||
"id",
|
||||
"config"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"title": "pod_ingress_shaping Arcaflow scenarios",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"const": "pod_ingress_shaping"
|
||||
},
|
||||
"config": {
|
||||
"$defs": {
|
||||
"IngressParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"namespace": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Namespace",
|
||||
"description": "Namespace of the pod to which filter need to be appliedfor details."
|
||||
},
|
||||
"network_params": {
|
||||
"type": "object",
|
||||
"propertyNames": {},
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
|
||||
},
|
||||
"kubeconfig_path": {
|
||||
"type": "string",
|
||||
"title": "Kubeconfig path",
|
||||
"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
|
||||
},
|
||||
"pod_name": {
|
||||
"type": "string",
|
||||
"title": "Pod name",
|
||||
"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
|
||||
},
|
||||
"label_selector": {
|
||||
"type": "string",
|
||||
"title": "Label selector",
|
||||
"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
|
||||
},
|
||||
"kraken_config": {
|
||||
"type": "string",
|
||||
"title": "Kraken Config",
|
||||
"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
|
||||
},
|
||||
"test_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 90,
|
||||
"title": "Test duration",
|
||||
"description": "Duration for which each step of the ingress chaos testing is to be performed."
|
||||
},
|
||||
"wait_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 300,
|
||||
"title": "Wait Duration",
|
||||
"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
|
||||
},
|
||||
"instance_count": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 1,
|
||||
"title": "Instance Count",
|
||||
"description": "Number of pods to perform action/select that match the label selector."
|
||||
},
|
||||
"execution_type": {
|
||||
"type": "string",
|
||||
"default": "parallel",
|
||||
"title": "Execution Type",
|
||||
"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"namespace"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"namespace": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"title": "Namespace",
|
||||
"description": "Namespace of the pod to which filter need to be appliedfor details."
|
||||
},
|
||||
"network_params": {
|
||||
"type": "object",
|
||||
"propertyNames": {},
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Network Parameters",
|
||||
"description": "The network filters that are applied on the interface. The currently supported filters are latency, loss and bandwidth"
|
||||
},
|
||||
"kubeconfig_path": {
|
||||
"type": "string",
|
||||
"title": "Kubeconfig path",
|
||||
"description": "Kubeconfig file as string\nSee https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ for details."
|
||||
},
|
||||
"pod_name": {
|
||||
"type": "string",
|
||||
"title": "Pod name",
|
||||
"description": "When label_selector is not specified, pod matching the name will beselected for the chaos scenario"
|
||||
},
|
||||
"label_selector": {
|
||||
"type": "string",
|
||||
"title": "Label selector",
|
||||
"description": "Kubernetes label selector for the target pod. When pod_name is not specified, pod with matching label_selector is selected for chaos scenario"
|
||||
},
|
||||
"kraken_config": {
|
||||
"type": "string",
|
||||
"title": "Kraken Config",
|
||||
"description": "Path to the config file of Kraken. Set this field if you wish to publish status onto Cerberus"
|
||||
},
|
||||
"test_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 90,
|
||||
"title": "Test duration",
|
||||
"description": "Duration for which each step of the ingress chaos testing is to be performed."
|
||||
},
|
||||
"wait_duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 300,
|
||||
"title": "Wait Duration",
|
||||
"description": "Wait duration for finishing a test and its cleanup.Ensure that it is significantly greater than wait_duration"
|
||||
},
|
||||
"instance_count": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 1,
|
||||
"title": "Instance Count",
|
||||
"description": "Number of pods to perform action/select that match the label selector."
|
||||
},
|
||||
"execution_type": {
|
||||
"type": "string",
|
||||
"default": "parallel",
|
||||
"title": "Execution Type",
|
||||
"description": "The order in which the ingress filters are applied. Execution type can be 'serial' or 'parallel'"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"namespace"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"dependentRequired": {}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"config"
|
||||
]
|
||||
}
|
||||
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,12 +15,12 @@ This tool profiles an application and gathers telemetry data such as CPU, Memory
|
||||
1. To run
|
||||
|
||||
```
|
||||
$ python3 -m venv chaos
|
||||
$ python3.9 -m venv chaos
|
||||
$ source chaos/bin/activate
|
||||
$ git clone https://github.com/redhat-chaos/krkn.git
|
||||
$ cd krkn
|
||||
$ pip3 install -r requirements.txt
|
||||
$ python3 utils/chaos_recommender/chaos_recommender.py
|
||||
$ python3.9 utils/chaos_recommender/chaos_recommender.py
|
||||
```
|
||||
|
||||
2. Follow the prompts to provide the required information.
|
||||
|
||||
Reference in New Issue
Block a user