mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-04-15 06:57:28 +00:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a3baffe8ee | ||
|
|
438b08fcd5 | ||
|
|
9b930a02a5 | ||
|
|
194e3b87ee | ||
|
|
8c05e44c23 | ||
|
|
88f8cf49f1 | ||
|
|
015ba4d90d |
13
.github/workflows/release.yml
vendored
13
.github/workflows/release.yml
vendored
@@ -16,6 +16,7 @@ jobs:
|
|||||||
PREVIOUS_TAG=$(git tag --sort=-creatordate | sed -n '2 p')
|
PREVIOUS_TAG=$(git tag --sort=-creatordate | sed -n '2 p')
|
||||||
echo $PREVIOUS_TAG
|
echo $PREVIOUS_TAG
|
||||||
echo "PREVIOUS_TAG=$PREVIOUS_TAG" >> "$GITHUB_ENV"
|
echo "PREVIOUS_TAG=$PREVIOUS_TAG" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
- name: generate release notes from template
|
- name: generate release notes from template
|
||||||
id: release-notes
|
id: release-notes
|
||||||
env:
|
env:
|
||||||
@@ -45,3 +46,15 @@ jobs:
|
|||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
gh release create ${{ github.ref_name }} --title "${{ github.ref_name }}" -F release-notes.md
|
gh release create ${{ github.ref_name }} --title "${{ github.ref_name }}" -F release-notes.md
|
||||||
|
|
||||||
|
- name: Install Syft
|
||||||
|
run: |
|
||||||
|
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sudo sh -s -- -b /usr/local/bin
|
||||||
|
|
||||||
|
- name: Generate SBOM
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
run: |
|
||||||
|
syft . --scope all-layers --output cyclonedx-json > sbom.json
|
||||||
|
echo "SBOM generated successfully!"
|
||||||
|
gh release upload ${{ github.ref_name }} sbom.json
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ function functional_pod_network_filter {
|
|||||||
yq -i '.[0].target="pod-network-filter-test"' scenarios/kube/pod-network-filter.yml
|
yq -i '.[0].target="pod-network-filter-test"' scenarios/kube/pod-network-filter.yml
|
||||||
yq -i '.[0].protocols=["tcp"]' scenarios/kube/pod-network-filter.yml
|
yq -i '.[0].protocols=["tcp"]' scenarios/kube/pod-network-filter.yml
|
||||||
yq -i '.[0].ports=[443]' scenarios/kube/pod-network-filter.yml
|
yq -i '.[0].ports=[443]' scenarios/kube/pod-network-filter.yml
|
||||||
|
yq -i '.performance_monitoring.check_critical_alerts=False' CI/config/pod_network_filter.yaml
|
||||||
|
|
||||||
## Test webservice deployment
|
## Test webservice deployment
|
||||||
kubectl apply -f ./CI/templates/pod_network_filter.yaml
|
kubectl apply -f ./CI/templates/pod_network_filter.yaml
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ ENV KUBECONFIG /home/krkn/.kube/config
|
|||||||
|
|
||||||
# This overwrites any existing configuration in /etc/yum.repos.d/kubernetes.repo
|
# This overwrites any existing configuration in /etc/yum.repos.d/kubernetes.repo
|
||||||
RUN dnf update && dnf install -y --setopt=install_weak_deps=False \
|
RUN dnf update && dnf install -y --setopt=install_weak_deps=False \
|
||||||
git python39 jq yq gettext wget which ipmitool &&\
|
git python39 jq yq gettext wget which ipmitool openssh-server &&\
|
||||||
dnf clean all
|
dnf clean all
|
||||||
|
|
||||||
# Virtctl
|
# Virtctl
|
||||||
|
|||||||
@@ -444,7 +444,7 @@
|
|||||||
"required": "false"
|
"required": "false"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "kubevirt-namespace",
|
"name": "kubevirt-name",
|
||||||
"short_description": "KubeVirt regex names to watch",
|
"short_description": "KubeVirt regex names to watch",
|
||||||
"description": "KubeVirt regex names to check VMs",
|
"description": "KubeVirt regex names to check VMs",
|
||||||
"variable": "KUBE_VIRT_NAME",
|
"variable": "KUBE_VIRT_NAME",
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
from asyncio import Future
|
||||||
import yaml
|
import yaml
|
||||||
from krkn_lib.k8s import KrknKubernetes
|
from krkn_lib.k8s import KrknKubernetes
|
||||||
from krkn_lib.k8s.pods_monitor_pool import PodsMonitorPool
|
from krkn_lib.k8s.pod_monitor import select_and_monitor_by_namespace_pattern_and_label
|
||||||
from krkn_lib.models.telemetry import ScenarioTelemetry
|
from krkn_lib.models.telemetry import ScenarioTelemetry
|
||||||
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
|
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
|
||||||
from krkn_lib.utils import get_yaml_item_value
|
from krkn_lib.utils import get_yaml_item_value
|
||||||
@@ -22,27 +22,21 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
|
|||||||
lib_telemetry: KrknTelemetryOpenshift,
|
lib_telemetry: KrknTelemetryOpenshift,
|
||||||
scenario_telemetry: ScenarioTelemetry,
|
scenario_telemetry: ScenarioTelemetry,
|
||||||
) -> int:
|
) -> int:
|
||||||
pool = PodsMonitorPool(lib_telemetry.get_lib_kubernetes())
|
|
||||||
try:
|
try:
|
||||||
with open(scenario, "r") as f:
|
with open(scenario, "r") as f:
|
||||||
cont_scenario_config = yaml.full_load(f)
|
cont_scenario_config = yaml.full_load(f)
|
||||||
|
|
||||||
for kill_scenario in cont_scenario_config["scenarios"]:
|
for kill_scenario in cont_scenario_config["scenarios"]:
|
||||||
self.start_monitoring(
|
future_snapshot = self.start_monitoring(
|
||||||
kill_scenario, pool
|
kill_scenario,
|
||||||
|
lib_telemetry
|
||||||
)
|
)
|
||||||
killed_containers = self.container_killing_in_pod(
|
self.container_killing_in_pod(
|
||||||
kill_scenario, lib_telemetry.get_lib_kubernetes()
|
kill_scenario, lib_telemetry.get_lib_kubernetes()
|
||||||
)
|
)
|
||||||
result = pool.join()
|
snapshot = future_snapshot.result()
|
||||||
if result.error:
|
result = snapshot.get_pods_status()
|
||||||
logging.error(
|
scenario_telemetry.affected_pods = result
|
||||||
logging.error(
|
|
||||||
f"ContainerScenarioPlugin pods failed to recovery: {result.error}"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return 1
|
|
||||||
scenario_telemetry.affected_pods = result
|
|
||||||
|
|
||||||
except (RuntimeError, Exception):
|
except (RuntimeError, Exception):
|
||||||
logging.error("ContainerScenarioPlugin exiting due to Exception %s")
|
logging.error("ContainerScenarioPlugin exiting due to Exception %s")
|
||||||
@@ -53,17 +47,18 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
|
|||||||
def get_scenario_types(self) -> list[str]:
|
def get_scenario_types(self) -> list[str]:
|
||||||
return ["container_scenarios"]
|
return ["container_scenarios"]
|
||||||
|
|
||||||
def start_monitoring(self, kill_scenario: dict, pool: PodsMonitorPool):
|
def start_monitoring(self, kill_scenario: dict, lib_telemetry: KrknTelemetryOpenshift) -> Future:
|
||||||
|
|
||||||
namespace_pattern = f"^{kill_scenario['namespace']}$"
|
namespace_pattern = f"^{kill_scenario['namespace']}$"
|
||||||
label_selector = kill_scenario["label_selector"]
|
label_selector = kill_scenario["label_selector"]
|
||||||
recovery_time = kill_scenario["expected_recovery_time"]
|
recovery_time = kill_scenario["expected_recovery_time"]
|
||||||
pool.select_and_monitor_by_namespace_pattern_and_label(
|
future_snapshot = select_and_monitor_by_namespace_pattern_and_label(
|
||||||
namespace_pattern=namespace_pattern,
|
namespace_pattern=namespace_pattern,
|
||||||
label_selector=label_selector,
|
label_selector=label_selector,
|
||||||
max_timeout=recovery_time,
|
max_timeout=recovery_time,
|
||||||
field_selector="status.phase=Running"
|
v1_client=lib_telemetry.get_lib_kubernetes().cli
|
||||||
)
|
)
|
||||||
|
return future_snapshot
|
||||||
|
|
||||||
def container_killing_in_pod(self, cont_scenario, kubecli: KrknKubernetes):
|
def container_killing_in_pod(self, cont_scenario, kubecli: KrknKubernetes):
|
||||||
scenario_name = get_yaml_item_value(cont_scenario, "name", "")
|
scenario_name = get_yaml_item_value(cont_scenario, "name", "")
|
||||||
|
|||||||
@@ -1,14 +1,16 @@
|
|||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
from asyncio import Future
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
from krkn_lib.k8s import KrknKubernetes
|
from krkn_lib.k8s import KrknKubernetes
|
||||||
from krkn_lib.k8s.pods_monitor_pool import PodsMonitorPool
|
from krkn_lib.k8s.pod_monitor import select_and_monitor_by_namespace_pattern_and_label, \
|
||||||
|
select_and_monitor_by_name_pattern_and_namespace_pattern
|
||||||
|
|
||||||
from krkn.scenario_plugins.pod_disruption.models.models import InputParams
|
from krkn.scenario_plugins.pod_disruption.models.models import InputParams
|
||||||
from krkn_lib.models.telemetry import ScenarioTelemetry
|
from krkn_lib.models.telemetry import ScenarioTelemetry
|
||||||
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
|
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
|
||||||
from krkn_lib.utils import get_yaml_item_value
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
@@ -29,31 +31,23 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
|||||||
lib_telemetry: KrknTelemetryOpenshift,
|
lib_telemetry: KrknTelemetryOpenshift,
|
||||||
scenario_telemetry: ScenarioTelemetry,
|
scenario_telemetry: ScenarioTelemetry,
|
||||||
) -> int:
|
) -> int:
|
||||||
pool = PodsMonitorPool(lib_telemetry.get_lib_kubernetes())
|
|
||||||
try:
|
try:
|
||||||
with open(scenario, "r") as f:
|
with open(scenario, "r") as f:
|
||||||
cont_scenario_config = yaml.full_load(f)
|
cont_scenario_config = yaml.full_load(f)
|
||||||
for kill_scenario in cont_scenario_config:
|
for kill_scenario in cont_scenario_config:
|
||||||
kill_scenario_config = InputParams(kill_scenario["config"])
|
kill_scenario_config = InputParams(kill_scenario["config"])
|
||||||
self.start_monitoring(
|
future_snapshot=self.start_monitoring(
|
||||||
kill_scenario_config, pool
|
kill_scenario_config,
|
||||||
|
lib_telemetry
|
||||||
)
|
)
|
||||||
return_status = self.killing_pods(
|
self.killing_pods(
|
||||||
kill_scenario_config, lib_telemetry.get_lib_kubernetes()
|
kill_scenario_config, lib_telemetry.get_lib_kubernetes()
|
||||||
)
|
)
|
||||||
if return_status != 0:
|
|
||||||
result = pool.cancel()
|
snapshot = future_snapshot.result()
|
||||||
else:
|
result = snapshot.get_pods_status()
|
||||||
result = pool.join()
|
scenario_telemetry.affected_pods = result
|
||||||
if result.error:
|
|
||||||
logging.error(
|
|
||||||
logging.error(
|
|
||||||
f"PodDisruptionScenariosPlugin pods failed to recovery: {result.error}"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
scenario_telemetry.affected_pods = result
|
|
||||||
|
|
||||||
except (RuntimeError, Exception) as e:
|
except (RuntimeError, Exception) as e:
|
||||||
logging.error("PodDisruptionScenariosPlugin exiting due to Exception %s" % e)
|
logging.error("PodDisruptionScenariosPlugin exiting due to Exception %s" % e)
|
||||||
@@ -64,7 +58,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
|||||||
def get_scenario_types(self) -> list[str]:
|
def get_scenario_types(self) -> list[str]:
|
||||||
return ["pod_disruption_scenarios"]
|
return ["pod_disruption_scenarios"]
|
||||||
|
|
||||||
def start_monitoring(self, kill_scenario: InputParams, pool: PodsMonitorPool):
|
def start_monitoring(self, kill_scenario: InputParams, lib_telemetry: KrknTelemetryOpenshift) -> Future:
|
||||||
|
|
||||||
recovery_time = kill_scenario.krkn_pod_recovery_time
|
recovery_time = kill_scenario.krkn_pod_recovery_time
|
||||||
if (
|
if (
|
||||||
@@ -73,16 +67,17 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
|||||||
):
|
):
|
||||||
namespace_pattern = kill_scenario.namespace_pattern
|
namespace_pattern = kill_scenario.namespace_pattern
|
||||||
label_selector = kill_scenario.label_selector
|
label_selector = kill_scenario.label_selector
|
||||||
pool.select_and_monitor_by_namespace_pattern_and_label(
|
future_snapshot = select_and_monitor_by_namespace_pattern_and_label(
|
||||||
namespace_pattern=namespace_pattern,
|
namespace_pattern=namespace_pattern,
|
||||||
label_selector=label_selector,
|
label_selector=label_selector,
|
||||||
max_timeout=recovery_time,
|
max_timeout=recovery_time,
|
||||||
field_selector="status.phase=Running"
|
v1_client=lib_telemetry.get_lib_kubernetes().cli
|
||||||
)
|
)
|
||||||
logging.info(
|
logging.info(
|
||||||
f"waiting up to {recovery_time} seconds for pod recovery, "
|
f"waiting up to {recovery_time} seconds for pod recovery, "
|
||||||
f"pod label pattern: {label_selector} namespace pattern: {namespace_pattern}"
|
f"pod label pattern: {label_selector} namespace pattern: {namespace_pattern}"
|
||||||
)
|
)
|
||||||
|
return future_snapshot
|
||||||
|
|
||||||
elif (
|
elif (
|
||||||
kill_scenario.namespace_pattern
|
kill_scenario.namespace_pattern
|
||||||
@@ -90,16 +85,17 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
|
|||||||
):
|
):
|
||||||
namespace_pattern = kill_scenario.namespace_pattern
|
namespace_pattern = kill_scenario.namespace_pattern
|
||||||
name_pattern = kill_scenario.name_pattern
|
name_pattern = kill_scenario.name_pattern
|
||||||
pool.select_and_monitor_by_name_pattern_and_namespace_pattern(
|
future_snapshot = select_and_monitor_by_name_pattern_and_namespace_pattern(
|
||||||
pod_name_pattern=name_pattern,
|
pod_name_pattern=name_pattern,
|
||||||
namespace_pattern=namespace_pattern,
|
namespace_pattern=namespace_pattern,
|
||||||
max_timeout=recovery_time,
|
max_timeout=recovery_time,
|
||||||
field_selector="status.phase=Running"
|
v1_client=lib_telemetry.get_lib_kubernetes().cli
|
||||||
)
|
)
|
||||||
logging.info(
|
logging.info(
|
||||||
f"waiting up to {recovery_time} seconds for pod recovery, "
|
f"waiting up to {recovery_time} seconds for pod recovery, "
|
||||||
f"pod name pattern: {name_pattern} namespace pattern: {namespace_pattern}"
|
f"pod name pattern: {name_pattern} namespace pattern: {namespace_pattern}"
|
||||||
)
|
)
|
||||||
|
return future_snapshot
|
||||||
else:
|
else:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"impossible to determine monitor parameters, check {kill_scenario} configuration"
|
f"impossible to determine monitor parameters, check {kill_scenario} configuration"
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import yaml
|
|||||||
from krkn_lib.models.telemetry import ScenarioTelemetry
|
from krkn_lib.models.telemetry import ScenarioTelemetry
|
||||||
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
|
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
|
||||||
from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin
|
from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin
|
||||||
|
from krkn_lib.utils import get_yaml_item_value
|
||||||
|
|
||||||
class ServiceHijackingScenarioPlugin(AbstractScenarioPlugin):
|
class ServiceHijackingScenarioPlugin(AbstractScenarioPlugin):
|
||||||
def run(
|
def run(
|
||||||
@@ -25,6 +25,8 @@ class ServiceHijackingScenarioPlugin(AbstractScenarioPlugin):
|
|||||||
image = scenario_config["image"]
|
image = scenario_config["image"]
|
||||||
target_port = scenario_config["service_target_port"]
|
target_port = scenario_config["service_target_port"]
|
||||||
chaos_duration = scenario_config["chaos_duration"]
|
chaos_duration = scenario_config["chaos_duration"]
|
||||||
|
privileged = get_yaml_item_value(scenario_config,"privileged", True)
|
||||||
|
|
||||||
|
|
||||||
logging.info(
|
logging.info(
|
||||||
f"checking service {service_name} in namespace: {service_namespace}"
|
f"checking service {service_name} in namespace: {service_namespace}"
|
||||||
@@ -46,14 +48,14 @@ class ServiceHijackingScenarioPlugin(AbstractScenarioPlugin):
|
|||||||
logging.info(f"webservice will listen on port {target_port}")
|
logging.info(f"webservice will listen on port {target_port}")
|
||||||
webservice = (
|
webservice = (
|
||||||
lib_telemetry.get_lib_kubernetes().deploy_service_hijacking(
|
lib_telemetry.get_lib_kubernetes().deploy_service_hijacking(
|
||||||
service_namespace, plan, image, port_number=target_port
|
service_namespace, plan, image, port_number=target_port, privileged=privileged
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logging.info(f"traffic will be redirected to named port: {target_port}")
|
logging.info(f"traffic will be redirected to named port: {target_port}")
|
||||||
webservice = (
|
webservice = (
|
||||||
lib_telemetry.get_lib_kubernetes().deploy_service_hijacking(
|
lib_telemetry.get_lib_kubernetes().deploy_service_hijacking(
|
||||||
service_namespace, plan, image, port_name=target_port
|
service_namespace, plan, image, port_name=target_port, privileged=privileged
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
logging.info(
|
logging.info(
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ class VirtChecker:
|
|||||||
:param namespace:
|
:param namespace:
|
||||||
:return: virtctl_status 'True' if successful, or an error message if it fails.
|
:return: virtctl_status 'True' if successful, or an error message if it fails.
|
||||||
"""
|
"""
|
||||||
virtctl_vm_cmd = f"virtctl ssh --local-ssh-opts='-o BatchMode=yes' --local-ssh-opts='-o PasswordAuthentication=no' --local-ssh-opts='-o ConnectTimeout=2' root@{vm_name} -n {namespace}"
|
virtctl_vm_cmd = f"virtctl ssh --local-ssh-opts='-o BatchMode=yes' --local-ssh-opts='-o PasswordAuthentication=no' --local-ssh-opts='-o ConnectTimeout=2' root@vmi/{vm_name} -n {namespace} 2>&1 |egrep 'denied|verification failed' && echo 'True' || echo 'False'"
|
||||||
check_virtctl_vm_cmd = f"virtctl ssh --local-ssh-opts='-o BatchMode=yes' --local-ssh-opts='-o PasswordAuthentication=no' --local-ssh-opts='-o ConnectTimeout=2' root@{vm_name} -n {namespace} 2>&1 |egrep 'denied|verification failed' && echo 'True' || echo 'False'"
|
check_virtctl_vm_cmd = f"virtctl ssh --local-ssh-opts='-o BatchMode=yes' --local-ssh-opts='-o PasswordAuthentication=no' --local-ssh-opts='-o ConnectTimeout=2' root@{vm_name} -n {namespace} 2>&1 |egrep 'denied|verification failed' && echo 'True' || echo 'False'"
|
||||||
if 'True' in invoke_no_exit(check_virtctl_vm_cmd):
|
if 'True' in invoke_no_exit(check_virtctl_vm_cmd):
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ google-cloud-compute==1.22.0
|
|||||||
ibm_cloud_sdk_core==3.18.0
|
ibm_cloud_sdk_core==3.18.0
|
||||||
ibm_vpc==0.20.0
|
ibm_vpc==0.20.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
krkn-lib==5.1.1
|
krkn-lib==5.1.5
|
||||||
lxml==5.1.0
|
lxml==5.1.0
|
||||||
kubernetes==28.1.0
|
kubernetes==28.1.0
|
||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ service_name: nginx-service # name of the service to be hijacked
|
|||||||
service_namespace: default # The namespace where the target service is located
|
service_namespace: default # The namespace where the target service is located
|
||||||
image: quay.io/krkn-chaos/krkn-service-hijacking:v0.1.3 # Image of the krkn web service to be deployed to receive traffic.
|
image: quay.io/krkn-chaos/krkn-service-hijacking:v0.1.3 # Image of the krkn web service to be deployed to receive traffic.
|
||||||
chaos_duration: 30 # Total duration of the chaos scenario in seconds.
|
chaos_duration: 30 # Total duration of the chaos scenario in seconds.
|
||||||
|
privileged: True # True or false if need privileged securityContext to run
|
||||||
plan:
|
plan:
|
||||||
- resource: "/list/index.php" # Specifies the resource or path to respond to in the scenario. For paths, both the path and query parameters are captured but ignored.
|
- resource: "/list/index.php" # Specifies the resource or path to respond to in the scenario. For paths, both the path and query parameters are captured but ignored.
|
||||||
# For resources, only query parameters are captured.
|
# For resources, only query parameters are captured.
|
||||||
|
|||||||
Reference in New Issue
Block a user