Core Refactoring, Krkn Scenario Plugin API (#694)

* relocated shared libraries from `kraken` to `krkn` folder

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* AbstractScenarioPlugin and ScenarioPluginFactory

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* application_outage porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* arcaflow_scenarios porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* managedcluster_scenarios porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* network_chaos porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* node_actions porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* plugin_scenarios porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* pvc_scenarios porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* service_disruption porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* service_hijacking porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* cluster_shut_down_scenarios porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* syn_flood porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* time_scenarios porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* zone_outages porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* ScenarioPluginFactory tests

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* unit tests update

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* pod_scenarios and post actions deprecated

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

scenarios post_actions

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* funtests and config update

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* run_krkn.py update

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* utils porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* API Documentation

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* container_scenarios porting

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

fix

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* funtest fix

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* document gif update

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* Documentation + tests update

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* removed example plugin

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* global renaming

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

test fix

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

test fix

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* config.yaml typos

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

typos

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* removed `plugin_scenarios` from NativScenarioPlugin class

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* pod_network_scenarios type added

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* documentation update

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

* krkn-lib update

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

typo

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>

---------

Signed-off-by: Tullio Sebastiani <tsebasti@redhat.com>
This commit is contained in:
Tullio Sebastiani
2024-10-03 20:48:04 +02:00
committed by GitHub
parent a13fb43d94
commit d91172d9b2
154 changed files with 5412 additions and 4827 deletions

View File

@@ -1,29 +0,0 @@
#!/usr/bin/env python3
import subprocess
import logging
import time
def run(cmd):
try:
output = subprocess.Popen(
cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
(out, err) = output.communicate()
except Exception as e:
logging.error("Failed to run %s, error: %s" % (cmd, e))
return out
i = 0
while i < 100:
pods_running = run("oc get pods -n openshift-etcd -l app=etcd | grep -c '4/4'").rstrip()
if pods_running == "3":
break
time.sleep(5)
i += 1
if pods_running == str(3):
print("There were 3 pods running properly")
else:
print("ERROR there were " + str(pods_running) + " pods running instead of 3")

View File

@@ -1,23 +0,0 @@
#!/usr/bin/env python3
import subprocess
import logging
def run(cmd):
try:
output = subprocess.Popen(
cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
(out, err) = output.communicate()
logging.info("out " + str(out))
except Exception as e:
logging.error("Failed to run %s, error: %s" % (cmd, e))
return out
pods_running = run("oc get pods -n openshift-etcd | grep -c Running").rstrip()
if pods_running == str(3):
print("There were 3 pods running properly")
else:
print("ERROR there were " + str(pods_running) + " pods running instead of 3")

View File

@@ -1,28 +0,0 @@
#!/usr/bin/env python3
import subprocess
import time
def run(cmd):
try:
output = subprocess.Popen(
cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
(out, err) = output.communicate()
except Exception as e:
print("Failed to run %s, error: %s" % (cmd, e))
return out
i = 0
while i < 100:
projects_active = run("oc get project | grep 'ingress' | grep -c Active").rstrip()
if projects_active == "3":
break
i += 1
time.sleep(5)
if projects_active == str(3):
print("There were 3 projects running properly")
else:
print("ERROR there were " + str(projects_active) + " projects running instead of 3")

View File

@@ -1,6 +0,0 @@
# yaml-language-server: $schema=../plugin.schema.json
- id: kill-pods
config:
namespace_pattern: ^openshift-monitoring$
label_selector: app=prometheus
krkn_pod_recovery_time: 120

View File

@@ -1,90 +0,0 @@
#!/usr/bin/env python3
import logging
import re
import subprocess
import sys
from kubernetes import client, config
from kubernetes.client.rest import ApiException
def list_namespaces():
"""
List all namespaces
"""
spaces_list = []
try:
config.load_kube_config()
cli = client.CoreV1Api()
ret = cli.list_namespace(pretty=True)
except ApiException as e:
logging.error(
"Exception when calling CoreV1Api->list_namespace: %s\n",
e
)
for current_namespace in ret.items:
spaces_list.append(current_namespace.metadata.name)
return spaces_list
def check_namespaces(namespaces):
"""
Check if all the watch_namespaces are valid
"""
try:
valid_namespaces = list_namespaces()
regex_namespaces = set(namespaces) - set(valid_namespaces)
final_namespaces = set(namespaces) - set(regex_namespaces)
valid_regex = set()
if regex_namespaces:
for current_ns in valid_namespaces:
for regex_namespace in regex_namespaces:
if re.search(regex_namespace, current_ns):
final_namespaces.add(current_ns)
valid_regex.add(regex_namespace)
break
invalid_namespaces = regex_namespaces - valid_regex
if invalid_namespaces:
raise Exception(
"There exists no namespaces matching: %s" % (
invalid_namespaces
)
)
return list(final_namespaces)
except Exception as e:
logging.error(str(e))
sys.exit(1)
def run(cmd):
try:
output = subprocess.Popen(
cmd,
shell=True,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
(out, err) = output.communicate()
except Exception as e:
logging.error("Failed to run %s, error: %s", cmd, e)
return out
def print_running_pods():
regex_namespace_list = ["openshift-.*"]
checked_namespaces = check_namespaces(regex_namespace_list)
pods_running = 0
for namespace in checked_namespaces:
new_pods_running = run(
"oc get pods -n " + namespace + " | grep -c Running"
).rstrip()
try:
pods_running += int(new_pods_running)
except Exception:
continue
print(pods_running)
if __name__ == '__main__':
print_running_pods()

View File

@@ -1,11 +0,0 @@
#!/bin/bash
pods="$(oc get pods -n openshift-etcd | grep -c Running)"
echo "$pods"
if [ "$pods" -eq 3 ]
then
echo "Pods Pass"
else
# need capital error for proper error catching in run_kraken
echo "ERROR pod count $pods doesnt match 3 expected pods"
fi

View File

@@ -1,76 +0,0 @@
#!/usr/bin/env python3
import subprocess
import logging
import time
import yaml
def run(cmd):
out = ""
try:
output = subprocess.Popen(
cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
(out, err) = output.communicate()
except Exception as e:
logging.info("Failed to run %s, error: %s" % (cmd, e))
return out
# Get cluster operators and return yaml
def get_cluster_operators():
operators_status = run("kubectl get co -o yaml")
status_yaml = yaml.safe_load(operators_status, Loader=yaml.FullLoader)
return status_yaml
# Monitor cluster operators
def monitor_cluster_operator(cluster_operators):
failed_operators = []
for operator in cluster_operators["items"]:
# loop through the conditions in the status section to find the dedgraded condition
if "status" in operator.keys() and "conditions" in operator["status"].keys():
for status_cond in operator["status"]["conditions"]:
# if the degraded status is not false, add it to the failed operators to return
if status_cond["type"] == "Degraded" and status_cond["status"] != "False":
failed_operators.append(operator["metadata"]["name"])
break
else:
logging.info("Can't find status of " + operator["metadata"]["name"])
failed_operators.append(operator["metadata"]["name"])
# return False if there are failed operators else return True
return failed_operators
wait_duration = 10
timeout = 900
counter = 0
counter = 0
co_yaml = get_cluster_operators()
failed_operators = monitor_cluster_operator(co_yaml)
while len(failed_operators) > 0:
time.sleep(wait_duration)
co_yaml = get_cluster_operators()
failed_operators = monitor_cluster_operator(co_yaml)
if counter >= timeout:
print("Cluster operators are still degraded after " + str(timeout) + "seconds")
print("Degraded operators " + str(failed_operators))
exit(1)
counter += wait_duration
not_ready = run("oc get nodes --no-headers | grep 'NotReady' | wc -l").rstrip()
while int(not_ready) > 0:
time.sleep(wait_duration)
not_ready = run("oc get nodes --no-headers | grep 'NotReady' | wc -l").rstrip()
if counter >= timeout:
print("Nodes are still not ready after " + str(timeout) + "seconds")
exit(1)
counter += wait_duration
worker_nodes = run("oc get nodes --no-headers | grep worker | egrep -v NotReady | awk '{print $1}'").rstrip()
print("Worker nodes list \n" + str(worker_nodes))
master_nodes = run("oc get nodes --no-headers | grep master | egrep -v NotReady | awk '{print $1}'").rstrip()
print("Master nodes list \n" + str(master_nodes))
infra_nodes = run("oc get nodes --no-headers | grep infra | egrep -v NotReady | awk '{print $1}'").rstrip()
print("Infra nodes list \n" + str(infra_nodes))