From eb86885bcda3a961069765f054dbe888cf696810 Mon Sep 17 00:00:00 2001 From: Paige Patton <64206430+paigerube14@users.noreply.github.com> Date: Thu, 13 Nov 2025 10:37:42 -0500 Subject: [PATCH] adding kube virt check failure (#952) Signed-off-by: Paige Patton --- .github/workflows/tests.yml | 2 +- config/config.yaml | 4 +++- krkn/utils/VirtChecker.py | 11 ++++++++--- run_kraken.py | 6 +++++- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 29b32cdb..e912faca 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -84,7 +84,7 @@ jobs: yq -i '.kraken.performance_monitoring="localhost:9090"' CI/config/common_test_config.yaml yq -i '.elastic.elastic_port=9200' CI/config/common_test_config.yaml yq -i '.elastic.elastic_url="https://localhost"' CI/config/common_test_config.yaml - yq -i '.elastic.enable_elastic=True' CI/config/common_test_config.yaml + yq -i '.elastic.enable_elastic=False' CI/config/common_test_config.yaml yq -i '.elastic.password="${{env.ELASTIC_PASSWORD}}"' CI/config/common_test_config.yaml yq -i '.performance_monitoring.prometheus_url="http://localhost:9090"' CI/config/common_test_config.yaml echo "test_service_hijacking" > ./CI/tests/functional_tests diff --git a/config/config.yaml b/config/config.yaml index cc69b6ab..2632168f 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -126,4 +126,6 @@ kubevirt_checks: # Utilizing virt che name: # Regex Name style of VMI's to watch, optional, will watch all VMI names in the namespace if left blank only_failures: False # Boolean of whether to show all VMI's failures and successful ssh connection (False), or only failure status' (True) disconnected: False # Boolean of how to try to connect to the VMIs; if True will use the ip_address to try ssh from within a node, if false will use the name and uses virtctl to try to connect; Default is False - ssh_node: "" # If set, will be a backup way to ssh to a node. Will want to set to a node that isn't targeted in chaos \ No newline at end of file + ssh_node: "" # If set, will be a backup way to ssh to a node. Will want to set to a node that isn't targeted in chaos + node_names: "" + exit_on_failure: # If value is True and VMI's are failing post chaos returns failure, values can be True/False \ No newline at end of file diff --git a/krkn/utils/VirtChecker.py b/krkn/utils/VirtChecker.py index 11c4a6d8..d17c459d 100644 --- a/krkn/utils/VirtChecker.py +++ b/krkn/utils/VirtChecker.py @@ -23,9 +23,7 @@ class VirtChecker: self.threads_limit = threads_limit # setting to 0 in case no variables are set, so no threads later get made self.batch_size = 0 - if self.namespace == "": - logging.info("kube virt checks config is not defined, skipping them") - return + self.ret_value = 0 vmi_name_match = get_yaml_item_value(kubevirt_check_config, "name", ".*") self.krkn_lib = krkn_lib self.disconnected = get_yaml_item_value(kubevirt_check_config, "disconnected", False) @@ -33,6 +31,10 @@ class VirtChecker: self.interval = get_yaml_item_value(kubevirt_check_config, "interval", 2) self.ssh_node = get_yaml_item_value(kubevirt_check_config, "ssh_node", "") self.node_names = get_yaml_item_value(kubevirt_check_config, "node_names", "") + self.exit_on_failure = get_yaml_item_value(kubevirt_check_config, "exit_on_failure", False) + if self.namespace == "": + logging.info("kube virt checks config is not defined, skipping them") + return try: self.kube_vm_plugin = KubevirtVmOutageScenarioPlugin() self.kube_vm_plugin.init_clients(k8s_client=krkn_lib) @@ -254,4 +256,7 @@ class VirtChecker: thread.join() if not post_kubevirt_check_queue.empty(): kubevirt_check_telem.extend(post_kubevirt_check_queue.get_nowait()) + + if self.exit_on_failure and len(kubevirt_check_telem) > 0: + self.ret_value = 2 return kubevirt_check_telem \ No newline at end of file diff --git a/run_kraken.py b/run_kraken.py index 799d91df..154af637 100644 --- a/run_kraken.py +++ b/run_kraken.py @@ -133,7 +133,7 @@ def main(options, command: Optional[str]) -> int: telemetry_api_url = config["telemetry"].get("api_url") health_check_config = get_yaml_item_value(config, "health_checks",{}) kubevirt_check_config = get_yaml_item_value(config, "kubevirt_checks", {}) - + # Initialize clients if not os.path.isfile(kubeconfig_path) and not os.path.isfile( "/var/run/secrets/kubernetes.io/serviceaccount/token" @@ -557,6 +557,10 @@ def main(options, command: Optional[str]) -> int: logging.error("Health check failed for the applications, Please check; exiting") return health_checker.ret_value + if kubevirt_checker.ret_value != 0: + logging.error("Kubevirt check still had failed VMIs at end of run, Please check; exiting") + return kubevirt_checker.ret_value + logging.info( "Successfully finished running Kraken. UUID for the run: " "%s. Report generated at %s. Exiting" % (run_uuid, report_file)