From 197ce43f9a88b72bf05f6ccf941b202af10b5cec Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Tue, 2 Dec 2025 14:10:05 -0500
Subject: [PATCH 01/11] adding test server (#982)

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 .github/workflows/tests.yml       |   6 +-
 CI/config/common_test_config.yaml |   4 +
 CI/tests/test_pod_server.sh       |  35 +++
 tests/test_server.py              | 385 ++++++++++++++++++++++++++++++
 4 files changed, 426 insertions(+), 4 deletions(-)
 create mode 100755 CI/tests/test_pod_server.sh
 create mode 100644 tests/test_server.py

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 5eb3e90c..d774b666 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -79,8 +79,6 @@ jobs:
         if: |
           github.event_name == 'pull_request'
         run: |
-            yq -i '.kraken.port="8081"' CI/config/common_test_config.yaml
-            yq -i '.kraken.signal_address="0.0.0.0"' CI/config/common_test_config.yaml
             yq -i '.kraken.performance_monitoring="localhost:9090"' CI/config/common_test_config.yaml
             yq -i '.elastic.elastic_port=9200' CI/config/common_test_config.yaml
             yq -i '.elastic.elastic_url="https://localhost"' CI/config/common_test_config.yaml
@@ -100,6 +98,7 @@ jobs:
             echo "test_memory_hog" >> ./CI/tests/functional_tests
             echo "test_io_hog" >> ./CI/tests/functional_tests
             echo "test_pod_network_filter" >> ./CI/tests/functional_tests
+            echo "test_pod_server" >> ./CI/tests/functional_tests
 
       # Push on main only steps + all other functional to collect coverage
       # for the badge
@@ -113,8 +112,6 @@ jobs:
       - name: Setup Post Merge Request Functional Tests
         if: github.ref == 'refs/heads/main' && github.event_name == 'push'
         run: |
-          yq -i '.kraken.port="8081"' CI/config/common_test_config.yaml
-          yq -i '.kraken.signal_address="0.0.0.0"' CI/config/common_test_config.yaml
           yq -i '.kraken.performance_monitoring="localhost:9090"' CI/config/common_test_config.yaml
           yq -i '.elastic.enable_elastic=False' CI/config/common_test_config.yaml
           yq -i '.elastic.password="${{env.ELASTIC_PASSWORD}}"' CI/config/common_test_config.yaml
@@ -137,6 +134,7 @@ jobs:
           echo "test_memory_hog" >> ./CI/tests/functional_tests
           echo "test_io_hog" >> ./CI/tests/functional_tests
           echo "test_pod_network_filter" >> ./CI/tests/functional_tests
+          echo "test_pod_server" >> ./CI/tests/functional_tests
 
       # Final common steps
       - name: Run Functional tests
diff --git a/CI/config/common_test_config.yaml b/CI/config/common_test_config.yaml
index c12c9f30..ef3a4f80 100644
--- a/CI/config/common_test_config.yaml
+++ b/CI/config/common_test_config.yaml
@@ -2,6 +2,10 @@ kraken:
     distribution: kubernetes                                # Distribution can be kubernetes or openshift.
     kubeconfig_path: ~/.kube/config                        # Path to kubeconfig.
     exit_on_failure: False                                 # Exit when a post action scenario fails.
+    publish_kraken_status: True                            # Can be accessed at http://0.0.0.0:8081
+    signal_state: RUN                                      # Will wait for the RUN signal when set to PAUSE before running the scenarios, refer docs/signal.md for more details
+    signal_address: 0.0.0.0                                # Signal listening address
+    port: 8081                                             # Signal port
     auto_rollback: True                                    # Enable auto rollback for scenarios.
     rollback_versions_directory: /tmp/kraken-rollback      # Directory to store rollback version files.
     chaos_scenarios:                                       # List of policies/chaos scenarios to load.
diff --git a/CI/tests/test_pod_server.sh b/CI/tests/test_pod_server.sh
new file mode 100755
index 00000000..0f9b2c36
--- /dev/null
+++ b/CI/tests/test_pod_server.sh
@@ -0,0 +1,35 @@
+set -xeEo pipefail
+
+source CI/tests/common.sh
+
+trap error ERR
+trap finish EXIT
+
+function functional_test_pod_server {
+  export scenario_type="pod_disruption_scenarios"
+  export scenario_file="scenarios/kind/pod_etcd.yml"
+  export post_config=""
+
+  envsubst < CI/config/common_test_config.yaml > CI/config/pod_config.yaml
+  yq -i '.[0].config.kill=1' scenarios/kind/pod_etcd.yml
+  
+  yq -i '.tunings.daemon_mode=True' CI/config/pod_config.yaml
+  cat CI/config/pod_config.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/pod_config.yaml & 
+  sleep 15
+  curl -X POST http:/0.0.0.0:8081/STOP
+
+  wait
+
+  yq -i '.kraken.signal_state="PAUSE"' CI/config/pod_config.yaml
+  yq -i '.tunings.daemon_mode=False' CI/config/pod_config.yaml
+  cat CI/config/pod_config.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/pod_config.yaml & 
+  sleep 5
+  curl -X POST http:/0.0.0.0:8081/RUN
+  wait
+
+  echo "Pod disruption with server scenario test: Success"
+}
+
+functional_test_pod_server
diff --git a/tests/test_server.py b/tests/test_server.py
new file mode 100644
index 00000000..b970bffe
--- /dev/null
+++ b/tests/test_server.py
@@ -0,0 +1,385 @@
+#!/usr/bin/env python3
+
+"""
+Test suite for SimpleHTTPRequestHandler class
+
+Usage:
+    python -m coverage run -a -m unittest tests/test_server.py -v
+
+Assisted By: Claude Code
+"""
+
+import unittest
+from unittest.mock import Mock, patch, MagicMock
+from io import BytesIO
+
+import server
+from server import SimpleHTTPRequestHandler
+
+
+class TestSimpleHTTPRequestHandler(unittest.TestCase):
+
+    def setUp(self):
+        """
+        Set up test fixtures for SimpleHTTPRequestHandler
+        """
+        # Reset the global server_status before each test
+        server.server_status = ""
+        # Reset the requests_served counter
+        SimpleHTTPRequestHandler.requests_served = 0
+
+        # Create a mock request
+        self.mock_request = MagicMock()
+        self.mock_client_address = ('127.0.0.1', 12345)
+        self.mock_server = MagicMock()
+
+    def _create_handler(self, method='GET', path='/'):
+        """
+        Helper method to create a handler instance with mocked request
+        """
+        # Create a mock request with proper attributes
+        mock_request = MagicMock()
+        mock_request.makefile.return_value = BytesIO(
+            f"{method} {path} HTTP/1.1\r\n\r\n".encode('utf-8')
+        )
+
+        # Create handler
+        handler = SimpleHTTPRequestHandler(
+            mock_request,
+            self.mock_client_address,
+            self.mock_server
+        )
+
+        # Mock the wfile (write file) for response
+        handler.wfile = BytesIO()
+
+        return handler
+
+    def test_do_GET_root_path_calls_do_status(self):
+        """
+        Test do_GET with root path calls do_status
+        """
+        handler = self._create_handler('GET', '/')
+
+        with patch.object(handler, 'do_status') as mock_do_status:
+            handler.do_GET()
+            mock_do_status.assert_called_once()
+
+    def test_do_GET_non_root_path_does_nothing(self):
+        """
+        Test do_GET with non-root path does not call do_status
+        """
+        handler = self._create_handler('GET', '/other')
+
+        with patch.object(handler, 'do_status') as mock_do_status:
+            handler.do_GET()
+            mock_do_status.assert_not_called()
+
+    def test_do_status_sends_200_response(self):
+        """
+        Test do_status sends 200 status code
+        """
+        server.server_status = "TEST_STATUS"
+        handler = self._create_handler()
+
+        with patch.object(handler, 'send_response') as mock_send_response:
+            with patch.object(handler, 'end_headers'):
+                handler.do_status()
+                mock_send_response.assert_called_once_with(200)
+
+    def test_do_status_writes_server_status(self):
+        """
+        Test do_status writes server_status to response
+        """
+        server.server_status = "RUNNING"
+        handler = self._create_handler()
+
+        with patch.object(handler, 'send_response'):
+            with patch.object(handler, 'end_headers'):
+                handler.do_status()
+
+        # Check that the status was written to wfile
+        response_content = handler.wfile.getvalue().decode('utf-8')
+        self.assertEqual(response_content, "RUNNING")
+
+    def test_do_status_increments_requests_served(self):
+        """
+        Test do_status increments requests_served counter
+        """
+        # Note: Creating a handler increments the counter by 1
+        # Then do_status increments it again
+        SimpleHTTPRequestHandler.requests_served = 0
+        handler = self._create_handler()
+        initial_count = SimpleHTTPRequestHandler.requests_served
+
+        with patch.object(handler, 'send_response'):
+            with patch.object(handler, 'end_headers'):
+                handler.do_status()
+
+        self.assertEqual(
+            SimpleHTTPRequestHandler.requests_served,
+            initial_count + 1
+        )
+
+    def test_do_status_multiple_requests_increment_counter(self):
+        """
+        Test multiple do_status calls increment counter correctly
+        """
+        SimpleHTTPRequestHandler.requests_served = 0
+
+        for i in range(5):
+            handler = self._create_handler()
+            with patch.object(handler, 'send_response'):
+                with patch.object(handler, 'end_headers'):
+                    handler.do_status()
+
+        # Each iteration: handler creation increments by 1, do_status increments by 1
+        # Total: 5 * 2 = 10
+        self.assertEqual(SimpleHTTPRequestHandler.requests_served, 10)
+
+    def test_do_POST_STOP_path_calls_set_stop(self):
+        """
+        Test do_POST with /STOP path calls set_stop
+        """
+        handler = self._create_handler('POST', '/STOP')
+
+        with patch.object(handler, 'set_stop') as mock_set_stop:
+            handler.do_POST()
+            mock_set_stop.assert_called_once()
+
+    def test_do_POST_RUN_path_calls_set_run(self):
+        """
+        Test do_POST with /RUN path calls set_run
+        """
+        handler = self._create_handler('POST', '/RUN')
+
+        with patch.object(handler, 'set_run') as mock_set_run:
+            handler.do_POST()
+            mock_set_run.assert_called_once()
+
+    def test_do_POST_PAUSE_path_calls_set_pause(self):
+        """
+        Test do_POST with /PAUSE path calls set_pause
+        """
+        handler = self._create_handler('POST', '/PAUSE')
+
+        with patch.object(handler, 'set_pause') as mock_set_pause:
+            handler.do_POST()
+            mock_set_pause.assert_called_once()
+
+    def test_do_POST_unknown_path_does_nothing(self):
+        """
+        Test do_POST with unknown path does not call any setter
+        """
+        handler = self._create_handler('POST', '/UNKNOWN')
+
+        with patch.object(handler, 'set_stop') as mock_set_stop:
+            with patch.object(handler, 'set_run') as mock_set_run:
+                with patch.object(handler, 'set_pause') as mock_set_pause:
+                    handler.do_POST()
+                    mock_set_stop.assert_not_called()
+                    mock_set_run.assert_not_called()
+                    mock_set_pause.assert_not_called()
+
+    def test_set_run_sets_status_to_RUN(self):
+        """
+        Test set_run sets global server_status to 'RUN'
+        """
+        handler = self._create_handler()
+
+        with patch.object(handler, 'send_response'):
+            with patch.object(handler, 'end_headers'):
+                handler.set_run()
+
+        self.assertEqual(server.server_status, 'RUN')
+
+    def test_set_run_sends_200_response(self):
+        """
+        Test set_run sends 200 status code
+        """
+        handler = self._create_handler()
+
+        with patch.object(handler, 'send_response') as mock_send_response:
+            with patch.object(handler, 'end_headers'):
+                handler.set_run()
+                mock_send_response.assert_called_once_with(200)
+
+    def test_set_stop_sets_status_to_STOP(self):
+        """
+        Test set_stop sets global server_status to 'STOP'
+        """
+        handler = self._create_handler()
+
+        with patch.object(handler, 'send_response'):
+            with patch.object(handler, 'end_headers'):
+                handler.set_stop()
+
+        self.assertEqual(server.server_status, 'STOP')
+
+    def test_set_stop_sends_200_response(self):
+        """
+        Test set_stop sends 200 status code
+        """
+        handler = self._create_handler()
+
+        with patch.object(handler, 'send_response') as mock_send_response:
+            with patch.object(handler, 'end_headers'):
+                handler.set_stop()
+                mock_send_response.assert_called_once_with(200)
+
+    def test_set_pause_sets_status_to_PAUSE(self):
+        """
+        Test set_pause sets global server_status to 'PAUSE'
+        """
+        handler = self._create_handler()
+
+        with patch.object(handler, 'send_response'):
+            with patch.object(handler, 'end_headers'):
+                handler.set_pause()
+
+        self.assertEqual(server.server_status, 'PAUSE')
+
+    def test_set_pause_sends_200_response(self):
+        """
+        Test set_pause sends 200 status code
+        """
+        handler = self._create_handler()
+
+        with patch.object(handler, 'send_response') as mock_send_response:
+            with patch.object(handler, 'end_headers'):
+                handler.set_pause()
+                mock_send_response.assert_called_once_with(200)
+
+    def test_requests_served_is_class_variable(self):
+        """
+        Test requests_served is shared across all instances
+        """
+        SimpleHTTPRequestHandler.requests_served = 0
+
+        handler1 = self._create_handler()  # Increments to 1
+        handler2 = self._create_handler()  # Increments to 2
+
+        with patch.object(handler1, 'send_response'):
+            with patch.object(handler1, 'end_headers'):
+                handler1.do_status()  # Increments to 3
+
+        with patch.object(handler2, 'send_response'):
+            with patch.object(handler2, 'end_headers'):
+                handler2.do_status()  # Increments to 4
+
+        # Both handlers should see the same counter
+        # 2 handler creations + 2 do_status calls = 4
+        self.assertEqual(handler1.requests_served, 4)
+        self.assertEqual(handler2.requests_served, 4)
+        self.assertEqual(SimpleHTTPRequestHandler.requests_served, 4)
+
+
+class TestServerModuleFunctions(unittest.TestCase):
+
+    def setUp(self):
+        """
+        Set up test fixtures for server module functions
+        """
+        server.server_status = ""
+
+    def test_publish_kraken_status_sets_server_status(self):
+        """
+        Test publish_kraken_status sets global server_status
+        """
+        server.publish_kraken_status("NEW_STATUS")
+        self.assertEqual(server.server_status, "NEW_STATUS")
+
+    def test_publish_kraken_status_overwrites_existing_status(self):
+        """
+        Test publish_kraken_status overwrites existing status
+        """
+        server.server_status = "OLD_STATUS"
+        server.publish_kraken_status("NEW_STATUS")
+        self.assertEqual(server.server_status, "NEW_STATUS")
+
+    @patch('server.HTTPServer')
+    @patch('server._thread')
+    def test_start_server_creates_http_server(self, mock_thread, mock_http_server):
+        """
+        Test start_server creates HTTPServer with correct address
+        """
+        address = ("localhost", 8080)
+        mock_server_instance = MagicMock()
+        mock_http_server.return_value = mock_server_instance
+
+        server.start_server(address, "RUNNING")
+
+        mock_http_server.assert_called_once_with(
+            address,
+            SimpleHTTPRequestHandler
+        )
+
+    @patch('server.HTTPServer')
+    @patch('server._thread')
+    def test_start_server_starts_thread(self, mock_thread, mock_http_server):
+        """
+        Test start_server starts a new thread for serve_forever
+        """
+        address = ("localhost", 8080)
+        mock_server_instance = MagicMock()
+        mock_http_server.return_value = mock_server_instance
+
+        server.start_server(address, "RUNNING")
+
+        mock_thread.start_new_thread.assert_called_once()
+        # Check that serve_forever was passed to the thread
+        args = mock_thread.start_new_thread.call_args[0]
+        self.assertEqual(args[0], mock_server_instance.serve_forever)
+
+    @patch('server.HTTPServer')
+    @patch('server._thread')
+    def test_start_server_publishes_status(self, mock_thread, mock_http_server):
+        """
+        Test start_server publishes the provided status
+        """
+        address = ("localhost", 8080)
+        mock_server_instance = MagicMock()
+        mock_http_server.return_value = mock_server_instance
+
+        server.start_server(address, "INITIAL_STATUS")
+
+        self.assertEqual(server.server_status, "INITIAL_STATUS")
+
+    @patch('server.HTTPConnection')
+    def test_get_status_makes_http_request(self, mock_http_connection):
+        """
+        Test get_status makes HTTP GET request to root path
+        """
+        address = ("localhost", 8080)
+        mock_connection = MagicMock()
+        mock_response = MagicMock()
+        mock_response.read.return_value = b"TEST_STATUS"
+        mock_connection.getresponse.return_value = mock_response
+        mock_http_connection.return_value = mock_connection
+
+        result = server.get_status(address)
+
+        mock_http_connection.assert_called_once_with("localhost", 8080)
+        mock_connection.request.assert_called_once_with("GET", "/")
+        self.assertEqual(result, "TEST_STATUS")
+
+    @patch('server.HTTPConnection')
+    def test_get_status_returns_decoded_response(self, mock_http_connection):
+        """
+        Test get_status returns decoded response string
+        """
+        address = ("localhost", 8080)
+        mock_connection = MagicMock()
+        mock_response = MagicMock()
+        mock_response.read.return_value = b"RUNNING"
+        mock_connection.getresponse.return_value = mock_response
+        mock_http_connection.return_value = mock_connection
+
+        result = server.get_status(address)
+
+        self.assertEqual(result, "RUNNING")
+        self.assertIsInstance(result, str)
+
+
+if __name__ == "__main__":
+    unittest.main()

From eb7a1e243cc434e0dc838d0c840bb583820c6c6f Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Mon, 15 Dec 2025 09:38:56 -0500
Subject: [PATCH 02/11] adding aws tests for node scenarios (#996)

Assisted By: Claude Code

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 tests/test_aws_node_scenarios.py | 984 +++++++++++++++++++++++++++++++
 1 file changed, 984 insertions(+)
 create mode 100644 tests/test_aws_node_scenarios.py

diff --git a/tests/test_aws_node_scenarios.py b/tests/test_aws_node_scenarios.py
new file mode 100644
index 00000000..be71dda0
--- /dev/null
+++ b/tests/test_aws_node_scenarios.py
@@ -0,0 +1,984 @@
+#!/usr/bin/env python3
+
+"""
+Test suite for AWS node scenarios
+
+This test suite covers both the AWS class and aws_node_scenarios class
+using mocks to avoid actual AWS API calls.
+
+Usage:
+    python -m coverage run -a -m unittest tests/test_aws_node_scenarios.py -v
+
+Assisted By: Claude Code
+"""
+
+import unittest
+import sys
+from unittest.mock import MagicMock, patch
+
+# Mock external dependencies before any imports that use them
+sys.modules['boto3'] = MagicMock()
+sys.modules['paramiko'] = MagicMock()
+
+from krkn_lib.k8s import KrknKubernetes
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus
+from krkn.scenario_plugins.node_actions.aws_node_scenarios import AWS, aws_node_scenarios
+
+
+class TestAWS(unittest.TestCase):
+    """Test cases for AWS class"""
+
+    def setUp(self):
+        """Set up test fixtures"""
+        # Mock boto3 to avoid actual AWS calls
+        self.boto_client_patcher = patch('boto3.client')
+        self.boto_resource_patcher = patch('boto3.resource')
+
+        self.mock_client = self.boto_client_patcher.start()
+        self.mock_resource = self.boto_resource_patcher.start()
+
+        # Create AWS instance with mocked boto3
+        self.aws = AWS()
+
+    def tearDown(self):
+        """Clean up after tests"""
+        self.boto_client_patcher.stop()
+        self.boto_resource_patcher.stop()
+
+    def test_aws_init(self):
+        """Test AWS class initialization"""
+        self.assertIsNotNone(self.aws.boto_client)
+        self.assertIsNotNone(self.aws.boto_resource)
+        self.assertIsNotNone(self.aws.boto_instance)
+
+    def test_get_instance_id_by_dns_name(self):
+        """Test getting instance ID by DNS name"""
+        mock_response = {
+            'Reservations': [{
+                'Instances': [{
+                    'InstanceId': 'i-1234567890abcdef0'
+                }]
+            }]
+        }
+        self.aws.boto_client.describe_instances = MagicMock(return_value=mock_response)
+
+        instance_id = self.aws.get_instance_id('ip-10-0-1-100.ec2.internal')
+
+        self.assertEqual(instance_id, 'i-1234567890abcdef0')
+        self.aws.boto_client.describe_instances.assert_called_once()
+
+    def test_get_instance_id_by_ip_address(self):
+        """Test getting instance ID by IP address when DNS name fails"""
+        # First call returns empty, second call returns the instance
+        mock_response_empty = {'Reservations': []}
+        mock_response_with_instance = {
+            'Reservations': [{
+                'Instances': [{
+                    'InstanceId': 'i-1234567890abcdef0'
+                }]
+            }]
+        }
+        self.aws.boto_client.describe_instances = MagicMock(
+            side_effect=[mock_response_empty, mock_response_with_instance]
+        )
+
+        instance_id = self.aws.get_instance_id('ip-10-0-1-100')
+
+        self.assertEqual(instance_id, 'i-1234567890abcdef0')
+        self.assertEqual(self.aws.boto_client.describe_instances.call_count, 2)
+
+    def test_start_instances_success(self):
+        """Test starting instances successfully"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_client.start_instances = MagicMock()
+
+        self.aws.start_instances(instance_id)
+
+        self.aws.boto_client.start_instances.assert_called_once_with(
+            InstanceIds=[instance_id]
+        )
+
+    def test_start_instances_failure(self):
+        """Test starting instances with failure"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_client.start_instances = MagicMock(
+            side_effect=Exception("AWS error")
+        )
+
+        with self.assertRaises(RuntimeError):
+            self.aws.start_instances(instance_id)
+
+    def test_stop_instances_success(self):
+        """Test stopping instances successfully"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_client.stop_instances = MagicMock()
+
+        self.aws.stop_instances(instance_id)
+
+        self.aws.boto_client.stop_instances.assert_called_once_with(
+            InstanceIds=[instance_id]
+        )
+
+    def test_stop_instances_failure(self):
+        """Test stopping instances with failure"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_client.stop_instances = MagicMock(
+            side_effect=Exception("AWS error")
+        )
+
+        with self.assertRaises(RuntimeError):
+            self.aws.stop_instances(instance_id)
+
+    def test_terminate_instances_success(self):
+        """Test terminating instances successfully"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_client.terminate_instances = MagicMock()
+
+        self.aws.terminate_instances(instance_id)
+
+        self.aws.boto_client.terminate_instances.assert_called_once_with(
+            InstanceIds=[instance_id]
+        )
+
+    def test_terminate_instances_failure(self):
+        """Test terminating instances with failure"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_client.terminate_instances = MagicMock(
+            side_effect=Exception("AWS error")
+        )
+
+        with self.assertRaises(RuntimeError):
+            self.aws.terminate_instances(instance_id)
+
+    def test_reboot_instances_success(self):
+        """Test rebooting instances successfully"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_client.reboot_instances = MagicMock()
+
+        self.aws.reboot_instances(instance_id)
+
+        self.aws.boto_client.reboot_instances.assert_called_once_with(
+            InstanceIds=[instance_id]
+        )
+
+    def test_reboot_instances_failure(self):
+        """Test rebooting instances with failure"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_client.reboot_instances = MagicMock(
+            side_effect=Exception("AWS error")
+        )
+
+        with self.assertRaises(RuntimeError):
+            self.aws.reboot_instances(instance_id)
+
+    def test_wait_until_running_success(self):
+        """Test waiting until instance is running successfully"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_instance.wait_until_running = MagicMock()
+
+        result = self.aws.wait_until_running(instance_id, timeout=600, poll_interval=15)
+
+        self.assertTrue(result)
+        self.aws.boto_instance.wait_until_running.assert_called_once()
+
+    def test_wait_until_running_with_affected_node(self):
+        """Test waiting until running with affected node tracking"""
+        instance_id = 'i-1234567890abcdef0'
+        affected_node = MagicMock(spec=AffectedNode)
+        self.aws.boto_instance.wait_until_running = MagicMock()
+
+        with patch('time.time', side_effect=[100, 110]):
+            result = self.aws.wait_until_running(
+                instance_id,
+                timeout=600,
+                affected_node=affected_node,
+                poll_interval=15
+            )
+
+        self.assertTrue(result)
+        affected_node.set_affected_node_status.assert_called_once_with("running", 10)
+
+    def test_wait_until_running_failure(self):
+        """Test waiting until running with failure"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_instance.wait_until_running = MagicMock(
+            side_effect=Exception("Timeout")
+        )
+
+        result = self.aws.wait_until_running(instance_id)
+
+        self.assertFalse(result)
+
+    def test_wait_until_stopped_success(self):
+        """Test waiting until instance is stopped successfully"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_instance.wait_until_stopped = MagicMock()
+
+        result = self.aws.wait_until_stopped(instance_id, timeout=600, poll_interval=15)
+
+        self.assertTrue(result)
+        self.aws.boto_instance.wait_until_stopped.assert_called_once()
+
+    def test_wait_until_stopped_with_affected_node(self):
+        """Test waiting until stopped with affected node tracking"""
+        instance_id = 'i-1234567890abcdef0'
+        affected_node = MagicMock(spec=AffectedNode)
+        self.aws.boto_instance.wait_until_stopped = MagicMock()
+
+        with patch('time.time', side_effect=[100, 115]):
+            result = self.aws.wait_until_stopped(
+                instance_id,
+                timeout=600,
+                affected_node=affected_node,
+                poll_interval=15
+            )
+
+        self.assertTrue(result)
+        affected_node.set_affected_node_status.assert_called_once_with("stopped", 15)
+
+    def test_wait_until_stopped_failure(self):
+        """Test waiting until stopped with failure"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_instance.wait_until_stopped = MagicMock(
+            side_effect=Exception("Timeout")
+        )
+
+        result = self.aws.wait_until_stopped(instance_id)
+
+        self.assertFalse(result)
+
+    def test_wait_until_terminated_success(self):
+        """Test waiting until instance is terminated successfully"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_instance.wait_until_terminated = MagicMock()
+
+        result = self.aws.wait_until_terminated(instance_id, timeout=600, poll_interval=15)
+
+        self.assertTrue(result)
+        self.aws.boto_instance.wait_until_terminated.assert_called_once()
+
+    def test_wait_until_terminated_with_affected_node(self):
+        """Test waiting until terminated with affected node tracking"""
+        instance_id = 'i-1234567890abcdef0'
+        affected_node = MagicMock(spec=AffectedNode)
+        self.aws.boto_instance.wait_until_terminated = MagicMock()
+
+        with patch('time.time', side_effect=[100, 120]):
+            result = self.aws.wait_until_terminated(
+                instance_id,
+                timeout=600,
+                affected_node=affected_node,
+                poll_interval=15
+            )
+
+        self.assertTrue(result)
+        affected_node.set_affected_node_status.assert_called_once_with("terminated", 20)
+
+    def test_wait_until_terminated_failure(self):
+        """Test waiting until terminated with failure"""
+        instance_id = 'i-1234567890abcdef0'
+        self.aws.boto_instance.wait_until_terminated = MagicMock(
+            side_effect=Exception("Timeout")
+        )
+
+        result = self.aws.wait_until_terminated(instance_id)
+
+        self.assertFalse(result)
+
+    def test_create_default_network_acl_success(self):
+        """Test creating default network ACL successfully"""
+        vpc_id = 'vpc-12345678'
+        acl_id = 'acl-12345678'
+        mock_response = {
+            'NetworkAcl': {
+                'NetworkAclId': acl_id
+            }
+        }
+        self.aws.boto_client.create_network_acl = MagicMock(return_value=mock_response)
+
+        result = self.aws.create_default_network_acl(vpc_id)
+
+        self.assertEqual(result, acl_id)
+        self.aws.boto_client.create_network_acl.assert_called_once_with(VpcId=vpc_id)
+
+    def test_create_default_network_acl_failure(self):
+        """Test creating default network ACL with failure"""
+        vpc_id = 'vpc-12345678'
+        self.aws.boto_client.create_network_acl = MagicMock(
+            side_effect=Exception("AWS error")
+        )
+
+        with self.assertRaises(RuntimeError):
+            self.aws.create_default_network_acl(vpc_id)
+
+    def test_replace_network_acl_association_success(self):
+        """Test replacing network ACL association successfully"""
+        association_id = 'aclassoc-12345678'
+        acl_id = 'acl-12345678'
+        new_association_id = 'aclassoc-87654321'
+        mock_response = {
+            'NewAssociationId': new_association_id
+        }
+        self.aws.boto_client.replace_network_acl_association = MagicMock(
+            return_value=mock_response
+        )
+
+        result = self.aws.replace_network_acl_association(association_id, acl_id)
+
+        self.assertEqual(result, new_association_id)
+        self.aws.boto_client.replace_network_acl_association.assert_called_once_with(
+            AssociationId=association_id, NetworkAclId=acl_id
+        )
+
+    def test_replace_network_acl_association_failure(self):
+        """Test replacing network ACL association with failure"""
+        association_id = 'aclassoc-12345678'
+        acl_id = 'acl-12345678'
+        self.aws.boto_client.replace_network_acl_association = MagicMock(
+            side_effect=Exception("AWS error")
+        )
+
+        with self.assertRaises(RuntimeError):
+            self.aws.replace_network_acl_association(association_id, acl_id)
+
+    def test_describe_network_acls_success(self):
+        """Test describing network ACLs successfully"""
+        vpc_id = 'vpc-12345678'
+        subnet_id = 'subnet-12345678'
+        acl_id = 'acl-12345678'
+        associations = [{'NetworkAclId': acl_id, 'SubnetId': subnet_id}]
+        mock_response = {
+            'NetworkAcls': [{
+                'Associations': associations
+            }]
+        }
+        self.aws.boto_client.describe_network_acls = MagicMock(return_value=mock_response)
+
+        result_associations, result_acl_id = self.aws.describe_network_acls(vpc_id, subnet_id)
+
+        self.assertEqual(result_associations, associations)
+        self.assertEqual(result_acl_id, acl_id)
+
+    def test_describe_network_acls_failure(self):
+        """Test describing network ACLs with failure"""
+        vpc_id = 'vpc-12345678'
+        subnet_id = 'subnet-12345678'
+        self.aws.boto_client.describe_network_acls = MagicMock(
+            side_effect=Exception("AWS error")
+        )
+
+        with self.assertRaises(RuntimeError):
+            self.aws.describe_network_acls(vpc_id, subnet_id)
+
+    def test_delete_network_acl_success(self):
+        """Test deleting network ACL successfully"""
+        acl_id = 'acl-12345678'
+        self.aws.boto_client.delete_network_acl = MagicMock()
+
+        self.aws.delete_network_acl(acl_id)
+
+        self.aws.boto_client.delete_network_acl.assert_called_once_with(NetworkAclId=acl_id)
+
+    def test_delete_network_acl_failure(self):
+        """Test deleting network ACL with failure"""
+        acl_id = 'acl-12345678'
+        self.aws.boto_client.delete_network_acl = MagicMock(
+            side_effect=Exception("AWS error")
+        )
+
+        with self.assertRaises(RuntimeError):
+            self.aws.delete_network_acl(acl_id)
+
+    def test_detach_volumes_success(self):
+        """Test detaching volumes successfully"""
+        volume_ids = ['vol-12345678', 'vol-87654321']
+        self.aws.boto_client.detach_volume = MagicMock()
+
+        self.aws.detach_volumes(volume_ids)
+
+        self.assertEqual(self.aws.boto_client.detach_volume.call_count, 2)
+        self.aws.boto_client.detach_volume.assert_any_call(VolumeId='vol-12345678', Force=True)
+        self.aws.boto_client.detach_volume.assert_any_call(VolumeId='vol-87654321', Force=True)
+
+    def test_detach_volumes_partial_failure(self):
+        """Test detaching volumes with partial failure"""
+        volume_ids = ['vol-12345678', 'vol-87654321']
+        # First call succeeds, second fails - should not raise exception
+        self.aws.boto_client.detach_volume = MagicMock(
+            side_effect=[None, Exception("AWS error")]
+        )
+
+        # Should not raise exception, just log error
+        self.aws.detach_volumes(volume_ids)
+
+        self.assertEqual(self.aws.boto_client.detach_volume.call_count, 2)
+
+    def test_attach_volume_success(self):
+        """Test attaching volume successfully"""
+        attachment = {
+            'VolumeId': 'vol-12345678',
+            'InstanceId': 'i-1234567890abcdef0',
+            'Device': '/dev/sdf'
+        }
+        mock_volume = MagicMock()
+        mock_volume.state = 'available'
+        self.aws.boto_resource.Volume = MagicMock(return_value=mock_volume)
+        self.aws.boto_client.attach_volume = MagicMock()
+
+        self.aws.attach_volume(attachment)
+
+        self.aws.boto_client.attach_volume.assert_called_once_with(
+            InstanceId=attachment['InstanceId'],
+            Device=attachment['Device'],
+            VolumeId=attachment['VolumeId']
+        )
+
+    def test_attach_volume_already_in_use(self):
+        """Test attaching volume that is already in use"""
+        attachment = {
+            'VolumeId': 'vol-12345678',
+            'InstanceId': 'i-1234567890abcdef0',
+            'Device': '/dev/sdf'
+        }
+        mock_volume = MagicMock()
+        mock_volume.state = 'in-use'
+        self.aws.boto_resource.Volume = MagicMock(return_value=mock_volume)
+        self.aws.boto_client.attach_volume = MagicMock()
+
+        self.aws.attach_volume(attachment)
+
+        # Should not attempt to attach
+        self.aws.boto_client.attach_volume.assert_not_called()
+
+    def test_attach_volume_failure(self):
+        """Test attaching volume with failure"""
+        attachment = {
+            'VolumeId': 'vol-12345678',
+            'InstanceId': 'i-1234567890abcdef0',
+            'Device': '/dev/sdf'
+        }
+        mock_volume = MagicMock()
+        mock_volume.state = 'available'
+        self.aws.boto_resource.Volume = MagicMock(return_value=mock_volume)
+        self.aws.boto_client.attach_volume = MagicMock(
+            side_effect=Exception("AWS error")
+        )
+
+        with self.assertRaises(RuntimeError):
+            self.aws.attach_volume(attachment)
+
+    def test_get_volumes_ids(self):
+        """Test getting volume IDs from instance"""
+        instance_id = ['i-1234567890abcdef0']
+        mock_response = {
+            'Reservations': [{
+                'Instances': [{
+                    'BlockDeviceMappings': [
+                        {'DeviceName': '/dev/sda1', 'Ebs': {'VolumeId': 'vol-root'}},
+                        {'DeviceName': '/dev/sdf', 'Ebs': {'VolumeId': 'vol-12345678'}},
+                        {'DeviceName': '/dev/sdg', 'Ebs': {'VolumeId': 'vol-87654321'}}
+                    ]
+                }]
+            }]
+        }
+        mock_instance = MagicMock()
+        mock_instance.root_device_name = '/dev/sda1'
+        self.aws.boto_resource.Instance = MagicMock(return_value=mock_instance)
+        self.aws.boto_client.describe_instances = MagicMock(return_value=mock_response)
+
+        volume_ids = self.aws.get_volumes_ids(instance_id)
+
+        self.assertEqual(len(volume_ids), 2)
+        self.assertIn('vol-12345678', volume_ids)
+        self.assertIn('vol-87654321', volume_ids)
+        self.assertNotIn('vol-root', volume_ids)
+
+    def test_get_volume_attachment_details(self):
+        """Test getting volume attachment details"""
+        volume_ids = ['vol-12345678', 'vol-87654321']
+        mock_response = {
+            'Volumes': [
+                {'VolumeId': 'vol-12345678', 'State': 'in-use'},
+                {'VolumeId': 'vol-87654321', 'State': 'available'}
+            ]
+        }
+        self.aws.boto_client.describe_volumes = MagicMock(return_value=mock_response)
+
+        details = self.aws.get_volume_attachment_details(volume_ids)
+
+        self.assertEqual(len(details), 2)
+        self.assertEqual(details[0]['VolumeId'], 'vol-12345678')
+        self.assertEqual(details[1]['VolumeId'], 'vol-87654321')
+
+    def test_get_root_volume_id(self):
+        """Test getting root volume ID"""
+        instance_id = ['i-1234567890abcdef0']
+        mock_instance = MagicMock()
+        mock_instance.root_device_name = '/dev/sda1'
+        self.aws.boto_resource.Instance = MagicMock(return_value=mock_instance)
+
+        root_volume = self.aws.get_root_volume_id(instance_id)
+
+        self.assertEqual(root_volume, '/dev/sda1')
+
+    def test_get_volume_state(self):
+        """Test getting volume state"""
+        volume_id = 'vol-12345678'
+        mock_volume = MagicMock()
+        mock_volume.state = 'available'
+        self.aws.boto_resource.Volume = MagicMock(return_value=mock_volume)
+
+        state = self.aws.get_volume_state(volume_id)
+
+        self.assertEqual(state, 'available')
+
+
+class TestAWSNodeScenarios(unittest.TestCase):
+    """Test cases for aws_node_scenarios class"""
+
+    def setUp(self):
+        """Set up test fixtures"""
+        self.kubecli = MagicMock(spec=KrknKubernetes)
+        self.affected_nodes_status = AffectedNodeStatus()
+
+        # Mock the AWS class
+        with patch('krkn.scenario_plugins.node_actions.aws_node_scenarios.AWS') as mock_aws_class:
+            self.mock_aws = MagicMock()
+            mock_aws_class.return_value = self.mock_aws
+            self.scenario = aws_node_scenarios(
+                kubecli=self.kubecli,
+                node_action_kube_check=True,
+                affected_nodes_status=self.affected_nodes_status
+            )
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_ready_status')
+    def test_node_start_scenario_success(self, mock_wait_ready):
+        """Test node start scenario successfully"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        self.mock_aws.get_instance_id.return_value = instance_id
+        self.mock_aws.start_instances.return_value = None
+        self.mock_aws.wait_until_running.return_value = True
+
+        self.scenario.node_start_scenario(
+            instance_kill_count=1,
+            node=node,
+            timeout=600,
+            poll_interval=15
+        )
+
+        self.mock_aws.get_instance_id.assert_called_once_with(node)
+        self.mock_aws.start_instances.assert_called_once_with(instance_id)
+        self.mock_aws.wait_until_running.assert_called_once()
+        mock_wait_ready.assert_called_once()
+        self.assertEqual(len(self.affected_nodes_status.affected_nodes), 1)
+        self.assertEqual(self.affected_nodes_status.affected_nodes[0].node_name, node)
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_ready_status')
+    def test_node_start_scenario_no_kube_check(self, mock_wait_ready):
+        """Test node start scenario without kube check"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        # Create scenario with node_action_kube_check=False
+        with patch('krkn.scenario_plugins.node_actions.aws_node_scenarios.AWS') as mock_aws_class:
+            mock_aws = MagicMock()
+            mock_aws_class.return_value = mock_aws
+            scenario = aws_node_scenarios(
+                kubecli=self.kubecli,
+                node_action_kube_check=False,
+                affected_nodes_status=AffectedNodeStatus()
+            )
+
+            mock_aws.get_instance_id.return_value = instance_id
+            mock_aws.start_instances.return_value = None
+            mock_aws.wait_until_running.return_value = True
+
+            scenario.node_start_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+            # Should not call wait_for_ready_status
+            mock_wait_ready.assert_not_called()
+
+    def test_node_start_scenario_failure(self):
+        """Test node start scenario with failure"""
+        node = 'ip-10-0-1-100.ec2.internal'
+
+        self.mock_aws.get_instance_id.side_effect = Exception("AWS error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_start_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_unknown_status')
+    def test_node_stop_scenario_success(self, mock_wait_unknown):
+        """Test node stop scenario successfully"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        self.mock_aws.get_instance_id.return_value = instance_id
+        self.mock_aws.stop_instances.return_value = None
+        self.mock_aws.wait_until_stopped.return_value = True
+
+        self.scenario.node_stop_scenario(
+            instance_kill_count=1,
+            node=node,
+            timeout=600,
+            poll_interval=15
+        )
+
+        self.mock_aws.get_instance_id.assert_called_once_with(node)
+        self.mock_aws.stop_instances.assert_called_once_with(instance_id)
+        self.mock_aws.wait_until_stopped.assert_called_once()
+        mock_wait_unknown.assert_called_once()
+        self.assertEqual(len(self.affected_nodes_status.affected_nodes), 1)
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_unknown_status')
+    def test_node_stop_scenario_no_kube_check(self, mock_wait_unknown):
+        """Test node stop scenario without kube check"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        # Create scenario with node_action_kube_check=False
+        with patch('krkn.scenario_plugins.node_actions.aws_node_scenarios.AWS') as mock_aws_class:
+            mock_aws = MagicMock()
+            mock_aws_class.return_value = mock_aws
+            scenario = aws_node_scenarios(
+                kubecli=self.kubecli,
+                node_action_kube_check=False,
+                affected_nodes_status=AffectedNodeStatus()
+            )
+
+            mock_aws.get_instance_id.return_value = instance_id
+            mock_aws.stop_instances.return_value = None
+            mock_aws.wait_until_stopped.return_value = True
+
+            scenario.node_stop_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+            # Should not call wait_for_unknown_status
+            mock_wait_unknown.assert_not_called()
+
+    def test_node_stop_scenario_failure(self):
+        """Test node stop scenario with failure"""
+        node = 'ip-10-0-1-100.ec2.internal'
+
+        self.mock_aws.get_instance_id.side_effect = Exception("AWS error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_stop_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+    @patch('time.sleep')
+    def test_node_termination_scenario_success(self, _mock_sleep):
+        """Test node termination scenario successfully"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        self.mock_aws.get_instance_id.return_value = instance_id
+        self.mock_aws.terminate_instances.return_value = None
+        self.mock_aws.wait_until_terminated.return_value = True
+        self.kubecli.list_nodes.return_value = []
+
+        self.scenario.node_termination_scenario(
+            instance_kill_count=1,
+            node=node,
+            timeout=600,
+            poll_interval=15
+        )
+
+        self.mock_aws.get_instance_id.assert_called_once_with(node)
+        self.mock_aws.terminate_instances.assert_called_once_with(instance_id)
+        self.mock_aws.wait_until_terminated.assert_called_once()
+        self.assertEqual(len(self.affected_nodes_status.affected_nodes), 1)
+
+    @patch('time.sleep')
+    def test_node_termination_scenario_node_still_exists(self, _mock_sleep):
+        """Test node termination scenario when node still exists"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        self.mock_aws.get_instance_id.return_value = instance_id
+        self.mock_aws.terminate_instances.return_value = None
+        self.mock_aws.wait_until_terminated.return_value = True
+        # Node still in list after timeout
+        self.kubecli.list_nodes.return_value = [node]
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_termination_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=2,
+                poll_interval=15
+            )
+
+    def test_node_termination_scenario_failure(self):
+        """Test node termination scenario with failure"""
+        node = 'ip-10-0-1-100.ec2.internal'
+
+        self.mock_aws.get_instance_id.side_effect = Exception("AWS error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_termination_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_unknown_status')
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_ready_status')
+    def test_node_reboot_scenario_success(self, mock_wait_ready, mock_wait_unknown):
+        """Test node reboot scenario successfully"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        self.mock_aws.get_instance_id.return_value = instance_id
+        self.mock_aws.reboot_instances.return_value = None
+
+        self.scenario.node_reboot_scenario(
+            instance_kill_count=1,
+            node=node,
+            timeout=600
+        )
+
+        self.mock_aws.get_instance_id.assert_called_once_with(node)
+        self.mock_aws.reboot_instances.assert_called_once_with(instance_id)
+        mock_wait_unknown.assert_called_once()
+        mock_wait_ready.assert_called_once()
+        self.assertEqual(len(self.affected_nodes_status.affected_nodes), 1)
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_unknown_status')
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_ready_status')
+    def test_node_reboot_scenario_no_kube_check(self, mock_wait_ready, mock_wait_unknown):
+        """Test node reboot scenario without kube check"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        # Create scenario with node_action_kube_check=False
+        with patch('krkn.scenario_plugins.node_actions.aws_node_scenarios.AWS') as mock_aws_class:
+            mock_aws = MagicMock()
+            mock_aws_class.return_value = mock_aws
+            scenario = aws_node_scenarios(
+                kubecli=self.kubecli,
+                node_action_kube_check=False,
+                affected_nodes_status=AffectedNodeStatus()
+            )
+
+            mock_aws.get_instance_id.return_value = instance_id
+            mock_aws.reboot_instances.return_value = None
+
+            scenario.node_reboot_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600
+            )
+
+            # Should not call wait functions
+            mock_wait_unknown.assert_not_called()
+            mock_wait_ready.assert_not_called()
+
+    def test_node_reboot_scenario_failure(self):
+        """Test node reboot scenario with failure"""
+        node = 'ip-10-0-1-100.ec2.internal'
+
+        self.mock_aws.get_instance_id.side_effect = Exception("AWS error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_reboot_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600
+            )
+
+    def test_node_reboot_scenario_multiple_kills(self):
+        """Test node reboot scenario with multiple kill counts"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        with patch('krkn.scenario_plugins.node_actions.aws_node_scenarios.AWS') as mock_aws_class:
+            mock_aws = MagicMock()
+            mock_aws_class.return_value = mock_aws
+            scenario = aws_node_scenarios(
+                kubecli=self.kubecli,
+                node_action_kube_check=False,
+                affected_nodes_status=AffectedNodeStatus()
+            )
+
+            mock_aws.get_instance_id.return_value = instance_id
+            mock_aws.reboot_instances.return_value = None
+
+            scenario.node_reboot_scenario(
+                instance_kill_count=3,
+                node=node,
+                timeout=600
+            )
+
+            self.assertEqual(mock_aws.reboot_instances.call_count, 3)
+            self.assertEqual(len(scenario.affected_nodes_status.affected_nodes), 3)
+
+    def test_get_disk_attachment_info_success(self):
+        """Test getting disk attachment info successfully"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+        volume_ids = ['vol-12345678']
+        attachment_details = [
+            {
+                'VolumeId': 'vol-12345678',
+                'Attachments': [{
+                    'InstanceId': instance_id,
+                    'Device': '/dev/sdf'
+                }]
+            }
+        ]
+
+        self.mock_aws.get_instance_id.return_value = instance_id
+        self.mock_aws.get_volumes_ids.return_value = volume_ids
+        self.mock_aws.get_volume_attachment_details.return_value = attachment_details
+
+        result = self.scenario.get_disk_attachment_info(
+            instance_kill_count=1,
+            node=node
+        )
+
+        self.assertEqual(result, attachment_details)
+        self.mock_aws.get_instance_id.assert_called_once_with(node)
+        self.mock_aws.get_volumes_ids.assert_called_once()
+        self.mock_aws.get_volume_attachment_details.assert_called_once_with(volume_ids)
+
+    def test_get_disk_attachment_info_no_volumes(self):
+        """Test getting disk attachment info when no volumes exist"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+
+        self.mock_aws.get_instance_id.return_value = instance_id
+        self.mock_aws.get_volumes_ids.return_value = []
+
+        result = self.scenario.get_disk_attachment_info(
+            instance_kill_count=1,
+            node=node
+        )
+
+        self.assertIsNone(result)
+        self.mock_aws.get_volume_attachment_details.assert_not_called()
+
+    def test_get_disk_attachment_info_failure(self):
+        """Test getting disk attachment info with failure"""
+        node = 'ip-10-0-1-100.ec2.internal'
+
+        self.mock_aws.get_instance_id.side_effect = Exception("AWS error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.get_disk_attachment_info(
+                instance_kill_count=1,
+                node=node
+            )
+
+    def test_disk_detach_scenario_success(self):
+        """Test disk detach scenario successfully"""
+        node = 'ip-10-0-1-100.ec2.internal'
+        instance_id = 'i-1234567890abcdef0'
+        volume_ids = ['vol-12345678', 'vol-87654321']
+
+        self.mock_aws.get_instance_id.return_value = instance_id
+        self.mock_aws.get_volumes_ids.return_value = volume_ids
+        self.mock_aws.detach_volumes.return_value = None
+
+        self.scenario.disk_detach_scenario(
+            instance_kill_count=1,
+            node=node,
+            timeout=600
+        )
+
+        self.mock_aws.get_instance_id.assert_called_once_with(node)
+        self.mock_aws.get_volumes_ids.assert_called_once()
+        self.mock_aws.detach_volumes.assert_called_once_with(volume_ids)
+
+    def test_disk_detach_scenario_failure(self):
+        """Test disk detach scenario with failure"""
+        node = 'ip-10-0-1-100.ec2.internal'
+
+        self.mock_aws.get_instance_id.side_effect = Exception("AWS error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.disk_detach_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600
+            )
+
+    def test_disk_attach_scenario_success(self):
+        """Test disk attach scenario successfully"""
+        attachment_details = [
+            {
+                'VolumeId': 'vol-12345678',
+                'Attachments': [{
+                    'InstanceId': 'i-1234567890abcdef0',
+                    'Device': '/dev/sdf',
+                    'VolumeId': 'vol-12345678'
+                }]
+            },
+            {
+                'VolumeId': 'vol-87654321',
+                'Attachments': [{
+                    'InstanceId': 'i-1234567890abcdef0',
+                    'Device': '/dev/sdg',
+                    'VolumeId': 'vol-87654321'
+                }]
+            }
+        ]
+
+        self.mock_aws.attach_volume.return_value = None
+
+        self.scenario.disk_attach_scenario(
+            instance_kill_count=1,
+            attachment_details=attachment_details,
+            timeout=600
+        )
+
+        self.assertEqual(self.mock_aws.attach_volume.call_count, 2)
+
+    def test_disk_attach_scenario_multiple_kills(self):
+        """Test disk attach scenario with multiple kill counts"""
+        attachment_details = [
+            {
+                'VolumeId': 'vol-12345678',
+                'Attachments': [{
+                    'InstanceId': 'i-1234567890abcdef0',
+                    'Device': '/dev/sdf',
+                    'VolumeId': 'vol-12345678'
+                }]
+            }
+        ]
+
+        self.mock_aws.attach_volume.return_value = None
+
+        self.scenario.disk_attach_scenario(
+            instance_kill_count=3,
+            attachment_details=attachment_details,
+            timeout=600
+        )
+
+        # Should call attach_volume 3 times (once per kill count)
+        self.assertEqual(self.mock_aws.attach_volume.call_count, 3)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 4b3617bd8a031630d4183628254d81453a48e35e Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Mon, 15 Dec 2025 09:39:16 -0500
Subject: [PATCH 03/11] adding gcp tests for node actions (#997)

Assisted By: Claude Code

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 tests/test_gcp_node_scenarios.py | 781 +++++++++++++++++++++++++++++++
 1 file changed, 781 insertions(+)
 create mode 100644 tests/test_gcp_node_scenarios.py

diff --git a/tests/test_gcp_node_scenarios.py b/tests/test_gcp_node_scenarios.py
new file mode 100644
index 00000000..5d4a9c6e
--- /dev/null
+++ b/tests/test_gcp_node_scenarios.py
@@ -0,0 +1,781 @@
+#!/usr/bin/env python3
+
+"""
+Test suite for GCP node scenarios
+
+This test suite covers both the GCP class and gcp_node_scenarios class
+using mocks to avoid actual GCP API calls.
+
+Usage:
+    python -m coverage run -a -m unittest tests/test_gcp_node_scenarios.py -v
+
+Assisted By: Claude Code
+"""
+
+import unittest
+import sys
+from unittest.mock import MagicMock, patch
+
+# Mock external dependencies before any imports that use them
+# Create proper nested mock structure for google modules
+mock_google = MagicMock()
+mock_google_auth = MagicMock()
+mock_google_auth_transport = MagicMock()
+mock_google_cloud = MagicMock()
+mock_google_cloud_compute = MagicMock()
+
+sys.modules['google'] = mock_google
+sys.modules['google.auth'] = mock_google_auth
+sys.modules['google.auth.transport'] = mock_google_auth_transport
+sys.modules['google.auth.transport.requests'] = MagicMock()
+sys.modules['google.cloud'] = mock_google_cloud
+sys.modules['google.cloud.compute_v1'] = mock_google_cloud_compute
+sys.modules['paramiko'] = MagicMock()
+
+from krkn_lib.k8s import KrknKubernetes
+from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus
+from krkn.scenario_plugins.node_actions.gcp_node_scenarios import GCP, gcp_node_scenarios
+
+
+class TestGCP(unittest.TestCase):
+    """Test cases for GCP class"""
+
+    def setUp(self):
+        """Set up test fixtures"""
+        # Mock google.auth before creating GCP instance
+        self.auth_patcher = patch('google.auth.default')
+        self.compute_patcher = patch('google.cloud.compute_v1.InstancesClient')
+
+        self.mock_auth = self.auth_patcher.start()
+        self.mock_compute_client = self.compute_patcher.start()
+
+        # Configure auth mock to return credentials and project_id
+        self.mock_auth.return_value = (MagicMock(), 'test-project-123')
+
+        # Create GCP instance with mocked dependencies
+        self.gcp = GCP()
+
+    def tearDown(self):
+        """Clean up after tests"""
+        self.auth_patcher.stop()
+        self.compute_patcher.stop()
+
+    def test_gcp_init_success(self):
+        """Test GCP class initialization success"""
+        self.assertEqual(self.gcp.project_id, 'test-project-123')
+        self.assertIsNotNone(self.gcp.instance_client)
+
+    def test_gcp_init_failure(self):
+        """Test GCP class initialization failure"""
+        with patch('google.auth.default', side_effect=Exception("Auth error")):
+            with self.assertRaises(Exception):
+                GCP()
+
+    def test_get_node_instance_success(self):
+        """Test getting node instance successfully"""
+        # Create mock instance
+        mock_instance = MagicMock()
+        mock_instance.name = 'gke-cluster-node-1'
+
+        # Create mock response
+        mock_response = MagicMock()
+        mock_response.instances = [mock_instance]
+
+        # Mock aggregated_list to return our mock data
+        self.gcp.instance_client.aggregated_list = MagicMock(
+            return_value=[('zones/us-central1-a', mock_response)]
+        )
+
+        result = self.gcp.get_node_instance('gke-cluster-node-1')
+
+        self.assertEqual(result, mock_instance)
+        self.assertEqual(result.name, 'gke-cluster-node-1')
+
+    def test_get_node_instance_partial_match(self):
+        """Test getting node instance with partial name match"""
+        mock_instance = MagicMock()
+        mock_instance.name = 'node-1'
+
+        mock_response = MagicMock()
+        mock_response.instances = [mock_instance]
+
+        self.gcp.instance_client.aggregated_list = MagicMock(
+            return_value=[('zones/us-central1-a', mock_response)]
+        )
+
+        # instance.name ('node-1') in node ('gke-cluster-node-1-abc') == True
+        result = self.gcp.get_node_instance('gke-cluster-node-1-abc')
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result.name, 'node-1')
+
+    def test_get_node_instance_not_found(self):
+        """Test getting node instance when not found"""
+        mock_response = MagicMock()
+        mock_response.instances = None
+
+        self.gcp.instance_client.aggregated_list = MagicMock(
+            return_value=[('zones/us-central1-a', mock_response)]
+        )
+
+        result = self.gcp.get_node_instance('non-existent-node')
+
+        self.assertIsNone(result)
+
+    def test_get_node_instance_failure(self):
+        """Test getting node instance with failure"""
+        self.gcp.instance_client.aggregated_list = MagicMock(
+            side_effect=Exception("GCP error")
+        )
+
+        with self.assertRaises(Exception):
+            self.gcp.get_node_instance('node-1')
+
+    def test_get_instance_name(self):
+        """Test getting instance name"""
+        mock_instance = MagicMock()
+        mock_instance.name = 'gke-cluster-node-1'
+
+        result = self.gcp.get_instance_name(mock_instance)
+
+        self.assertEqual(result, 'gke-cluster-node-1')
+
+    def test_get_instance_name_none(self):
+        """Test getting instance name when name is None"""
+        mock_instance = MagicMock()
+        mock_instance.name = None
+
+        result = self.gcp.get_instance_name(mock_instance)
+
+        self.assertIsNone(result)
+
+    def test_get_instance_zone(self):
+        """Test getting instance zone"""
+        mock_instance = MagicMock()
+        mock_instance.zone = 'https://www.googleapis.com/compute/v1/projects/test-project/zones/us-central1-a'
+
+        result = self.gcp.get_instance_zone(mock_instance)
+
+        self.assertEqual(result, 'us-central1-a')
+
+    def test_get_instance_zone_none(self):
+        """Test getting instance zone when zone is None"""
+        mock_instance = MagicMock()
+        mock_instance.zone = None
+
+        result = self.gcp.get_instance_zone(mock_instance)
+
+        self.assertIsNone(result)
+
+    def test_get_node_instance_zone(self):
+        """Test getting node instance zone"""
+        mock_instance = MagicMock()
+        mock_instance.name = 'gke-cluster-node-1'
+        mock_instance.zone = 'https://www.googleapis.com/compute/v1/projects/test-project/zones/us-west1-b'
+
+        # Patch get_node_instance to return our mock directly
+        with patch.object(self.gcp, 'get_node_instance', return_value=mock_instance):
+            result = self.gcp.get_node_instance_zone('node-1')
+            self.assertEqual(result, 'us-west1-b')
+
+    def test_get_node_instance_name(self):
+        """Test getting node instance name"""
+        mock_instance = MagicMock()
+        mock_instance.name = 'gke-cluster-node-1'
+
+        # Patch get_node_instance to return our mock directly
+        with patch.object(self.gcp, 'get_node_instance', return_value=mock_instance):
+            result = self.gcp.get_node_instance_name('node-1')
+            self.assertEqual(result, 'gke-cluster-node-1')
+
+    def test_get_instance_id(self):
+        """Test getting instance ID (alias for get_node_instance_name)"""
+        # Patch get_node_instance_name since get_instance_id just calls it
+        with patch.object(self.gcp, 'get_node_instance_name', return_value='gke-cluster-node-1'):
+            result = self.gcp.get_instance_id('node-1')
+            self.assertEqual(result, 'gke-cluster-node-1')
+
+    def test_start_instances_success(self):
+        """Test starting instances successfully"""
+        instance_id = 'gke-cluster-node-1'
+
+        # Mock get_node_instance_zone
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.start = MagicMock()
+
+            self.gcp.start_instances(instance_id)
+
+            self.gcp.instance_client.start.assert_called_once()
+
+    def test_start_instances_failure(self):
+        """Test starting instances with failure"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.start = MagicMock(
+                side_effect=Exception("GCP error")
+            )
+
+            with self.assertRaises(RuntimeError):
+                self.gcp.start_instances(instance_id)
+
+    def test_stop_instances_success(self):
+        """Test stopping instances successfully"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.stop = MagicMock()
+
+            self.gcp.stop_instances(instance_id)
+
+            self.gcp.instance_client.stop.assert_called_once()
+
+    def test_stop_instances_failure(self):
+        """Test stopping instances with failure"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.stop = MagicMock(
+                side_effect=Exception("GCP error")
+            )
+
+            with self.assertRaises(RuntimeError):
+                self.gcp.stop_instances(instance_id)
+
+    def test_suspend_instances_success(self):
+        """Test suspending instances successfully"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.suspend = MagicMock()
+
+            self.gcp.suspend_instances(instance_id)
+
+            self.gcp.instance_client.suspend.assert_called_once()
+
+    def test_suspend_instances_failure(self):
+        """Test suspending instances with failure"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.suspend = MagicMock(
+                side_effect=Exception("GCP error")
+            )
+
+            with self.assertRaises(RuntimeError):
+                self.gcp.suspend_instances(instance_id)
+
+    def test_terminate_instances_success(self):
+        """Test terminating instances successfully"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.delete = MagicMock()
+
+            self.gcp.terminate_instances(instance_id)
+
+            self.gcp.instance_client.delete.assert_called_once()
+
+    def test_terminate_instances_failure(self):
+        """Test terminating instances with failure"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.delete = MagicMock(
+                side_effect=Exception("GCP error")
+            )
+
+            with self.assertRaises(RuntimeError):
+                self.gcp.terminate_instances(instance_id)
+
+    def test_reboot_instances_success(self):
+        """Test rebooting instances successfully"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.reset = MagicMock()
+
+            self.gcp.reboot_instances(instance_id)
+
+            self.gcp.instance_client.reset.assert_called_once()
+
+    def test_reboot_instances_failure(self):
+        """Test rebooting instances with failure"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.reset = MagicMock(
+                side_effect=Exception("GCP error")
+            )
+
+            with self.assertRaises(RuntimeError):
+                self.gcp.reboot_instances(instance_id)
+
+    @patch('time.sleep')
+    def test_get_instance_status_success(self, _mock_sleep):
+        """Test getting instance status successfully"""
+        instance_id = 'gke-cluster-node-1'
+
+        mock_instance = MagicMock()
+        mock_instance.status = 'RUNNING'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.get = MagicMock(return_value=mock_instance)
+
+            result = self.gcp.get_instance_status(instance_id, 'RUNNING', 60)
+
+            self.assertTrue(result)
+
+    @patch('time.sleep')
+    def test_get_instance_status_timeout(self, _mock_sleep):
+        """Test getting instance status with timeout"""
+        instance_id = 'gke-cluster-node-1'
+
+        mock_instance = MagicMock()
+        mock_instance.status = 'PROVISIONING'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.get = MagicMock(return_value=mock_instance)
+
+            result = self.gcp.get_instance_status(instance_id, 'RUNNING', 5)
+
+            self.assertFalse(result)
+
+    @patch('time.sleep')
+    def test_get_instance_status_failure(self, _mock_sleep):
+        """Test getting instance status with failure"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_node_instance_zone', return_value='us-central1-a'):
+            self.gcp.instance_client.get = MagicMock(
+                side_effect=Exception("GCP error")
+            )
+
+            with self.assertRaises(RuntimeError):
+                self.gcp.get_instance_status(instance_id, 'RUNNING', 60)
+
+    def test_wait_until_suspended_success(self):
+        """Test waiting until instance is suspended"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_instance_status', return_value=True) as mock_get_status:
+            result = self.gcp.wait_until_suspended(instance_id, 60)
+
+            self.assertTrue(result)
+            mock_get_status.assert_called_once_with(instance_id, 'SUSPENDED', 60)
+
+    def test_wait_until_suspended_failure(self):
+        """Test waiting until instance is suspended with failure"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_instance_status', return_value=False):
+            result = self.gcp.wait_until_suspended(instance_id, 60)
+
+            self.assertFalse(result)
+
+    def test_wait_until_running_success(self):
+        """Test waiting until instance is running successfully"""
+        instance_id = 'gke-cluster-node-1'
+        affected_node = MagicMock(spec=AffectedNode)
+
+        with patch('time.time', side_effect=[100, 110]):
+            with patch.object(self.gcp, 'get_instance_status', return_value=True):
+                result = self.gcp.wait_until_running(instance_id, 60, affected_node)
+
+                self.assertTrue(result)
+                affected_node.set_affected_node_status.assert_called_once_with('running', 10)
+
+    def test_wait_until_running_without_affected_node(self):
+        """Test waiting until running without affected node tracking"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_instance_status', return_value=True):
+            result = self.gcp.wait_until_running(instance_id, 60, None)
+
+            self.assertTrue(result)
+
+    def test_wait_until_stopped_success(self):
+        """Test waiting until instance is stopped successfully"""
+        instance_id = 'gke-cluster-node-1'
+        affected_node = MagicMock(spec=AffectedNode)
+
+        with patch('time.time', side_effect=[100, 115]):
+            with patch.object(self.gcp, 'get_instance_status', return_value=True):
+                result = self.gcp.wait_until_stopped(instance_id, 60, affected_node)
+
+                self.assertTrue(result)
+                affected_node.set_affected_node_status.assert_called_once_with('stopped', 15)
+
+    def test_wait_until_stopped_without_affected_node(self):
+        """Test waiting until stopped without affected node tracking"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_instance_status', return_value=True):
+            result = self.gcp.wait_until_stopped(instance_id, 60, None)
+
+            self.assertTrue(result)
+
+    def test_wait_until_terminated_success(self):
+        """Test waiting until instance is terminated successfully"""
+        instance_id = 'gke-cluster-node-1'
+        affected_node = MagicMock(spec=AffectedNode)
+
+        with patch('time.time', side_effect=[100, 120]):
+            with patch.object(self.gcp, 'get_instance_status', return_value=True):
+                result = self.gcp.wait_until_terminated(instance_id, 60, affected_node)
+
+                self.assertTrue(result)
+                affected_node.set_affected_node_status.assert_called_once_with('terminated', 20)
+
+    def test_wait_until_terminated_without_affected_node(self):
+        """Test waiting until terminated without affected node tracking"""
+        instance_id = 'gke-cluster-node-1'
+
+        with patch.object(self.gcp, 'get_instance_status', return_value=True):
+            result = self.gcp.wait_until_terminated(instance_id, 60, None)
+
+            self.assertTrue(result)
+
+
+class TestGCPNodeScenarios(unittest.TestCase):
+    """Test cases for gcp_node_scenarios class"""
+
+    def setUp(self):
+        """Set up test fixtures"""
+        self.kubecli = MagicMock(spec=KrknKubernetes)
+        self.affected_nodes_status = AffectedNodeStatus()
+
+        # Mock the GCP class
+        with patch('krkn.scenario_plugins.node_actions.gcp_node_scenarios.GCP') as mock_gcp_class:
+            self.mock_gcp = MagicMock()
+            mock_gcp_class.return_value = self.mock_gcp
+            self.scenario = gcp_node_scenarios(
+                kubecli=self.kubecli,
+                node_action_kube_check=True,
+                affected_nodes_status=self.affected_nodes_status
+            )
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_ready_status')
+    def test_node_start_scenario_success(self, mock_wait_ready):
+        """Test node start scenario successfully"""
+        node = 'gke-cluster-node-1'
+        instance_id = 'gke-cluster-node-1'
+
+        mock_instance = MagicMock()
+        mock_instance.name = instance_id
+
+        self.mock_gcp.get_node_instance.return_value = mock_instance
+        self.mock_gcp.get_instance_name.return_value = instance_id
+        self.mock_gcp.start_instances.return_value = None
+        self.mock_gcp.wait_until_running.return_value = True
+
+        self.scenario.node_start_scenario(
+            instance_kill_count=1,
+            node=node,
+            timeout=600,
+            poll_interval=15
+        )
+
+        self.mock_gcp.get_node_instance.assert_called_once_with(node)
+        self.mock_gcp.get_instance_name.assert_called_once_with(mock_instance)
+        self.mock_gcp.start_instances.assert_called_once_with(instance_id)
+        self.mock_gcp.wait_until_running.assert_called_once()
+        mock_wait_ready.assert_called_once()
+        self.assertEqual(len(self.affected_nodes_status.affected_nodes), 1)
+        self.assertEqual(self.affected_nodes_status.affected_nodes[0].node_name, node)
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_ready_status')
+    def test_node_start_scenario_no_kube_check(self, mock_wait_ready):
+        """Test node start scenario without kube check"""
+        node = 'gke-cluster-node-1'
+        instance_id = 'gke-cluster-node-1'
+
+        # Create scenario with node_action_kube_check=False
+        with patch('krkn.scenario_plugins.node_actions.gcp_node_scenarios.GCP') as mock_gcp_class:
+            mock_gcp = MagicMock()
+            mock_gcp_class.return_value = mock_gcp
+            scenario = gcp_node_scenarios(
+                kubecli=self.kubecli,
+                node_action_kube_check=False,
+                affected_nodes_status=AffectedNodeStatus()
+            )
+
+            mock_instance = MagicMock()
+            mock_instance.name = instance_id
+
+            mock_gcp.get_node_instance.return_value = mock_instance
+            mock_gcp.get_instance_name.return_value = instance_id
+            mock_gcp.start_instances.return_value = None
+            mock_gcp.wait_until_running.return_value = True
+
+            scenario.node_start_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+            # Should not call wait_for_ready_status
+            mock_wait_ready.assert_not_called()
+
+    def test_node_start_scenario_failure(self):
+        """Test node start scenario with failure"""
+        node = 'gke-cluster-node-1'
+
+        self.mock_gcp.get_node_instance.side_effect = Exception("GCP error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_start_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_unknown_status')
+    def test_node_stop_scenario_success(self, mock_wait_unknown):
+        """Test node stop scenario successfully"""
+        node = 'gke-cluster-node-1'
+        instance_id = 'gke-cluster-node-1'
+
+        mock_instance = MagicMock()
+        mock_instance.name = instance_id
+
+        self.mock_gcp.get_node_instance.return_value = mock_instance
+        self.mock_gcp.get_instance_name.return_value = instance_id
+        self.mock_gcp.stop_instances.return_value = None
+        self.mock_gcp.wait_until_stopped.return_value = True
+
+        self.scenario.node_stop_scenario(
+            instance_kill_count=1,
+            node=node,
+            timeout=600,
+            poll_interval=15
+        )
+
+        self.mock_gcp.get_node_instance.assert_called_once_with(node)
+        self.mock_gcp.get_instance_name.assert_called_once_with(mock_instance)
+        self.mock_gcp.stop_instances.assert_called_once_with(instance_id)
+        self.mock_gcp.wait_until_stopped.assert_called_once()
+        mock_wait_unknown.assert_called_once()
+        self.assertEqual(len(self.affected_nodes_status.affected_nodes), 1)
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_unknown_status')
+    def test_node_stop_scenario_no_kube_check(self, mock_wait_unknown):
+        """Test node stop scenario without kube check"""
+        node = 'gke-cluster-node-1'
+        instance_id = 'gke-cluster-node-1'
+
+        # Create scenario with node_action_kube_check=False
+        with patch('krkn.scenario_plugins.node_actions.gcp_node_scenarios.GCP') as mock_gcp_class:
+            mock_gcp = MagicMock()
+            mock_gcp_class.return_value = mock_gcp
+            scenario = gcp_node_scenarios(
+                kubecli=self.kubecli,
+                node_action_kube_check=False,
+                affected_nodes_status=AffectedNodeStatus()
+            )
+
+            mock_instance = MagicMock()
+            mock_instance.name = instance_id
+
+            mock_gcp.get_node_instance.return_value = mock_instance
+            mock_gcp.get_instance_name.return_value = instance_id
+            mock_gcp.stop_instances.return_value = None
+            mock_gcp.wait_until_stopped.return_value = True
+
+            scenario.node_stop_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+            # Should not call wait_for_unknown_status
+            mock_wait_unknown.assert_not_called()
+
+    def test_node_stop_scenario_failure(self):
+        """Test node stop scenario with failure"""
+        node = 'gke-cluster-node-1'
+
+        self.mock_gcp.get_node_instance.side_effect = Exception("GCP error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_stop_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+    @patch('time.sleep')
+    def test_node_termination_scenario_success(self, _mock_sleep):
+        """Test node termination scenario successfully"""
+        node = 'gke-cluster-node-1'
+        instance_id = 'gke-cluster-node-1'
+
+        mock_instance = MagicMock()
+        mock_instance.name = instance_id
+
+        self.mock_gcp.get_node_instance.return_value = mock_instance
+        self.mock_gcp.get_instance_name.return_value = instance_id
+        self.mock_gcp.terminate_instances.return_value = None
+        self.mock_gcp.wait_until_terminated.return_value = True
+        self.kubecli.list_nodes.return_value = []
+
+        self.scenario.node_termination_scenario(
+            instance_kill_count=1,
+            node=node,
+            timeout=600,
+            poll_interval=15
+        )
+
+        self.mock_gcp.get_node_instance.assert_called_once_with(node)
+        self.mock_gcp.get_instance_name.assert_called_once_with(mock_instance)
+        self.mock_gcp.terminate_instances.assert_called_once_with(instance_id)
+        self.mock_gcp.wait_until_terminated.assert_called_once()
+        self.assertEqual(len(self.affected_nodes_status.affected_nodes), 1)
+
+    @patch('time.sleep')
+    def test_node_termination_scenario_node_still_exists(self, _mock_sleep):
+        """Test node termination scenario when node still exists"""
+        node = 'gke-cluster-node-1'
+        instance_id = 'gke-cluster-node-1'
+
+        mock_instance = MagicMock()
+        mock_instance.name = instance_id
+
+        self.mock_gcp.get_node_instance.return_value = mock_instance
+        self.mock_gcp.get_instance_name.return_value = instance_id
+        self.mock_gcp.terminate_instances.return_value = None
+        self.mock_gcp.wait_until_terminated.return_value = True
+        # Node still in list after timeout
+        self.kubecli.list_nodes.return_value = [node]
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_termination_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=2,
+                poll_interval=15
+            )
+
+    def test_node_termination_scenario_failure(self):
+        """Test node termination scenario with failure"""
+        node = 'gke-cluster-node-1'
+
+        self.mock_gcp.get_node_instance.side_effect = Exception("GCP error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_termination_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600,
+                poll_interval=15
+            )
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_unknown_status')
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_ready_status')
+    def test_node_reboot_scenario_success(self, mock_wait_ready, mock_wait_unknown):
+        """Test node reboot scenario successfully"""
+        node = 'gke-cluster-node-1'
+        instance_id = 'gke-cluster-node-1'
+
+        mock_instance = MagicMock()
+        mock_instance.name = instance_id
+
+        self.mock_gcp.get_node_instance.return_value = mock_instance
+        self.mock_gcp.get_instance_name.return_value = instance_id
+        self.mock_gcp.reboot_instances.return_value = None
+        self.mock_gcp.wait_until_running.return_value = True
+
+        self.scenario.node_reboot_scenario(
+            instance_kill_count=1,
+            node=node,
+            timeout=600
+        )
+
+        self.mock_gcp.get_node_instance.assert_called_once_with(node)
+        self.mock_gcp.get_instance_name.assert_called_once_with(mock_instance)
+        self.mock_gcp.reboot_instances.assert_called_once_with(instance_id)
+        self.mock_gcp.wait_until_running.assert_called_once()
+        # Should be called twice in GCP implementation
+        self.assertEqual(mock_wait_unknown.call_count, 1)
+        self.assertEqual(mock_wait_ready.call_count, 1)
+        self.assertEqual(len(self.affected_nodes_status.affected_nodes), 1)
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_unknown_status')
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_ready_status')
+    def test_node_reboot_scenario_no_kube_check(self, mock_wait_ready, mock_wait_unknown):
+        """Test node reboot scenario without kube check"""
+        node = 'gke-cluster-node-1'
+        instance_id = 'gke-cluster-node-1'
+
+        # Create scenario with node_action_kube_check=False
+        with patch('krkn.scenario_plugins.node_actions.gcp_node_scenarios.GCP') as mock_gcp_class:
+            mock_gcp = MagicMock()
+            mock_gcp_class.return_value = mock_gcp
+            scenario = gcp_node_scenarios(
+                kubecli=self.kubecli,
+                node_action_kube_check=False,
+                affected_nodes_status=AffectedNodeStatus()
+            )
+
+            mock_instance = MagicMock()
+            mock_instance.name = instance_id
+
+            mock_gcp.get_node_instance.return_value = mock_instance
+            mock_gcp.get_instance_name.return_value = instance_id
+            mock_gcp.reboot_instances.return_value = None
+            mock_gcp.wait_until_running.return_value = True
+
+            scenario.node_reboot_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600
+            )
+
+            # Should not call wait functions
+            mock_wait_unknown.assert_not_called()
+            mock_wait_ready.assert_not_called()
+
+    def test_node_reboot_scenario_failure(self):
+        """Test node reboot scenario with failure"""
+        node = 'gke-cluster-node-1'
+
+        self.mock_gcp.get_node_instance.side_effect = Exception("GCP error")
+
+        with self.assertRaises(RuntimeError):
+            self.scenario.node_reboot_scenario(
+                instance_kill_count=1,
+                node=node,
+                timeout=600
+            )
+
+    @patch('krkn.scenario_plugins.node_actions.common_node_functions.wait_for_ready_status')
+    def test_node_start_scenario_multiple_kills(self, mock_wait_ready):
+        """Test node start scenario with multiple kill counts"""
+        node = 'gke-cluster-node-1'
+        instance_id = 'gke-cluster-node-1'
+
+        mock_instance = MagicMock()
+        mock_instance.name = instance_id
+
+        self.mock_gcp.get_node_instance.return_value = mock_instance
+        self.mock_gcp.get_instance_name.return_value = instance_id
+        self.mock_gcp.start_instances.return_value = None
+        self.mock_gcp.wait_until_running.return_value = True
+
+        self.scenario.node_start_scenario(
+            instance_kill_count=3,
+            node=node,
+            timeout=600,
+            poll_interval=15
+        )
+
+        self.assertEqual(self.mock_gcp.start_instances.call_count, 3)
+        self.assertEqual(len(self.affected_nodes_status.affected_nodes), 3)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 42d18a8e047add12a1ba256dd749884ed6439afd Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Mon, 15 Dec 2025 10:04:35 -0500
Subject: [PATCH 04/11] adding fail scenario if unrecovered kubevirt vm killing
 (#994)

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 .../kubevirt_vm_outage/kubevirt_vm_outage_scenario_plugin.py   | 3 ++-
 tests/test_kubevirt_vm_outage.py                               | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/krkn/scenario_plugins/kubevirt_vm_outage/kubevirt_vm_outage_scenario_plugin.py b/krkn/scenario_plugins/kubevirt_vm_outage/kubevirt_vm_outage_scenario_plugin.py
index 58a31364..f9a289f8 100644
--- a/krkn/scenario_plugins/kubevirt_vm_outage/kubevirt_vm_outage_scenario_plugin.py
+++ b/krkn/scenario_plugins/kubevirt_vm_outage/kubevirt_vm_outage_scenario_plugin.py
@@ -55,7 +55,8 @@ class KubevirtVmOutageScenarioPlugin(AbstractScenarioPlugin):
                     pods_status.merge(single_pods_status)
             
             scenario_telemetry.affected_pods = pods_status
-                        
+            if len(scenario_telemetry.affected_pods.unrecovered) > 0: 
+                return 1
             return 0
         except Exception as e:
             logging.error(f"KubeVirt VM Outage scenario failed: {e}")
diff --git a/tests/test_kubevirt_vm_outage.py b/tests/test_kubevirt_vm_outage.py
index cded8b51..4d884a14 100644
--- a/tests/test_kubevirt_vm_outage.py
+++ b/tests/test_kubevirt_vm_outage.py
@@ -147,7 +147,7 @@ class TestKubevirtVmOutageScenarioPlugin(unittest.TestCase):
                             with patch("builtins.open", unittest.mock.mock_open(read_data=yaml.dump(self.config))):
                                 result = self.plugin.run("test-uuid", self.scenario_file, {}, self.telemetry, self.scenario_telemetry)
 
-        self.assertEqual(result, 0)
+        self.assertEqual(result, 1)
         mock_delete.assert_called_once_with("test-vm", "default", False)
         mock_wait.assert_not_called()
         

From ba3fdea40387ddcf2bbacc4793877c364c9a0fc4 Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Mon, 15 Dec 2025 11:46:48 -0500
Subject: [PATCH 05/11] adding pvc ttests (#1000)

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 .github/workflows/tests.yml              |  5 +++-
 CI/legacy/scenarios/volume_scenario.yaml | 38 +++++++++++++++++++++---
 CI/tests/test_pvc.sh                     | 18 +++++++++++
 scenarios/kind/pvc_scenario.yaml         |  7 +++++
 4 files changed, 63 insertions(+), 5 deletions(-)
 create mode 100755 CI/tests/test_pvc.sh
 create mode 100644 scenarios/kind/pvc_scenario.yaml

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index d774b666..c943deb0 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -67,6 +67,8 @@ jobs:
           kubectl wait --for=condition=ready pod -l scenario=time-skew --timeout=300s
           kubectl apply -f CI/templates/service_hijacking.yaml
           kubectl wait --for=condition=ready pod -l "app.kubernetes.io/name=proxy" --timeout=300s
+          kubectl apply -f CI/legacy/scenarios/volume_scenario.yaml
+          kubectl wait --for=condition=ready pod kraken-test-pod -n kraken --timeout=300s
       - name: Get Kind nodes
         run: |
           kubectl get nodes --show-labels=true
@@ -99,6 +101,7 @@ jobs:
             echo "test_io_hog" >> ./CI/tests/functional_tests
             echo "test_pod_network_filter" >> ./CI/tests/functional_tests
             echo "test_pod_server" >> ./CI/tests/functional_tests
+            echo "test_pvc" >> ./CI/tests/functional_tests
 
       # Push on main only steps + all other functional to collect coverage
       # for the badge
@@ -135,7 +138,7 @@ jobs:
           echo "test_io_hog" >> ./CI/tests/functional_tests
           echo "test_pod_network_filter" >> ./CI/tests/functional_tests
           echo "test_pod_server" >> ./CI/tests/functional_tests
-
+          echo "test_pvc" >> ./CI/tests/functional_tests
       # Final common steps
       - name: Run Functional tests
         env:
diff --git a/CI/legacy/scenarios/volume_scenario.yaml b/CI/legacy/scenarios/volume_scenario.yaml
index 2ea0b476..3b261bcb 100644
--- a/CI/legacy/scenarios/volume_scenario.yaml
+++ b/CI/legacy/scenarios/volume_scenario.yaml
@@ -45,6 +45,31 @@ metadata:
   name: kraken-test-pod
   namespace: kraken
 spec:
+  securityContext:
+    fsGroup: 1001
+  # initContainer to fix permissions on the mounted volume
+  initContainers:
+    - name: fix-permissions
+      image: 'quay.io/centos7/httpd-24-centos7:centos7'
+      command:
+        - sh
+        - -c
+        - |
+          echo "Setting up permissions for /home/kraken..."
+          # Create the directory if it doesn't exist
+          mkdir -p /home/kraken
+          # Set ownership to user 1001 and group 1001
+          chown -R 1001:1001 /home/kraken
+          # Set permissions to allow read/write
+          chmod -R 755 /home/kraken
+          rm -rf /home/kraken/*
+          echo "Permissions fixed. Current state:"
+          ls -la /home/kraken
+      volumeMounts:
+        - mountPath: "/home/kraken"
+          name: kraken-test-pv
+      securityContext:
+        runAsUser: 0  # Run as root to fix permissions
   volumes:
     - name: kraken-test-pv
       persistentVolumeClaim:
@@ -52,8 +77,13 @@ spec:
   containers:
     - name: kraken-test-container
       image: 'quay.io/centos7/httpd-24-centos7:centos7'
-      volumeMounts:
-        - mountPath: "/home/krake-dir/"
-          name: kraken-test-pv
       securityContext:
-        privileged: true
+        runAsUser: 1001
+        runAsNonRoot: true
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop:
+            - ALL
+      volumeMounts:
+        - mountPath: "/home/kraken"
+          name: kraken-test-pv
diff --git a/CI/tests/test_pvc.sh b/CI/tests/test_pvc.sh
new file mode 100755
index 00000000..c115716d
--- /dev/null
+++ b/CI/tests/test_pvc.sh
@@ -0,0 +1,18 @@
+set -xeEo pipefail
+
+source CI/tests/common.sh
+
+trap error ERR
+trap finish EXIT
+
+function functional_test_pvc_fill {
+  export scenario_type="pvc_scenarios"
+  export scenario_file="scenarios/kind/pvc_scenario.yaml"
+  export post_config=""
+  envsubst < CI/config/common_test_config.yaml > CI/config/pvc_config.yaml
+  cat CI/config/pvc_config.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/pvc_config.yaml --debug True
+  echo "PVC Fill scenario test: Success"
+}
+
+functional_test_pvc_fill
diff --git a/scenarios/kind/pvc_scenario.yaml b/scenarios/kind/pvc_scenario.yaml
new file mode 100644
index 00000000..9385ced9
--- /dev/null
+++ b/scenarios/kind/pvc_scenario.yaml
@@ -0,0 +1,7 @@
+pvc_scenario:
+  pvc_name: kraken-test-pvc         # Name of the target PVC
+  pod_name: kraken-test-pod      # Name of the pod where the PVC is mounted, it will be ignored if the pvc_name is defined
+  namespace: kraken  # Namespace where the PVC is
+  fill_percentage: 38           # Target percentage to fill up the cluster, value must be higher than current percentage, valid values are between 0 and 99
+  duration: 10                  # Duration in seconds for the fault
+  block_size: 102400            # used only by dd if fallocate not present in the container

From f2ba8b85af6d4504ded60bbbf9b75d1b23f46817 Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Mon, 15 Dec 2025 11:52:30 -0500
Subject: [PATCH 06/11] adding podman support in docker configuration (#999)

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 .github/workflows/tests.yml                   |   2 +
 CI/tests/test_node.sh                         |  18 +++
 .../node_actions/docker_node_scenarios.py     | 129 +++++++++++++++++-
 requirements.txt                              |   5 +-
 scenarios/kind/node_scenarios_example.yml     |  10 +-
 5 files changed, 157 insertions(+), 7 deletions(-)
 create mode 100755 CI/tests/test_node.sh

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c943deb0..a8498bb1 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -101,6 +101,7 @@ jobs:
             echo "test_io_hog" >> ./CI/tests/functional_tests
             echo "test_pod_network_filter" >> ./CI/tests/functional_tests
             echo "test_pod_server" >> ./CI/tests/functional_tests
+            echo "test_node" >> ./CI/tests/functional_tests
             echo "test_pvc" >> ./CI/tests/functional_tests
 
       # Push on main only steps + all other functional to collect coverage
@@ -138,6 +139,7 @@ jobs:
           echo "test_io_hog" >> ./CI/tests/functional_tests
           echo "test_pod_network_filter" >> ./CI/tests/functional_tests
           echo "test_pod_server" >> ./CI/tests/functional_tests
+          echo "test_node" >> ./CI/tests/functional_tests
           echo "test_pvc" >> ./CI/tests/functional_tests
       # Final common steps
       - name: Run Functional tests
diff --git a/CI/tests/test_node.sh b/CI/tests/test_node.sh
new file mode 100755
index 00000000..b0057ed0
--- /dev/null
+++ b/CI/tests/test_node.sh
@@ -0,0 +1,18 @@
+uset -xeEo pipefail
+
+source CI/tests/common.sh
+
+trap error ERR
+trap finish EXIT
+
+function functional_test_node_stop_start {
+  export scenario_type="node_scenarios"
+  export scenario_file="scenarios/kind/node_scenarios_example.yml"
+  export post_config=""
+  envsubst < CI/config/common_test_config.yaml > CI/config/node_config.yaml
+  cat CI/config/node_config.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/node_config.yaml
+  echo "Node Stop/Start scenario test: Success"
+}
+
+functional_test_node_stop_start
diff --git a/krkn/scenario_plugins/node_actions/docker_node_scenarios.py b/krkn/scenario_plugins/node_actions/docker_node_scenarios.py
index 04ad20fc..37dd0a0e 100644
--- a/krkn/scenario_plugins/node_actions/docker_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/docker_node_scenarios.py
@@ -2,46 +2,173 @@ import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction
 from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
     abstract_node_scenarios,
 )
+import os
+import platform
 import logging
 import docker
 from krkn_lib.k8s import KrknKubernetes
 from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus
 
 class Docker:
+    """
+    Container runtime client wrapper supporting both Docker and Podman.
+
+    This class automatically detects and connects to either Docker or Podman
+    container runtimes using the Docker-compatible API. It tries multiple
+    connection methods in order of preference:
+
+    1. Docker Unix socket (unix:///var/run/docker.sock)
+    2. Platform-specific Podman sockets:
+       - macOS: ~/.local/share/containers/podman/machine/podman.sock
+       - Linux rootful: unix:///run/podman/podman.sock
+       - Linux rootless: unix:///run/user/<uid>/podman/podman.sock
+    3. Environment variables (DOCKER_HOST or CONTAINER_HOST)
+
+    The runtime type (docker/podman) is auto-detected and logged for debugging.
+    Supports Kind clusters running on Podman.
+
+    Assisted By: Claude Code
+    """
     def __init__(self):
-        self.client = docker.from_env()
+        self.client = None
+        self.runtime = 'unknown'
+        
+
+        # Try multiple connection methods in order of preference
+        # Supports both Docker and Podman
+        connection_methods = [
+            ('unix:///var/run/docker.sock', 'Docker Unix socket'),
+        ]
+
+        # Add platform-specific Podman sockets
+        if platform.system() == 'Darwin':  # macOS
+            # On macOS, Podman uses podman-machine with socket typically at:
+            # ~/.local/share/containers/podman/machine/podman.sock
+            # This is often symlinked to /var/run/docker.sock
+            podman_machine_sock = os.path.expanduser('~/.local/share/containers/podman/machine/podman.sock')
+            if os.path.exists(podman_machine_sock):
+                connection_methods.append((f'unix://{podman_machine_sock}', 'Podman machine socket (macOS)'))
+        else:  # Linux
+            connection_methods.extend([
+                ('unix:///run/podman/podman.sock', 'Podman Unix socket (rootful)'),
+                ('unix:///run/user/{uid}/podman/podman.sock', 'Podman Unix socket (rootless)'),
+            ])
+
+        # Always try from_env as last resort
+        connection_methods.append(('from_env', 'Environment variables (DOCKER_HOST/CONTAINER_HOST)'))
+
+        for method, description in connection_methods:
+            try:
+                # Handle rootless Podman socket path with {uid} placeholder
+                if '{uid}' in method:
+                    uid = os.getuid()
+                    method = method.format(uid=uid)
+                    logging.info(f'Attempting to connect using {description}: {method}')
+
+                if method == 'from_env':
+                    logging.info(f'Attempting to connect using {description}')
+                    self.client = docker.from_env()
+                else:
+                    logging.info(f'Attempting to connect using {description}: {method}')
+                    self.client = docker.DockerClient(base_url=method)
+
+                # Test the connection
+                self.client.ping()
+
+                # Detect runtime type
+                try:
+                    version_info = self.client.version()
+                    version_str = version_info.get('Version', '')
+                    if 'podman' in version_str.lower():
+                        self.runtime = 'podman'
+                    else:
+                        self.runtime = 'docker'
+                    logging.debug(f'Runtime version info: {version_str}')
+                except Exception as version_err:
+                    logging.warning(f'Could not detect runtime version: {version_err}')
+                    self.runtime = 'unknown'
+
+                logging.info(f'Successfully connected to {self.runtime} using {description}')
+
+                # Log available containers for debugging
+                try:
+                    containers = self.client.containers.list(all=True)
+                    logging.info(f'Found {len(containers)} total containers')
+                    for container in containers[:5]:  # Log first 5
+                        logging.debug(f'  Container: {container.name} ({container.status})')
+                except Exception as list_err:
+                    logging.warning(f'Could not list containers: {list_err}')
+
+                break
+
+            except Exception as e:
+                logging.warning(f'Failed to connect using {description}: {e}')
+                continue
+
+        if self.client is None:
+            error_msg = 'Failed to initialize container runtime client (Docker/Podman) with any connection method'
+            logging.error(error_msg)
+            logging.error('Attempted connection methods:')
+            for method, desc in connection_methods:
+                logging.error(f'  - {desc}: {method}')
+            raise RuntimeError(error_msg)
+
+        logging.info(f'Container runtime client initialized successfully: {self.runtime}')
 
     def get_container_id(self, node_name):
+        """Get the container ID for a given node name."""
         container = self.client.containers.get(node_name)
+        logging.info(f'Found {self.runtime} container for node {node_name}: {container.id}')
         return container.id
 
     # Start the node instance
     def start_instances(self, node_name):
+        """Start a container instance (works with both Docker and Podman)."""
+        logging.info(f'Starting {self.runtime} container for node: {node_name}')
         container = self.client.containers.get(node_name)
         container.start()
+        logging.info(f'Container {container.id} started successfully')
 
     # Stop the node instance
     def stop_instances(self, node_name):
+        """Stop a container instance (works with both Docker and Podman)."""
+        logging.info(f'Stopping {self.runtime} container for node: {node_name}')
         container = self.client.containers.get(node_name)
         container.stop()
+        logging.info(f'Container {container.id} stopped successfully')
 
     # Reboot the node instance
     def reboot_instances(self, node_name):
+        """Restart a container instance (works with both Docker and Podman)."""
+        logging.info(f'Restarting {self.runtime} container for node: {node_name}')
         container = self.client.containers.get(node_name)
         container.restart()
+        logging.info(f'Container {container.id} restarted successfully')
 
     # Terminate the node instance
     def terminate_instances(self, node_name):
+        """Stop and remove a container instance (works with both Docker and Podman)."""
+        logging.info(f'Terminating {self.runtime} container for node: {node_name}')
         container = self.client.containers.get(node_name)
         container.stop()
         container.remove()
+        logging.info(f'Container {container.id} terminated and removed successfully')
 
 
 class docker_node_scenarios(abstract_node_scenarios):
+    """
+    Node chaos scenarios for containerized Kubernetes nodes.
+
+    Supports both Docker and Podman container runtimes. This class provides
+    methods to inject chaos into Kubernetes nodes running as containers
+    (e.g., Kind clusters, Podman-based clusters).
+    """
     def __init__(self, kubecli: KrknKubernetes, node_action_kube_check: bool, affected_nodes_status: AffectedNodeStatus):
+        logging.info('Initializing docker_node_scenarios (supports Docker and Podman)')
         super().__init__(kubecli, node_action_kube_check, affected_nodes_status)
         self.docker = Docker()
         self.node_action_kube_check = node_action_kube_check
+        logging.info(f'Node scenarios initialized successfully using {self.docker.runtime} runtime')
 
     # Node scenario to start the node
     def node_start_scenario(self, instance_kill_count, node, timeout, poll_interval):
diff --git a/requirements.txt b/requirements.txt
index 60653ff5..2b0f577f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,7 @@ azure-mgmt-network==27.0.0
 itsdangerous==2.0.1
 coverage==7.6.12
 datetime==5.4
-docker==7.0.0
+docker>=6.0,<7.0  # docker 7.0+ has breaking changes with Unix sockets
 gitpython==3.1.41
 google-auth==2.37.0
 google-cloud-compute==1.22.0
@@ -28,7 +28,8 @@ pyfiglet==1.0.2
 pytest==8.0.0
 python-ipmi==0.5.4
 python-openstackclient==6.5.0
-requests==2.32.4
+requests<2.32  # requests 2.32+ breaks Unix socket support (http+docker scheme)
+requests-unixsocket>=0.4.0  # Required for Docker Unix socket support
 service_identity==24.1.0
 PyYAML==6.0.1
 setuptools==78.1.1
diff --git a/scenarios/kind/node_scenarios_example.yml b/scenarios/kind/node_scenarios_example.yml
index e955795d..07f2b255 100644
--- a/scenarios/kind/node_scenarios_example.yml
+++ b/scenarios/kind/node_scenarios_example.yml
@@ -1,16 +1,18 @@
 node_scenarios:
   - actions:                                                        # node chaos scenarios to be injected
     - node_stop_start_scenario
-    node_name: kind-worker                                          # node on which scenario has to be injected; can set multiple names separated by comma
-    # label_selector: node-role.kubernetes.io/worker                # when node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection
+    # node_name: kind-control-plane                                        # node on which scenario has to be injected; can set multiple names separated by comma
+    label_selector: kubernetes.io/hostname=kind-worker              # when node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection
     instance_count: 1                                               # Number of nodes to perform action/select that match the label selector
     runs: 1                                                         # number of times to inject each scenario under actions (will perform on same node each time)
     timeout: 120                                                    # duration to wait for completion of node scenario injection
     cloud_type: docker                                                # cloud type on which Kubernetes/OpenShift runs
+    duration: 10
   - actions:
     - node_reboot_scenario
-    node_name: kind-worker
-    # label_selector: node-role.kubernetes.io/infra
+    node_name: kind-control-plane
+    # label_selector: kubernetes.io/hostname=kind-worker
     instance_count: 1
     timeout: 120
     cloud_type: docker
+    kube_check: false

From c3f6b1a7ff21a7835fcd381c4a7dd37f672d483c Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Tue, 16 Dec 2025 10:27:24 -0500
Subject: [PATCH 07/11] updating return code (#1001)

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 run_kraken.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/run_kraken.py b/run_kraken.py
index 5a86de33..b2a485e3 100644
--- a/run_kraken.py
+++ b/run_kraken.py
@@ -141,7 +141,7 @@ def main(options, command: Optional[str]) -> int:
             logging.error(
                 "Cannot read the kubeconfig file at %s, please check" % kubeconfig_path
             )
-            return 1
+            return -1
         logging.info("Initializing client to talk to the Kubernetes cluster")
 
         # Generate uuid for the run
@@ -184,10 +184,10 @@ def main(options, command: Optional[str]) -> int:
         # Set up kraken url to track signal
         if not 0 <= int(port) <= 65535:
             logging.error("%s isn't a valid port number, please check" % (port))
-            return 1
+            return -1
         if not signal_address:
             logging.error("Please set the signal address in the config")
-            return 1
+            return -1
         address = (signal_address, port)
 
         # If publish_running_status is False this should keep us going
@@ -220,7 +220,7 @@ def main(options, command: Optional[str]) -> int:
                         "invalid distribution selected, running openshift scenarios against kubernetes cluster."
                         "Please set 'kubernetes' in config.yaml krkn.platform and try again"
                     )
-                    return 1
+                    return -1
         if cv != "":
             logging.info(cv)
         else:
@@ -361,7 +361,7 @@ def main(options, command: Optional[str]) -> int:
                             logging.error(
                                 f"impossible to find scenario {scenario_type}, plugin not found. Exiting"
                             )
-                            sys.exit(1)
+                            sys.exit(-1)
 
                         failed_post_scenarios, scenario_telemetries = (
                             scenario_plugin.run_scenarios(
@@ -522,7 +522,7 @@ def main(options, command: Optional[str]) -> int:
 
             else:
                 logging.error("Alert profile is not defined")
-                return 1
+                return -1
                 # sys.exit(1)
         if enable_metrics:
             logging.info(f'Capturing metrics using file {metrics_profile}')
@@ -537,17 +537,20 @@ def main(options, command: Optional[str]) -> int:
                 telemetry_json
             )
 
+        # want to exit with 1 first to show failure of scenario 
+        # even if alerts failing
+        if failed_post_scenarios:
+            logging.error(
+                "Post scenarios are still failing at the end of all iterations"
+            )
+            # sys.exit(1)
+            return 1
+
         if post_critical_alerts > 0:
             logging.error("Critical alerts are firing, please check; exiting")
             # sys.exit(2)
             return 2
 
-        if failed_post_scenarios:
-            logging.error(
-                "Post scenarios are still failing at the end of all iterations"
-            )
-            # sys.exit(2)
-            return 2
         if health_checker.ret_value != 0:
             logging.error("Health check failed for the applications, Please check; exiting")
             return health_checker.ret_value
@@ -563,7 +566,7 @@ def main(options, command: Optional[str]) -> int:
     else:
         logging.error("Cannot find a config at %s, please check" % (cfg))
         # sys.exit(1)
-        return 2
+        return -1
 
     return 0
 

From e7fa6bdebcb19054e6d80208a40e72b2bc915c9b Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Wed, 17 Dec 2025 15:09:15 -0500
Subject: [PATCH 08/11] checking chunk error in ci tests (#937)

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 .github/workflows/tests.yml                   |  4 +-
 CI/tests/test_container.sh                    |  4 +-
 CI/tests/test_customapp_pod.sh                |  2 +-
 CI/tests/test_pod.sh                          |  4 +-
 .../container/container_scenario_plugin.py    |  4 +-
 .../pod_disruption_scenario_plugin.py         | 87 +++++++++----------
 requirements.txt                              |  2 +-
 scenarios/kind/pvc_scenario.yaml              |  2 +-
 8 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a8498bb1..7053c282 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -102,7 +102,7 @@ jobs:
             echo "test_pod_network_filter" >> ./CI/tests/functional_tests
             echo "test_pod_server" >> ./CI/tests/functional_tests
             echo "test_node" >> ./CI/tests/functional_tests
-            echo "test_pvc" >> ./CI/tests/functional_tests
+            # echo "test_pvc" >> ./CI/tests/functional_tests
 
       # Push on main only steps + all other functional to collect coverage
       # for the badge
@@ -140,7 +140,7 @@ jobs:
           echo "test_pod_network_filter" >> ./CI/tests/functional_tests
           echo "test_pod_server" >> ./CI/tests/functional_tests
           echo "test_node" >> ./CI/tests/functional_tests
-          echo "test_pvc" >> ./CI/tests/functional_tests
+          # echo "test_pvc" >> ./CI/tests/functional_tests 
       # Final common steps
       - name: Run Functional tests
         env:
diff --git a/CI/tests/test_container.sh b/CI/tests/test_container.sh
index 9042b021..271b43fa 100755
--- a/CI/tests/test_container.sh
+++ b/CI/tests/test_container.sh
@@ -16,8 +16,10 @@ function functional_test_container_crash {
   export post_config=""
   envsubst < CI/config/common_test_config.yaml > CI/config/container_config.yaml
 
-  python3 -m coverage run -a run_kraken.py -c CI/config/container_config.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/container_config.yaml -d True
   echo "Container scenario test: Success"
+
+  kubectl get pods -n kube-system -l component=etcd
 }
 
 functional_test_container_crash
diff --git a/CI/tests/test_customapp_pod.sh b/CI/tests/test_customapp_pod.sh
index c07869c8..6ae39230 100755
--- a/CI/tests/test_customapp_pod.sh
+++ b/CI/tests/test_customapp_pod.sh
@@ -11,7 +11,7 @@ function functional_test_customapp_pod_node_selector {
   export post_config=""
   envsubst < CI/config/common_test_config.yaml > CI/config/customapp_pod_config.yaml
 
-  python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml
+  python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml -d True
   echo "Pod disruption with node_label_selector test: Success"
 }
 
diff --git a/CI/tests/test_pod.sh b/CI/tests/test_pod.sh
index 97df491d..e09356ef 100755
--- a/CI/tests/test_pod.sh
+++ b/CI/tests/test_pod.sh
@@ -10,9 +10,11 @@ function functional_test_pod_crash {
   export scenario_file="scenarios/kind/pod_etcd.yml"
   export post_config=""
   envsubst < CI/config/common_test_config.yaml > CI/config/pod_config.yaml
-  cat CI/config/pod_config.yaml
+
   python3 -m coverage run -a run_kraken.py -c CI/config/pod_config.yaml
   echo "Pod disruption scenario test: Success"
+  date
+  kubectl get pods -n kube-system -l component=etcd -o yaml
 }
 
 functional_test_pod_crash
diff --git a/krkn/scenario_plugins/container/container_scenario_plugin.py b/krkn/scenario_plugins/container/container_scenario_plugin.py
index 1c8e6ec2..21d67dcb 100644
--- a/krkn/scenario_plugins/container/container_scenario_plugin.py
+++ b/krkn/scenario_plugins/container/container_scenario_plugin.py
@@ -1,6 +1,7 @@
 import logging
 import random
 import time
+import traceback
 from asyncio import Future
 import yaml
 from krkn_lib.k8s import KrknKubernetes
@@ -41,6 +42,7 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
                         logging.info("ContainerScenarioPlugin failed with unrecovered containers")
                         return 1
         except (RuntimeError, Exception) as e:
+            logging.error("Stack trace:\n%s", traceback.format_exc())
             logging.error("ContainerScenarioPlugin exiting due to Exception %s" % e)
             return 1
         else:
@@ -50,7 +52,6 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
         return ["container_scenarios"]
 
     def start_monitoring(self, kill_scenario: dict, lib_telemetry: KrknTelemetryOpenshift) -> Future:
-        
         namespace_pattern = f"^{kill_scenario['namespace']}$"
         label_selector = kill_scenario["label_selector"]
         recovery_time = kill_scenario["expected_recovery_time"]
@@ -232,4 +233,5 @@ class ContainerScenarioPlugin(AbstractScenarioPlugin):
             timer += 5
             logging.info("Waiting 5 seconds for containers to become ready")
             time.sleep(5)
+
         return killed_container_list
diff --git a/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py b/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py
index 736f6d5f..df309cc9 100644
--- a/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py
+++ b/krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py
@@ -2,7 +2,7 @@ import logging
 import random
 import time
 from asyncio import Future
-
+import traceback
 import yaml
 from krkn_lib.k8s import KrknKubernetes
 from krkn_lib.k8s.pod_monitor import select_and_monitor_by_namespace_pattern_and_label, \
@@ -74,6 +74,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
                         return 1
                     
         except (RuntimeError, Exception) as e:
+            logging.error("Stack trace:\n%s", traceback.format_exc())
             logging.error("PodDisruptionScenariosPlugin exiting due to Exception %s" % e)
             return 1
         else:
@@ -150,7 +151,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
                 field_selector=combined_field_selector
             )
 
-    def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None, quiet: bool = False): 
+    def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None): 
         if label_selector and name_pattern: 
             logging.error('Only, one of name pattern or label pattern can be specified')
             return []
@@ -161,8 +162,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
         
         # If specific node names are provided, make multiple calls with field selector
         if node_names:
-            if not quiet:
-                logging.info(f"Targeting pods on {len(node_names)} specific nodes")
+            logging.debug(f"Targeting pods on {len(node_names)} specific nodes")
             all_pods = []
             for node_name in node_names:
                 pods = self._select_pods_with_field_selector(
@@ -172,8 +172,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
                 if pods:
                     all_pods.extend(pods)
             
-            if not quiet:
-                logging.info(f"Found {len(all_pods)} target pods across {len(node_names)} nodes")
+            logging.debug(f"Found {len(all_pods)} target pods across {len(node_names)} nodes")
             return all_pods
         
         #  Node label selector approach - use field selectors
@@ -181,11 +180,10 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
             # Get nodes matching the label selector first
             nodes_with_label = kubecli.list_nodes(label_selector=node_label_selector)
             if not nodes_with_label:
-                logging.info(f"No nodes found with label selector: {node_label_selector}")
+                logging.debug(f"No nodes found with label selector: {node_label_selector}")
                 return []
             
-            if not quiet:
-                logging.info(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}")
+            logging.debug(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}")
             # Use field selector for each node
             all_pods = []
             for node_name in nodes_with_label:
@@ -196,8 +194,7 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
                 if pods:
                     all_pods.extend(pods)
             
-            if not quiet:
-                logging.info(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes")
+            logging.debug(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes")
             return all_pods
         
         # Standard pod selection (no node targeting)
@@ -207,37 +204,40 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
     
     def killing_pods(self, config: InputParams, kubecli: KrknKubernetes):
         # region Select target pods
+        try:
+            namespace = config.namespace_pattern
+            if not namespace: 
+                logging.error('Namespace pattern must be specified')
+
+            pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
+            exclude_pods = set()
+            if config.exclude_label:
+                _exclude_pods = self.get_pods("",config.exclude_label,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
+                for pod in _exclude_pods:
+                    exclude_pods.add(pod[0])
+
+
+            pods_count = len(pods)
+            if len(pods) < config.kill:
+                logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format(
+                        config.kill, len(pods)))
+                return 1
             
-        namespace = config.namespace_pattern
-        if not namespace: 
-            logging.error('Namespace pattern must be specified')
-            return 2
+            random.shuffle(pods)
+            for i in range(config.kill):
+                pod = pods[i]
+                logging.info(pod)
+                if pod[0] in exclude_pods:
+                    logging.info(f"Excluding {pod[0]} from chaos")
+                else:
+                    logging.info(f'Deleting pod {pod[0]}')
+                    kubecli.delete_pod(pod[0], pod[1])
+            
+            return_val = self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
+        except Exception as e:
+            raise(e)
 
-        pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
-        exclude_pods = set()
-        if config.exclude_label:
-            _exclude_pods = self.get_pods("",config.exclude_label,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
-            for pod in _exclude_pods:
-                exclude_pods.add(pod[0])
-
-        pods_count = len(pods)
-        if len(pods) < config.kill:
-            logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format(
-                    config.kill, len(pods)))
-            return 2
-        
-        random.shuffle(pods)
-        for i in range(config.kill):
-            pod = pods[i]
-            logging.info(pod)
-            if pod[0] in exclude_pods:
-                logging.info(f"Excluding {pod[0]} from chaos")
-            else:
-                logging.info(f'Deleting pod {pod[0]}')
-                kubecli.delete_pod(pod[0], pod[1])
-        
-        ret = self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
-        return ret
+        return return_val
 
     def wait_for_pods(
         self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes, node_label_selector, node_names
@@ -246,10 +246,10 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
         start_time = datetime.now()
 
         while not timeout:
-            pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names, quiet=True)
+            pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names)
             if pod_count == len(pods):
                 return 0
-               
+            
             time.sleep(duration)
 
             now_time = datetime.now()
@@ -258,6 +258,5 @@ class PodDisruptionScenarioPlugin(AbstractScenarioPlugin):
             if time_diff.seconds > wait_timeout:
                 logging.error("timeout while waiting for pods to come up")
                 return 1
-        
-        # should never get to this return
+
         return 0
diff --git a/requirements.txt b/requirements.txt
index 2b0f577f..494eb303 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,7 @@ google-cloud-compute==1.22.0
 ibm_cloud_sdk_core==3.18.0
 ibm_vpc==0.20.0
 jinja2==3.1.6
-krkn-lib==5.1.12
+krkn-lib==5.1.13
 lxml==5.1.0
 kubernetes==34.1.0
 numpy==1.26.4
diff --git a/scenarios/kind/pvc_scenario.yaml b/scenarios/kind/pvc_scenario.yaml
index 9385ced9..28c8d272 100644
--- a/scenarios/kind/pvc_scenario.yaml
+++ b/scenarios/kind/pvc_scenario.yaml
@@ -2,6 +2,6 @@ pvc_scenario:
   pvc_name: kraken-test-pvc         # Name of the target PVC
   pod_name: kraken-test-pod      # Name of the pod where the PVC is mounted, it will be ignored if the pvc_name is defined
   namespace: kraken  # Namespace where the PVC is
-  fill_percentage: 38           # Target percentage to fill up the cluster, value must be higher than current percentage, valid values are between 0 and 99
+  fill_percentage: 98           # Target percentage to fill up the cluster, value must be higher than current percentage, valid values are between 0 and 99
   duration: 10                  # Duration in seconds for the fault
   block_size: 102400            # used only by dd if fallocate not present in the container

From 18385cba2b2b864ff562b6ef67e5ea3bf44d7feb Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Wed, 17 Dec 2025 15:09:47 -0500
Subject: [PATCH 09/11] adding run unit tests on main (#1004)

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 .github/workflows/tests.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 7053c282..154e8e06 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -74,7 +74,6 @@ jobs:
           kubectl get nodes --show-labels=true
       # Pull request only steps
       - name: Run unit tests
-        if: github.event_name == 'pull_request'
         run: python -m coverage run -a -m unittest discover -s tests -v
 
       - name: Setup Pull Request Functional Tests

From 2458022248a5e76a8ca3b266c87a10d1f52ff5b3 Mon Sep 17 00:00:00 2001
From: Paige Patton <64206430+paigerube14@users.noreply.github.com>
Date: Thu, 18 Dec 2025 14:59:37 -0500
Subject: [PATCH 10/11] moving telemetry (#1008)

Signed-off-by: Paige Patton <prubenda@redhat.com>
---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 154e8e06..718ed7de 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -123,11 +123,11 @@ jobs:
           yq -i '.performance_monitoring.prometheus_url="http://localhost:9090"' CI/config/common_test_config.yaml
           yq -i '.telemetry.username="${{secrets.TELEMETRY_USERNAME}}"' CI/config/common_test_config.yaml
           yq -i '.telemetry.password="${{secrets.TELEMETRY_PASSWORD}}"' CI/config/common_test_config.yaml
-          echo "test_telemetry" > ./CI/tests/functional_tests
           echo "test_service_hijacking" >> ./CI/tests/functional_tests
           echo "test_app_outages" >> ./CI/tests/functional_tests
           echo "test_container"      >> ./CI/tests/functional_tests
           echo "test_pod" >> ./CI/tests/functional_tests
+          echo "test_telemetry" > ./CI/tests/functional_tests
           echo "test_pod_error" >> ./CI/tests/functional_tests
           echo "test_customapp_pod" >> ./CI/tests/functional_tests
           echo "test_namespace"      >> ./CI/tests/functional_tests

From b60b66de43330230855f4dc6c3a2f37c562aa896 Mon Sep 17 00:00:00 2001
From: Parag Kamble <pakamble@redhat.com>
Date: Fri, 19 Dec 2025 20:36:17 +0530
Subject: [PATCH 11/11] Fixed IBM  node_reboot_scenario failure (#1007)

Signed-off-by: Parag Kamble <pakamble@redhat.com>
Co-authored-by: Paige Patton <64206430+paigerube14@users.noreply.github.com>
---
 .../node_actions/ibmcloud_node_scenarios.py   | 58 ++++++++++++-------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/krkn/scenario_plugins/node_actions/ibmcloud_node_scenarios.py b/krkn/scenario_plugins/node_actions/ibmcloud_node_scenarios.py
index 30bca718..039d766a 100644
--- a/krkn/scenario_plugins/node_actions/ibmcloud_node_scenarios.py
+++ b/krkn/scenario_plugins/node_actions/ibmcloud_node_scenarios.py
@@ -327,14 +327,20 @@ class ibm_node_scenarios(abstract_node_scenarios):
                 vm_stopped = self.ibmcloud.stop_instances(instance_id)
                 if vm_stopped:
                     self.ibmcloud.wait_until_stopped(instance_id, timeout, affected_node)
-                logging.info(
-                    "Node with instance ID: %s is in stopped state" % node
-                )
-                logging.info(
-                    "node_stop_scenario has been successfully injected!"
-                )
+                    logging.info(
+                        "Node with instance ID: %s is in stopped state" % node
+                    )
+                    logging.info(
+                        "node_stop_scenario has been successfully injected!"
+                    )
+                else:
+                    logging.error(
+                        "Failed to stop node instance %s. Stop command failed." % instance_id
+                    )
+                    raise Exception("Stop command failed for instance %s" % instance_id)
+                self.affected_nodes_status.affected_nodes.append(affected_node)
         except Exception as e:
-            logging.error("Failed to stop node instance. Test Failed")
+            logging.error("Failed to stop node instance. Test Failed: %s" % str(e))
             logging.error("node_stop_scenario injection failed!")
 
 
@@ -345,24 +351,31 @@ class ibm_node_scenarios(abstract_node_scenarios):
                 affected_node = AffectedNode(node, node_id=instance_id)
                 logging.info("Starting node_reboot_scenario injection")
                 logging.info("Rebooting the node %s " % (node))
-                self.ibmcloud.reboot_instances(instance_id)
-                self.ibmcloud.wait_until_rebooted(instance_id, timeout, affected_node)
-                if self.node_action_kube_check:
-                    nodeaction.wait_for_unknown_status(
-                        node, timeout, affected_node
+                vm_rebooted = self.ibmcloud.reboot_instances(instance_id)
+                if vm_rebooted:
+                    self.ibmcloud.wait_until_rebooted(instance_id, timeout, affected_node)
+                    if self.node_action_kube_check:
+                        nodeaction.wait_for_unknown_status(
+                            node, timeout, self.kubecli, affected_node
+                        )
+                        nodeaction.wait_for_ready_status(
+                            node, timeout, self.kubecli, affected_node
+                        )
+                    logging.info(
+                        "Node with instance ID: %s has rebooted successfully" % node
                     )
-                    nodeaction.wait_for_ready_status(
-                        node, timeout, affected_node
+                    logging.info(
+                        "node_reboot_scenario has been successfully injected!"
                     )
-                logging.info(
-                    "Node with instance ID: %s has rebooted successfully" % node
-                )
-                logging.info(
-                    "node_reboot_scenario has been successfully injected!"
-                )
+                else:
+                    logging.error(
+                        "Failed to reboot node instance %s. Reboot command failed." % instance_id
+                    )
+                    raise Exception("Reboot command failed for instance %s" % instance_id)
+                self.affected_nodes_status.affected_nodes.append(affected_node)
 
         except Exception as e:
-            logging.error("Failed to reboot node instance. Test Failed")
+            logging.error("Failed to reboot node instance. Test Failed: %s" % str(e))
             logging.error("node_reboot_scenario injection failed!")
 
 
@@ -383,7 +396,8 @@ class ibm_node_scenarios(abstract_node_scenarios):
                 logging.info(
                     "node_terminate_scenario has been successfully injected!"
                 )
+                self.affected_nodes_status.affected_nodes.append(affected_node)
         except Exception as e:
-            logging.error("Failed to terminate node instance. Test Failed")
+            logging.error("Failed to terminate node instance. Test Failed: %s" % str(e))
             logging.error("node_terminate_scenario injection failed!")