mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-14 18:10:00 +00:00
adding podman support in docker configuration (#999)
Some checks failed
Functional & Unit Tests / Functional & Unit Tests (push) Failing after 1s
Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped
Some checks failed
Functional & Unit Tests / Functional & Unit Tests (push) Failing after 1s
Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped
Signed-off-by: Paige Patton <prubenda@redhat.com>
This commit is contained in:
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
@@ -101,6 +101,7 @@ jobs:
|
||||
echo "test_io_hog" >> ./CI/tests/functional_tests
|
||||
echo "test_pod_network_filter" >> ./CI/tests/functional_tests
|
||||
echo "test_pod_server" >> ./CI/tests/functional_tests
|
||||
echo "test_node" >> ./CI/tests/functional_tests
|
||||
echo "test_pvc" >> ./CI/tests/functional_tests
|
||||
|
||||
# Push on main only steps + all other functional to collect coverage
|
||||
@@ -138,6 +139,7 @@ jobs:
|
||||
echo "test_io_hog" >> ./CI/tests/functional_tests
|
||||
echo "test_pod_network_filter" >> ./CI/tests/functional_tests
|
||||
echo "test_pod_server" >> ./CI/tests/functional_tests
|
||||
echo "test_node" >> ./CI/tests/functional_tests
|
||||
echo "test_pvc" >> ./CI/tests/functional_tests
|
||||
# Final common steps
|
||||
- name: Run Functional tests
|
||||
|
||||
18
CI/tests/test_node.sh
Executable file
18
CI/tests/test_node.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
uset -xeEo pipefail
|
||||
|
||||
source CI/tests/common.sh
|
||||
|
||||
trap error ERR
|
||||
trap finish EXIT
|
||||
|
||||
function functional_test_node_stop_start {
|
||||
export scenario_type="node_scenarios"
|
||||
export scenario_file="scenarios/kind/node_scenarios_example.yml"
|
||||
export post_config=""
|
||||
envsubst < CI/config/common_test_config.yaml > CI/config/node_config.yaml
|
||||
cat CI/config/node_config.yaml
|
||||
python3 -m coverage run -a run_kraken.py -c CI/config/node_config.yaml
|
||||
echo "Node Stop/Start scenario test: Success"
|
||||
}
|
||||
|
||||
functional_test_node_stop_start
|
||||
@@ -2,46 +2,173 @@ import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction
|
||||
from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
|
||||
abstract_node_scenarios,
|
||||
)
|
||||
import os
|
||||
import platform
|
||||
import logging
|
||||
import docker
|
||||
from krkn_lib.k8s import KrknKubernetes
|
||||
from krkn_lib.models.k8s import AffectedNode, AffectedNodeStatus
|
||||
|
||||
class Docker:
|
||||
"""
|
||||
Container runtime client wrapper supporting both Docker and Podman.
|
||||
|
||||
This class automatically detects and connects to either Docker or Podman
|
||||
container runtimes using the Docker-compatible API. It tries multiple
|
||||
connection methods in order of preference:
|
||||
|
||||
1. Docker Unix socket (unix:///var/run/docker.sock)
|
||||
2. Platform-specific Podman sockets:
|
||||
- macOS: ~/.local/share/containers/podman/machine/podman.sock
|
||||
- Linux rootful: unix:///run/podman/podman.sock
|
||||
- Linux rootless: unix:///run/user/<uid>/podman/podman.sock
|
||||
3. Environment variables (DOCKER_HOST or CONTAINER_HOST)
|
||||
|
||||
The runtime type (docker/podman) is auto-detected and logged for debugging.
|
||||
Supports Kind clusters running on Podman.
|
||||
|
||||
Assisted By: Claude Code
|
||||
"""
|
||||
def __init__(self):
|
||||
self.client = docker.from_env()
|
||||
self.client = None
|
||||
self.runtime = 'unknown'
|
||||
|
||||
|
||||
# Try multiple connection methods in order of preference
|
||||
# Supports both Docker and Podman
|
||||
connection_methods = [
|
||||
('unix:///var/run/docker.sock', 'Docker Unix socket'),
|
||||
]
|
||||
|
||||
# Add platform-specific Podman sockets
|
||||
if platform.system() == 'Darwin': # macOS
|
||||
# On macOS, Podman uses podman-machine with socket typically at:
|
||||
# ~/.local/share/containers/podman/machine/podman.sock
|
||||
# This is often symlinked to /var/run/docker.sock
|
||||
podman_machine_sock = os.path.expanduser('~/.local/share/containers/podman/machine/podman.sock')
|
||||
if os.path.exists(podman_machine_sock):
|
||||
connection_methods.append((f'unix://{podman_machine_sock}', 'Podman machine socket (macOS)'))
|
||||
else: # Linux
|
||||
connection_methods.extend([
|
||||
('unix:///run/podman/podman.sock', 'Podman Unix socket (rootful)'),
|
||||
('unix:///run/user/{uid}/podman/podman.sock', 'Podman Unix socket (rootless)'),
|
||||
])
|
||||
|
||||
# Always try from_env as last resort
|
||||
connection_methods.append(('from_env', 'Environment variables (DOCKER_HOST/CONTAINER_HOST)'))
|
||||
|
||||
for method, description in connection_methods:
|
||||
try:
|
||||
# Handle rootless Podman socket path with {uid} placeholder
|
||||
if '{uid}' in method:
|
||||
uid = os.getuid()
|
||||
method = method.format(uid=uid)
|
||||
logging.info(f'Attempting to connect using {description}: {method}')
|
||||
|
||||
if method == 'from_env':
|
||||
logging.info(f'Attempting to connect using {description}')
|
||||
self.client = docker.from_env()
|
||||
else:
|
||||
logging.info(f'Attempting to connect using {description}: {method}')
|
||||
self.client = docker.DockerClient(base_url=method)
|
||||
|
||||
# Test the connection
|
||||
self.client.ping()
|
||||
|
||||
# Detect runtime type
|
||||
try:
|
||||
version_info = self.client.version()
|
||||
version_str = version_info.get('Version', '')
|
||||
if 'podman' in version_str.lower():
|
||||
self.runtime = 'podman'
|
||||
else:
|
||||
self.runtime = 'docker'
|
||||
logging.debug(f'Runtime version info: {version_str}')
|
||||
except Exception as version_err:
|
||||
logging.warning(f'Could not detect runtime version: {version_err}')
|
||||
self.runtime = 'unknown'
|
||||
|
||||
logging.info(f'Successfully connected to {self.runtime} using {description}')
|
||||
|
||||
# Log available containers for debugging
|
||||
try:
|
||||
containers = self.client.containers.list(all=True)
|
||||
logging.info(f'Found {len(containers)} total containers')
|
||||
for container in containers[:5]: # Log first 5
|
||||
logging.debug(f' Container: {container.name} ({container.status})')
|
||||
except Exception as list_err:
|
||||
logging.warning(f'Could not list containers: {list_err}')
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f'Failed to connect using {description}: {e}')
|
||||
continue
|
||||
|
||||
if self.client is None:
|
||||
error_msg = 'Failed to initialize container runtime client (Docker/Podman) with any connection method'
|
||||
logging.error(error_msg)
|
||||
logging.error('Attempted connection methods:')
|
||||
for method, desc in connection_methods:
|
||||
logging.error(f' - {desc}: {method}')
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
logging.info(f'Container runtime client initialized successfully: {self.runtime}')
|
||||
|
||||
def get_container_id(self, node_name):
|
||||
"""Get the container ID for a given node name."""
|
||||
container = self.client.containers.get(node_name)
|
||||
logging.info(f'Found {self.runtime} container for node {node_name}: {container.id}')
|
||||
return container.id
|
||||
|
||||
# Start the node instance
|
||||
def start_instances(self, node_name):
|
||||
"""Start a container instance (works with both Docker and Podman)."""
|
||||
logging.info(f'Starting {self.runtime} container for node: {node_name}')
|
||||
container = self.client.containers.get(node_name)
|
||||
container.start()
|
||||
logging.info(f'Container {container.id} started successfully')
|
||||
|
||||
# Stop the node instance
|
||||
def stop_instances(self, node_name):
|
||||
"""Stop a container instance (works with both Docker and Podman)."""
|
||||
logging.info(f'Stopping {self.runtime} container for node: {node_name}')
|
||||
container = self.client.containers.get(node_name)
|
||||
container.stop()
|
||||
logging.info(f'Container {container.id} stopped successfully')
|
||||
|
||||
# Reboot the node instance
|
||||
def reboot_instances(self, node_name):
|
||||
"""Restart a container instance (works with both Docker and Podman)."""
|
||||
logging.info(f'Restarting {self.runtime} container for node: {node_name}')
|
||||
container = self.client.containers.get(node_name)
|
||||
container.restart()
|
||||
logging.info(f'Container {container.id} restarted successfully')
|
||||
|
||||
# Terminate the node instance
|
||||
def terminate_instances(self, node_name):
|
||||
"""Stop and remove a container instance (works with both Docker and Podman)."""
|
||||
logging.info(f'Terminating {self.runtime} container for node: {node_name}')
|
||||
container = self.client.containers.get(node_name)
|
||||
container.stop()
|
||||
container.remove()
|
||||
logging.info(f'Container {container.id} terminated and removed successfully')
|
||||
|
||||
|
||||
class docker_node_scenarios(abstract_node_scenarios):
|
||||
"""
|
||||
Node chaos scenarios for containerized Kubernetes nodes.
|
||||
|
||||
Supports both Docker and Podman container runtimes. This class provides
|
||||
methods to inject chaos into Kubernetes nodes running as containers
|
||||
(e.g., Kind clusters, Podman-based clusters).
|
||||
"""
|
||||
def __init__(self, kubecli: KrknKubernetes, node_action_kube_check: bool, affected_nodes_status: AffectedNodeStatus):
|
||||
logging.info('Initializing docker_node_scenarios (supports Docker and Podman)')
|
||||
super().__init__(kubecli, node_action_kube_check, affected_nodes_status)
|
||||
self.docker = Docker()
|
||||
self.node_action_kube_check = node_action_kube_check
|
||||
logging.info(f'Node scenarios initialized successfully using {self.docker.runtime} runtime')
|
||||
|
||||
# Node scenario to start the node
|
||||
def node_start_scenario(self, instance_kill_count, node, timeout, poll_interval):
|
||||
|
||||
@@ -9,7 +9,7 @@ azure-mgmt-network==27.0.0
|
||||
itsdangerous==2.0.1
|
||||
coverage==7.6.12
|
||||
datetime==5.4
|
||||
docker==7.0.0
|
||||
docker>=6.0,<7.0 # docker 7.0+ has breaking changes with Unix sockets
|
||||
gitpython==3.1.41
|
||||
google-auth==2.37.0
|
||||
google-cloud-compute==1.22.0
|
||||
@@ -28,7 +28,8 @@ pyfiglet==1.0.2
|
||||
pytest==8.0.0
|
||||
python-ipmi==0.5.4
|
||||
python-openstackclient==6.5.0
|
||||
requests==2.32.4
|
||||
requests<2.32 # requests 2.32+ breaks Unix socket support (http+docker scheme)
|
||||
requests-unixsocket>=0.4.0 # Required for Docker Unix socket support
|
||||
service_identity==24.1.0
|
||||
PyYAML==6.0.1
|
||||
setuptools==78.1.1
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
node_scenarios:
|
||||
- actions: # node chaos scenarios to be injected
|
||||
- node_stop_start_scenario
|
||||
node_name: kind-worker # node on which scenario has to be injected; can set multiple names separated by comma
|
||||
# label_selector: node-role.kubernetes.io/worker # when node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection
|
||||
# node_name: kind-control-plane # node on which scenario has to be injected; can set multiple names separated by comma
|
||||
label_selector: kubernetes.io/hostname=kind-worker # when node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection
|
||||
instance_count: 1 # Number of nodes to perform action/select that match the label selector
|
||||
runs: 1 # number of times to inject each scenario under actions (will perform on same node each time)
|
||||
timeout: 120 # duration to wait for completion of node scenario injection
|
||||
cloud_type: docker # cloud type on which Kubernetes/OpenShift runs
|
||||
duration: 10
|
||||
- actions:
|
||||
- node_reboot_scenario
|
||||
node_name: kind-worker
|
||||
# label_selector: node-role.kubernetes.io/infra
|
||||
node_name: kind-control-plane
|
||||
# label_selector: kubernetes.io/hostname=kind-worker
|
||||
instance_count: 1
|
||||
timeout: 120
|
||||
cloud_type: docker
|
||||
kube_check: false
|
||||
|
||||
Reference in New Issue
Block a user