mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-04-15 06:57:28 +00:00
Merge pull request #50 from paigerube14/node_clouds
Adding a general cloud provider file for kubelet reset and fork bomb on all clouds
This commit is contained in:
@@ -28,7 +28,9 @@ A google service account is required to give proper authentication to GCP for no
|
||||
|
||||
After creating the service account you'll need to enable the account using the following: ```export GOOGLE_APPLICATION_CREDENTIALS="<serviceaccount.json>"```
|
||||
|
||||
**NOTE**: The `stop_start_kubelet_scenario` and `node_crash_scenario` scenarios are supported as they are independent of the cloud platform.
|
||||
**NOTE**: The `node_crash_scenario` and `stop_kubelet_scenario` scenario is supported independent of the cloud platform.
|
||||
|
||||
Use 'generic' or do not add the 'cloud_type' key to your scenario if your cluster is not set up using one of the current supported cloud types
|
||||
|
||||
|
||||
Node scenarios can be injected by placing the node scenarios config files under node_scenarios option in the kraken config. Refer to [node_scenarios_example](https://github.com/openshift-scale/kraken/blob/master/scenarios/node_scenarios_example.yml) config file.
|
||||
@@ -51,4 +53,10 @@ node_scenarios:
|
||||
instance_kill_count: 1
|
||||
timeout: 120
|
||||
cloud_type: aws
|
||||
- actions:
|
||||
- node_crash_scenario
|
||||
node_name:
|
||||
label_selector: node-role.kubernetes.io/infra
|
||||
instance_kill_count: 1
|
||||
timeout: 120
|
||||
```
|
||||
|
||||
@@ -15,7 +15,8 @@ class GCP:
|
||||
self.project = runcommand.invoke('gcloud config get-value project').split('/n')[0].strip()
|
||||
logging.info("project " + str(self.project) + "!")
|
||||
credentials = GoogleCredentials.get_application_default()
|
||||
self.client = discovery.build('compute', 'v1', credentials=credentials, cache_discovery=False)
|
||||
self.client = discovery.build('compute', 'v1', credentials=credentials,
|
||||
cache_discovery=False)
|
||||
|
||||
# Get the instance ID of the node
|
||||
def get_instance_id(self, node):
|
||||
@@ -23,50 +24,56 @@ class GCP:
|
||||
while zone_request is not None:
|
||||
zone_response = zone_request.execute()
|
||||
for zone in zone_response['items']:
|
||||
instances_request = self.client.instances().list(project=self.project, zone=zone['name'])
|
||||
instances_request = self.client.instances().list(project=self.project,
|
||||
zone=zone['name'])
|
||||
while instances_request is not None:
|
||||
instance_response = instances_request.execute()
|
||||
if "items" in instance_response.keys():
|
||||
for instance in instance_response['items']:
|
||||
if instance['name'] in node:
|
||||
return instance['name'], zone['name']
|
||||
instances_request = self.client.zones().list_next(previous_request=instances_request,
|
||||
previous_response=instance_response)
|
||||
zone_request = self.client.zones().list_next(previous_request=zone_request, previous_response=zone_response)
|
||||
instances_request = self.client.zones().list_next(
|
||||
previous_request=instances_request,
|
||||
previous_response=instance_response)
|
||||
zone_request = self.client.zones().list_next(previous_request=zone_request,
|
||||
previous_response=zone_response)
|
||||
logging.info('no instances ')
|
||||
|
||||
# Start the node instance
|
||||
def start_instances(self, zone, instance_id):
|
||||
self.client.instances().start(project=self.project, zone=zone, instance=instance_id).execute()
|
||||
self.client.instances().start(project=self.project, zone=zone, instance=instance_id)\
|
||||
.execute()
|
||||
|
||||
# Stop the node instance
|
||||
def stop_instances(self, zone, instance_id):
|
||||
self.client.instances().stop(project=self.project, zone=zone, instance=instance_id).execute()
|
||||
self.client.instances().stop(project=self.project, zone=zone, instance=instance_id)\
|
||||
.execute()
|
||||
|
||||
# Start the node instance
|
||||
def suspend_instances(self, zone, instance_id):
|
||||
self.client.instances().suspend(project=self.project, zone=zone, instance=instance_id).execute()
|
||||
|
||||
# Stop the node instance
|
||||
def stop_instances(self, zone, instance_id):
|
||||
self.client.instances().stop(project=self.project, zone=zone, instance=instance_id).execute()
|
||||
self.client.instances().suspend(project=self.project, zone=zone, instance=instance_id)\
|
||||
.execute()
|
||||
|
||||
# Terminate the node instance
|
||||
def terminate_instances(self, zone, instance_id):
|
||||
self.client.instances().delete(project=self.project, zone=zone, instance=instance_id).execute()
|
||||
self.client.instances().delete(project=self.project, zone=zone, instance=instance_id)\
|
||||
.execute()
|
||||
|
||||
# Reboot the node instance
|
||||
def reboot_instances(self, zone, instance_id):
|
||||
response = self.client.instances().reset(project=self.project, zone=zone, instance=instance_id).execute()
|
||||
response = self.client.instances().reset(project=self.project, zone=zone,
|
||||
instance=instance_id).execute()
|
||||
logging.info('response reboot ' + str(response))
|
||||
|
||||
# Get instance status
|
||||
def get_instance_status(self, zone, instance_id, expected_status, timeout):
|
||||
# statuses: PROVISIONING, STAGING, RUNNING, STOPPING, SUSPENDING, SUSPENDED, REPAIRING, and TERMINATED.
|
||||
# statuses: PROVISIONING, STAGING, RUNNING, STOPPING, SUSPENDING, SUSPENDED, REPAIRING,
|
||||
# and TERMINATED.
|
||||
i = 0
|
||||
sleeper = 5
|
||||
while i <= timeout:
|
||||
instStatus = self.client.instances().get(project=self.project, zone=zone, instance=instance_id).execute()
|
||||
instStatus = self.client.instances().get(project=self.project, zone=zone,
|
||||
instance=instance_id).execute()
|
||||
logging.info("Status of vm " + str(instStatus['status']))
|
||||
if instStatus['status'] == expected_status:
|
||||
return True
|
||||
|
||||
32
kraken/node_actions/general_cloud_node_scenarios.py
Normal file
32
kraken/node_actions/general_cloud_node_scenarios.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import logging
|
||||
from kraken.node_actions.abstract_node_scenarios import abstract_node_scenarios
|
||||
|
||||
|
||||
class GENERAL:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
class general_node_scenarios(abstract_node_scenarios):
|
||||
def __init__(self):
|
||||
self.general = GENERAL()
|
||||
|
||||
# Node scenario to start the node
|
||||
def node_start_scenario(self, instance_kill_count, node, timeout):
|
||||
logging.info("Node start is not set up yet for this cloud type, "
|
||||
"no action is going to be taken")
|
||||
|
||||
# Node scenario to stop the node
|
||||
def node_stop_scenario(self, instance_kill_count, node, timeout):
|
||||
logging.info("Node stop is not set up yet for this cloud type,"
|
||||
" no action is going to be taken")
|
||||
|
||||
# Node scenario to terminate the node
|
||||
def node_termination_scenario(self, instance_kill_count, node, timeout):
|
||||
logging.info("Node termination is not set up yet for this cloud type, "
|
||||
"no action is going to be taken")
|
||||
|
||||
# Node scenario to reboot the node
|
||||
def node_reboot_scenario(self, instance_kill_count, node, timeout):
|
||||
logging.info("Node reboot is not set up yet for this cloud type,"
|
||||
" no action is going to be taken")
|
||||
@@ -12,20 +12,34 @@ import kraken.kubernetes.client as kubecli
|
||||
import kraken.invoke.command as runcommand
|
||||
import kraken.node_actions.common_node_functions as nodeaction
|
||||
from kraken.node_actions.aws_node_scenarios import aws_node_scenarios
|
||||
from kraken.node_actions.general_cloud_node_scenarios import general_node_scenarios
|
||||
from kraken.node_actions.gcp_node_scenarios import gcp_node_scenarios
|
||||
import kraken.time_actions.common_time_functions as time_actions
|
||||
|
||||
|
||||
node_general = False
|
||||
|
||||
|
||||
# Get the node scenarios object of specfied cloud type
|
||||
def get_node_scenario_object(node_scenario):
|
||||
if "cloud_type" not in node_scenario.keys() or node_scenario['cloud_type'] == "generic":
|
||||
global node_general
|
||||
node_general = True
|
||||
return general_node_scenarios()
|
||||
if node_scenario['cloud_type'] == 'aws':
|
||||
return aws_node_scenarios()
|
||||
elif node_scenario['cloud_type'] == 'gcp':
|
||||
return gcp_node_scenarios()
|
||||
else:
|
||||
logging.error("Cloud type " + node_scenario['cloud_type'] + " is not currently supported; "
|
||||
"try using 'generic' if wanting to stop/start kubelet or fork bomb on any "
|
||||
"cluster")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Inject the specified node scenario
|
||||
def inject_node_scenario(action, node_scenario, node_scenario_object):
|
||||
generic_cloud_scenarios = ("stop_kubelet_scenario", "node_crash_scenario")
|
||||
# Get the node scenario configurations
|
||||
instance_kill_count = node_scenario.get("instance_kill_count", 1)
|
||||
node_name = node_scenario.get("node_name", "")
|
||||
@@ -33,22 +47,26 @@ def inject_node_scenario(action, node_scenario, node_scenario_object):
|
||||
timeout = node_scenario.get("timeout", 120)
|
||||
# Get the node to apply the scenario
|
||||
node = nodeaction.get_node(node_name, label_selector)
|
||||
if action == "node_start_scenario":
|
||||
node_scenario_object.node_start_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_stop_scenario":
|
||||
node_scenario_object.node_stop_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_stop_start_scenario":
|
||||
node_scenario_object.node_stop_start_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_termination_scenario":
|
||||
node_scenario_object.node_termination_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_reboot_scenario":
|
||||
node_scenario_object.node_reboot_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "stop_kubelet_scenario":
|
||||
node_scenario_object.stop_kubelet_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "stop_start_kubelet_scenario":
|
||||
node_scenario_object.stop_start_kubelet_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_crash_scenario":
|
||||
node_scenario_object.node_crash_scenario(instance_kill_count, node, timeout)
|
||||
|
||||
if node_general and action not in generic_cloud_scenarios:
|
||||
logging.info("Scenario: " + action + " is not set up for generic cloud type, skipping action")
|
||||
else:
|
||||
if action == "node_start_scenario":
|
||||
node_scenario_object.node_start_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_stop_scenario":
|
||||
node_scenario_object.node_stop_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_stop_start_scenario":
|
||||
node_scenario_object.node_stop_start_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_termination_scenario":
|
||||
node_scenario_object.node_termination_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_reboot_scenario":
|
||||
node_scenario_object.node_reboot_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "stop_start_kubelet_scenario":
|
||||
node_scenario_object.stop_start_kubelet_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "stop_kubelet_scenario":
|
||||
node_scenario_object.stop_kubelet_scenario(instance_kill_count, node, timeout)
|
||||
elif action == "node_crash_scenario":
|
||||
node_scenario_object.node_crash_scenario(instance_kill_count, node, timeout)
|
||||
|
||||
|
||||
# Get cerberus status
|
||||
|
||||
Reference in New Issue
Block a user