mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-14 09:59:59 +00:00
Install pre-commit and use GitHub Actions (#94)
* added pre-commit and code-cleaning * removed tox and TravisCI
This commit is contained in:
22
.github/workflows/ci.yml
vendored
Normal file
22
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
name: Code Quality Check
|
||||||
|
|
||||||
|
on:
|
||||||
|
- push
|
||||||
|
- pull_request
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lint-ci:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
name: Run pre-commit and install test
|
||||||
|
steps:
|
||||||
|
- name: Check out source repository
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
- name: Set up Python environment
|
||||||
|
uses: actions/setup-python@v1
|
||||||
|
with:
|
||||||
|
python-version: "3.8"
|
||||||
|
- name: Run pre-commit
|
||||||
|
uses: pre-commit/action@v2.0.3
|
||||||
|
- name: Install Kraken
|
||||||
|
run: |
|
||||||
|
python setup.py develop
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -31,7 +31,6 @@ tags
|
|||||||
# Unittest and coverage
|
# Unittest and coverage
|
||||||
htmlcov/*
|
htmlcov/*
|
||||||
.coverage
|
.coverage
|
||||||
.tox
|
|
||||||
junit.xml
|
junit.xml
|
||||||
coverage.xml
|
coverage.xml
|
||||||
.pytest_cache/
|
.pytest_cache/
|
||||||
|
|||||||
30
.pre-commit-config.yaml
Normal file
30
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
---
|
||||||
|
repos:
|
||||||
|
- repo: git://github.com/Lucas-C/pre-commit-hooks
|
||||||
|
rev: v1.1.1
|
||||||
|
hooks:
|
||||||
|
- id: remove-tabs
|
||||||
|
|
||||||
|
- repo: git://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v2.0.0
|
||||||
|
hooks:
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- id: check-merge-conflict
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: check-case-conflict
|
||||||
|
- id: detect-private-key
|
||||||
|
- id: check-ast
|
||||||
|
|
||||||
|
- repo: https://github.com/psf/black
|
||||||
|
rev: 19.10b0
|
||||||
|
hooks:
|
||||||
|
- id: black
|
||||||
|
args: ['--line-length', '120']
|
||||||
|
|
||||||
|
- repo: https://gitlab.com/PyCQA/flake8
|
||||||
|
rev: '3.7.8'
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
||||||
|
additional_dependencies: ['pep8-naming']
|
||||||
|
# Ignore all format-related checks as Black takes care of those.
|
||||||
|
args: ['--ignore', 'E123,E125', '--select', 'E,W,F', '--max-line-length=120']
|
||||||
15
.travis.yml
15
.travis.yml
@@ -1,15 +0,0 @@
|
|||||||
dist: xenial
|
|
||||||
|
|
||||||
langauge: python
|
|
||||||
|
|
||||||
python: "3.6.8"
|
|
||||||
|
|
||||||
before_install:
|
|
||||||
- sudo apt-get update
|
|
||||||
|
|
||||||
install:
|
|
||||||
- sudo apt install python3-pip
|
|
||||||
- sudo pip3 install -r test-requirements.txt
|
|
||||||
- sudo pip3 install tox
|
|
||||||
|
|
||||||
script: tox .
|
|
||||||
@@ -5,9 +5,9 @@ import logging
|
|||||||
|
|
||||||
def run(cmd):
|
def run(cmd):
|
||||||
try:
|
try:
|
||||||
output = subprocess.Popen(cmd, shell=True,
|
output = subprocess.Popen(
|
||||||
universal_newlines=True, stdout=subprocess.PIPE,
|
cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
stderr=subprocess.STDOUT)
|
)
|
||||||
(out, err) = output.communicate()
|
(out, err) = output.communicate()
|
||||||
logging.info("out " + str(out))
|
logging.info("out " + str(out))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -19,4 +19,3 @@ scenarios:
|
|||||||
actions:
|
actions:
|
||||||
- checkPodCount:
|
- checkPodCount:
|
||||||
count: 2
|
count: 2
|
||||||
|
|
||||||
@@ -15,8 +15,11 @@ def list_namespaces():
|
|||||||
cli = client.CoreV1Api()
|
cli = client.CoreV1Api()
|
||||||
ret = cli.list_namespace(pretty=True)
|
ret = cli.list_namespace(pretty=True)
|
||||||
except ApiException as e:
|
except ApiException as e:
|
||||||
logging.error("Exception when calling \
|
logging.error(
|
||||||
CoreV1Api->list_namespaced_pod: %s\n" % e)
|
"Exception when calling \
|
||||||
|
CoreV1Api->list_namespaced_pod: %s\n"
|
||||||
|
% e
|
||||||
|
)
|
||||||
for namespace in ret.items:
|
for namespace in ret.items:
|
||||||
namespaces.append(namespace.metadata.name)
|
namespaces.append(namespace.metadata.name)
|
||||||
return namespaces
|
return namespaces
|
||||||
@@ -47,9 +50,9 @@ def check_namespaces(namespaces):
|
|||||||
|
|
||||||
def run(cmd):
|
def run(cmd):
|
||||||
try:
|
try:
|
||||||
output = subprocess.Popen(cmd, shell=True,
|
output = subprocess.Popen(
|
||||||
universal_newlines=True, stdout=subprocess.PIPE,
|
cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
stderr=subprocess.STDOUT)
|
)
|
||||||
(out, err) = output.communicate()
|
(out, err) = output.communicate()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to run %s, error: %s" % (cmd, e))
|
logging.error("Failed to run %s, error: %s" % (cmd, e))
|
||||||
|
|||||||
@@ -22,15 +22,17 @@ $ git push
|
|||||||
```
|
```
|
||||||
|
|
||||||
## Fix Formatting
|
## Fix Formatting
|
||||||
You can do this before your first commit but please take a look at the formatting outlined using tox.
|
Kraken uses [pre-commit](https://pre-commit.com) framework to maintain the code linting and python code styling.
|
||||||
|
The CI would run the pre-commit check on each pull request.
|
||||||
|
We encourage our contributors to follow the same pattern, while contributing to the code.
|
||||||
|
|
||||||
To run:
|
The pre-commit configuration file is present in the repository `.pre-commit-config.yaml`
|
||||||
|
It contains the different code styling and linting guide which we use for the application.
|
||||||
|
|
||||||
```pip install tox ```(if not already installed)
|
Following command can be used to run the pre-commit:
|
||||||
|
`pre-commit run --all-files`
|
||||||
|
|
||||||
```tox```
|
If pre-commit is not installed in your system, it can be install with : `pip install pre-commit`
|
||||||
|
|
||||||
Fix all spacing, import issues and other formatting issues
|
|
||||||
|
|
||||||
## Squash Commits
|
## Squash Commits
|
||||||
If there are mutliple commits, please rebase/squash multiple commits
|
If there are mutliple commits, please rebase/squash multiple commits
|
||||||
@@ -50,6 +52,3 @@ Push your rebased commits (you may need to force), then issue your PR.
|
|||||||
```
|
```
|
||||||
$ git push origin <my-working-branch> --force
|
$ git push origin <my-working-branch> --force
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,3 @@ Following are the start of scenarios for which a chaos scenario config exists to
|
|||||||
Component | Description | Working
|
Component | Description | Working
|
||||||
------------------------ | ---------------------------------------------------------------------------------------------------| ------------------------- |
|
------------------------ | ---------------------------------------------------------------------------------------------------| ------------------------- |
|
||||||
Node CPU Hog | Chaos scenario that hogs up the CPU on a defined node for a specific amount of time | :heavy_check_mark: |
|
Node CPU Hog | Chaos scenario that hogs up the CPU on a defined node for a specific amount of time | :heavy_check_mark: |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ import logging
|
|||||||
# Invokes a given command and returns the stdout
|
# Invokes a given command and returns the stdout
|
||||||
def invoke(command):
|
def invoke(command):
|
||||||
try:
|
try:
|
||||||
output = subprocess.Popen(command, shell=True,
|
output = subprocess.Popen(
|
||||||
universal_newlines=True, stdout=subprocess.PIPE,
|
command, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
stderr=subprocess.STDOUT)
|
)
|
||||||
(out, err) = output.communicate()
|
(out, err) = output.communicate()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to run %s, error: %s" % (command, e))
|
logging.error("Failed to run %s, error: %s" % (command, e))
|
||||||
|
|||||||
@@ -55,8 +55,11 @@ def list_pods(namespace):
|
|||||||
try:
|
try:
|
||||||
ret = cli.list_namespaced_pod(namespace, pretty=True)
|
ret = cli.list_namespaced_pod(namespace, pretty=True)
|
||||||
except ApiException as e:
|
except ApiException as e:
|
||||||
logging.error("Exception when calling \
|
logging.error(
|
||||||
CoreV1Api->list_namespaced_pod: %s\n" % e)
|
"Exception when calling \
|
||||||
|
CoreV1Api->list_namespaced_pod: %s\n"
|
||||||
|
% e
|
||||||
|
)
|
||||||
for pod in ret.items:
|
for pod in ret.items:
|
||||||
pods.append(pod.metadata.name)
|
pods.append(pod.metadata.name)
|
||||||
return pods
|
return pods
|
||||||
@@ -76,11 +79,18 @@ def get_all_pods(label_selector=None):
|
|||||||
# Execute command in pod
|
# Execute command in pod
|
||||||
def exec_cmd_in_pod(command, pod_name, namespace):
|
def exec_cmd_in_pod(command, pod_name, namespace):
|
||||||
|
|
||||||
exec_command = ['bash', '-c', command]
|
exec_command = ["bash", "-c", command]
|
||||||
try:
|
try:
|
||||||
ret = stream(cli.connect_get_namespaced_pod_exec, pod_name, namespace,
|
ret = stream(
|
||||||
command=exec_command, stderr=True, stdin=False,
|
cli.connect_get_namespaced_pod_exec,
|
||||||
stdout=True, tty=False)
|
pod_name,
|
||||||
|
namespace,
|
||||||
|
command=exec_command,
|
||||||
|
stderr=True,
|
||||||
|
stdin=False,
|
||||||
|
stdout=True,
|
||||||
|
tty=False,
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
return ret
|
return ret
|
||||||
@@ -91,8 +101,11 @@ def get_node_status(node):
|
|||||||
try:
|
try:
|
||||||
node_info = cli.read_node_status(node, pretty=True)
|
node_info = cli.read_node_status(node, pretty=True)
|
||||||
except ApiException as e:
|
except ApiException as e:
|
||||||
logging.error("Exception when calling \
|
logging.error(
|
||||||
CoreV1Api->read_node_status: %s\n" % e)
|
"Exception when calling \
|
||||||
|
CoreV1Api->read_node_status: %s\n"
|
||||||
|
% e
|
||||||
|
)
|
||||||
for condition in node_info.status.conditions:
|
for condition in node_info.status.conditions:
|
||||||
if condition.type == "Ready":
|
if condition.type == "Ready":
|
||||||
return condition.status
|
return condition.status
|
||||||
@@ -107,8 +120,11 @@ def monitor_nodes():
|
|||||||
try:
|
try:
|
||||||
node_info = cli.read_node_status(node, pretty=True)
|
node_info = cli.read_node_status(node, pretty=True)
|
||||||
except ApiException as e:
|
except ApiException as e:
|
||||||
logging.error("Exception when calling \
|
logging.error(
|
||||||
CoreV1Api->read_node_status: %s\n" % e)
|
"Exception when calling \
|
||||||
|
CoreV1Api->read_node_status: %s\n"
|
||||||
|
% e
|
||||||
|
)
|
||||||
for condition in node_info.status.conditions:
|
for condition in node_info.status.conditions:
|
||||||
if condition.type == "KernelDeadlock":
|
if condition.type == "KernelDeadlock":
|
||||||
node_kerneldeadlock_status = condition.status
|
node_kerneldeadlock_status = condition.status
|
||||||
@@ -116,10 +132,7 @@ def monitor_nodes():
|
|||||||
node_ready_status = condition.status
|
node_ready_status = condition.status
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
if (
|
if node_kerneldeadlock_status != "False" or node_ready_status != "True": # noqa # noqa
|
||||||
node_kerneldeadlock_status != "False" # noqa
|
|
||||||
or node_ready_status != "True" # noqa
|
|
||||||
):
|
|
||||||
notready_nodes.append(node)
|
notready_nodes.append(node)
|
||||||
if len(notready_nodes) != 0:
|
if len(notready_nodes) != 0:
|
||||||
status = False
|
status = False
|
||||||
@@ -135,15 +148,15 @@ def monitor_namespace(namespace):
|
|||||||
notready_pods = []
|
notready_pods = []
|
||||||
for pod in pods:
|
for pod in pods:
|
||||||
try:
|
try:
|
||||||
pod_info = cli.read_namespaced_pod_status(pod, namespace,
|
pod_info = cli.read_namespaced_pod_status(pod, namespace, pretty=True)
|
||||||
pretty=True)
|
|
||||||
except ApiException as e:
|
except ApiException as e:
|
||||||
logging.error("Exception when calling \
|
logging.error(
|
||||||
CoreV1Api->read_namespaced_pod_status: %s\n" % e)
|
"Exception when calling \
|
||||||
|
CoreV1Api->read_namespaced_pod_status: %s\n"
|
||||||
|
% e
|
||||||
|
)
|
||||||
pod_status = pod_info.status.phase
|
pod_status = pod_info.status.phase
|
||||||
if pod_status != "Running" \
|
if pod_status != "Running" and pod_status != "Completed" and pod_status != "Succeeded":
|
||||||
and pod_status != "Completed" \
|
|
||||||
and pod_status != "Succeeded":
|
|
||||||
notready_pods.append(pod)
|
notready_pods.append(pod)
|
||||||
if len(notready_pods) != 0:
|
if len(notready_pods) != 0:
|
||||||
status = False
|
status = False
|
||||||
@@ -154,10 +167,8 @@ def monitor_namespace(namespace):
|
|||||||
|
|
||||||
# Monitor component namespace
|
# Monitor component namespace
|
||||||
def monitor_component(iteration, component_namespace):
|
def monitor_component(iteration, component_namespace):
|
||||||
watch_component_status, failed_component_pods = \
|
watch_component_status, failed_component_pods = monitor_namespace(component_namespace)
|
||||||
monitor_namespace(component_namespace)
|
logging.info("Iteration %s: %s: %s" % (iteration, component_namespace, watch_component_status))
|
||||||
logging.info("Iteration %s: %s: %s"
|
|
||||||
% (iteration, component_namespace, watch_component_status))
|
|
||||||
return watch_component_status, failed_component_pods
|
return watch_component_status, failed_component_pods
|
||||||
|
|
||||||
|
|
||||||
@@ -178,7 +189,7 @@ def find_kraken_node():
|
|||||||
runcommand.invoke("kubectl config set-context --current --namespace=" + str(kraken_project))
|
runcommand.invoke("kubectl config set-context --current --namespace=" + str(kraken_project))
|
||||||
pod_json_str = runcommand.invoke("kubectl get pods/" + str(kraken_pod_name) + " -o json")
|
pod_json_str = runcommand.invoke("kubectl get pods/" + str(kraken_pod_name) + " -o json")
|
||||||
pod_json = json.loads(pod_json_str)
|
pod_json = json.loads(pod_json_str)
|
||||||
node_name = pod_json['spec']['nodeName']
|
node_name = pod_json["spec"]["nodeName"]
|
||||||
|
|
||||||
# Reset to the default project
|
# Reset to the default project
|
||||||
runcommand.invoke("kubectl config set-context --current --namespace=default")
|
runcommand.invoke("kubectl config set-context --current --namespace=default")
|
||||||
|
|||||||
@@ -6,12 +6,13 @@ import sys
|
|||||||
|
|
||||||
# Install litmus and wait until pod is running
|
# Install litmus and wait until pod is running
|
||||||
def install_litmus(version):
|
def install_litmus(version):
|
||||||
runcommand.invoke("kubectl apply -f "
|
runcommand.invoke("kubectl apply -f " "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version)
|
||||||
"https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version)
|
|
||||||
|
|
||||||
runcommand.invoke("oc patch -n litmus deployment.apps/chaos-operator-ce --type=json --patch ' "
|
runcommand.invoke(
|
||||||
"[ { \"op\": \"add\", \"path\": \"/spec/template/spec/containers/0/env/-\", "
|
"oc patch -n litmus deployment.apps/chaos-operator-ce --type=json --patch ' "
|
||||||
"\"value\": { \"name\": \"ANALYTICS\", \"value\": \"FALSE\" } } ]'")
|
'[ { "op": "add", "path": "/spec/template/spec/containers/0/env/-", '
|
||||||
|
'"value": { "name": "ANALYTICS", "value": "FALSE" } } ]\''
|
||||||
|
)
|
||||||
|
|
||||||
runcommand.invoke("oc wait deploy -n litmus chaos-operator-ce --for=condition=Available")
|
runcommand.invoke("oc wait deploy -n litmus chaos-operator-ce --for=condition=Available")
|
||||||
|
|
||||||
@@ -19,14 +20,13 @@ def install_litmus(version):
|
|||||||
def deploy_all_experiments(version_string):
|
def deploy_all_experiments(version_string):
|
||||||
|
|
||||||
if not version_string.startswith("v"):
|
if not version_string.startswith("v"):
|
||||||
logging.error("Incorrect version string for litmus, needs to start with 'v' "
|
logging.error("Incorrect version string for litmus, needs to start with 'v' " "followed by a number")
|
||||||
"followed by a number")
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
version = version_string[1:]
|
version = version_string[1:]
|
||||||
|
|
||||||
runcommand.invoke("kubectl apply -f "
|
runcommand.invoke(
|
||||||
"https://hub.litmuschaos.io/api/chaos/%s?file=charts/generic/experiments.yaml"
|
"kubectl apply -f " "https://hub.litmuschaos.io/api/chaos/%s?file=charts/generic/experiments.yaml" % version
|
||||||
% version)
|
)
|
||||||
|
|
||||||
|
|
||||||
def delete_experiments():
|
def delete_experiments():
|
||||||
@@ -35,16 +35,18 @@ def delete_experiments():
|
|||||||
|
|
||||||
# Check status of experiment
|
# Check status of experiment
|
||||||
def check_experiment(engine_name, experiment_name, namespace):
|
def check_experiment(engine_name, experiment_name, namespace):
|
||||||
chaos_engine = runcommand.invoke("kubectl get chaosengines/%s -n %s -o jsonpath="
|
chaos_engine = runcommand.invoke(
|
||||||
"'{.status.engineStatus}'" % (engine_name, namespace))
|
"kubectl get chaosengines/%s -n %s -o jsonpath=" "'{.status.engineStatus}'" % (engine_name, namespace)
|
||||||
|
)
|
||||||
engine_status = chaos_engine.strip()
|
engine_status = chaos_engine.strip()
|
||||||
max_tries = 30
|
max_tries = 30
|
||||||
engine_counter = 0
|
engine_counter = 0
|
||||||
while engine_status.lower() != "running" and engine_status.lower() != "completed":
|
while engine_status.lower() != "running" and engine_status.lower() != "completed":
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
logging.info("Waiting for engine to start running.")
|
logging.info("Waiting for engine to start running.")
|
||||||
chaos_engine = runcommand.invoke("kubectl get chaosengines/%s -n %s -o jsonpath="
|
chaos_engine = runcommand.invoke(
|
||||||
"'{.status.engineStatus}'" % (engine_name, namespace))
|
"kubectl get chaosengines/%s -n %s -o jsonpath=" "'{.status.engineStatus}'" % (engine_name, namespace)
|
||||||
|
)
|
||||||
engine_status = chaos_engine.strip()
|
engine_status = chaos_engine.strip()
|
||||||
if engine_counter >= max_tries:
|
if engine_counter >= max_tries:
|
||||||
logging.error("Chaos engine took longer than 5 minutes to be running or complete")
|
logging.error("Chaos engine took longer than 5 minutes to be running or complete")
|
||||||
@@ -57,19 +59,21 @@ def check_experiment(engine_name, experiment_name, namespace):
|
|||||||
|
|
||||||
if not chaos_engine:
|
if not chaos_engine:
|
||||||
return False
|
return False
|
||||||
chaos_result = runcommand.invoke("kubectl get chaosresult %s"
|
chaos_result = runcommand.invoke(
|
||||||
|
"kubectl get chaosresult %s"
|
||||||
"-%s -n %s -o "
|
"-%s -n %s -o "
|
||||||
"jsonpath='{.status.experimentstatus.verdict}'"
|
"jsonpath='{.status.experimentstatus.verdict}'" % (engine_name, experiment_name, namespace)
|
||||||
% (engine_name, experiment_name, namespace))
|
)
|
||||||
result_counter = 0
|
result_counter = 0
|
||||||
status = chaos_result.strip()
|
status = chaos_result.strip()
|
||||||
while status == "Awaited":
|
while status == "Awaited":
|
||||||
logging.info("Waiting for chaos result to finish, sleeping 10 seconds")
|
logging.info("Waiting for chaos result to finish, sleeping 10 seconds")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
chaos_result = runcommand.invoke("kubectl get chaosresult %s"
|
chaos_result = runcommand.invoke(
|
||||||
|
"kubectl get chaosresult %s"
|
||||||
"-%s -n %s -o "
|
"-%s -n %s -o "
|
||||||
"jsonpath='{.status.experimentstatus.verdict}'"
|
"jsonpath='{.status.experimentstatus.verdict}'" % (engine_name, experiment_name, namespace)
|
||||||
% (engine_name, experiment_name, namespace))
|
)
|
||||||
status = chaos_result.strip()
|
status = chaos_result.strip()
|
||||||
if result_counter >= max_tries:
|
if result_counter >= max_tries:
|
||||||
logging.error("Chaos results took longer than 5 minutes to get a final result")
|
logging.error("Chaos results took longer than 5 minutes to get a final result")
|
||||||
@@ -82,10 +86,11 @@ def check_experiment(engine_name, experiment_name, namespace):
|
|||||||
if status == "Pass":
|
if status == "Pass":
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
chaos_result = runcommand.invoke("kubectl get chaosresult %s"
|
chaos_result = runcommand.invoke(
|
||||||
|
"kubectl get chaosresult %s"
|
||||||
"-%s -n %s -o jsonpath="
|
"-%s -n %s -o jsonpath="
|
||||||
"'{.status.experimentstatus.failStep}'" %
|
"'{.status.experimentstatus.failStep}'" % (engine_name, experiment_name, namespace)
|
||||||
(engine_name, experiment_name, namespace))
|
)
|
||||||
logging.info("Chaos result failed information: " + str(chaos_result))
|
logging.info("Chaos result failed information: " + str(chaos_result))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -99,5 +104,4 @@ def delete_chaos(namespace):
|
|||||||
|
|
||||||
# Uninstall litmus operator
|
# Uninstall litmus operator
|
||||||
def uninstall_litmus(version):
|
def uninstall_litmus(version):
|
||||||
runcommand.invoke("kubectl delete -f "
|
runcommand.invoke("kubectl delete -f " "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version)
|
||||||
"https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version)
|
|
||||||
|
|||||||
@@ -46,8 +46,9 @@ class abstract_node_scenarios:
|
|||||||
logging.info("The kubelet of the node %s has been stopped" % (node))
|
logging.info("The kubelet of the node %s has been stopped" % (node))
|
||||||
logging.info("stop_kubelet_scenario has been successfuly injected!")
|
logging.info("stop_kubelet_scenario has been successfuly injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to stop the kubelet of the node. Encountered following "
|
logging.error(
|
||||||
"exception: %s. Test Failed" % (e))
|
"Failed to stop the kubelet of the node. Encountered following " "exception: %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("stop_kubelet_scenario injection failed!")
|
logging.error("stop_kubelet_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -64,12 +65,12 @@ class abstract_node_scenarios:
|
|||||||
try:
|
try:
|
||||||
logging.info("Starting node_crash_scenario injection")
|
logging.info("Starting node_crash_scenario injection")
|
||||||
logging.info("Crashing the node %s" % (node))
|
logging.info("Crashing the node %s" % (node))
|
||||||
runcommand.invoke("oc debug node/" + node + " -- chroot /host "
|
runcommand.invoke(
|
||||||
"dd if=/dev/urandom of=/proc/sysrq-trigger")
|
"oc debug node/" + node + " -- chroot /host " "dd if=/dev/urandom of=/proc/sysrq-trigger"
|
||||||
|
)
|
||||||
logging.info("node_crash_scenario has been successfuly injected!")
|
logging.info("node_crash_scenario has been successfuly injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to crash the node. Encountered following exception: %s. "
|
logging.error("Failed to crash the node. Encountered following exception: %s. " "Test Failed" % (e))
|
||||||
"Test Failed" % (e))
|
|
||||||
logging.error("node_crash_scenario injection failed!")
|
logging.error("node_crash_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|||||||
@@ -9,56 +9,42 @@ from kraken.node_actions.abstract_node_scenarios import abstract_node_scenarios
|
|||||||
|
|
||||||
class AWS:
|
class AWS:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.boto_client = boto3.client('ec2')
|
self.boto_client = boto3.client("ec2")
|
||||||
self.boto_instance = boto3.resource('ec2').Instance('id')
|
self.boto_instance = boto3.resource("ec2").Instance("id")
|
||||||
|
|
||||||
# Get the instance ID of the node
|
# Get the instance ID of the node
|
||||||
def get_instance_id(self, node):
|
def get_instance_id(self, node):
|
||||||
return self.boto_client.describe_instances(
|
return self.boto_client.describe_instances(Filters=[{"Name": "private-dns-name", "Values": [node]}])[
|
||||||
Filters=[{'Name': 'private-dns-name', 'Values': [node]}]
|
"Reservations"
|
||||||
)['Reservations'][0]['Instances'][0]['InstanceId']
|
][0]["Instances"][0]["InstanceId"]
|
||||||
|
|
||||||
# Start the node instance
|
# Start the node instance
|
||||||
def start_instances(self, instance_id):
|
def start_instances(self, instance_id):
|
||||||
self.boto_client.start_instances(
|
self.boto_client.start_instances(InstanceIds=[instance_id])
|
||||||
InstanceIds=[instance_id]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Stop the node instance
|
# Stop the node instance
|
||||||
def stop_instances(self, instance_id):
|
def stop_instances(self, instance_id):
|
||||||
self.boto_client.stop_instances(
|
self.boto_client.stop_instances(InstanceIds=[instance_id])
|
||||||
InstanceIds=[instance_id]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Terminate the node instance
|
# Terminate the node instance
|
||||||
def terminate_instances(self, instance_id):
|
def terminate_instances(self, instance_id):
|
||||||
self.boto_client.terminate_instances(
|
self.boto_client.terminate_instances(InstanceIds=[instance_id])
|
||||||
InstanceIds=[instance_id]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Reboot the node instance
|
# Reboot the node instance
|
||||||
def reboot_instances(self, instance_id):
|
def reboot_instances(self, instance_id):
|
||||||
self.boto_client.reboot_instances(
|
self.boto_client.reboot_instances(InstanceIds=[instance_id])
|
||||||
InstanceIds=[instance_id]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Wait until the node instance is running
|
# Wait until the node instance is running
|
||||||
def wait_until_running(self, instance_id):
|
def wait_until_running(self, instance_id):
|
||||||
self.boto_instance.wait_until_running(
|
self.boto_instance.wait_until_running(InstanceIds=[instance_id])
|
||||||
InstanceIds=[instance_id]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Wait until the node instance is stopped
|
# Wait until the node instance is stopped
|
||||||
def wait_until_stopped(self, instance_id):
|
def wait_until_stopped(self, instance_id):
|
||||||
self.boto_instance.wait_until_stopped(
|
self.boto_instance.wait_until_stopped(InstanceIds=[instance_id])
|
||||||
InstanceIds=[instance_id]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Wait until the node instance is terminated
|
# Wait until the node instance is terminated
|
||||||
def wait_until_terminated(self, instance_id):
|
def wait_until_terminated(self, instance_id):
|
||||||
self.boto_instance.wait_until_terminated(
|
self.boto_instance.wait_until_terminated(InstanceIds=[instance_id])
|
||||||
InstanceIds=[instance_id]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class aws_node_scenarios(abstract_node_scenarios):
|
class aws_node_scenarios(abstract_node_scenarios):
|
||||||
@@ -78,8 +64,9 @@ class aws_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s is in running state" % (instance_id))
|
logging.info("Node with instance ID: %s is in running state" % (instance_id))
|
||||||
logging.info("node_start_scenario has been successfully injected!")
|
logging.info("node_start_scenario has been successfully injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to start node instance. Encountered following "
|
logging.error(
|
||||||
"exception: %s. Test Failed" % (e))
|
"Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_start_scenario injection failed!")
|
logging.error("node_start_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -95,8 +82,7 @@ class aws_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s is in stopped state" % (instance_id))
|
logging.info("Node with instance ID: %s is in stopped state" % (instance_id))
|
||||||
nodeaction.wait_for_unknown_status(node, timeout)
|
nodeaction.wait_for_unknown_status(node, timeout)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to stop node instance. Encountered following exception: %s. "
|
logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % (e))
|
||||||
"Test Failed" % (e))
|
|
||||||
logging.error("node_stop_scenario injection failed!")
|
logging.error("node_stop_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -118,8 +104,9 @@ class aws_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s has been terminated" % (instance_id))
|
logging.info("Node with instance ID: %s has been terminated" % (instance_id))
|
||||||
logging.info("node_termination_scenario has been successfuly injected!")
|
logging.info("node_termination_scenario has been successfuly injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to terminate node instance. Encountered following exception:"
|
logging.error(
|
||||||
" %s. Test Failed" % (e))
|
"Failed to terminate node instance. Encountered following exception:" " %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_termination_scenario injection failed!")
|
logging.error("node_termination_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -136,7 +123,8 @@ class aws_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s has been rebooted" % (instance_id))
|
logging.info("Node with instance ID: %s has been rebooted" % (instance_id))
|
||||||
logging.info("node_reboot_scenario has been successfuly injected!")
|
logging.info("node_reboot_scenario has been successfuly injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to reboot node instance. Encountered following exception:"
|
logging.error(
|
||||||
" %s. Test Failed" % (e))
|
"Failed to reboot node instance. Encountered following exception:" " %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_reboot_scenario injection failed!")
|
logging.error("node_reboot_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -12,13 +12,13 @@ import yaml
|
|||||||
|
|
||||||
class Azure:
|
class Azure:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
logging.info('azure ' + str(self))
|
logging.info("azure " + str(self))
|
||||||
# Acquire a credential object using CLI-based authentication.
|
# Acquire a credential object using CLI-based authentication.
|
||||||
credentials = DefaultAzureCredential()
|
credentials = DefaultAzureCredential()
|
||||||
logging.info("credential " + str(credentials))
|
logging.info("credential " + str(credentials))
|
||||||
az_account = runcommand.invoke("az account list -o yaml")
|
az_account = runcommand.invoke("az account list -o yaml")
|
||||||
az_account_yaml = yaml.load(az_account, Loader=yaml.FullLoader)
|
az_account_yaml = yaml.load(az_account, Loader=yaml.FullLoader)
|
||||||
subscription_id = az_account_yaml[0]['id']
|
subscription_id = az_account_yaml[0]["id"]
|
||||||
self.compute_client = ComputeManagementClient(credentials, subscription_id)
|
self.compute_client = ComputeManagementClient(credentials, subscription_id)
|
||||||
|
|
||||||
# Get the instance ID of the node
|
# Get the instance ID of the node
|
||||||
@@ -49,8 +49,7 @@ class Azure:
|
|||||||
self.compute_client.virtual_machines.begin_restart(group_name, vm_name)
|
self.compute_client.virtual_machines.begin_restart(group_name, vm_name)
|
||||||
|
|
||||||
def get_vm_status(self, resource_group, vm_name):
|
def get_vm_status(self, resource_group, vm_name):
|
||||||
statuses = self.compute_client.virtual_machines.instance_view(resource_group, vm_name)\
|
statuses = self.compute_client.virtual_machines.instance_view(resource_group, vm_name).statuses
|
||||||
.statuses
|
|
||||||
status = len(statuses) >= 2 and statuses[1]
|
status = len(statuses) >= 2 and statuses[1]
|
||||||
return status
|
return status
|
||||||
|
|
||||||
@@ -58,7 +57,7 @@ class Azure:
|
|||||||
def wait_until_running(self, resource_group, vm_name, timeout):
|
def wait_until_running(self, resource_group, vm_name, timeout):
|
||||||
time_counter = 0
|
time_counter = 0
|
||||||
status = self.get_vm_status(resource_group, vm_name)
|
status = self.get_vm_status(resource_group, vm_name)
|
||||||
while status and status.code != 'PowerState/running':
|
while status and status.code != "PowerState/running":
|
||||||
status = self.get_vm_status(resource_group, vm_name)
|
status = self.get_vm_status(resource_group, vm_name)
|
||||||
logging.info("Vm %s is still not running, sleeping for 5 seconds" % vm_name)
|
logging.info("Vm %s is still not running, sleeping for 5 seconds" % vm_name)
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
@@ -71,7 +70,7 @@ class Azure:
|
|||||||
def wait_until_stopped(self, resource_group, vm_name, timeout):
|
def wait_until_stopped(self, resource_group, vm_name, timeout):
|
||||||
time_counter = 0
|
time_counter = 0
|
||||||
status = self.get_vm_status(resource_group, vm_name)
|
status = self.get_vm_status(resource_group, vm_name)
|
||||||
while status and status.code != 'PowerState/stopped':
|
while status and status.code != "PowerState/stopped":
|
||||||
status = self.get_vm_status(resource_group, vm_name)
|
status = self.get_vm_status(resource_group, vm_name)
|
||||||
logging.info("Vm %s is still stopping, sleeping for 5 seconds" % vm_name)
|
logging.info("Vm %s is still stopping, sleeping for 5 seconds" % vm_name)
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
@@ -82,13 +81,11 @@ class Azure:
|
|||||||
|
|
||||||
# Wait until the node instance is terminated
|
# Wait until the node instance is terminated
|
||||||
def wait_until_terminated(self, resource_group, vm_name):
|
def wait_until_terminated(self, resource_group, vm_name):
|
||||||
statuses = self.compute_client.virtual_machines.instance_view(resource_group,
|
statuses = self.compute_client.virtual_machines.instance_view(resource_group, vm_name).statuses[0]
|
||||||
vm_name).statuses[0]
|
|
||||||
logging.info("vm status " + str(statuses))
|
logging.info("vm status " + str(statuses))
|
||||||
while statuses.code == "ProvisioningState/deleting":
|
while statuses.code == "ProvisioningState/deleting":
|
||||||
try:
|
try:
|
||||||
statuses = self.compute_client.virtual_machines.instance_view(resource_group,
|
statuses = self.compute_client.virtual_machines.instance_view(resource_group, vm_name).statuses[0]
|
||||||
vm_name).statuses[0]
|
|
||||||
logging.info("Vm %s is still deleting, waiting 10 seconds" % vm_name)
|
logging.info("Vm %s is still deleting, waiting 10 seconds" % vm_name)
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -114,8 +111,9 @@ class azure_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s is in running state" % node)
|
logging.info("Node with instance ID: %s is in running state" % node)
|
||||||
logging.info("node_start_scenario has been successfully injected!")
|
logging.info("node_start_scenario has been successfully injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to start node instance. Encountered following "
|
logging.error(
|
||||||
"exception: %s. Test Failed" % (e))
|
"Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_start_scenario injection failed!")
|
logging.error("node_start_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -131,8 +129,7 @@ class azure_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s is in stopped state" % node)
|
logging.info("Node with instance ID: %s is in stopped state" % node)
|
||||||
nodeaction.wait_for_unknown_status(node, timeout)
|
nodeaction.wait_for_unknown_status(node, timeout)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to stop node instance. Encountered following exception: %s. "
|
logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % e)
|
||||||
"Test Failed" % e)
|
|
||||||
logging.error("node_stop_scenario injection failed!")
|
logging.error("node_stop_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -142,8 +139,7 @@ class azure_node_scenarios(abstract_node_scenarios):
|
|||||||
try:
|
try:
|
||||||
logging.info("Starting node_termination_scenario injection")
|
logging.info("Starting node_termination_scenario injection")
|
||||||
resource_group = self.azure.get_instance_id(node)
|
resource_group = self.azure.get_instance_id(node)
|
||||||
logging.info("Terminating the node %s with instance ID: %s "
|
logging.info("Terminating the node %s with instance ID: %s " % (node, resource_group))
|
||||||
% (node, resource_group))
|
|
||||||
self.azure.terminate_instances(resource_group, node)
|
self.azure.terminate_instances(resource_group, node)
|
||||||
self.azure.wait_until_terminated(resource_group, node)
|
self.azure.wait_until_terminated(resource_group, node)
|
||||||
for _ in range(timeout):
|
for _ in range(timeout):
|
||||||
@@ -155,8 +151,9 @@ class azure_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s has been terminated" % node)
|
logging.info("Node with instance ID: %s has been terminated" % node)
|
||||||
logging.info("node_termination_scenario has been successfully injected!")
|
logging.info("node_termination_scenario has been successfully injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to terminate node instance. Encountered following exception:"
|
logging.error(
|
||||||
" %s. Test Failed" % (e))
|
"Failed to terminate node instance. Encountered following exception:" " %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_termination_scenario injection failed!")
|
logging.error("node_termination_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -173,7 +170,8 @@ class azure_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s has been rebooted" % (node))
|
logging.info("Node with instance ID: %s has been rebooted" % (node))
|
||||||
logging.info("node_reboot_scenario has been successfully injected!")
|
logging.info("node_reboot_scenario has been successfully injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to reboot node instance. Encountered following exception:"
|
logging.error(
|
||||||
" %s. Test Failed" % (e))
|
"Failed to reboot node instance. Encountered following exception:" " %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_reboot_scenario injection failed!")
|
logging.error("node_reboot_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -11,8 +11,7 @@ def get_node(node_name, label_selector):
|
|||||||
if node_name in kubecli.list_killable_nodes():
|
if node_name in kubecli.list_killable_nodes():
|
||||||
return node_name
|
return node_name
|
||||||
else:
|
else:
|
||||||
logging.info("Node with provided node_name does not exist or the node might "
|
logging.info("Node with provided node_name does not exist or the node might " "be in NotReady state.")
|
||||||
"be in NotReady state.")
|
|
||||||
nodes = kubecli.list_killable_nodes(label_selector)
|
nodes = kubecli.list_killable_nodes(label_selector)
|
||||||
if not nodes:
|
if not nodes:
|
||||||
raise Exception("Ready nodes with the provided label selector do not exist")
|
raise Exception("Ready nodes with the provided label selector do not exist")
|
||||||
@@ -24,8 +23,7 @@ def get_node(node_name, label_selector):
|
|||||||
|
|
||||||
# Wait till node status becomes Ready
|
# Wait till node status becomes Ready
|
||||||
def wait_for_ready_status(node, timeout):
|
def wait_for_ready_status(node, timeout):
|
||||||
runcommand.invoke("kubectl wait --for=condition=Ready "
|
runcommand.invoke("kubectl wait --for=condition=Ready " "node/" + node + " --timeout=" + str(timeout) + "s")
|
||||||
"node/" + node + " --timeout=" + str(timeout) + "s")
|
|
||||||
|
|
||||||
|
|
||||||
# Wait till node status becomes NotReady
|
# Wait till node status becomes NotReady
|
||||||
@@ -40,9 +38,9 @@ def wait_for_unknown_status(node, timeout):
|
|||||||
|
|
||||||
# Get the ip of the cluster node
|
# Get the ip of the cluster node
|
||||||
def get_node_ip(node):
|
def get_node_ip(node):
|
||||||
return runcommand.invoke("kubectl get node %s -o "
|
return runcommand.invoke(
|
||||||
"jsonpath='{.status.addresses[?(@.type==\"InternalIP\")].address}'"
|
"kubectl get node %s -o " "jsonpath='{.status.addresses[?(@.type==\"InternalIP\")].address}'" % (node)
|
||||||
% (node))
|
)
|
||||||
|
|
||||||
|
|
||||||
def check_service_status(node, service, ssh_private_key, timeout):
|
def check_service_status(node, service, ssh_private_key, timeout):
|
||||||
@@ -55,18 +53,20 @@ def check_service_status(node, service, ssh_private_key, timeout):
|
|||||||
time.sleep(sleeper)
|
time.sleep(sleeper)
|
||||||
i += sleeper
|
i += sleeper
|
||||||
logging.info("Trying to ssh to instance: %s" % (node))
|
logging.info("Trying to ssh to instance: %s" % (node))
|
||||||
connection = ssh.connect(node, username='root', key_filename=ssh_private_key,
|
connection = ssh.connect(
|
||||||
timeout=800, banner_timeout=400)
|
node, username="root", key_filename=ssh_private_key, timeout=800, banner_timeout=400
|
||||||
|
)
|
||||||
if connection is None:
|
if connection is None:
|
||||||
break
|
break
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
for service_name in service:
|
for service_name in service:
|
||||||
logging.info("Checking status of Service: %s" % (service_name))
|
logging.info("Checking status of Service: %s" % (service_name))
|
||||||
stdin, stdout, stderr = ssh.exec_command("systemctl status %s | grep '^ Active' "
|
stdin, stdout, stderr = ssh.exec_command(
|
||||||
"| awk '{print $2}'" % (service_name))
|
"systemctl status %s | grep '^ Active' " "| awk '{print $2}'" % (service_name)
|
||||||
|
)
|
||||||
service_status = stdout.readlines()[0]
|
service_status = stdout.readlines()[0]
|
||||||
logging.info("Status of service %s is %s \n" % (service_name, service_status.strip()))
|
logging.info("Status of service %s is %s \n" % (service_name, service_status.strip()))
|
||||||
if(service_status.strip() != "active"):
|
if service_status.strip() != "active":
|
||||||
logging.error("Service %s is in %s state" % (service_name, service_status.strip()))
|
logging.error("Service %s is in %s state" % (service_name, service_status.strip()))
|
||||||
ssh.close()
|
ssh.close()
|
||||||
|
|||||||
@@ -12,58 +12,50 @@ import kraken.invoke.command as runcommand
|
|||||||
class GCP:
|
class GCP:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
||||||
self.project = runcommand.invoke('gcloud config get-value project').split('/n')[0].strip()
|
self.project = runcommand.invoke("gcloud config get-value project").split("/n")[0].strip()
|
||||||
logging.info("project " + str(self.project) + "!")
|
logging.info("project " + str(self.project) + "!")
|
||||||
credentials = GoogleCredentials.get_application_default()
|
credentials = GoogleCredentials.get_application_default()
|
||||||
self.client = discovery.build('compute', 'v1', credentials=credentials,
|
self.client = discovery.build("compute", "v1", credentials=credentials, cache_discovery=False)
|
||||||
cache_discovery=False)
|
|
||||||
|
|
||||||
# Get the instance ID of the node
|
# Get the instance ID of the node
|
||||||
def get_instance_id(self, node):
|
def get_instance_id(self, node):
|
||||||
zone_request = self.client.zones().list(project=self.project)
|
zone_request = self.client.zones().list(project=self.project)
|
||||||
while zone_request is not None:
|
while zone_request is not None:
|
||||||
zone_response = zone_request.execute()
|
zone_response = zone_request.execute()
|
||||||
for zone in zone_response['items']:
|
for zone in zone_response["items"]:
|
||||||
instances_request = self.client.instances().list(project=self.project,
|
instances_request = self.client.instances().list(project=self.project, zone=zone["name"])
|
||||||
zone=zone['name'])
|
|
||||||
while instances_request is not None:
|
while instances_request is not None:
|
||||||
instance_response = instances_request.execute()
|
instance_response = instances_request.execute()
|
||||||
if "items" in instance_response.keys():
|
if "items" in instance_response.keys():
|
||||||
for instance in instance_response['items']:
|
for instance in instance_response["items"]:
|
||||||
if instance['name'] in node:
|
if instance["name"] in node:
|
||||||
return instance['name'], zone['name']
|
return instance["name"], zone["name"]
|
||||||
instances_request = self.client.zones().list_next(
|
instances_request = self.client.zones().list_next(
|
||||||
previous_request=instances_request,
|
previous_request=instances_request, previous_response=instance_response
|
||||||
previous_response=instance_response)
|
)
|
||||||
zone_request = self.client.zones().list_next(previous_request=zone_request,
|
zone_request = self.client.zones().list_next(previous_request=zone_request, previous_response=zone_response)
|
||||||
previous_response=zone_response)
|
logging.info("no instances ")
|
||||||
logging.info('no instances ')
|
|
||||||
|
|
||||||
# Start the node instance
|
# Start the node instance
|
||||||
def start_instances(self, zone, instance_id):
|
def start_instances(self, zone, instance_id):
|
||||||
self.client.instances().start(project=self.project, zone=zone, instance=instance_id)\
|
self.client.instances().start(project=self.project, zone=zone, instance=instance_id).execute()
|
||||||
.execute()
|
|
||||||
|
|
||||||
# Stop the node instance
|
# Stop the node instance
|
||||||
def stop_instances(self, zone, instance_id):
|
def stop_instances(self, zone, instance_id):
|
||||||
self.client.instances().stop(project=self.project, zone=zone, instance=instance_id)\
|
self.client.instances().stop(project=self.project, zone=zone, instance=instance_id).execute()
|
||||||
.execute()
|
|
||||||
|
|
||||||
# Start the node instance
|
# Start the node instance
|
||||||
def suspend_instances(self, zone, instance_id):
|
def suspend_instances(self, zone, instance_id):
|
||||||
self.client.instances().suspend(project=self.project, zone=zone, instance=instance_id)\
|
self.client.instances().suspend(project=self.project, zone=zone, instance=instance_id).execute()
|
||||||
.execute()
|
|
||||||
|
|
||||||
# Terminate the node instance
|
# Terminate the node instance
|
||||||
def terminate_instances(self, zone, instance_id):
|
def terminate_instances(self, zone, instance_id):
|
||||||
self.client.instances().delete(project=self.project, zone=zone, instance=instance_id)\
|
self.client.instances().delete(project=self.project, zone=zone, instance=instance_id).execute()
|
||||||
.execute()
|
|
||||||
|
|
||||||
# Reboot the node instance
|
# Reboot the node instance
|
||||||
def reboot_instances(self, zone, instance_id):
|
def reboot_instances(self, zone, instance_id):
|
||||||
response = self.client.instances().reset(project=self.project, zone=zone,
|
response = self.client.instances().reset(project=self.project, zone=zone, instance=instance_id).execute()
|
||||||
instance=instance_id).execute()
|
logging.info("response reboot " + str(response))
|
||||||
logging.info('response reboot ' + str(response))
|
|
||||||
|
|
||||||
# Get instance status
|
# Get instance status
|
||||||
def get_instance_status(self, zone, instance_id, expected_status, timeout):
|
def get_instance_status(self, zone, instance_id, expected_status, timeout):
|
||||||
@@ -72,10 +64,9 @@ class GCP:
|
|||||||
i = 0
|
i = 0
|
||||||
sleeper = 5
|
sleeper = 5
|
||||||
while i <= timeout:
|
while i <= timeout:
|
||||||
instStatus = self.client.instances().get(project=self.project, zone=zone,
|
instStatus = self.client.instances().get(project=self.project, zone=zone, instance=instance_id).execute()
|
||||||
instance=instance_id).execute()
|
logging.info("Status of vm " + str(instStatus["status"]))
|
||||||
logging.info("Status of vm " + str(instStatus['status']))
|
if instStatus["status"] == expected_status:
|
||||||
if instStatus['status'] == expected_status:
|
|
||||||
return True
|
return True
|
||||||
time.sleep(sleeper)
|
time.sleep(sleeper)
|
||||||
i += sleeper
|
i += sleeper
|
||||||
@@ -83,15 +74,15 @@ class GCP:
|
|||||||
|
|
||||||
# Wait until the node instance is suspended
|
# Wait until the node instance is suspended
|
||||||
def wait_until_suspended(self, zone, instance_id, timeout):
|
def wait_until_suspended(self, zone, instance_id, timeout):
|
||||||
self.get_instance_status(zone, instance_id, 'SUSPENDED', timeout)
|
self.get_instance_status(zone, instance_id, "SUSPENDED", timeout)
|
||||||
|
|
||||||
# Wait until the node instance is running
|
# Wait until the node instance is running
|
||||||
def wait_until_running(self, zone, instance_id, timeout):
|
def wait_until_running(self, zone, instance_id, timeout):
|
||||||
self.get_instance_status(zone, instance_id, 'RUNNING', timeout)
|
self.get_instance_status(zone, instance_id, "RUNNING", timeout)
|
||||||
|
|
||||||
# Wait until the node instance is stopped
|
# Wait until the node instance is stopped
|
||||||
def wait_until_stopped(self, zone, instance_id, timeout):
|
def wait_until_stopped(self, zone, instance_id, timeout):
|
||||||
self.get_instance_status(zone, instance_id, 'TERMINATED', timeout)
|
self.get_instance_status(zone, instance_id, "TERMINATED", timeout)
|
||||||
|
|
||||||
# Wait until the node instance is terminated
|
# Wait until the node instance is terminated
|
||||||
def wait_until_terminated(self, zone, instance_id, timeout):
|
def wait_until_terminated(self, zone, instance_id, timeout):
|
||||||
@@ -99,9 +90,10 @@ class GCP:
|
|||||||
i = 0
|
i = 0
|
||||||
sleeper = 5
|
sleeper = 5
|
||||||
while i <= timeout:
|
while i <= timeout:
|
||||||
instStatus = self.client.instances().get(project=self.project, zone=zone,
|
instStatus = (
|
||||||
instance=instance_id).execute()
|
self.client.instances().get(project=self.project, zone=zone, instance=instance_id).execute()
|
||||||
logging.info("Status of vm " + str(instStatus['status']))
|
)
|
||||||
|
logging.info("Status of vm " + str(instStatus["status"]))
|
||||||
time.sleep(sleeper)
|
time.sleep(sleeper)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info("here " + str(e))
|
logging.info("here " + str(e))
|
||||||
@@ -125,14 +117,15 @@ class gcp_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s is in running state" % instance_id)
|
logging.info("Node with instance ID: %s is in running state" % instance_id)
|
||||||
logging.info("node_start_scenario has been successfully injected!")
|
logging.info("node_start_scenario has been successfully injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to start node instance. Encountered following "
|
logging.error(
|
||||||
"exception: %s. Test Failed" % (e))
|
"Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_start_scenario injection failed!")
|
logging.error("node_start_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Node scenario to stop the node
|
# Node scenario to stop the node
|
||||||
def node_stop_scenario(self, instance_kill_count, node, timeout):
|
def node_stop_scenario(self, instance_kill_count, node, timeout):
|
||||||
logging.info('stop scenario')
|
logging.info("stop scenario")
|
||||||
for _ in range(instance_kill_count):
|
for _ in range(instance_kill_count):
|
||||||
try:
|
try:
|
||||||
logging.info("Starting node_stop_scenario injection")
|
logging.info("Starting node_stop_scenario injection")
|
||||||
@@ -143,8 +136,7 @@ class gcp_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s is in stopped state" % instance_id)
|
logging.info("Node with instance ID: %s is in stopped state" % instance_id)
|
||||||
nodeaction.wait_for_unknown_status(node, timeout)
|
nodeaction.wait_for_unknown_status(node, timeout)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to stop node instance. Encountered following exception: %s. "
|
logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % (e))
|
||||||
"Test Failed" % (e))
|
|
||||||
logging.error("node_stop_scenario injection failed!")
|
logging.error("node_stop_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -166,8 +158,9 @@ class gcp_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s has been terminated" % instance_id)
|
logging.info("Node with instance ID: %s has been terminated" % instance_id)
|
||||||
logging.info("node_termination_scenario has been successfuly injected!")
|
logging.info("node_termination_scenario has been successfuly injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to terminate node instance. Encountered following exception:"
|
logging.error(
|
||||||
" %s. Test Failed" % e)
|
"Failed to terminate node instance. Encountered following exception:" " %s. Test Failed" % e
|
||||||
|
)
|
||||||
logging.error("node_termination_scenario injection failed!")
|
logging.error("node_termination_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -183,7 +176,8 @@ class gcp_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s has been rebooted" % instance_id)
|
logging.info("Node with instance ID: %s has been rebooted" % instance_id)
|
||||||
logging.info("node_reboot_scenario has been successfuly injected!")
|
logging.info("node_reboot_scenario has been successfuly injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to reboot node instance. Encountered following exception:"
|
logging.error(
|
||||||
" %s. Test Failed" % (e))
|
"Failed to reboot node instance. Encountered following exception:" " %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_reboot_scenario injection failed!")
|
logging.error("node_reboot_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -13,20 +13,16 @@ class general_node_scenarios(abstract_node_scenarios):
|
|||||||
|
|
||||||
# Node scenario to start the node
|
# Node scenario to start the node
|
||||||
def node_start_scenario(self, instance_kill_count, node, timeout):
|
def node_start_scenario(self, instance_kill_count, node, timeout):
|
||||||
logging.info("Node start is not set up yet for this cloud type, "
|
logging.info("Node start is not set up yet for this cloud type, " "no action is going to be taken")
|
||||||
"no action is going to be taken")
|
|
||||||
|
|
||||||
# Node scenario to stop the node
|
# Node scenario to stop the node
|
||||||
def node_stop_scenario(self, instance_kill_count, node, timeout):
|
def node_stop_scenario(self, instance_kill_count, node, timeout):
|
||||||
logging.info("Node stop is not set up yet for this cloud type,"
|
logging.info("Node stop is not set up yet for this cloud type," " no action is going to be taken")
|
||||||
" no action is going to be taken")
|
|
||||||
|
|
||||||
# Node scenario to terminate the node
|
# Node scenario to terminate the node
|
||||||
def node_termination_scenario(self, instance_kill_count, node, timeout):
|
def node_termination_scenario(self, instance_kill_count, node, timeout):
|
||||||
logging.info("Node termination is not set up yet for this cloud type, "
|
logging.info("Node termination is not set up yet for this cloud type, " "no action is going to be taken")
|
||||||
"no action is going to be taken")
|
|
||||||
|
|
||||||
# Node scenario to reboot the node
|
# Node scenario to reboot the node
|
||||||
def node_reboot_scenario(self, instance_kill_count, node, timeout):
|
def node_reboot_scenario(self, instance_kill_count, node, timeout):
|
||||||
logging.info("Node reboot is not set up yet for this cloud type,"
|
logging.info("Node reboot is not set up yet for this cloud type," " no action is going to be taken")
|
||||||
" no action is going to be taken")
|
|
||||||
|
|||||||
@@ -39,12 +39,12 @@ class OPENSTACKCLOUD:
|
|||||||
i = 0
|
i = 0
|
||||||
sleeper = 1
|
sleeper = 1
|
||||||
while i <= timeout:
|
while i <= timeout:
|
||||||
instStatus = runcommand.invoke("openstack server show %s | tr -d ' ' |"
|
instStatus = runcommand.invoke(
|
||||||
"grep '^|status' |"
|
"openstack server show %s | tr -d ' ' |" "grep '^|status' |" "cut -d '|' -f3 | tr -d '\n'" % (node)
|
||||||
"cut -d '|' -f3 | tr -d '\n'" % (node))
|
)
|
||||||
logging.info("instance status is %s" % (instStatus))
|
logging.info("instance status is %s" % (instStatus))
|
||||||
logging.info("expected status is %s" % (expected_status))
|
logging.info("expected status is %s" % (expected_status))
|
||||||
if (instStatus.strip() == expected_status):
|
if instStatus.strip() == expected_status:
|
||||||
logging.info("instance status has reached desired status %s" % (instStatus))
|
logging.info("instance status has reached desired status %s" % (instStatus))
|
||||||
return True
|
return True
|
||||||
time.sleep(sleeper)
|
time.sleep(sleeper)
|
||||||
@@ -53,7 +53,7 @@ class OPENSTACKCLOUD:
|
|||||||
# Get the openstack instance name
|
# Get the openstack instance name
|
||||||
def get_openstack_nodename(self, os_node_ip):
|
def get_openstack_nodename(self, os_node_ip):
|
||||||
server_list = runcommand.invoke("openstack server list | grep %s" % (os_node_ip))
|
server_list = runcommand.invoke("openstack server list | grep %s" % (os_node_ip))
|
||||||
list_of_servers = server_list.split('\n')
|
list_of_servers = server_list.split("\n")
|
||||||
for item in list_of_servers:
|
for item in list_of_servers:
|
||||||
items = item.split("|")
|
items = item.split("|")
|
||||||
counter = 0
|
counter = 0
|
||||||
@@ -63,7 +63,7 @@ class OPENSTACKCLOUD:
|
|||||||
logging.info("Openstack node name is %s " % (node_name))
|
logging.info("Openstack node name is %s " % (node_name))
|
||||||
counter += 1
|
counter += 1
|
||||||
continue
|
continue
|
||||||
item_list = i.split('=')
|
item_list = i.split("=")
|
||||||
if len(item_list) == 2 and item_list[-1].strip() == os_node_ip:
|
if len(item_list) == 2 and item_list[-1].strip() == os_node_ip:
|
||||||
return node_name
|
return node_name
|
||||||
counter += 1
|
counter += 1
|
||||||
@@ -87,8 +87,9 @@ class openstack_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance ID: %s is in running state" % (node))
|
logging.info("Node with instance ID: %s is in running state" % (node))
|
||||||
logging.info("node_start_scenario has been successfully injected!")
|
logging.info("node_start_scenario has been successfully injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to start node instance. Encountered following "
|
logging.error(
|
||||||
"exception: %s. Test Failed" % (e))
|
"Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_start_scenario injection failed!")
|
logging.error("node_start_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -105,8 +106,7 @@ class openstack_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance name: %s is in stopped state" % (node))
|
logging.info("Node with instance name: %s is in stopped state" % (node))
|
||||||
nodeaction.wait_for_ready_status(node, timeout)
|
nodeaction.wait_for_ready_status(node, timeout)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to stop node instance. Encountered following exception: %s. "
|
logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % (e))
|
||||||
"Test Failed" % (e))
|
|
||||||
logging.error("node_stop_scenario injection failed!")
|
logging.error("node_stop_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -124,8 +124,9 @@ class openstack_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Node with instance name: %s has been rebooted" % (node))
|
logging.info("Node with instance name: %s has been rebooted" % (node))
|
||||||
logging.info("node_reboot_scenario has been successfuly injected!")
|
logging.info("node_reboot_scenario has been successfuly injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to reboot node instance. Encountered following exception:"
|
logging.error(
|
||||||
" %s. Test Failed" % (e))
|
"Failed to reboot node instance. Encountered following exception:" " %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("node_reboot_scenario injection failed!")
|
logging.error("node_reboot_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -141,8 +142,9 @@ class openstack_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Helper node with IP: %s is in running state" % (node_ip))
|
logging.info("Helper node with IP: %s is in running state" % (node_ip))
|
||||||
logging.info("node_start_scenario has been successfully injected!")
|
logging.info("node_start_scenario has been successfully injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to start node instance. Encountered following "
|
logging.error(
|
||||||
"exception: %s. Test Failed" % (e))
|
"Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e)
|
||||||
|
)
|
||||||
logging.error("helper_node_start_scenario injection failed!")
|
logging.error("helper_node_start_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -157,8 +159,7 @@ class openstack_node_scenarios(abstract_node_scenarios):
|
|||||||
self.openstackcloud.wait_until_stopped(openstack_node_name)
|
self.openstackcloud.wait_until_stopped(openstack_node_name)
|
||||||
logging.info("Helper node with IP: %s is in stopped state" % (node_ip))
|
logging.info("Helper node with IP: %s is in stopped state" % (node_ip))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to stop node instance. Encountered following exception: %s. "
|
logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % (e))
|
||||||
"Test Failed" % (e))
|
|
||||||
logging.error("helper_node_stop_scenario injection failed!")
|
logging.error("helper_node_stop_scenario injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@@ -169,7 +170,6 @@ class openstack_node_scenarios(abstract_node_scenarios):
|
|||||||
logging.info("Service status checked on %s" % (node_ip))
|
logging.info("Service status checked on %s" % (node_ip))
|
||||||
logging.info("Check service status is successfuly injected!")
|
logging.info("Check service status is successfuly injected!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to check service status. Encountered following exception:"
|
logging.error("Failed to check service status. Encountered following exception:" " %s. Test Failed" % (e))
|
||||||
" %s. Test Failed" % (e))
|
|
||||||
logging.error("helper_node_service_status injection failed!")
|
logging.error("helper_node_service_status injection failed!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ def setup(repo):
|
|||||||
# delete repo to clone the latest copy if exists
|
# delete repo to clone the latest copy if exists
|
||||||
subprocess.run(delete_repo, shell=True, universal_newlines=True, timeout=45)
|
subprocess.run(delete_repo, shell=True, universal_newlines=True, timeout=45)
|
||||||
# clone the repo
|
# clone the repo
|
||||||
git.Repo.clone_from(repo, '/tmp/performance-dashboards')
|
git.Repo.clone_from(repo, "/tmp/performance-dashboards")
|
||||||
# deploy performance dashboards
|
# deploy performance dashboards
|
||||||
subprocess.run(command, shell=True, universal_newlines=True)
|
subprocess.run(command, shell=True, universal_newlines=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -23,52 +23,52 @@ def pod_exec(pod_name, command, namespace):
|
|||||||
|
|
||||||
|
|
||||||
def node_debug(node_name, command):
|
def node_debug(node_name, command):
|
||||||
response = runcommand.invoke("oc debug node/" + node_name + ' -- chroot /host ' + command)
|
response = runcommand.invoke("oc debug node/" + node_name + " -- chroot /host " + command)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def skew_time(scenario):
|
def skew_time(scenario):
|
||||||
skew_command = "date --set "
|
skew_command = "date --set "
|
||||||
if scenario['action'] == "skew_date":
|
if scenario["action"] == "skew_date":
|
||||||
skewed_date = "00-01-01"
|
skewed_date = "00-01-01"
|
||||||
skew_command += skewed_date
|
skew_command += skewed_date
|
||||||
elif scenario['action'] == "skew_time":
|
elif scenario["action"] == "skew_time":
|
||||||
skewed_time = "01:01:01"
|
skewed_time = "01:01:01"
|
||||||
skew_command += skewed_time
|
skew_command += skewed_time
|
||||||
if "node" in scenario["object_type"]:
|
if "node" in scenario["object_type"]:
|
||||||
node_names = []
|
node_names = []
|
||||||
if "object_name" in scenario.keys() and scenario['object_name']:
|
if "object_name" in scenario.keys() and scenario["object_name"]:
|
||||||
node_names = scenario['object_name']
|
node_names = scenario["object_name"]
|
||||||
elif "label_selector" in scenario.keys() and scenario['label_selector']:
|
elif "label_selector" in scenario.keys() and scenario["label_selector"]:
|
||||||
node_names = kubecli.list_nodes(scenario['label_selector'])
|
node_names = kubecli.list_nodes(scenario["label_selector"])
|
||||||
|
|
||||||
for node in node_names:
|
for node in node_names:
|
||||||
node_debug(node, skew_command)
|
node_debug(node, skew_command)
|
||||||
logging.info("Reset date/time on node " + str(node))
|
logging.info("Reset date/time on node " + str(node))
|
||||||
return "node", node_names
|
return "node", node_names
|
||||||
|
|
||||||
elif "pod" in scenario['object_type']:
|
elif "pod" in scenario["object_type"]:
|
||||||
pod_names = []
|
pod_names = []
|
||||||
if "object_name" in scenario.keys() and scenario['object_name']:
|
if "object_name" in scenario.keys() and scenario["object_name"]:
|
||||||
for name in scenario['object_name']:
|
for name in scenario["object_name"]:
|
||||||
if "namespace" not in scenario.keys():
|
if "namespace" not in scenario.keys():
|
||||||
logging.error("Need to set namespace when using pod name")
|
logging.error("Need to set namespace when using pod name")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
pod_names.append([name, scenario['namespace']])
|
pod_names.append([name, scenario["namespace"]])
|
||||||
elif "label_selector" in scenario.keys() and scenario['label_selector']:
|
elif "label_selector" in scenario.keys() and scenario["label_selector"]:
|
||||||
pod_names = kubecli.get_all_pods(scenario['label_selector'])
|
pod_names = kubecli.get_all_pods(scenario["label_selector"])
|
||||||
elif "namespace" in scenario.keys() and scenario['namespace']:
|
elif "namespace" in scenario.keys() and scenario["namespace"]:
|
||||||
pod_names = kubecli.list_pods(scenario['namespace'])
|
pod_names = kubecli.list_pods(scenario["namespace"])
|
||||||
counter = 0
|
counter = 0
|
||||||
for pod_name in pod_names:
|
for pod_name in pod_names:
|
||||||
pod_names[counter] = [pod_name, scenario['namespace']]
|
pod_names[counter] = [pod_name, scenario["namespace"]]
|
||||||
counter += 1
|
counter += 1
|
||||||
|
|
||||||
for pod in pod_names:
|
for pod in pod_names:
|
||||||
if len(pod) > 1:
|
if len(pod) > 1:
|
||||||
pod_exec(pod[0], skew_command, pod[1])
|
pod_exec(pod[0], skew_command, pod[1])
|
||||||
else:
|
else:
|
||||||
pod_exec(pod, skew_command, scenario['namespace'])
|
pod_exec(pod, skew_command, scenario["namespace"])
|
||||||
logging.info("Reset date/time on pod " + str(pod[0]))
|
logging.info("Reset date/time on pod " + str(pod[0]))
|
||||||
return "pod", pod_names
|
return "pod", pod_names
|
||||||
|
|
||||||
@@ -76,8 +76,7 @@ def skew_time(scenario):
|
|||||||
# From kubectl/oc command get time output
|
# From kubectl/oc command get time output
|
||||||
def parse_string_date(obj_datetime):
|
def parse_string_date(obj_datetime):
|
||||||
try:
|
try:
|
||||||
date_line = re.search(r'[a-zA-Z0-9_() .]*\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} \w{3} '
|
date_line = re.search(r"[a-zA-Z0-9_() .]*\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} \w{3} " r"\d{4}\W*", obj_datetime)
|
||||||
r'\d{4}\W*', obj_datetime)
|
|
||||||
return date_line.group().strip()
|
return date_line.group().strip()
|
||||||
except Exception:
|
except Exception:
|
||||||
return ""
|
return ""
|
||||||
@@ -87,7 +86,7 @@ def parse_string_date(obj_datetime):
|
|||||||
def string_to_date(obj_datetime):
|
def string_to_date(obj_datetime):
|
||||||
obj_datetime = parse_string_date(obj_datetime)
|
obj_datetime = parse_string_date(obj_datetime)
|
||||||
try:
|
try:
|
||||||
date_time_obj = datetime.datetime.strptime(obj_datetime, '%a %b %d %H:%M:%S %Z %Y')
|
date_time_obj = datetime.datetime.strptime(obj_datetime, "%a %b %d %H:%M:%S %Z %Y")
|
||||||
return date_time_obj
|
return date_time_obj
|
||||||
except Exception:
|
except Exception:
|
||||||
return datetime.datetime(datetime.MINYEAR, 1, 1)
|
return datetime.datetime(datetime.MINYEAR, 1, 1)
|
||||||
@@ -105,14 +104,12 @@ def check_date_time(object_type, names):
|
|||||||
counter = 0
|
counter = 0
|
||||||
while not first_date_time < node_datetime < datetime.datetime.utcnow():
|
while not first_date_time < node_datetime < datetime.datetime.utcnow():
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
logging.info("Date/time on node %s still not reset, waiting 10 seconds and retrying"
|
logging.info("Date/time on node %s still not reset, waiting 10 seconds and retrying" % node_name)
|
||||||
% node_name)
|
|
||||||
node_datetime_string = node_debug(node_name, skew_command)
|
node_datetime_string = node_debug(node_name, skew_command)
|
||||||
node_datetime = string_to_date(node_datetime_string)
|
node_datetime = string_to_date(node_datetime_string)
|
||||||
counter += 1
|
counter += 1
|
||||||
if counter > max_retries:
|
if counter > max_retries:
|
||||||
logging.error("Date and time in node %s didn't reset properly"
|
logging.error("Date and time in node %s didn't reset properly" % node_name)
|
||||||
% node_name)
|
|
||||||
not_reset.append(node_name)
|
not_reset.append(node_name)
|
||||||
break
|
break
|
||||||
if counter < max_retries:
|
if counter < max_retries:
|
||||||
@@ -125,15 +122,13 @@ def check_date_time(object_type, names):
|
|||||||
pod_datetime = string_to_date(pod_datetime_string)
|
pod_datetime = string_to_date(pod_datetime_string)
|
||||||
while not first_date_time < pod_datetime < datetime.datetime.utcnow():
|
while not first_date_time < pod_datetime < datetime.datetime.utcnow():
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
logging.info("Date/time on pod %s still not reset, waiting 10 seconds and retrying"
|
logging.info("Date/time on pod %s still not reset, waiting 10 seconds and retrying" % pod_name[0])
|
||||||
% pod_name[0])
|
|
||||||
first_date_time = datetime.datetime.utcnow()
|
first_date_time = datetime.datetime.utcnow()
|
||||||
pod_datetime = pod_exec(pod_name[0], skew_command, pod_name[1])
|
pod_datetime = pod_exec(pod_name[0], skew_command, pod_name[1])
|
||||||
pod_datetime = string_to_date(pod_datetime)
|
pod_datetime = string_to_date(pod_datetime)
|
||||||
counter += 1
|
counter += 1
|
||||||
if counter > max_retries:
|
if counter > max_retries:
|
||||||
logging.error("Date and time in pod %s didn't reset properly"
|
logging.error("Date and time in pod %s didn't reset properly" % pod_name[0])
|
||||||
% pod_name[0])
|
|
||||||
not_reset.append(pod_name[0])
|
not_reset.append(pod_name[0])
|
||||||
break
|
break
|
||||||
if counter < max_retries:
|
if counter < max_retries:
|
||||||
|
|||||||
@@ -14,4 +14,3 @@ python-openstackclient
|
|||||||
gitpython
|
gitpython
|
||||||
paramiko
|
paramiko
|
||||||
setuptools
|
setuptools
|
||||||
tox
|
|
||||||
|
|||||||
184
run_kraken.py
184
run_kraken.py
@@ -25,22 +25,24 @@ node_general = False
|
|||||||
|
|
||||||
# Get the node scenarios object of specfied cloud type
|
# Get the node scenarios object of specfied cloud type
|
||||||
def get_node_scenario_object(node_scenario):
|
def get_node_scenario_object(node_scenario):
|
||||||
if "cloud_type" not in node_scenario.keys() or node_scenario['cloud_type'] == "generic":
|
if "cloud_type" not in node_scenario.keys() or node_scenario["cloud_type"] == "generic":
|
||||||
global node_general
|
global node_general
|
||||||
node_general = True
|
node_general = True
|
||||||
return general_node_scenarios()
|
return general_node_scenarios()
|
||||||
if node_scenario['cloud_type'] == 'aws':
|
if node_scenario["cloud_type"] == "aws":
|
||||||
return aws_node_scenarios()
|
return aws_node_scenarios()
|
||||||
elif node_scenario['cloud_type'] == 'gcp':
|
elif node_scenario["cloud_type"] == "gcp":
|
||||||
return gcp_node_scenarios()
|
return gcp_node_scenarios()
|
||||||
elif node_scenario['cloud_type'] == 'openstack':
|
elif node_scenario["cloud_type"] == "openstack":
|
||||||
return openstack_node_scenarios()
|
return openstack_node_scenarios()
|
||||||
elif node_scenario['cloud_type'] == 'azure' or node_scenario['cloud_type'] == 'az':
|
elif node_scenario["cloud_type"] == "azure" or node_scenario["cloud_type"] == "az":
|
||||||
return azure_node_scenarios()
|
return azure_node_scenarios()
|
||||||
else:
|
else:
|
||||||
logging.error("Cloud type " + node_scenario['cloud_type'] + " is not currently supported; "
|
logging.error(
|
||||||
|
"Cloud type " + node_scenario["cloud_type"] + " is not currently supported; "
|
||||||
"try using 'generic' if wanting to stop/start kubelet or fork bomb on any "
|
"try using 'generic' if wanting to stop/start kubelet or fork bomb on any "
|
||||||
"cluster")
|
"cluster"
|
||||||
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
@@ -77,19 +79,23 @@ def inject_node_scenario(action, node_scenario, node_scenario_object):
|
|||||||
elif action == "node_crash_scenario":
|
elif action == "node_crash_scenario":
|
||||||
node_scenario_object.node_crash_scenario(instance_kill_count, node, timeout)
|
node_scenario_object.node_crash_scenario(instance_kill_count, node, timeout)
|
||||||
elif action == "stop_start_helper_node_scenario":
|
elif action == "stop_start_helper_node_scenario":
|
||||||
if node_scenario['cloud_type'] != "openstack":
|
if node_scenario["cloud_type"] != "openstack":
|
||||||
logging.error("Scenario: " + action + " is not supported for "
|
logging.error(
|
||||||
"cloud type " + node_scenario['cloud_type'] + ", skipping action")
|
"Scenario: " + action + " is not supported for "
|
||||||
|
"cloud type " + node_scenario["cloud_type"] + ", skipping action"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
if not node_scenario['helper_node_ip']:
|
if not node_scenario["helper_node_ip"]:
|
||||||
logging.error("Helper node IP address is not provided")
|
logging.error("Helper node IP address is not provided")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
node_scenario_object.helper_node_stop_start_scenario(
|
node_scenario_object.helper_node_stop_start_scenario(
|
||||||
instance_kill_count, node_scenario['helper_node_ip'], timeout)
|
instance_kill_count, node_scenario["helper_node_ip"], timeout
|
||||||
|
)
|
||||||
node_scenario_object.helper_node_service_status(
|
node_scenario_object.helper_node_service_status(
|
||||||
node_scenario['helper_node_ip'], service, ssh_private_key, timeout)
|
node_scenario["helper_node_ip"], service, ssh_private_key, timeout
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
logging.info('There is no node action that matches %s, skipping scenario' % action)
|
logging.info("There is no node action that matches %s, skipping scenario" % action)
|
||||||
|
|
||||||
|
|
||||||
# Get cerberus status
|
# Get cerberus status
|
||||||
@@ -101,15 +107,16 @@ def cerberus_integration(config):
|
|||||||
logging.error("url where Cerberus publishes True/False signal is not provided.")
|
logging.error("url where Cerberus publishes True/False signal is not provided.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
cerberus_status = requests.get(cerberus_url).content
|
cerberus_status = requests.get(cerberus_url).content
|
||||||
cerberus_status = True if cerberus_status == b'True' else False
|
cerberus_status = True if cerberus_status == b"True" else False
|
||||||
if not cerberus_status:
|
if not cerberus_status:
|
||||||
logging.error("Received a no-go signal from Cerberus, looks like "
|
logging.error(
|
||||||
|
"Received a no-go signal from Cerberus, looks like "
|
||||||
"the cluster is unhealthy. Please check the Cerberus "
|
"the cluster is unhealthy. Please check the Cerberus "
|
||||||
"report for more details. Test failed.")
|
"report for more details. Test failed."
|
||||||
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
logging.info("Received a go signal from Ceberus, the cluster is healthy. "
|
logging.info("Received a go signal from Ceberus, the cluster is healthy. " "Test passed.")
|
||||||
"Test passed.")
|
|
||||||
return cerberus_status
|
return cerberus_status
|
||||||
|
|
||||||
|
|
||||||
@@ -118,35 +125,36 @@ def publish_kraken_status(config, failed_post_scenarios):
|
|||||||
cerberus_status = cerberus_integration(config)
|
cerberus_status = cerberus_integration(config)
|
||||||
if not cerberus_status:
|
if not cerberus_status:
|
||||||
if failed_post_scenarios:
|
if failed_post_scenarios:
|
||||||
if config['kraken']['exit_on_failure']:
|
if config["kraken"]["exit_on_failure"]:
|
||||||
logging.info("Cerberus status is not healthy and post action scenarios "
|
logging.info(
|
||||||
"are still failing, exiting kraken run")
|
"Cerberus status is not healthy and post action scenarios " "are still failing, exiting kraken run"
|
||||||
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
logging.info("Cerberus status is not healthy and post action scenarios "
|
logging.info("Cerberus status is not healthy and post action scenarios " "are still failing")
|
||||||
"are still failing")
|
|
||||||
else:
|
else:
|
||||||
if failed_post_scenarios:
|
if failed_post_scenarios:
|
||||||
if config['kraken']['exit_on_failure']:
|
if config["kraken"]["exit_on_failure"]:
|
||||||
logging.info("Cerberus status is healthy but post action scenarios "
|
logging.info(
|
||||||
"are still failing, exiting kraken run")
|
"Cerberus status is healthy but post action scenarios " "are still failing, exiting kraken run"
|
||||||
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
logging.info("Cerberus status is healthy but post action scenarios "
|
logging.info("Cerberus status is healthy but post action scenarios " "are still failing")
|
||||||
"are still failing")
|
|
||||||
|
|
||||||
|
|
||||||
def run_post_action(kubeconfig_path, scenario, pre_action_output=""):
|
def run_post_action(kubeconfig_path, scenario, pre_action_output=""):
|
||||||
|
|
||||||
if scenario.endswith(".yaml") or scenario.endswith(".yml"):
|
if scenario.endswith(".yaml") or scenario.endswith(".yml"):
|
||||||
action_output = runcommand.invoke("powerfulseal autonomous "
|
action_output = runcommand.invoke(
|
||||||
|
"powerfulseal autonomous "
|
||||||
"--use-pod-delete-instead-of-ssh-kill"
|
"--use-pod-delete-instead-of-ssh-kill"
|
||||||
" --policy-file %s --kubeconfig %s --no-cloud"
|
" --policy-file %s --kubeconfig %s --no-cloud"
|
||||||
" --inventory-kubernetes --headless"
|
" --inventory-kubernetes --headless" % (scenario, kubeconfig_path)
|
||||||
% (scenario, kubeconfig_path))
|
)
|
||||||
# read output to make sure no error
|
# read output to make sure no error
|
||||||
if "ERROR" in action_output:
|
if "ERROR" in action_output:
|
||||||
action_output.split("ERROR")[1].split('\n')[0]
|
action_output.split("ERROR")[1].split("\n")[0]
|
||||||
if not pre_action_output:
|
if not pre_action_output:
|
||||||
logging.info("Powerful seal pre action check failed for " + str(scenario))
|
logging.info("Powerful seal pre action check failed for " + str(scenario))
|
||||||
return False
|
return False
|
||||||
@@ -159,7 +167,7 @@ def run_post_action(kubeconfig_path, scenario, pre_action_output=""):
|
|||||||
if pre_action_output == action_output:
|
if pre_action_output == action_output:
|
||||||
logging.info(scenario + " post action checks passed")
|
logging.info(scenario + " post action checks passed")
|
||||||
else:
|
else:
|
||||||
logging.info(scenario + ' post action response did not match pre check output')
|
logging.info(scenario + " post action response did not match pre check output")
|
||||||
return False
|
return False
|
||||||
elif scenario != "":
|
elif scenario != "":
|
||||||
# invoke custom bash script
|
# invoke custom bash script
|
||||||
@@ -168,7 +176,7 @@ def run_post_action(kubeconfig_path, scenario, pre_action_output=""):
|
|||||||
if pre_action_output == action_output:
|
if pre_action_output == action_output:
|
||||||
logging.info(scenario + " post action checks passed")
|
logging.info(scenario + " post action checks passed")
|
||||||
else:
|
else:
|
||||||
logging.info(scenario + ' post action response did not match pre check output')
|
logging.info(scenario + " post action response did not match pre check output")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return action_output
|
return action_output
|
||||||
@@ -178,12 +186,11 @@ def run_post_action(kubeconfig_path, scenario, pre_action_output=""):
|
|||||||
def post_actions(kubeconfig_path, scenario, failed_post_scenarios, pre_action_output):
|
def post_actions(kubeconfig_path, scenario, failed_post_scenarios, pre_action_output):
|
||||||
|
|
||||||
for failed_scenario in failed_post_scenarios:
|
for failed_scenario in failed_post_scenarios:
|
||||||
post_action_output = run_post_action(kubeconfig_path,
|
post_action_output = run_post_action(kubeconfig_path, failed_scenario[0], failed_scenario[1])
|
||||||
failed_scenario[0], failed_scenario[1])
|
|
||||||
if post_action_output is not False:
|
if post_action_output is not False:
|
||||||
failed_post_scenarios.remove(failed_scenario)
|
failed_post_scenarios.remove(failed_scenario)
|
||||||
else:
|
else:
|
||||||
logging.info('Post action scenario ' + str(failed_scenario) + "is still failing")
|
logging.info("Post action scenario " + str(failed_scenario) + "is still failing")
|
||||||
|
|
||||||
# check post actions
|
# check post actions
|
||||||
if len(scenario) > 1:
|
if len(scenario) > 1:
|
||||||
@@ -201,11 +208,12 @@ def pod_scenarios(scenarios_list, config, failed_post_scenarios):
|
|||||||
if len(pod_scenario) > 1:
|
if len(pod_scenario) > 1:
|
||||||
pre_action_output = run_post_action(kubeconfig_path, pod_scenario[1])
|
pre_action_output = run_post_action(kubeconfig_path, pod_scenario[1])
|
||||||
else:
|
else:
|
||||||
pre_action_output = ''
|
pre_action_output = ""
|
||||||
scenario_logs = runcommand.invoke("powerfulseal autonomous --use-pod-delete-instead-"
|
scenario_logs = runcommand.invoke(
|
||||||
|
"powerfulseal autonomous --use-pod-delete-instead-"
|
||||||
"of-ssh-kill --policy-file %s --kubeconfig %s "
|
"of-ssh-kill --policy-file %s --kubeconfig %s "
|
||||||
"--no-cloud --inventory-kubernetes --headless"
|
"--no-cloud --inventory-kubernetes --headless" % (pod_scenario[0], kubeconfig_path)
|
||||||
% (pod_scenario[0], kubeconfig_path))
|
)
|
||||||
|
|
||||||
# Display pod scenario logs/actions
|
# Display pod scenario logs/actions
|
||||||
print(scenario_logs)
|
print(scenario_logs)
|
||||||
@@ -214,23 +222,23 @@ def pod_scenarios(scenarios_list, config, failed_post_scenarios):
|
|||||||
logging.info("Waiting for the specified duration: %s" % (wait_duration))
|
logging.info("Waiting for the specified duration: %s" % (wait_duration))
|
||||||
time.sleep(wait_duration)
|
time.sleep(wait_duration)
|
||||||
|
|
||||||
failed_post_scenarios = post_actions(kubeconfig_path, pod_scenario,
|
failed_post_scenarios = post_actions(
|
||||||
failed_post_scenarios, pre_action_output)
|
kubeconfig_path, pod_scenario, failed_post_scenarios, pre_action_output
|
||||||
|
)
|
||||||
publish_kraken_status(config, failed_post_scenarios)
|
publish_kraken_status(config, failed_post_scenarios)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to run scenario: %s. Encountered the following "
|
logging.error("Failed to run scenario: %s. Encountered the following " "exception: %s" % (pod_scenario[0], e))
|
||||||
"exception: %s" % (pod_scenario[0], e))
|
|
||||||
return failed_post_scenarios
|
return failed_post_scenarios
|
||||||
|
|
||||||
|
|
||||||
def node_scenarios(scenarios_list, config):
|
def node_scenarios(scenarios_list, config):
|
||||||
for node_scenario_config in scenarios_list:
|
for node_scenario_config in scenarios_list:
|
||||||
with open(node_scenario_config, 'r') as f:
|
with open(node_scenario_config, "r") as f:
|
||||||
node_scenario_config = yaml.full_load(f)
|
node_scenario_config = yaml.full_load(f)
|
||||||
for node_scenario in node_scenario_config['node_scenarios']:
|
for node_scenario in node_scenario_config["node_scenarios"]:
|
||||||
node_scenario_object = get_node_scenario_object(node_scenario)
|
node_scenario_object = get_node_scenario_object(node_scenario)
|
||||||
if node_scenario['actions']:
|
if node_scenario["actions"]:
|
||||||
for action in node_scenario['actions']:
|
for action in node_scenario["actions"]:
|
||||||
inject_node_scenario(action, node_scenario, node_scenario_object)
|
inject_node_scenario(action, node_scenario, node_scenario_object)
|
||||||
logging.info("Waiting for the specified duration: %s" % (wait_duration))
|
logging.info("Waiting for the specified duration: %s" % (wait_duration))
|
||||||
time.sleep(wait_duration)
|
time.sleep(wait_duration)
|
||||||
@@ -240,13 +248,13 @@ def node_scenarios(scenarios_list, config):
|
|||||||
|
|
||||||
def time_scenarios(scenarios_list, config):
|
def time_scenarios(scenarios_list, config):
|
||||||
for time_scenario_config in scenarios_list:
|
for time_scenario_config in scenarios_list:
|
||||||
with open(time_scenario_config, 'r') as f:
|
with open(time_scenario_config, "r") as f:
|
||||||
scenario_config = yaml.full_load(f)
|
scenario_config = yaml.full_load(f)
|
||||||
for time_scenario in scenario_config['time_scenarios']:
|
for time_scenario in scenario_config["time_scenarios"]:
|
||||||
object_type, object_names = time_actions.skew_time(time_scenario)
|
object_type, object_names = time_actions.skew_time(time_scenario)
|
||||||
not_reset = time_actions.check_date_time(object_type, object_names)
|
not_reset = time_actions.check_date_time(object_type, object_names)
|
||||||
if len(not_reset) > 0:
|
if len(not_reset) > 0:
|
||||||
logging.info('Object times were not reset')
|
logging.info("Object times were not reset")
|
||||||
logging.info("Waiting for the specified duration: %s" % (wait_duration))
|
logging.info("Waiting for the specified duration: %s" % (wait_duration))
|
||||||
time.sleep(wait_duration)
|
time.sleep(wait_duration)
|
||||||
publish_kraken_status(config, not_reset)
|
publish_kraken_status(config, not_reset)
|
||||||
@@ -266,36 +274,31 @@ def litmus_scenarios(scenarios_list, config, litmus_namespaces, litmus_uninstall
|
|||||||
logging.info("opened yaml" + str(item))
|
logging.info("opened yaml" + str(item))
|
||||||
yaml_item = list(yaml.safe_load_all(f))[0]
|
yaml_item = list(yaml.safe_load_all(f))[0]
|
||||||
|
|
||||||
if yaml_item['kind'] == "ChaosEngine":
|
if yaml_item["kind"] == "ChaosEngine":
|
||||||
engine_name = yaml_item['metadata']['name']
|
engine_name = yaml_item["metadata"]["name"]
|
||||||
namespace = yaml_item['metadata']['namespace']
|
namespace = yaml_item["metadata"]["namespace"]
|
||||||
litmus_namespaces.append(namespace)
|
litmus_namespaces.append(namespace)
|
||||||
experiment_names = yaml_item['spec']['experiments']
|
experiment_names = yaml_item["spec"]["experiments"]
|
||||||
for expr in experiment_names:
|
for expr in experiment_names:
|
||||||
expr_name = expr['name']
|
expr_name = expr["name"]
|
||||||
experiment_result = common_litmus.check_experiment(engine_name,
|
experiment_result = common_litmus.check_experiment(engine_name, expr_name, namespace)
|
||||||
expr_name,
|
|
||||||
namespace)
|
|
||||||
if experiment_result:
|
if experiment_result:
|
||||||
logging.info("Scenario: %s has been successfully injected!"
|
logging.info("Scenario: %s has been successfully injected!" % item)
|
||||||
% item)
|
|
||||||
else:
|
else:
|
||||||
logging.info("Scenario: %s was not successfully injected!"
|
logging.info("Scenario: %s was not successfully injected!" % item)
|
||||||
% item)
|
|
||||||
if litmus_uninstall:
|
if litmus_uninstall:
|
||||||
for l_item in l_scenario:
|
for l_item in l_scenario:
|
||||||
logging.info('item ' + str(l_item))
|
logging.info("item " + str(l_item))
|
||||||
runcommand.invoke("kubectl delete -f %s" % l_item)
|
runcommand.invoke("kubectl delete -f %s" % l_item)
|
||||||
if litmus_uninstall:
|
if litmus_uninstall:
|
||||||
for item in l_scenario:
|
for item in l_scenario:
|
||||||
logging.info('item ' + str(item))
|
logging.info("item " + str(item))
|
||||||
runcommand.invoke("kubectl delete -f %s" % item)
|
runcommand.invoke("kubectl delete -f %s" % item)
|
||||||
logging.info("Waiting for the specified duration: %s" % wait_duration)
|
logging.info("Waiting for the specified duration: %s" % wait_duration)
|
||||||
time.sleep(wait_duration)
|
time.sleep(wait_duration)
|
||||||
cerberus_integration(config)
|
cerberus_integration(config)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to run litmus scenario: %s. Encountered "
|
logging.error("Failed to run litmus scenario: %s. Encountered " "the following exception: %s" % (item, e))
|
||||||
"the following exception: %s" % (item, e))
|
|
||||||
return litmus_namespaces
|
return litmus_namespaces
|
||||||
|
|
||||||
|
|
||||||
@@ -307,18 +310,20 @@ def main(cfg):
|
|||||||
|
|
||||||
# Parse and read the config
|
# Parse and read the config
|
||||||
if os.path.isfile(cfg):
|
if os.path.isfile(cfg):
|
||||||
with open(cfg, 'r') as f:
|
with open(cfg, "r") as f:
|
||||||
config = yaml.full_load(f)
|
config = yaml.full_load(f)
|
||||||
global kubeconfig_path, wait_duration
|
global kubeconfig_path, wait_duration
|
||||||
kubeconfig_path = config["kraken"].get("kubeconfig_path", "")
|
kubeconfig_path = config["kraken"].get("kubeconfig_path", "")
|
||||||
chaos_scenarios = config["kraken"].get("chaos_scenarios", [])
|
chaos_scenarios = config["kraken"].get("chaos_scenarios", [])
|
||||||
litmus_version = config['kraken'].get("litmus_version", 'v1.9.1')
|
litmus_version = config["kraken"].get("litmus_version", "v1.9.1")
|
||||||
litmus_uninstall = config['kraken'].get("litmus_uninstall", False)
|
litmus_uninstall = config["kraken"].get("litmus_uninstall", False)
|
||||||
wait_duration = config["tunings"].get("wait_duration", 60)
|
wait_duration = config["tunings"].get("wait_duration", 60)
|
||||||
iterations = config["tunings"].get("iterations", 1)
|
iterations = config["tunings"].get("iterations", 1)
|
||||||
daemon_mode = config["tunings"].get("daemon_mode", False)
|
daemon_mode = config["tunings"].get("daemon_mode", False)
|
||||||
deploy_performance_dashboards = config["performance_monitoring"].get("deploy_dashboards", False)
|
deploy_performance_dashboards = config["performance_monitoring"].get("deploy_dashboards", False)
|
||||||
dashboard_repo = config["performance_monitoring"].get("repo", "https://github.com/cloud-bulldozer/performance-dashboards.git") # noqa
|
dashboard_repo = config["performance_monitoring"].get(
|
||||||
|
"repo", "https://github.com/cloud-bulldozer/performance-dashboards.git"
|
||||||
|
) # noqa
|
||||||
|
|
||||||
# Initialize clients
|
# Initialize clients
|
||||||
if not os.path.isfile(kubeconfig_path):
|
if not os.path.isfile(kubeconfig_path):
|
||||||
@@ -332,8 +337,9 @@ def main(cfg):
|
|||||||
# Cluster info
|
# Cluster info
|
||||||
logging.info("Fetching cluster info")
|
logging.info("Fetching cluster info")
|
||||||
cluster_version = runcommand.invoke("kubectl get clusterversion")
|
cluster_version = runcommand.invoke("kubectl get clusterversion")
|
||||||
cluster_info = runcommand.invoke("kubectl cluster-info | awk 'NR==1' | sed -r "
|
cluster_info = runcommand.invoke(
|
||||||
"'s/\x1B\[([0-9]{1,3}(;[0-9]{1,2})?)?[mGK]//g'") # noqa
|
"kubectl cluster-info | awk 'NR==1' | sed -r " "'s/\x1B\[([0-9]{1,3}(;[0-9]{1,2})?)?[mGK]//g'"
|
||||||
|
) # noqa
|
||||||
logging.info("\n%s%s" % (cluster_version, cluster_info))
|
logging.info("\n%s%s" % (cluster_version, cluster_info))
|
||||||
|
|
||||||
# Deploy performance dashboards
|
# Deploy performance dashboards
|
||||||
@@ -348,17 +354,16 @@ def main(cfg):
|
|||||||
if daemon_mode:
|
if daemon_mode:
|
||||||
logging.info("Daemon mode enabled, kraken will cause chaos forever\n")
|
logging.info("Daemon mode enabled, kraken will cause chaos forever\n")
|
||||||
logging.info("Ignoring the iterations set")
|
logging.info("Ignoring the iterations set")
|
||||||
iterations = float('inf')
|
iterations = float("inf")
|
||||||
else:
|
else:
|
||||||
logging.info("Daemon mode not enabled, will run through %s iterations\n"
|
logging.info("Daemon mode not enabled, will run through %s iterations\n" % str(iterations))
|
||||||
% str(iterations))
|
|
||||||
iterations = int(iterations)
|
iterations = int(iterations)
|
||||||
|
|
||||||
failed_post_scenarios = []
|
failed_post_scenarios = []
|
||||||
litmus_namespaces = []
|
litmus_namespaces = []
|
||||||
litmus_installed = False
|
litmus_installed = False
|
||||||
# Loop to run the chaos starts here
|
# Loop to run the chaos starts here
|
||||||
while (int(iteration) < iterations):
|
while int(iteration) < iterations:
|
||||||
# Inject chaos scenarios specified in the config
|
# Inject chaos scenarios specified in the config
|
||||||
logging.info("Executing scenarios for iteration " + str(iteration))
|
logging.info("Executing scenarios for iteration " + str(iteration))
|
||||||
if chaos_scenarios:
|
if chaos_scenarios:
|
||||||
@@ -368,8 +373,7 @@ def main(cfg):
|
|||||||
if scenarios_list:
|
if scenarios_list:
|
||||||
# Inject pod chaos scenarios specified in the config
|
# Inject pod chaos scenarios specified in the config
|
||||||
if scenario_type == "pod_scenarios":
|
if scenario_type == "pod_scenarios":
|
||||||
failed_post_scenarios = pod_scenarios(scenarios_list, config,
|
failed_post_scenarios = pod_scenarios(scenarios_list, config, failed_post_scenarios)
|
||||||
failed_post_scenarios)
|
|
||||||
|
|
||||||
# Inject node chaos scenarios specified in the config
|
# Inject node chaos scenarios specified in the config
|
||||||
elif scenario_type == "node_scenarios":
|
elif scenario_type == "node_scenarios":
|
||||||
@@ -383,9 +387,9 @@ def main(cfg):
|
|||||||
common_litmus.install_litmus(litmus_version)
|
common_litmus.install_litmus(litmus_version)
|
||||||
common_litmus.deploy_all_experiments(litmus_version)
|
common_litmus.deploy_all_experiments(litmus_version)
|
||||||
litmus_installed = True
|
litmus_installed = True
|
||||||
litmus_namespaces = litmus_scenarios(scenarios_list, config,
|
litmus_namespaces = litmus_scenarios(
|
||||||
litmus_namespaces,
|
scenarios_list, config, litmus_namespaces, litmus_uninstall
|
||||||
litmus_uninstall)
|
)
|
||||||
|
|
||||||
iteration += 1
|
iteration += 1
|
||||||
logging.info("")
|
logging.info("")
|
||||||
@@ -407,21 +411,15 @@ if __name__ == "__main__":
|
|||||||
# Initialize the parser to read the config
|
# Initialize the parser to read the config
|
||||||
parser = optparse.OptionParser()
|
parser = optparse.OptionParser()
|
||||||
parser.add_option(
|
parser.add_option(
|
||||||
"-c", "--config",
|
"-c", "--config", dest="cfg", help="config location", default="config/config.yaml",
|
||||||
dest="cfg",
|
|
||||||
help="config location",
|
|
||||||
default="config/config.yaml",
|
|
||||||
)
|
)
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||||
handlers=[
|
handlers=[logging.FileHandler("kraken.report", mode="w"), logging.StreamHandler()],
|
||||||
logging.FileHandler("kraken.report", mode='w'),
|
|
||||||
logging.StreamHandler()
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
if (options.cfg is None):
|
if options.cfg is None:
|
||||||
logging.error("Please check if you have passed the config")
|
logging.error("Please check if you have passed the config")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ import logging
|
|||||||
|
|
||||||
def run(cmd):
|
def run(cmd):
|
||||||
try:
|
try:
|
||||||
output = subprocess.Popen(cmd, shell=True,
|
output = subprocess.Popen(
|
||||||
universal_newlines=True, stdout=subprocess.PIPE,
|
cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
stderr=subprocess.STDOUT)
|
)
|
||||||
(out, err) = output.communicate()
|
(out, err) = output.communicate()
|
||||||
logging.info("out " + str(out))
|
logging.info("out " + str(out))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -19,4 +19,3 @@ scenarios:
|
|||||||
actions:
|
actions:
|
||||||
- checkPodCount:
|
- checkPodCount:
|
||||||
count: 2
|
count: 2
|
||||||
|
|
||||||
@@ -15,8 +15,11 @@ def list_namespaces():
|
|||||||
cli = client.CoreV1Api()
|
cli = client.CoreV1Api()
|
||||||
ret = cli.list_namespace(pretty=True)
|
ret = cli.list_namespace(pretty=True)
|
||||||
except ApiException as e:
|
except ApiException as e:
|
||||||
logging.error("Exception when calling \
|
logging.error(
|
||||||
CoreV1Api->list_namespaced_pod: %s\n" % e)
|
"Exception when calling \
|
||||||
|
CoreV1Api->list_namespaced_pod: %s\n"
|
||||||
|
% e
|
||||||
|
)
|
||||||
for namespace in ret.items:
|
for namespace in ret.items:
|
||||||
namespaces.append(namespace.metadata.name)
|
namespaces.append(namespace.metadata.name)
|
||||||
return namespaces
|
return namespaces
|
||||||
@@ -47,9 +50,9 @@ def check_namespaces(namespaces):
|
|||||||
|
|
||||||
def run(cmd):
|
def run(cmd):
|
||||||
try:
|
try:
|
||||||
output = subprocess.Popen(cmd, shell=True,
|
output = subprocess.Popen(
|
||||||
universal_newlines=True, stdout=subprocess.PIPE,
|
cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
stderr=subprocess.STDOUT)
|
)
|
||||||
(out, err) = output.communicate()
|
(out, err) = output.communicate()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to run %s, error: %s" % (cmd, e))
|
logging.error("Failed to run %s, error: %s" % (cmd, e))
|
||||||
|
|||||||
@@ -36,11 +36,3 @@ dists = bdist_wheel
|
|||||||
[bdist_wheel]
|
[bdist_wheel]
|
||||||
# Use this option if your package is pure-python
|
# Use this option if your package is pure-python
|
||||||
universal = 1
|
universal = 1
|
||||||
|
|
||||||
[flake8]
|
|
||||||
# Some sane defaults for the code style checker flake8
|
|
||||||
exclude =
|
|
||||||
.tox
|
|
||||||
build
|
|
||||||
dist
|
|
||||||
.eggsse
|
|
||||||
|
|||||||
2
setup.py
2
setup.py
@@ -5,7 +5,7 @@ from pkg_resources import VersionConflict, require
|
|||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
|
|
||||||
try:
|
try:
|
||||||
require('setuptools>=38.3')
|
require("setuptools>=38.3")
|
||||||
except VersionConflict:
|
except VersionConflict:
|
||||||
print("Error: version of setuptools is too old (<38.3)!")
|
print("Error: version of setuptools is too old (<38.3)!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
setuptools
|
|
||||||
flake8
|
|
||||||
24
tox.ini
24
tox.ini
@@ -1,24 +0,0 @@
|
|||||||
[tox]
|
|
||||||
envlist = pep8
|
|
||||||
|
|
||||||
[testenv]
|
|
||||||
usedevelop = True
|
|
||||||
setenv =
|
|
||||||
VIRTUAL_ENV={envdir}
|
|
||||||
deps = -r{toxinidir}/test-requirements.txt
|
|
||||||
commands = python setup.py develop
|
|
||||||
|
|
||||||
[testenv:pep8]
|
|
||||||
basepython = python
|
|
||||||
commands = flake8 {posargs}
|
|
||||||
|
|
||||||
[testenv:venv]
|
|
||||||
commands = {posargs}
|
|
||||||
|
|
||||||
[flake8]
|
|
||||||
# E123, E125 skipped as they are invalid PEP-8.
|
|
||||||
show-source = True
|
|
||||||
max-line-length = 120
|
|
||||||
ignore = E123,E125
|
|
||||||
builtins = _
|
|
||||||
exclude=.venv,.git,.tox,dist,doc,*lib/python*,*egg,build
|
|
||||||
Reference in New Issue
Block a user