Compare commits

...

3 Commits

Author SHA1 Message Date
Naga Ravi Chaitanya Elluri
9cb701a616 Convert thresholds to float
Some checks failed
Functional & Unit Tests / Functional & Unit Tests (push) Failing after 9m22s
Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped
This is needed to avoid issues due to comparing two different data types:
TypeError: Invalid comparison between dtype=float64 and str. This commit also
avoids setting defaults for the thresholds to make it mandatory for the users
to define them as it plays a key role in determining the outliers.

Signed-off-by: Naga Ravi Chaitanya Elluri <nelluri@redhat.com>
2025-01-13 15:47:33 -05:00
dependabot[bot]
0372013b67 Bump jinja2 from 3.1.4 to 3.1.5 (#745)
Some checks failed
Functional & Unit Tests / Functional & Unit Tests (push) Failing after 3m57s
Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped
Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5.
- [Release notes](https://github.com/pallets/jinja/releases)
- [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst)
- [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5)

---
updated-dependencies:
- dependency-name: jinja2
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-08 09:54:26 +01:00
Tullio Sebastiani
4fea1a354d added krknctl types to krkn baseimage for global variables (#741)
Some checks failed
Functional & Unit Tests / Functional & Unit Tests (push) Failing after 7m55s
Functional & Unit Tests / Generate Coverage Badge (push) Has been skipped
* added krknctl types to krkn baseimage for global variables

fixed

* fixed dockerfile

* dockerfile compile script

fix
2025-01-07 10:12:37 -05:00
7 changed files with 420 additions and 12 deletions

View File

@@ -13,6 +13,7 @@ jobs:
- name: Build the Docker images
if: startsWith(github.ref, 'refs/tags')
run: |
./containers/compile_dockerfile.sh
docker build --no-cache -t quay.io/krkn-chaos/krkn containers/ --build-arg TAG=${GITHUB_REF#refs/tags/}
docker tag quay.io/krkn-chaos/krkn quay.io/redhat-chaos/krkn
docker tag quay.io/krkn-chaos/krkn quay.io/krkn-chaos/krkn:${GITHUB_REF#refs/tags/}
@@ -21,6 +22,7 @@ jobs:
- name: Test Build the Docker images
if: ${{ github.event_name == 'pull_request' }}
run: |
./containers/compile_dockerfile.sh
docker build --no-cache -t quay.io/krkn-chaos/krkn containers/ --build-arg PR_NUMBER=${{ github.event.pull_request.number }}
- name: Login in quay
if: startsWith(github.ref, 'refs/tags')

View File

@@ -49,6 +49,11 @@ RUN python3.9 -m ensurepip
RUN pip3.9 install -r requirements.txt
RUN pip3.9 install jsonschema
LABEL krknctl.title="Krkn Base Image"
LABEL krknctl.description="This is the krkn base image."
LABEL krknctl.input_fields='$KRKNCTL_INPUT'
RUN chown -R krkn:krkn /home/krkn && chmod 755 /home/krkn
USER krkn
ENTRYPOINT ["python3.9", "run_kraken.py"]

View File

@@ -0,0 +1,5 @@
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
export KRKNCTL_INPUT=$(cat krknctl-input.json|tr -d "\n")
envsubst '${KRKNCTL_INPUT}' < Dockerfile.template > Dockerfile

View File

@@ -0,0 +1,396 @@
[
{
"name": "cerberus-enabled",
"short_description": "Enable Cerberus",
"description": "Enables Cerberus Support",
"variable": "CERBERUS_ENABLED",
"type": "enum",
"default": "False",
"allowed_values": "True,False",
"separator": ",",
"required": "false"
},
{
"name": "cerberus-url",
"short_description": "Cerberus URL",
"description": "Cerberus http url",
"variable": "CERBERUS_URL",
"type": "string",
"default": "http://0.0.0.0:8080",
"validator": "^(http|https):\/\/.*",
"required": "false"
},
{
"name": "distribution",
"short_description": "Orchestrator distribution",
"description": "Selects the orchestrator distribution",
"variable": "DISTRIBUTION",
"type": "enum",
"default": "openshift",
"allowed_values": "openshift,kubernetes",
"separator": ",",
"required": "false"
},
{
"name": "krkn-kubeconfig",
"short_description": "Krkn kubeconfig path",
"description": "Sets the path where krkn will search for kubeconfig (in container)",
"variable": "KRKN_KUBE_CONFIG",
"type": "string",
"default": "/home/krkn/.kube/config",
"required": "false"
},
{
"name": "wait-duration",
"short_description": "Post chaos wait duration",
"description": "waits for a certain amount of time after the scenario",
"variable": "WAIT_DURATION",
"type": "number",
"default": "1"
},
{
"name": "iterations",
"short_description": "Chaos scenario iterations",
"description": "number of times the same chaos scenario will be executed",
"variable": "ITERATIONS",
"type": "number",
"default": "1"
},
{
"name": "daemon-mode",
"short_description": "Sets krkn daemon mode",
"description": "if set the scenario will execute forever",
"variable": "DAEMON_MODE",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "uuid",
"short_description": "Sets krkn run uuid",
"description": "sets krkn run uuid instead of generating it",
"variable": "UUID",
"type": "string",
"default": "",
"required": "false"
},
{
"name": "capture-metrics",
"short_description": "Enables metrics capture",
"description": "Enables metrics capture",
"variable": "CAPTURE_METRICS",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "enable-alerts",
"short_description": "Enables cluster alerts check",
"description": "Enables cluster alerts check",
"variable": "ENABLE_ALERTS",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "alerts-path",
"short_description": "Cluster alerts path file (in container)",
"description": "Enables cluster alerts check",
"variable": "ALERTS_PATH",
"type": "string",
"default": "config/alerts.yaml",
"required": "false"
},
{
"name": "enable-es",
"short_description": "Enables elastic search data collection",
"description": "Enables elastic search data collection",
"variable": "ENABLE_ES",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "es-server",
"short_description": "Elasticsearch instance URL",
"description": "Elasticsearch instance URL",
"variable": "ES_SERVER",
"type": "string",
"default": "http://0.0.0.0",
"validator": "^(http|https):\/\/.*",
"required": "false"
},
{
"name": "es-port",
"short_description": "Elasticsearch instance port",
"description": "Elasticsearch instance port",
"variable": "ES_PORT",
"type": "number",
"default": "443",
"required": "false"
},
{
"name": "es-username",
"short_description": "Elasticsearch instance username",
"description": "Elasticsearch instance username",
"variable": "ES_USERNAME",
"type": "string",
"default": "elastic",
"required": "false"
},
{
"name": "es-password",
"short_description": "Elasticsearch instance password",
"description": "Elasticsearch instance password",
"variable": "ES_PASSWORD",
"type": "string",
"default": "",
"required": "false"
},
{
"name": "es-verify-certs",
"short_description": "Enables elasticsearch TLS certificate verification",
"description": "Enables elasticsearch TLS certificate verification",
"variable": "ES_VERIFY_CERTS",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "es-collect-metrics",
"short_description": "Enables metrics collection on elastic search",
"description": "Enables metrics collection on elastic search",
"variable": "ES_COLLECT_METRICS",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "es-collect-alerts",
"short_description": "Enables alerts collection on elastic search",
"description": "Enables alerts collection on elastic search",
"variable": "ES_COLLECT_ALERTS",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "es-metrics-index",
"short_description": "Elasticsearch metrics index",
"description": "Index name for metrics in Elasticsearch",
"variable": "ES_METRICS_INDEX",
"type": "string",
"default": "krkn-metrics",
"required": "false"
},
{
"name": "es-alerts-index",
"short_description": "Elasticsearch alerts index",
"description": "Index name for alerts in Elasticsearch",
"variable": "ES_ALERTS_INDEX",
"type": "string",
"default": "krkn-alerts",
"required": "false"
},
{
"name": "es-telemetry-index",
"short_description": "Elasticsearch telemetry index",
"description": "Index name for telemetry in Elasticsearch",
"variable": "ES_TELEMETRY_INDEX",
"type": "string",
"default": "krkn-telemetry",
"required": "false"
},
{
"name": "check-critical-alerts",
"short_description": "Check critical alerts",
"description": "Enables checking for critical alerts",
"variable": "CHECK_CRITICAL_ALERTS",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "telemetry-enabled",
"short_description": "Enable telemetry",
"description": "Enables telemetry support",
"variable": "TELEMETRY_ENABLED",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "telemetry-api-url",
"short_description": "Telemetry API URL",
"description": "API endpoint for telemetry data",
"variable": "TELEMETRY_API_URL",
"type": "string",
"default": "https://ulnmf9xv7j.execute-api.us-west-2.amazonaws.com/production",
"validator": "^(http|https):\/\/.*",
"required": "false"
},
{
"name": "telemetry-username",
"short_description": "Telemetry username",
"description": "Username for telemetry authentication",
"variable": "TELEMETRY_USERNAME",
"type": "string",
"default": "redhat-chaos",
"required": "false"
},
{
"name": "telemetry-password",
"short_description": "Telemetry password",
"description": "Password for telemetry authentication",
"variable": "TELEMETRY_PASSWORD",
"type": "string",
"default": "",
"required": "false"
},
{
"name": "telemetry-prometheus-backup",
"short_description": "Prometheus backup for telemetry",
"description": "Enables Prometheus backup for telemetry",
"variable": "TELEMETRY_PROMETHEUS_BACKUP",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "True",
"required": "false"
},
{
"name": "telemetry-full-prometheus-backup",
"short_description": "Full Prometheus backup",
"description": "Enables full Prometheus backup for telemetry",
"variable": "TELEMETRY_FULL_PROMETHEUS_BACKUP",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "telemetry-backup-threads",
"short_description": "Telemetry backup threads",
"description": "Number of threads for telemetry backup",
"variable": "TELEMETRY_BACKUP_THREADS",
"type": "number",
"default": "5",
"required": "false"
},
{
"name": "telemetry-archive-path",
"short_description": "Telemetry archive path",
"description": "Path to save telemetry archive",
"variable": "TELEMETRY_ARCHIVE_PATH",
"type": "string",
"default": "/tmp",
"required": "false"
},
{
"name": "telemetry-max-retries",
"short_description": "Telemetry max retries",
"description": "Maximum retries for telemetry operations",
"variable": "TELEMETRY_MAX_RETRIES",
"type": "number",
"default": "0",
"required": "false"
},
{
"name": "telemetry-run-tag",
"short_description": "Telemetry run tag",
"description": "Tag for telemetry run",
"variable": "TELEMETRY_RUN_TAG",
"type": "string",
"default": "chaos",
"required": "false"
},
{
"name": "telemetry-group",
"short_description": "Telemetry group",
"description": "Group name for telemetry data",
"variable": "TELEMETRY_GROUP",
"type": "string",
"default": "default",
"required": "false"
},
{
"name": "telemetry-archive-size",
"short_description": "Telemetry archive size",
"description": "Maximum size for telemetry archives",
"variable": "TELEMETRY_ARCHIVE_SIZE",
"type": "number",
"default": "1000",
"required": "false"
},
{
"name": "telemetry-logs-backup",
"short_description": "Telemetry logs backup",
"description": "Enables logs backup for telemetry",
"variable": "TELEMETRY_LOGS_BACKUP",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
},
{
"name": "telemetry-filter-pattern",
"short_description": "Telemetry filter pattern",
"description": "Filter pattern for telemetry logs",
"variable": "TELEMETRY_FILTER_PATTERN",
"type": "string",
"default": "[\"(\\\\w{3}\\\\s\\\\d{1,2}\\\\s\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d+).+\",\"kinit (\\\\d+/\\\\d+/\\\\d+\\\\s\\\\d{2}:\\\\d{2}:\\\\d{2})\\\\s+\",\"(\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d+Z).+\"]",
"required": "false"
},
{
"name": "telemetry-cli-path",
"short_description": "Telemetry CLI path (oc)",
"description": "Path to telemetry CLI tool (oc)",
"variable": "TELEMETRY_CLI_PATH",
"type": "string",
"default": "",
"required": "false"
},
{
"name": "telemetry-events-backup",
"short_description": "Telemetry events backup",
"description": "Enables events backup for telemetry",
"variable": "TELEMETRY_EVENTS_BACKUP",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "True",
"required": "false"
},
{
"name": "krkn-debug",
"short_description": "Krkn debug mode",
"description": "Enables debug mode for Krkn",
"variable": "KRKN_DEBUG",
"type": "enum",
"allowed_values": "True,False",
"separator": ",",
"default": "False",
"required": "false"
}
]

View File

@@ -29,9 +29,9 @@ def calculate_zscores(data):
def identify_outliers(data, threshold):
outliers_cpu = data[data["CPU"] > threshold]["Service"].tolist()
outliers_memory = data[data["Memory"] > threshold]["Service"].tolist()
outliers_network = data[data["Network"] > threshold]["Service"].tolist()
outliers_cpu = data[data["CPU"] > float(threshold)]["Service"].tolist()
outliers_memory = data[data["Memory"] > float(threshold)]["Service"].tolist()
outliers_network = data[data["Network"] > float(threshold)]["Service"].tolist()
return outliers_cpu, outliers_memory, outliers_network
@@ -39,13 +39,13 @@ def identify_outliers(data, threshold):
def get_services_above_heatmap_threshold(dataframe, cpu_threshold, mem_threshold):
# Filter the DataFrame based on CPU_HEATMAP and MEM_HEATMAP thresholds
filtered_df = dataframe[
((dataframe["CPU"] / dataframe["CPU_LIMITS"]) > cpu_threshold)
((dataframe["CPU"] / dataframe["CPU_LIMITS"]) > float(cpu_threshold))
]
# Get the lists of services
cpu_services = filtered_df["service"].tolist()
filtered_df = dataframe[
((dataframe["MEM"] / dataframe["MEM_LIMITS"]) > mem_threshold)
((dataframe["MEM"] / dataframe["MEM_LIMITS"]) > float(mem_threshold))
]
mem_services = filtered_df["service"].tolist()

View File

@@ -15,7 +15,7 @@ google-auth==2.37.0
google-cloud-compute==1.22.0
ibm_cloud_sdk_core==3.18.0
ibm_vpc==0.20.0
jinja2==3.1.4
jinja2==3.1.5
krkn-lib==4.0.4
lxml==5.1.0
kubernetes==28.1.0

View File

@@ -112,12 +112,12 @@ def parse_arguments(parser):
default=[],
help="Memory related chaos tests (space separated list)",
)
parser.add_argument("--threshold", action="store", default="", help="Threshold")
parser.add_argument("--threshold", action="store", help="Threshold")
parser.add_argument(
"--cpu-threshold", action="store", default="", help="CPU threshold"
"--cpu-threshold", action="store", help="CPU threshold"
)
parser.add_argument(
"--mem-threshold", action="store", default="", help="Memory threshold"
"--mem-threshold", action="store", help="Memory threshold"
)
return parser.parse_args()
@@ -141,9 +141,9 @@ def read_configuration(config_file_path):
prometheus_endpoint = config.get("prometheus_endpoint")
auth_token = config.get("auth_token")
scrape_duration = get_yaml_item_value(config, "scrape_duration", "10m")
threshold = get_yaml_item_value(config, "threshold", ".7")
heatmap_cpu_threshold = get_yaml_item_value(config, "cpu_threshold", ".5")
heatmap_mem_threshold = get_yaml_item_value(config, "mem_threshold", ".3")
threshold = get_yaml_item_value(config, "threshold")
heatmap_cpu_threshold = get_yaml_item_value(config, "cpu_threshold")
heatmap_mem_threshold = get_yaml_item_value(config, "mem_threshold")
output_file = config.get("json_output_file", False)
if output_file is True:
output_path = config.get("json_output_folder_path")