kraken: distribution: kubernetes # Distribution can be kubernetes or openshift. kubeconfig_path: ~/.kube/config # Path to kubeconfig. exit_on_failure: False # Exit when a post action scenario fails. publish_kraken_status: True # Can be accessed at http://0.0.0.0:8081 signal_state: RUN # Will wait for the RUN signal when set to PAUSE before running the scenarios, refer docs/signal.md for more details signal_address: 0.0.0.0 # Signal listening address port: 8081 # Signal port auto_rollback: True # Enable auto rollback for scenarios. rollback_versions_directory: /tmp/kraken-rollback # Directory to store rollback version files. chaos_scenarios: # List of policies/chaos scenarios to load. - $scenario_type: # List of chaos pod scenarios to load. - $scenario_file cerberus: cerberus_enabled: False # Enable it when cerberus is previously installed. cerberus_url: # When cerberus_enabled is set to True, provide the url where cerberus publishes go/no-go signal. performance_monitoring: capture_metrics: False metrics_profile_path: config/metrics-aggregated.yaml prometheus_url: # The prometheus url/route is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes. prometheus_bearer_token: # The bearer token is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes. This is needed to authenticate with prometheus. uuid: # uuid for the run is generated by default if not set. enable_alerts: True # Runs the queries specified in the alert profile and displays the info or exits 1 when severity=error enable_metrics: True alert_profile: config/alerts.yaml # Path or URL to alert profile with the prometheus queries metrics_profile: config/metrics-report.yaml check_critical_alerts: True # Path to alert profile with the prometheus queries. tunings: wait_duration: 6 # Duration to wait between each chaos scenario. iterations: 1 # Number of times to execute the scenarios. daemon_mode: False # Iterations are set to infinity which means that the kraken will cause chaos forever. telemetry: enabled: False # enable/disables the telemetry collection feature api_url: https://yvnn4rfoi7.execute-api.us-west-2.amazonaws.com/test #telemetry service endpoint username: $TELEMETRY_USERNAME # telemetry service username password: $TELEMETRY_PASSWORD # telemetry service password prometheus_namespace: 'monitoring' # prometheus namespace prometheus_pod_name: 'prometheus-kind-prometheus-kube-prome-prometheus-0' # prometheus pod_name prometheus_container_name: 'prometheus' prometheus_backup: True # enables/disables prometheus data collection full_prometheus_backup: False # if is set to False only the /prometheus/wal folder will be downloaded. backup_threads: 5 # number of telemetry download/upload threads archive_path: /tmp # local path where the archive files will be temporarily stored max_retries: 0 # maximum number of upload retries (if 0 will retry forever) run_tag: '' # if set, this will be appended to the run folder in the bucket (useful to group the runs) archive_size: 10000 # the size of the prometheus data archive size in KB. The lower the size of archive is logs_backup: True logs_filter_patterns: - "(\\w{3}\\s\\d{1,2}\\s\\d{2}:\\d{2}:\\d{2}\\.\\d+).+" # Sep 9 11:20:36.123425532 - "kinit (\\d+/\\d+/\\d+\\s\\d{2}:\\d{2}:\\d{2})\\s+" # kinit 2023/09/15 11:20:36 log - "(\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z).+" # 2023-09-15T11:20:36.123425532Z log oc_cli_path: /usr/bin/oc # optional, if not specified will be search in $PATH events_backup: True # enables/disables cluster events collection telemetry_group: "funtests" elastic: enable_elastic: False verify_certs: False elastic_url: "https://192.168.39.196" # To track results in elasticsearch, give url to server here; will post telemetry details when url and index not blank elastic_port: 32766 username: "elastic" password: "test" metrics_index: "krkn-metrics" alerts_index: "krkn-alerts" telemetry_index: "krkn-telemetry" health_checks: # Utilizing health check endpoints to observe application behavior during chaos injection. interval: # Interval in seconds to perform health checks, default value is 2 seconds config: # Provide list of health check configurations for applications - url: # Provide application endpoint bearer_token: # Bearer token for authentication if any auth: # Provide authentication credentials (username , password) in tuple format if any, ex:("admin","secretpassword") exit_on_failure: # If value is True exits when health check failed for application, values can be True/False