diff --git a/html/index.html b/html/index.html index d38b53c..a5cdf80 100644 --- a/html/index.html +++ b/html/index.html @@ -179,10 +179,10 @@ @@ -191,6 +191,11 @@ + +
diff --git a/html/js/utils.js b/html/js/utils.js index 358c62a..3c0ee78 100644 --- a/html/js/utils.js +++ b/html/js/utils.js @@ -107,8 +107,14 @@ function getK8sToken() { } function getStoredK8sConnectionConfig() { + var endpointInput = document.getElementById('k8s_api_endpoint'); + var inputEndpoint = endpointInput ? endpointInput.value.trim() : ''; + var storedEndpoint = (localStorage.getItem('k8s_api_endpoint') || '').trim(); + var normalizedInputEndpoint = normalizeK8sEndpoint(inputEndpoint); + var normalizedStoredEndpoint = normalizeK8sEndpoint(storedEndpoint); + return { - target: (localStorage.getItem('k8s_api_endpoint') || '').trim(), + target: normalizedInputEndpoint || normalizedStoredEndpoint || '', token: getK8sToken().trim(), caCert: getK8sCaCert() }; diff --git a/nginx/KubeInvaders.conf b/nginx/KubeInvaders.conf index 78b0986..3199b68 100644 --- a/nginx/KubeInvaders.conf +++ b/nginx/KubeInvaders.conf @@ -79,6 +79,7 @@ server { local https = require "ssl.https" local ltn12 = require "ltn12" local json = require "lunajson" + local redis = require "resty.redis" ngx.header['Access-Control-Allow-Origin'] = '*' ngx.header['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS' @@ -130,6 +131,19 @@ server { end target = string.gsub(target, "/+$", "") + + -- Persist the user-configured endpoint so background loops can reuse it. + local red = redis:new() + local okredis, errredis = red:connect("unix:/tmp/redis.sock") + if okredis then + local okset, errset = red:set("k8s_api_endpoint", target) + if not okset then + ngx.log(ngx.WARN, "[kube/healthz] Unable to persist k8s_api_endpoint in Redis: ", tostring(errset)) + end + else + ngx.log(ngx.WARN, "[kube/healthz] Redis connection failed while storing k8s_api_endpoint: ", tostring(errredis)) + end + local healthz_url = target .. "/healthz" local disable_tls_env = string.lower(tostring(os.getenv("DISABLE_TLS") or "false")) diff --git a/scripts/logs_loop/start.py b/scripts/logs_loop/start.py index c3913ee..45f5b18 100644 --- a/scripts/logs_loop/start.py +++ b/scripts/logs_loop/start.py @@ -175,7 +175,14 @@ if os.environ.get("DEV"): logging.debug(r.get("logs_enabled:aaaa")) configuration = client.Configuration() -token = os.environ["TOKEN"] +token = os.environ.get("TOKEN", "") +if not token: + redis_token = r.get("x_k8s_token") + if redis_token: + token = redis_token if isinstance(redis_token, str) else redis_token.decode() + logging.debug("[k-inv][logs-loop] TOKEN not set in env, using X-K8S-Token from Redis") +if not token: + logging.warning("[k-inv][logs-loop] No token available (TOKEN env and Redis x_k8s_token both empty) — API calls may fail") configuration.api_key = {"authorization": f"Bearer {token}"} configuration.host = sys.argv[1] diff --git a/scripts/logs_loop/start.sh b/scripts/logs_loop/start.sh index fc90538..9d9cabc 100755 --- a/scripts/logs_loop/start.sh +++ b/scripts/logs_loop/start.sh @@ -9,10 +9,35 @@ else fi export PYTHONWARNINGS="ignore:Unverified HTTPS request" -python3 /opt/logs_loop/start.py https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS} & +RESOLVED_ENDPOINT="" + +# 1) Endpoint configured from the web console (persisted by /kube/healthz in Redis) +REDIS_ENDPOINT=$(python3 - <<'PY' +import redis +try: + r = redis.Redis(unix_socket_path='/tmp/redis.sock', decode_responses=True) + v = r.get('k8s_api_endpoint') + print(v.strip() if v else '') +except Exception: + print('') +PY +) + +if [ ! -z "$REDIS_ENDPOINT" ]; then + RESOLVED_ENDPOINT="$REDIS_ENDPOINT" + echo "Using k8s_api_endpoint from Redis: $RESOLVED_ENDPOINT" +elif [ ! -z "$ENDPOINT" ]; then + RESOLVED_ENDPOINT="$ENDPOINT" + echo "Using ENDPOINT env var: $RESOLVED_ENDPOINT" +else + RESOLVED_ENDPOINT="https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}" + echo "Using in-cluster endpoint: $RESOLVED_ENDPOINT" +fi + +python3 /opt/logs_loop/start.py "$RESOLVED_ENDPOINT" & while true do - pgrep -a -f -c "^python3.*logs_loop.*$" > /dev/null || ( python3 /opt/logs_loop/start.py https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS} & ) + pgrep -a -f -c "^python3.*logs_loop.*$" > /dev/null || ( python3 /opt/logs_loop/start.py "$RESOLVED_ENDPOINT" & ) sleep 2 done diff --git a/scripts/metrics_loop/start.py b/scripts/metrics_loop/start.py index 923ec16..1e7dd9b 100644 --- a/scripts/metrics_loop/start.py +++ b/scripts/metrics_loop/start.py @@ -4,6 +4,18 @@ import json import logging import os import sys + +logging.basicConfig( + level=logging.INFO, + stream=sys.stdout, + format="%(asctime)s %(levelname)s %(message)s", + force=True, +) +logging.getLogger().setLevel(logging.INFO) +_file_handler = logging.FileHandler("/tmp/kubeinvaders_metrics_loop.log") +_file_handler.setLevel(logging.INFO) +_file_handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s")) +logging.getLogger().addHandler(_file_handler) from kubernetes import client, config from kubernetes.client.rest import ApiException import requests @@ -13,10 +25,28 @@ import random import redis import time import urllib3 +from urllib.parse import urlparse +from urllib3.exceptions import LocationValueError +from urllib3.exceptions import MaxRetryError import time import datetime urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +def normalize_k8s_host(raw_host): + if not raw_host: + return "" + host = str(raw_host).strip().strip('"').strip("'") + if not host: + return "" + if not host.startswith("http://") and not host.startswith("https://"): + host = "https://" + host + host = host.rstrip("/") + parsed = urlparse(host) + if not parsed.netloc: + return "" + return host + def join_string_to_array(project_list_string, separator): return project_list_string.split(separator) @@ -86,31 +116,112 @@ def create_job(job_name, pod_template): r = redis.Redis(unix_socket_path='/tmp/redis.sock') -logging.basicConfig( - level=os.environ.get("LOGLEVEL", "INFO"), - stream=sys.stdout, - format="%(asctime)s %(levelname)s %(message)s", - force=True, -) logging.getLogger('kubernetes').setLevel(logging.ERROR) logging.debug('Starting script for KubeInvaders metrics loop') configuration = client.Configuration() -token = os.environ["TOKEN"] -configuration.api_key = {"authorization": f"Bearer {token}"} -configuration.host = sys.argv[1] +token = os.environ.get("TOKEN", "") +if not token: + redis_token = r.get("x_k8s_token") + if redis_token: + token = redis_token.decode() + logging.info("[k-inv][metrics_loop] TOKEN not set in env, using X-K8S-Token from Redis") +token = token.strip() +if token.lower().startswith("bearer "): + token = token[7:].strip() +if not token: + logging.warning("[k-inv][metrics_loop] No token available (TOKEN env and Redis x_k8s_token both empty) — API calls may fail") + +configuration.api_key = {"authorization": token} +configuration.api_key_prefix = {"authorization": "Bearer"} + +arg_host = sys.argv[1] if len(sys.argv) > 1 else "" +resolved_host = normalize_k8s_host(arg_host) + +if not resolved_host: + redis_endpoint = r.get("k8s_api_endpoint") + if redis_endpoint: + redis_endpoint = redis_endpoint.decode() if isinstance(redis_endpoint, bytes) else redis_endpoint + resolved_host = normalize_k8s_host(redis_endpoint) + if resolved_host: + logging.info(f"[k-inv][metrics_loop] Using Kubernetes host from Redis k8s_api_endpoint: {resolved_host}") + +if not resolved_host: + resolved_host = normalize_k8s_host(os.environ.get("ENDPOINT", "")) + if resolved_host: + logging.info(f"[k-inv][metrics_loop] Using Kubernetes host from ENDPOINT: {resolved_host}") + +if not resolved_host: + svc_host = os.environ.get("KUBERNETES_SERVICE_HOST", "") + svc_port = os.environ.get("KUBERNETES_SERVICE_PORT_HTTPS", "443") + resolved_host = normalize_k8s_host(f"https://{svc_host}:{svc_port}" if svc_host else "") + if resolved_host: + logging.info(f"[k-inv][metrics_loop] Using in-cluster Kubernetes host: {resolved_host}") + +if not resolved_host: + resolved_host = "https://kubernetes.default.svc" + logging.warning("[k-inv][metrics_loop] No valid Kubernetes host provided; defaulting to https://kubernetes.default.svc") + +configuration.host = resolved_host +logging.info(f"[k-inv][metrics_loop] Kubernetes API host resolved to: {configuration.host}") configuration.insecure_skip_tls_verify = True configuration.verify_ssl = False -client.Configuration.set_default(configuration) -client.Configuration.set_default(configuration) +api_client = client.ApiClient(configuration=configuration) +api_instance = client.CoreV1Api(api_client) +batch_api = client.BatchV1Api(api_client) -api_instance = client.CoreV1Api() -batch_api = client.BatchV1Api() + +def rebuild_api_client_from_fallbacks(current_configuration): + fallback_host = "" + + redis_endpoint = r.get("k8s_api_endpoint") + if redis_endpoint: + redis_endpoint = redis_endpoint.decode() if isinstance(redis_endpoint, bytes) else redis_endpoint + fallback_host = normalize_k8s_host(redis_endpoint) + + if not fallback_host: + fallback_host = normalize_k8s_host(os.environ.get("ENDPOINT", "")) + + if not fallback_host: + svc_host = os.environ.get("KUBERNETES_SERVICE_HOST", "") + svc_port = os.environ.get("KUBERNETES_SERVICE_PORT_HTTPS", "443") + fallback_host = normalize_k8s_host(f"https://{svc_host}:{svc_port}" if svc_host else "") + + if not fallback_host: + fallback_host = "https://kubernetes.default.svc" + + current_configuration.host = fallback_host + logging.warning(f"[k-inv][metrics_loop] Rebuilding Kubernetes API client with host: {fallback_host}") + new_api_client = client.ApiClient(configuration=current_configuration) + return client.CoreV1Api(new_api_client), client.BatchV1Api(new_api_client) + + +def sync_api_client_from_redis(current_configuration): + redis_endpoint = r.get("k8s_api_endpoint") + if not redis_endpoint: + return None, None + + redis_endpoint = redis_endpoint.decode() if isinstance(redis_endpoint, bytes) else redis_endpoint + redis_host = normalize_k8s_host(redis_endpoint) + if not redis_host: + return None, None + + if current_configuration.host != redis_host: + current_configuration.host = redis_host + logging.info(f"[k-inv][metrics_loop] Switching Kubernetes API host from Redis k8s_api_endpoint to: {redis_host}") + new_api_client = client.ApiClient(configuration=current_configuration) + return client.CoreV1Api(new_api_client), client.BatchV1Api(new_api_client) + + return None, None while True: + maybe_api_instance, maybe_batch_api = sync_api_client_from_redis(configuration) + if maybe_api_instance and maybe_batch_api: + api_instance, batch_api = maybe_api_instance, maybe_batch_api + #logging.info(f"[k-inv][metrics_loop] Metrics loop is active - {r.exists('chaos_report_project_list')}") if r.exists('chaos_report_project_list'): logging.info("[k-inv][metrics_loop] Found chaos_report_project_list Redis Key") @@ -145,6 +256,18 @@ while True: if method not in ["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"]: method = "GET" + with open("/tmp/kubeinvaders_url_debug.log", "w") as dbg: + dbg.write(f"--- {datetime.datetime.now().isoformat()} ---\n") + dbg.write(f" project = {project!r}\n") + dbg.write(f" chaos_program_key= {chaos_program_key!r}\n") + dbg.write(f" raw redis value = {r.get(chaos_program_key)!r}\n") + dbg.write(f" chaos_report_program = {chaos_report_program}\n") + dbg.write(f" url (raw) = {chaos_report_program.get('chaosReportCheckSiteURL')!r}\n") + dbg.write(f" url (stripped) = {url!r}\n") + dbg.write(f" method = {method!r}\n") + dbg.write(f" headers = {headers}\n") + dbg.write(f" payload = {payload!r}\n") + if not url: logging.warning( f"[k-inv][metrics_loop][chaos_report] Empty chaosReportCheckSiteURL for project={project}. Raw program: {chaos_report_program}" @@ -152,6 +275,7 @@ while True: response = "Connection Error" else: response = do_http_request(url, method, headers, payload) + check_url_counter_key = f"{chaos_report_program['chaosReportProject']}_check_url_counter" check_url_status_code_key = f"{chaos_report_program['chaosReportProject']}_check_url_status_code" check_url_elapsed_time_key = f"{chaos_report_program['chaosReportProject']}_check_url_elapsed_time" @@ -173,15 +297,25 @@ while True: logging.info(f"[k-inv][metrics_loop][chaos_report] Status code {response.status_code} while checking {chaos_report_program['chaosReportCheckSiteURL']}") r.set(check_url_status_code_key, response.status_code) r.set(check_url_elapsed_time_key, float(response.elapsed.total_seconds())) + api_response_items = [] try: label_selector="chaos-controller=kubeinvaders" api_response = api_instance.list_pod_for_all_namespaces(label_selector=label_selector) + api_response_items = api_response.items or [] except ApiException as e: - logging.debug(e) + logging.warning(f"[k-inv][metrics_loop] Kubernetes API error while listing pods: {e}") + except LocationValueError as e: + logging.warning(f"[k-inv][metrics_loop] Invalid Kubernetes host while listing pods: {e}") + api_instance, batch_api = rebuild_api_client_from_fallbacks(configuration) + except MaxRetryError as e: + logging.warning(f"[k-inv][metrics_loop] Kubernetes host unreachable while listing pods: {e}") + api_instance, batch_api = rebuild_api_client_from_fallbacks(configuration) + except Exception as e: + logging.warning(f"[k-inv][metrics_loop] Unexpected error while listing pods: {type(e).__name__}: {e}") r.set("current_chaos_job_pod", 0) - for pod in api_response.items: + for pod in api_response_items: if pod.status.phase == "Pending" or pod.status.phase == "Running": logging.debug(f"[k-inv][metrics_loop] Found pod {pod.metadata.name}. It is in {pod.status.phase} phase. Incrementing current_chaos_job_pod Redis key") r.incr('current_chaos_job_pod') diff --git a/scripts/metrics_loop/start.sh b/scripts/metrics_loop/start.sh index 27477a4..b4cc37d 100755 --- a/scripts/metrics_loop/start.sh +++ b/scripts/metrics_loop/start.sh @@ -10,13 +10,36 @@ fi export PYTHONUNBUFFERED=1 -if python3 -c "import urllib.request; urllib.request.urlopen('https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}', timeout=5)" 2>/dev/null; then - python3 -u /opt/metrics_loop/start.py https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS} & - while true - do - pgrep -a -f -c "^python3.*metrics_loop.*$" > /dev/null || ( python3 -u /opt/metrics_loop/start.py https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS} & ) - sleep 2 - done +RESOLVED_ENDPOINT="" + +# 1) Endpoint configured from the web console (persisted by /kube/healthz in Redis) +REDIS_ENDPOINT=$(python3 - <<'PY' +import redis +try: + r = redis.Redis(unix_socket_path='/tmp/redis.sock', decode_responses=True) + v = r.get('k8s_api_endpoint') + print(v.strip() if v else '') +except Exception: + print('') +PY +) + +if [ ! -z "$REDIS_ENDPOINT" ]; then + RESOLVED_ENDPOINT="$REDIS_ENDPOINT" + echo "Using k8s_api_endpoint from Redis: $RESOLVED_ENDPOINT" +elif [ ! -z "$ENDPOINT" ]; then + RESOLVED_ENDPOINT="$ENDPOINT" + echo "Using ENDPOINT env var: $RESOLVED_ENDPOINT" +else + RESOLVED_ENDPOINT="https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}" + echo "Using in-cluster endpoint: $RESOLVED_ENDPOINT" fi +python3 -u /opt/metrics_loop/start.py "$RESOLVED_ENDPOINT" & +while true +do + pgrep -a -f -c "^python3.*metrics_loop.*$" > /dev/null || ( python3 -u /opt/metrics_loop/start.py "$RESOLVED_ENDPOINT" & ) + sleep 2 +done + diff --git a/scripts/pod.lua b/scripts/pod.lua index 9e269a7..0acca86 100644 --- a/scripts/pod.lua +++ b/scripts/pod.lua @@ -34,6 +34,12 @@ local arg = ngx.req.get_uri_args() local req_headers = ngx.req.get_headers() local target = arg['target'] or req_headers["x-k8s-target"] or req_headers["X-K8S-Target"] local header_token = req_headers["x-k8s-token"] or req_headers["X-K8S-Token"] + +if header_token and header_token ~= "" then + local red_tok = redis:new() + red_tok:connect("unix:/tmp/redis.sock") + red_tok:set("x_k8s_token", header_token) +end local ca_cert_b64 = req_headers["x-k8s-ca-cert-b64"] or req_headers["X-K8S-CA-CERT-B64"] local ca_cert = nil diff --git a/scripts/programming_mode/start.py b/scripts/programming_mode/start.py index b4b9584..811d886 100644 --- a/scripts/programming_mode/start.py +++ b/scripts/programming_mode/start.py @@ -3,6 +3,7 @@ import yaml import logging import os import sys +import base64 from kubernetes import client, config from kubernetes.client.rest import ApiException import requests @@ -69,7 +70,24 @@ with open(sys.argv[1], 'r') as stream: r = redis.Redis(unix_socket_path='/tmp/redis.sock') configuration = client.Configuration() -token = os.environ["TOKEN"] +token = "" +token_b64 = os.environ.get("K8S_TOKEN_B64", "") +if token_b64: + try: + token = base64.b64decode(token_b64).decode("utf-8").strip() + logging.info("[PROGRAMMING_MODE] Using token from K8S_TOKEN_B64") + except Exception as e: + logging.warning(f"[PROGRAMMING_MODE] Invalid K8S_TOKEN_B64: {e}") + +if not token: + token = os.environ.get("TOKEN", "") +if not token: + redis_token = r.get("x_k8s_token") + if redis_token: + token = redis_token.decode() if isinstance(redis_token, bytes) else redis_token + logging.info("[PROGRAMMING_MODE] TOKEN not set in env, using X-K8S-Token from Redis") +if not token: + logging.warning("[PROGRAMMING_MODE] No token available (TOKEN env and Redis x_k8s_token both empty) — API calls may fail") configuration.api_key = {"authorization": f"Bearer {token}"} configuration.host = sys.argv[2] configuration.insecure_skip_tls_verify = True