+ Before starting, configure the Kubernetes connection, click Test Connection, then Save Connection. After that, click Enable Game Mode in the sidebar.
+
+
diff --git a/html/js/utils.js b/html/js/utils.js
index 358c62a..3c0ee78 100644
--- a/html/js/utils.js
+++ b/html/js/utils.js
@@ -107,8 +107,14 @@ function getK8sToken() {
}
function getStoredK8sConnectionConfig() {
+ var endpointInput = document.getElementById('k8s_api_endpoint');
+ var inputEndpoint = endpointInput ? endpointInput.value.trim() : '';
+ var storedEndpoint = (localStorage.getItem('k8s_api_endpoint') || '').trim();
+ var normalizedInputEndpoint = normalizeK8sEndpoint(inputEndpoint);
+ var normalizedStoredEndpoint = normalizeK8sEndpoint(storedEndpoint);
+
return {
- target: (localStorage.getItem('k8s_api_endpoint') || '').trim(),
+ target: normalizedInputEndpoint || normalizedStoredEndpoint || '',
token: getK8sToken().trim(),
caCert: getK8sCaCert()
};
diff --git a/nginx/KubeInvaders.conf b/nginx/KubeInvaders.conf
index 78b0986..3199b68 100644
--- a/nginx/KubeInvaders.conf
+++ b/nginx/KubeInvaders.conf
@@ -79,6 +79,7 @@ server {
local https = require "ssl.https"
local ltn12 = require "ltn12"
local json = require "lunajson"
+ local redis = require "resty.redis"
ngx.header['Access-Control-Allow-Origin'] = '*'
ngx.header['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS'
@@ -130,6 +131,19 @@ server {
end
target = string.gsub(target, "/+$", "")
+
+ -- Persist the user-configured endpoint so background loops can reuse it.
+ local red = redis:new()
+ local okredis, errredis = red:connect("unix:/tmp/redis.sock")
+ if okredis then
+ local okset, errset = red:set("k8s_api_endpoint", target)
+ if not okset then
+ ngx.log(ngx.WARN, "[kube/healthz] Unable to persist k8s_api_endpoint in Redis: ", tostring(errset))
+ end
+ else
+ ngx.log(ngx.WARN, "[kube/healthz] Redis connection failed while storing k8s_api_endpoint: ", tostring(errredis))
+ end
+
local healthz_url = target .. "/healthz"
local disable_tls_env = string.lower(tostring(os.getenv("DISABLE_TLS") or "false"))
diff --git a/scripts/logs_loop/start.py b/scripts/logs_loop/start.py
index c3913ee..45f5b18 100644
--- a/scripts/logs_loop/start.py
+++ b/scripts/logs_loop/start.py
@@ -175,7 +175,14 @@ if os.environ.get("DEV"):
logging.debug(r.get("logs_enabled:aaaa"))
configuration = client.Configuration()
-token = os.environ["TOKEN"]
+token = os.environ.get("TOKEN", "")
+if not token:
+ redis_token = r.get("x_k8s_token")
+ if redis_token:
+ token = redis_token if isinstance(redis_token, str) else redis_token.decode()
+ logging.debug("[k-inv][logs-loop] TOKEN not set in env, using X-K8S-Token from Redis")
+if not token:
+ logging.warning("[k-inv][logs-loop] No token available (TOKEN env and Redis x_k8s_token both empty) — API calls may fail")
configuration.api_key = {"authorization": f"Bearer {token}"}
configuration.host = sys.argv[1]
diff --git a/scripts/logs_loop/start.sh b/scripts/logs_loop/start.sh
index fc90538..9d9cabc 100755
--- a/scripts/logs_loop/start.sh
+++ b/scripts/logs_loop/start.sh
@@ -9,10 +9,35 @@ else
fi
export PYTHONWARNINGS="ignore:Unverified HTTPS request"
-python3 /opt/logs_loop/start.py https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS} &
+RESOLVED_ENDPOINT=""
+
+# 1) Endpoint configured from the web console (persisted by /kube/healthz in Redis)
+REDIS_ENDPOINT=$(python3 - <<'PY'
+import redis
+try:
+ r = redis.Redis(unix_socket_path='/tmp/redis.sock', decode_responses=True)
+ v = r.get('k8s_api_endpoint')
+ print(v.strip() if v else '')
+except Exception:
+ print('')
+PY
+)
+
+if [ ! -z "$REDIS_ENDPOINT" ]; then
+ RESOLVED_ENDPOINT="$REDIS_ENDPOINT"
+ echo "Using k8s_api_endpoint from Redis: $RESOLVED_ENDPOINT"
+elif [ ! -z "$ENDPOINT" ]; then
+ RESOLVED_ENDPOINT="$ENDPOINT"
+ echo "Using ENDPOINT env var: $RESOLVED_ENDPOINT"
+else
+ RESOLVED_ENDPOINT="https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}"
+ echo "Using in-cluster endpoint: $RESOLVED_ENDPOINT"
+fi
+
+python3 /opt/logs_loop/start.py "$RESOLVED_ENDPOINT" &
while true
do
- pgrep -a -f -c "^python3.*logs_loop.*$" > /dev/null || ( python3 /opt/logs_loop/start.py https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS} & )
+ pgrep -a -f -c "^python3.*logs_loop.*$" > /dev/null || ( python3 /opt/logs_loop/start.py "$RESOLVED_ENDPOINT" & )
sleep 2
done
diff --git a/scripts/metrics_loop/start.py b/scripts/metrics_loop/start.py
index 923ec16..1e7dd9b 100644
--- a/scripts/metrics_loop/start.py
+++ b/scripts/metrics_loop/start.py
@@ -4,6 +4,18 @@ import json
import logging
import os
import sys
+
+logging.basicConfig(
+ level=logging.INFO,
+ stream=sys.stdout,
+ format="%(asctime)s %(levelname)s %(message)s",
+ force=True,
+)
+logging.getLogger().setLevel(logging.INFO)
+_file_handler = logging.FileHandler("/tmp/kubeinvaders_metrics_loop.log")
+_file_handler.setLevel(logging.INFO)
+_file_handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
+logging.getLogger().addHandler(_file_handler)
from kubernetes import client, config
from kubernetes.client.rest import ApiException
import requests
@@ -13,10 +25,28 @@ import random
import redis
import time
import urllib3
+from urllib.parse import urlparse
+from urllib3.exceptions import LocationValueError
+from urllib3.exceptions import MaxRetryError
import time
import datetime
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+def normalize_k8s_host(raw_host):
+ if not raw_host:
+ return ""
+ host = str(raw_host).strip().strip('"').strip("'")
+ if not host:
+ return ""
+ if not host.startswith("http://") and not host.startswith("https://"):
+ host = "https://" + host
+ host = host.rstrip("/")
+ parsed = urlparse(host)
+ if not parsed.netloc:
+ return ""
+ return host
+
def join_string_to_array(project_list_string, separator):
return project_list_string.split(separator)
@@ -86,31 +116,112 @@ def create_job(job_name, pod_template):
r = redis.Redis(unix_socket_path='/tmp/redis.sock')
-logging.basicConfig(
- level=os.environ.get("LOGLEVEL", "INFO"),
- stream=sys.stdout,
- format="%(asctime)s %(levelname)s %(message)s",
- force=True,
-)
logging.getLogger('kubernetes').setLevel(logging.ERROR)
logging.debug('Starting script for KubeInvaders metrics loop')
configuration = client.Configuration()
-token = os.environ["TOKEN"]
-configuration.api_key = {"authorization": f"Bearer {token}"}
-configuration.host = sys.argv[1]
+token = os.environ.get("TOKEN", "")
+if not token:
+ redis_token = r.get("x_k8s_token")
+ if redis_token:
+ token = redis_token.decode()
+ logging.info("[k-inv][metrics_loop] TOKEN not set in env, using X-K8S-Token from Redis")
+token = token.strip()
+if token.lower().startswith("bearer "):
+ token = token[7:].strip()
+if not token:
+ logging.warning("[k-inv][metrics_loop] No token available (TOKEN env and Redis x_k8s_token both empty) — API calls may fail")
+
+configuration.api_key = {"authorization": token}
+configuration.api_key_prefix = {"authorization": "Bearer"}
+
+arg_host = sys.argv[1] if len(sys.argv) > 1 else ""
+resolved_host = normalize_k8s_host(arg_host)
+
+if not resolved_host:
+ redis_endpoint = r.get("k8s_api_endpoint")
+ if redis_endpoint:
+ redis_endpoint = redis_endpoint.decode() if isinstance(redis_endpoint, bytes) else redis_endpoint
+ resolved_host = normalize_k8s_host(redis_endpoint)
+ if resolved_host:
+ logging.info(f"[k-inv][metrics_loop] Using Kubernetes host from Redis k8s_api_endpoint: {resolved_host}")
+
+if not resolved_host:
+ resolved_host = normalize_k8s_host(os.environ.get("ENDPOINT", ""))
+ if resolved_host:
+ logging.info(f"[k-inv][metrics_loop] Using Kubernetes host from ENDPOINT: {resolved_host}")
+
+if not resolved_host:
+ svc_host = os.environ.get("KUBERNETES_SERVICE_HOST", "")
+ svc_port = os.environ.get("KUBERNETES_SERVICE_PORT_HTTPS", "443")
+ resolved_host = normalize_k8s_host(f"https://{svc_host}:{svc_port}" if svc_host else "")
+ if resolved_host:
+ logging.info(f"[k-inv][metrics_loop] Using in-cluster Kubernetes host: {resolved_host}")
+
+if not resolved_host:
+ resolved_host = "https://kubernetes.default.svc"
+ logging.warning("[k-inv][metrics_loop] No valid Kubernetes host provided; defaulting to https://kubernetes.default.svc")
+
+configuration.host = resolved_host
+logging.info(f"[k-inv][metrics_loop] Kubernetes API host resolved to: {configuration.host}")
configuration.insecure_skip_tls_verify = True
configuration.verify_ssl = False
-client.Configuration.set_default(configuration)
-client.Configuration.set_default(configuration)
+api_client = client.ApiClient(configuration=configuration)
+api_instance = client.CoreV1Api(api_client)
+batch_api = client.BatchV1Api(api_client)
-api_instance = client.CoreV1Api()
-batch_api = client.BatchV1Api()
+
+def rebuild_api_client_from_fallbacks(current_configuration):
+ fallback_host = ""
+
+ redis_endpoint = r.get("k8s_api_endpoint")
+ if redis_endpoint:
+ redis_endpoint = redis_endpoint.decode() if isinstance(redis_endpoint, bytes) else redis_endpoint
+ fallback_host = normalize_k8s_host(redis_endpoint)
+
+ if not fallback_host:
+ fallback_host = normalize_k8s_host(os.environ.get("ENDPOINT", ""))
+
+ if not fallback_host:
+ svc_host = os.environ.get("KUBERNETES_SERVICE_HOST", "")
+ svc_port = os.environ.get("KUBERNETES_SERVICE_PORT_HTTPS", "443")
+ fallback_host = normalize_k8s_host(f"https://{svc_host}:{svc_port}" if svc_host else "")
+
+ if not fallback_host:
+ fallback_host = "https://kubernetes.default.svc"
+
+ current_configuration.host = fallback_host
+ logging.warning(f"[k-inv][metrics_loop] Rebuilding Kubernetes API client with host: {fallback_host}")
+ new_api_client = client.ApiClient(configuration=current_configuration)
+ return client.CoreV1Api(new_api_client), client.BatchV1Api(new_api_client)
+
+
+def sync_api_client_from_redis(current_configuration):
+ redis_endpoint = r.get("k8s_api_endpoint")
+ if not redis_endpoint:
+ return None, None
+
+ redis_endpoint = redis_endpoint.decode() if isinstance(redis_endpoint, bytes) else redis_endpoint
+ redis_host = normalize_k8s_host(redis_endpoint)
+ if not redis_host:
+ return None, None
+
+ if current_configuration.host != redis_host:
+ current_configuration.host = redis_host
+ logging.info(f"[k-inv][metrics_loop] Switching Kubernetes API host from Redis k8s_api_endpoint to: {redis_host}")
+ new_api_client = client.ApiClient(configuration=current_configuration)
+ return client.CoreV1Api(new_api_client), client.BatchV1Api(new_api_client)
+
+ return None, None
while True:
+ maybe_api_instance, maybe_batch_api = sync_api_client_from_redis(configuration)
+ if maybe_api_instance and maybe_batch_api:
+ api_instance, batch_api = maybe_api_instance, maybe_batch_api
+
#logging.info(f"[k-inv][metrics_loop] Metrics loop is active - {r.exists('chaos_report_project_list')}")
if r.exists('chaos_report_project_list'):
logging.info("[k-inv][metrics_loop] Found chaos_report_project_list Redis Key")
@@ -145,6 +256,18 @@ while True:
if method not in ["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"]:
method = "GET"
+ with open("/tmp/kubeinvaders_url_debug.log", "w") as dbg:
+ dbg.write(f"--- {datetime.datetime.now().isoformat()} ---\n")
+ dbg.write(f" project = {project!r}\n")
+ dbg.write(f" chaos_program_key= {chaos_program_key!r}\n")
+ dbg.write(f" raw redis value = {r.get(chaos_program_key)!r}\n")
+ dbg.write(f" chaos_report_program = {chaos_report_program}\n")
+ dbg.write(f" url (raw) = {chaos_report_program.get('chaosReportCheckSiteURL')!r}\n")
+ dbg.write(f" url (stripped) = {url!r}\n")
+ dbg.write(f" method = {method!r}\n")
+ dbg.write(f" headers = {headers}\n")
+ dbg.write(f" payload = {payload!r}\n")
+
if not url:
logging.warning(
f"[k-inv][metrics_loop][chaos_report] Empty chaosReportCheckSiteURL for project={project}. Raw program: {chaos_report_program}"
@@ -152,6 +275,7 @@ while True:
response = "Connection Error"
else:
response = do_http_request(url, method, headers, payload)
+
check_url_counter_key = f"{chaos_report_program['chaosReportProject']}_check_url_counter"
check_url_status_code_key = f"{chaos_report_program['chaosReportProject']}_check_url_status_code"
check_url_elapsed_time_key = f"{chaos_report_program['chaosReportProject']}_check_url_elapsed_time"
@@ -173,15 +297,25 @@ while True:
logging.info(f"[k-inv][metrics_loop][chaos_report] Status code {response.status_code} while checking {chaos_report_program['chaosReportCheckSiteURL']}")
r.set(check_url_status_code_key, response.status_code)
r.set(check_url_elapsed_time_key, float(response.elapsed.total_seconds()))
+ api_response_items = []
try:
label_selector="chaos-controller=kubeinvaders"
api_response = api_instance.list_pod_for_all_namespaces(label_selector=label_selector)
+ api_response_items = api_response.items or []
except ApiException as e:
- logging.debug(e)
+ logging.warning(f"[k-inv][metrics_loop] Kubernetes API error while listing pods: {e}")
+ except LocationValueError as e:
+ logging.warning(f"[k-inv][metrics_loop] Invalid Kubernetes host while listing pods: {e}")
+ api_instance, batch_api = rebuild_api_client_from_fallbacks(configuration)
+ except MaxRetryError as e:
+ logging.warning(f"[k-inv][metrics_loop] Kubernetes host unreachable while listing pods: {e}")
+ api_instance, batch_api = rebuild_api_client_from_fallbacks(configuration)
+ except Exception as e:
+ logging.warning(f"[k-inv][metrics_loop] Unexpected error while listing pods: {type(e).__name__}: {e}")
r.set("current_chaos_job_pod", 0)
- for pod in api_response.items:
+ for pod in api_response_items:
if pod.status.phase == "Pending" or pod.status.phase == "Running":
logging.debug(f"[k-inv][metrics_loop] Found pod {pod.metadata.name}. It is in {pod.status.phase} phase. Incrementing current_chaos_job_pod Redis key")
r.incr('current_chaos_job_pod')
diff --git a/scripts/metrics_loop/start.sh b/scripts/metrics_loop/start.sh
index 27477a4..b4cc37d 100755
--- a/scripts/metrics_loop/start.sh
+++ b/scripts/metrics_loop/start.sh
@@ -10,13 +10,36 @@ fi
export PYTHONUNBUFFERED=1
-if python3 -c "import urllib.request; urllib.request.urlopen('https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}', timeout=5)" 2>/dev/null; then
- python3 -u /opt/metrics_loop/start.py https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS} &
- while true
- do
- pgrep -a -f -c "^python3.*metrics_loop.*$" > /dev/null || ( python3 -u /opt/metrics_loop/start.py https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS} & )
- sleep 2
- done
+RESOLVED_ENDPOINT=""
+
+# 1) Endpoint configured from the web console (persisted by /kube/healthz in Redis)
+REDIS_ENDPOINT=$(python3 - <<'PY'
+import redis
+try:
+ r = redis.Redis(unix_socket_path='/tmp/redis.sock', decode_responses=True)
+ v = r.get('k8s_api_endpoint')
+ print(v.strip() if v else '')
+except Exception:
+ print('')
+PY
+)
+
+if [ ! -z "$REDIS_ENDPOINT" ]; then
+ RESOLVED_ENDPOINT="$REDIS_ENDPOINT"
+ echo "Using k8s_api_endpoint from Redis: $RESOLVED_ENDPOINT"
+elif [ ! -z "$ENDPOINT" ]; then
+ RESOLVED_ENDPOINT="$ENDPOINT"
+ echo "Using ENDPOINT env var: $RESOLVED_ENDPOINT"
+else
+ RESOLVED_ENDPOINT="https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}"
+ echo "Using in-cluster endpoint: $RESOLVED_ENDPOINT"
fi
+python3 -u /opt/metrics_loop/start.py "$RESOLVED_ENDPOINT" &
+while true
+do
+ pgrep -a -f -c "^python3.*metrics_loop.*$" > /dev/null || ( python3 -u /opt/metrics_loop/start.py "$RESOLVED_ENDPOINT" & )
+ sleep 2
+done
+
diff --git a/scripts/pod.lua b/scripts/pod.lua
index 9e269a7..0acca86 100644
--- a/scripts/pod.lua
+++ b/scripts/pod.lua
@@ -34,6 +34,12 @@ local arg = ngx.req.get_uri_args()
local req_headers = ngx.req.get_headers()
local target = arg['target'] or req_headers["x-k8s-target"] or req_headers["X-K8S-Target"]
local header_token = req_headers["x-k8s-token"] or req_headers["X-K8S-Token"]
+
+if header_token and header_token ~= "" then
+ local red_tok = redis:new()
+ red_tok:connect("unix:/tmp/redis.sock")
+ red_tok:set("x_k8s_token", header_token)
+end
local ca_cert_b64 = req_headers["x-k8s-ca-cert-b64"] or req_headers["X-K8S-CA-CERT-B64"]
local ca_cert = nil
diff --git a/scripts/programming_mode/start.py b/scripts/programming_mode/start.py
index b4b9584..811d886 100644
--- a/scripts/programming_mode/start.py
+++ b/scripts/programming_mode/start.py
@@ -3,6 +3,7 @@ import yaml
import logging
import os
import sys
+import base64
from kubernetes import client, config
from kubernetes.client.rest import ApiException
import requests
@@ -69,7 +70,24 @@ with open(sys.argv[1], 'r') as stream:
r = redis.Redis(unix_socket_path='/tmp/redis.sock')
configuration = client.Configuration()
-token = os.environ["TOKEN"]
+token = ""
+token_b64 = os.environ.get("K8S_TOKEN_B64", "")
+if token_b64:
+ try:
+ token = base64.b64decode(token_b64).decode("utf-8").strip()
+ logging.info("[PROGRAMMING_MODE] Using token from K8S_TOKEN_B64")
+ except Exception as e:
+ logging.warning(f"[PROGRAMMING_MODE] Invalid K8S_TOKEN_B64: {e}")
+
+if not token:
+ token = os.environ.get("TOKEN", "")
+if not token:
+ redis_token = r.get("x_k8s_token")
+ if redis_token:
+ token = redis_token.decode() if isinstance(redis_token, bytes) else redis_token
+ logging.info("[PROGRAMMING_MODE] TOKEN not set in env, using X-K8S-Token from Redis")
+if not token:
+ logging.warning("[PROGRAMMING_MODE] No token available (TOKEN env and Redis x_k8s_token both empty) — API calls may fail")
configuration.api_key = {"authorization": f"Bearer {token}"}
configuration.host = sys.argv[2]
configuration.insecure_skip_tls_verify = True