mirror of
https://github.com/kubeshark/kubeshark.git
synced 2026-05-27 19:45:23 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9396e64b9b | ||
|
|
b5e59321e0 | ||
|
|
f97866f747 | ||
|
|
b2a0fb0cea | ||
|
|
2475f6e260 | ||
|
|
cd13d8f89e | ||
|
|
ad9dfbf5f9 | ||
|
|
ed1d2e1a4d | ||
|
|
7b5954ea00 | ||
|
|
8186b7891b |
24
.github/workflows/release-tag.yml
vendored
24
.github/workflows/release-tag.yml
vendored
@@ -1,24 +0,0 @@
|
||||
name: Auto-tag release
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [closed]
|
||||
branches: [master]
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
if: github.event.pull_request.merged == true && startsWith(github.event.pull_request.head.ref, 'release/v')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Create and push tag
|
||||
run: |
|
||||
VERSION="${GITHUB_HEAD_REF#release/}"
|
||||
echo "Creating tag $VERSION on master"
|
||||
git tag "$VERSION"
|
||||
git push origin "$VERSION"
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -66,4 +66,5 @@ scripts/
|
||||
kubeshark.yaml
|
||||
|
||||
# Claude Code
|
||||
CLAUDE.md
|
||||
CLAUDE.md
|
||||
.claude/
|
||||
12
Makefile
12
Makefile
@@ -268,8 +268,8 @@ release: ## Print release workflow instructions.
|
||||
@echo ""
|
||||
@echo " Shortcut: make release-pr VERSION=x.y.z runs 1 → 2 → 3."
|
||||
@echo ""
|
||||
@echo " After both PRs merge: tag is created automatically,"
|
||||
@echo " or run: make release-tag VERSION=x.y.z"
|
||||
@echo " After both PRs merge, create the release tag:"
|
||||
@echo " make release-tag VERSION=x.y.z"
|
||||
|
||||
# Internal: validate VERSION before any release-* target runs.
|
||||
_release-check-version:
|
||||
@@ -362,14 +362,18 @@ release-pr: release-siblings release-pr-kubeshark release-pr-helm ## Run release
|
||||
@echo " - kubeshark.github.io: Review and merge the helm chart PR."
|
||||
@echo "Tag will be created automatically, or run: make release-tag VERSION=$(VERSION)"
|
||||
|
||||
release-tag: ## Step 2 (fallback): Tag master after release PR is merged.
|
||||
release-tag: _release-check-version ## Step 2: Tag master after release PR is merged. Idempotent; re-run to retrigger the release build.
|
||||
@echo "Verifying release PR was merged..."
|
||||
@if ! gh pr list --state merged --head release/v$(VERSION) --json number --jq '.[0].number' | grep -q .; then \
|
||||
echo "Error: No merged PR found for release/v$(VERSION). Merge the PR first."; \
|
||||
exit 1; \
|
||||
fi
|
||||
@git checkout master && git pull
|
||||
@git tag -d v$(VERSION) 2>/dev/null; git tag v$(VERSION) && git push origin --tags
|
||||
@if git ls-remote --tags origin "refs/tags/v$(VERSION)" | grep -q .; then \
|
||||
echo "Tag v$(VERSION) already exists on origin — deleting to retrigger release..."; \
|
||||
git push origin :refs/tags/v$(VERSION); \
|
||||
fi
|
||||
@git tag -d v$(VERSION) 2>/dev/null; git tag v$(VERSION) && git push origin "refs/tags/v$(VERSION)"
|
||||
@echo ""
|
||||
@echo "Tagged v$(VERSION) on master. GitHub Actions will build the release."
|
||||
|
||||
|
||||
@@ -102,23 +102,21 @@ func CreateDefaultConfig() ConfigStruct {
|
||||
},
|
||||
},
|
||||
Auth: configStructs.AuthConfig{
|
||||
Saml: configStructs.SamlConfig{
|
||||
RoleAttribute: "role",
|
||||
Roles: map[string]configStructs.Role{
|
||||
"admin": {
|
||||
Filter: "",
|
||||
CanDownloadPCAP: true,
|
||||
CanUseScripting: true,
|
||||
ScriptingPermissions: configStructs.ScriptingPermissions{
|
||||
CanSave: true,
|
||||
CanActivate: true,
|
||||
CanDelete: true,
|
||||
},
|
||||
CanUpdateTargetedPods: true,
|
||||
CanStopTrafficCapturing: true,
|
||||
CanControlDissection: true,
|
||||
ShowAdminConsoleLink: true,
|
||||
RolesClaim: "role",
|
||||
Roles: map[string]configStructs.Role{
|
||||
"admin": {
|
||||
Filter: "",
|
||||
CanDownloadPCAP: true,
|
||||
CanUseScripting: true,
|
||||
ScriptingPermissions: configStructs.ScriptingPermissions{
|
||||
CanSave: true,
|
||||
CanActivate: true,
|
||||
CanDelete: true,
|
||||
},
|
||||
CanUpdateTargetedPods: true,
|
||||
CanStopTrafficCapturing: true,
|
||||
CanControlDissection: true,
|
||||
ShowAdminConsoleLink: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -137,7 +135,7 @@ func CreateDefaultConfig() ConfigStruct {
|
||||
// "tcp",
|
||||
// "udp",
|
||||
"ws",
|
||||
// "tlsx",
|
||||
"tlsx",
|
||||
"ldap",
|
||||
"radius",
|
||||
"diameter",
|
||||
|
||||
@@ -173,17 +173,29 @@ type Role struct {
|
||||
}
|
||||
|
||||
type SamlConfig struct {
|
||||
IdpMetadataUrl string `yaml:"idpMetadataUrl" json:"idpMetadataUrl"`
|
||||
X509crt string `yaml:"x509crt" json:"x509crt"`
|
||||
X509key string `yaml:"x509key" json:"x509key"`
|
||||
RoleAttribute string `yaml:"roleAttribute" json:"roleAttribute"`
|
||||
Roles map[string]Role `yaml:"roles" json:"roles"`
|
||||
IdpMetadataUrl string `yaml:"idpMetadataUrl" json:"idpMetadataUrl"`
|
||||
X509crt string `yaml:"x509crt" json:"x509crt"`
|
||||
X509key string `yaml:"x509key" json:"x509key"`
|
||||
}
|
||||
|
||||
type AuthConfig struct {
|
||||
Enabled bool `yaml:"enabled" json:"enabled" default:"false"`
|
||||
Type string `yaml:"type" json:"type" default:"saml"`
|
||||
Saml SamlConfig `yaml:"saml" json:"saml"`
|
||||
Enabled bool `yaml:"enabled" json:"enabled" default:"false"`
|
||||
// Type selects the authentication backend. Valid values:
|
||||
// saml — SAML 2.0 SSO
|
||||
// oidc — generic OIDC (Dex, Okta, Auth0, Keycloak, Azure AD, …)
|
||||
// dex — permanent alias of oidc (kept for back-compat)
|
||||
// descope — Descope SDK
|
||||
// default — also routes to Descope (kept, not deprecated)
|
||||
//
|
||||
// NOTE: prior releases routed `oidc` to Descope. If you were using `oidc`
|
||||
// to mean Descope, switch to `descope` (or `default`). The rename is a
|
||||
// breaking change documented in the release notes.
|
||||
Type string `yaml:"type" json:"type" default:"saml"`
|
||||
Roles map[string]Role `yaml:"roles" json:"roles"`
|
||||
RolesClaim string `yaml:"rolesClaim" json:"rolesClaim"`
|
||||
DefaultRole string `yaml:"defaultRole" json:"defaultRole"`
|
||||
DefaultFilter string `yaml:"defaultFilter" json:"defaultFilter"`
|
||||
Saml SamlConfig `yaml:"saml" json:"saml"`
|
||||
}
|
||||
|
||||
type IngressConfig struct {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
apiVersion: v2
|
||||
name: kubeshark
|
||||
version: "53.2.5"
|
||||
version: "53.3.0"
|
||||
description: The API Traffic Analyzer for Kubernetes
|
||||
home: https://kubeshark.com
|
||||
keywords:
|
||||
|
||||
@@ -212,14 +212,15 @@ Example for overriding image names:
|
||||
| `tap.tolerations.hub` | Tolerations for hub component | `[]` |
|
||||
| `tap.tolerations.front` | Tolerations for front-end component | `[]` |
|
||||
| `tap.auth.enabled` | Enable authentication | `false` |
|
||||
| `tap.auth.type` | Authentication type (1 option available: `saml`) | `saml` |
|
||||
| `tap.auth.type` | Authentication backend. Valid values: `saml`, `oidc` (generic OIDC — Dex, Okta, Auth0, Keycloak, Azure AD, Google, …), `dex` (permanent alias of `oidc`), `descope`, `default` (also routes to Descope). **Breaking**: prior releases routed `oidc` to Descope — if you were using it for Descope, switch to `descope` or `default`. | `saml` |
|
||||
| `tap.auth.approvedEmails` | List of approved email addresses for authentication | `[]` |
|
||||
| `tap.auth.approvedDomains` | List of approved email domains for authentication | `[]` |
|
||||
| `tap.auth.saml.idpMetadataUrl` | SAML IDP metadata URL <br/>(effective, if `tap.auth.type = saml`) | `` |
|
||||
| `tap.auth.rolesClaim` | Name of the JWT claim (OIDC) or SAML attribute carrying role memberships. | `role` |
|
||||
| `tap.auth.defaultRole` | Optional role name inside `tap.auth.roles` applied as fallback when an authenticated user has no matching role. Empty string = no fallback, zero-valued permissions. | `""` |
|
||||
| `tap.auth.roles` | Backend-neutral role map shared by SAML and OIDC. Each role's `namespaces` is a comma-separated list controlling which Kubernetes namespaces the role's users see traffic for: `""` = deny all, `"*"` = allow all, `"foo"` = literal namespace, `"foo,bar"` = OR over literals, `"foo-*"` = glob expansion against the cluster's known namespaces. Empty/unset `tap.auth.roles` grants nothing — admins opt into elevated access by populating this map. | `{"admin":{"namespaces":"*","canDownloadPCAP":true,"canUpdateTargetedPods":true,"canUseScripting":true,"scriptingPermissions":{"canSave":true,"canActivate":true,"canDelete":true},"canStopTrafficCapturing":true,"canControlDissection":true,"showAdminConsoleLink":true}}` |
|
||||
| `tap.auth.saml.idpMetadataUrl` | SAML IDP metadata URL <br/>(effective, if `tap.auth.type = saml`) | `` |
|
||||
| `tap.auth.saml.x509crt` | A self-signed X.509 `.cert` contents <br/>(effective, if `tap.auth.type = saml`) | `` |
|
||||
| `tap.auth.saml.x509key` | A self-signed X.509 `.key` contents <br/>(effective, if `tap.auth.type = saml`) | `` |
|
||||
| `tap.auth.saml.roleAttribute` | A SAML attribute name corresponding to user's authorization role <br/>(effective, if `tap.auth.type = saml`) | `role` |
|
||||
| `tap.auth.saml.roles` | A list of SAML authorization roles and their permissions <br/>(effective, if `tap.auth.type = saml`) | `{"admin":{"canDownloadPCAP":true,"canUpdateTargetedPods":true,"canUseScripting":true, "scriptingPermissions":{"canSave":true, "canActivate":true, "canDelete":true}, "canStopTrafficCapturing":true, "canControlDissection":true, "filter":"","showAdminConsoleLink":true}}` |
|
||||
| `tap.ingress.enabled` | Enable `Ingress` | `false` |
|
||||
| `tap.ingress.className` | Ingress class name | `""` |
|
||||
| `tap.ingress.host` | Host of the `Ingress` | `ks.svc.cluster.local` |
|
||||
@@ -377,8 +378,8 @@ Add these helm values to set up OIDC authentication powered by your Dex IdP:
|
||||
tap:
|
||||
auth:
|
||||
enabled: true
|
||||
type: dex
|
||||
dexOidc:
|
||||
type: oidc # canonical; `dex` is accepted as a permanent alias
|
||||
oidc:
|
||||
issuer: <put Dex IdP issuer URL here>
|
||||
clientId: kubeshark
|
||||
clientSecret: create your own client password
|
||||
@@ -390,7 +391,7 @@ tap:
|
||||
---
|
||||
|
||||
**Note:**<br/>
|
||||
Set `tap.auth.dexOidc.bypassSslCaCheck: true`
|
||||
Set `tap.auth.oidc.bypassSslCaCheck: true`
|
||||
to allow Kubeshark communication with Dex IdP having an unknown SSL Certificate Authority.
|
||||
|
||||
This setting allows you to prevent such SSL CA-related errors:<br/>
|
||||
@@ -429,7 +430,7 @@ The following Dex settings will have these values:
|
||||
|
||||
| Setting | Value |
|
||||
|-------------------------------------------------------|----------------------------------------------|
|
||||
| `tap.auth.dexOidc.issuer` | `https://ks.example.com/dex` |
|
||||
| `tap.auth.oidc.issuer` | `https://ks.example.com/dex` |
|
||||
| `tap.auth.dexConfig.issuer` | `https://ks.example.com/dex` |
|
||||
| `tap.auth.dexConfig.staticClients -> redirectURIs` | `https://ks.example.com/api/oauth2/callback` |
|
||||
| `tap.auth.dexConfig.connectors -> config.redirectURI` | `https://ks.example.com/dex/callback` |
|
||||
@@ -447,16 +448,16 @@ Please, make sure to prepare the following things first.
|
||||
- You will need to specify storage settings in `tap.auth.dexConfig.storage`
|
||||
- default: `memory`
|
||||
3. Decide on the OAuth2 `?state=` param expiration time:
|
||||
- field: `tap.auth.dexOidc.oauth2StateParamExpiry`
|
||||
- field: `tap.auth.oidc.oauth2StateParamExpiry`
|
||||
- default: `10m` (10 minutes)
|
||||
- valid time units are `s`, `m`, `h`
|
||||
4. Decide on the refresh token expiration:
|
||||
- field 1: `tap.auth.dexOidc.expiry.refreshTokenLifetime`
|
||||
- field 1: `tap.auth.oidc.expiry.refreshTokenLifetime`
|
||||
- field 2: `tap.auth.dexConfig.expiry.refreshTokens.absoluteLifetime`
|
||||
- default: `3960h` (165 days)
|
||||
- valid time units are `s`, `m`, `h`
|
||||
5. Create a unique & secure password to set in these fields:
|
||||
- field 1: `tap.auth.dexOidc.clientSecret`
|
||||
- field 1: `tap.auth.oidc.clientSecret`
|
||||
- field 2: `tap.auth.dexConfig.staticClients -> secret`
|
||||
- password must be the same for these 2 fields
|
||||
6. Discover more possibilities of **[Dex Configuration](https://dexidp.io/docs/configuration/)**
|
||||
@@ -478,8 +479,8 @@ Helm `values.yaml`:
|
||||
tap:
|
||||
auth:
|
||||
enabled: true
|
||||
type: dex
|
||||
dexOidc:
|
||||
type: oidc # canonical; `dex` is accepted as a permanent alias
|
||||
oidc:
|
||||
issuer: https://<your-ingress-hostname>/dex
|
||||
|
||||
# Client ID/secret must be taken from `tap.auth.dexConfig.staticClients -> id/secret`
|
||||
|
||||
@@ -44,6 +44,12 @@ rules:
|
||||
- create
|
||||
- update
|
||||
- delete
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
- tokenreviews
|
||||
verbs:
|
||||
- create
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
|
||||
@@ -26,12 +26,12 @@ spec:
|
||||
- env:
|
||||
- name: REACT_APP_AUTH_ENABLED
|
||||
value: '{{- if or (and .Values.cloudLicenseEnabled (not (empty .Values.license))) (not .Values.internetConnectivity) -}}
|
||||
{{ (default false .Values.demoModeEnabled) | ternary true ((and .Values.tap.auth.enabled (eq .Values.tap.auth.type "dex")) | ternary true false) }}
|
||||
{{ (default false .Values.demoModeEnabled) | ternary true ((and .Values.tap.auth.enabled (or (eq .Values.tap.auth.type "oidc") (eq .Values.tap.auth.type "dex"))) | ternary true false) }}
|
||||
{{- else -}}
|
||||
{{ .Values.cloudLicenseEnabled | ternary "true" ((default false .Values.demoModeEnabled) | ternary "true" .Values.tap.auth.enabled) }}
|
||||
{{- end }}'
|
||||
- name: REACT_APP_AUTH_TYPE
|
||||
value: '{{- if and .Values.cloudLicenseEnabled (not (eq .Values.tap.auth.type "dex")) -}}
|
||||
value: '{{- if and .Values.cloudLicenseEnabled (not (or (eq .Values.tap.auth.type "oidc") (eq .Values.tap.auth.type "dex"))) -}}
|
||||
default
|
||||
{{- else -}}
|
||||
{{ (default false .Values.demoModeEnabled) | ternary "default" .Values.tap.auth.type }}
|
||||
|
||||
@@ -21,6 +21,7 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubeshark.com/app: worker
|
||||
kubeshark.io/internal-auth: "true"
|
||||
{{- include "kubeshark.labels" . | nindent 8 }}
|
||||
name: kubeshark-worker-daemon-set
|
||||
namespace: kubeshark
|
||||
|
||||
@@ -19,27 +19,28 @@ data:
|
||||
INGRESS_HOST: '{{ .Values.tap.ingress.host }}'
|
||||
PROXY_FRONT_PORT: '{{ .Values.tap.proxy.front.port }}'
|
||||
AUTH_ENABLED: '{{- if and .Values.cloudLicenseEnabled (not (empty .Values.license)) -}}
|
||||
{{ (default false .Values.demoModeEnabled) | ternary true ((and .Values.tap.auth.enabled (eq .Values.tap.auth.type "dex")) | ternary true false) }}
|
||||
{{ (default false .Values.demoModeEnabled) | ternary true ((and .Values.tap.auth.enabled (or (eq .Values.tap.auth.type "oidc") (eq .Values.tap.auth.type "dex"))) | ternary true false) }}
|
||||
{{- else -}}
|
||||
{{ .Values.cloudLicenseEnabled | ternary "true" ((default false .Values.demoModeEnabled) | ternary "true" .Values.tap.auth.enabled) }}
|
||||
{{- end }}'
|
||||
AUTH_TYPE: '{{- if and .Values.cloudLicenseEnabled (not (eq .Values.tap.auth.type "dex")) -}}
|
||||
AUTH_TYPE: '{{- if and .Values.cloudLicenseEnabled (not (or (eq .Values.tap.auth.type "oidc") (eq .Values.tap.auth.type "dex"))) -}}
|
||||
default
|
||||
{{- else -}}
|
||||
{{ (default false .Values.demoModeEnabled) | ternary "default" .Values.tap.auth.type }}
|
||||
{{- end }}'
|
||||
AUTH_SAML_IDP_METADATA_URL: '{{ .Values.tap.auth.saml.idpMetadataUrl }}'
|
||||
AUTH_SAML_ROLE_ATTRIBUTE: '{{ .Values.tap.auth.saml.roleAttribute }}'
|
||||
AUTH_SAML_ROLES: '{{ .Values.tap.auth.saml.roles | toJson }}'
|
||||
AUTH_OIDC_ISSUER: '{{ default "not set" (((.Values.tap).auth).dexOidc).issuer }}'
|
||||
AUTH_OIDC_REFRESH_TOKEN_LIFETIME: '{{ default "3960h" (((.Values.tap).auth).dexOidc).refreshTokenLifetime }}'
|
||||
AUTH_OIDC_STATE_PARAM_EXPIRY: '{{ default "10m" (((.Values.tap).auth).dexOidc).oauth2StateParamExpiry }}'
|
||||
AUTH_ROLES: '{{ .Values.tap.auth.roles | toJson }}'
|
||||
AUTH_ROLES_CLAIM: '{{ .Values.tap.auth.rolesClaim }}'
|
||||
AUTH_DEFAULT_ROLE: '{{ default "" .Values.tap.auth.defaultRole }}'
|
||||
AUTH_OIDC_ISSUER: '{{ default "not set" (((.Values.tap).auth).oidc).issuer }}'
|
||||
AUTH_OIDC_REFRESH_TOKEN_LIFETIME: '{{ default "3960h" (((.Values.tap).auth).oidc).refreshTokenLifetime }}'
|
||||
AUTH_OIDC_STATE_PARAM_EXPIRY: '{{ default "10m" (((.Values.tap).auth).oidc).oauth2StateParamExpiry }}'
|
||||
AUTH_OIDC_BYPASS_SSL_CA_CHECK: '{{- if and
|
||||
(hasKey .Values.tap "auth")
|
||||
(hasKey .Values.tap.auth "dexOidc")
|
||||
(hasKey .Values.tap.auth.dexOidc "bypassSslCaCheck")
|
||||
(hasKey .Values.tap.auth "oidc")
|
||||
(hasKey .Values.tap.auth.oidc "bypassSslCaCheck")
|
||||
-}}
|
||||
{{ eq .Values.tap.auth.dexOidc.bypassSslCaCheck true | ternary "true" "false" }}
|
||||
{{ eq .Values.tap.auth.oidc.bypassSslCaCheck true | ternary "true" "false" }}
|
||||
{{- else -}}
|
||||
false
|
||||
{{- end }}'
|
||||
|
||||
@@ -9,8 +9,8 @@ metadata:
|
||||
stringData:
|
||||
LICENSE: '{{ .Values.license }}'
|
||||
SCRIPTING_ENV: '{{ .Values.scripting.env | toJson }}'
|
||||
OIDC_CLIENT_ID: '{{ default "not set" (((.Values.tap).auth).dexOidc).clientId }}'
|
||||
OIDC_CLIENT_SECRET: '{{ default "not set" (((.Values.tap).auth).dexOidc).clientSecret }}'
|
||||
OIDC_CLIENT_ID: '{{ default "not set" (((.Values.tap).auth).oidc).clientId }}'
|
||||
OIDC_CLIENT_SECRET: '{{ default "not set" (((.Values.tap).auth).oidc).clientSecret }}'
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -153,24 +153,26 @@ tap:
|
||||
auth:
|
||||
enabled: false
|
||||
type: saml
|
||||
roles:
|
||||
admin:
|
||||
filter: ""
|
||||
canDownloadPCAP: true
|
||||
canUseScripting: true
|
||||
scriptingPermissions:
|
||||
canSave: true
|
||||
canActivate: true
|
||||
canDelete: true
|
||||
canUpdateTargetedPods: true
|
||||
canStopTrafficCapturing: true
|
||||
canControlDissection: true
|
||||
showAdminConsoleLink: true
|
||||
rolesClaim: role
|
||||
defaultRole: ""
|
||||
defaultFilter: ""
|
||||
saml:
|
||||
idpMetadataUrl: ""
|
||||
x509crt: ""
|
||||
x509key: ""
|
||||
roleAttribute: role
|
||||
roles:
|
||||
admin:
|
||||
filter: ""
|
||||
canDownloadPCAP: true
|
||||
canUseScripting: true
|
||||
scriptingPermissions:
|
||||
canSave: true
|
||||
canActivate: true
|
||||
canDelete: true
|
||||
canUpdateTargetedPods: true
|
||||
canStopTrafficCapturing: true
|
||||
canControlDissection: true
|
||||
showAdminConsoleLink: true
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
@@ -213,6 +215,7 @@ tap:
|
||||
- postgresql
|
||||
- redis
|
||||
- ws
|
||||
- tlsx
|
||||
- ldap
|
||||
- radius
|
||||
- diameter
|
||||
|
||||
@@ -4,10 +4,10 @@ apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-hub-network-policy
|
||||
namespace: default
|
||||
@@ -33,10 +33,10 @@ apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
annotations:
|
||||
name: kubeshark-front-network-policy
|
||||
@@ -60,10 +60,10 @@ apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
annotations:
|
||||
name: kubeshark-dex-network-policy
|
||||
@@ -87,10 +87,10 @@ apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
annotations:
|
||||
name: kubeshark-worker-network-policy
|
||||
@@ -116,10 +116,10 @@ apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-service-account
|
||||
namespace: default
|
||||
@@ -132,10 +132,10 @@ metadata:
|
||||
namespace: default
|
||||
labels:
|
||||
app.kubeshark.com/app: hub
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
stringData:
|
||||
LICENSE: ''
|
||||
@@ -151,10 +151,10 @@ metadata:
|
||||
namespace: default
|
||||
labels:
|
||||
app.kubeshark.com/app: hub
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
stringData:
|
||||
AUTH_SAML_X509_CRT: |
|
||||
@@ -167,10 +167,10 @@ metadata:
|
||||
namespace: default
|
||||
labels:
|
||||
app.kubeshark.com/app: hub
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
stringData:
|
||||
AUTH_SAML_X509_KEY: |
|
||||
@@ -182,10 +182,10 @@ metadata:
|
||||
name: kubeshark-nginx-config-map
|
||||
namespace: default
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
default.conf: |
|
||||
@@ -252,10 +252,10 @@ metadata:
|
||||
namespace: default
|
||||
labels:
|
||||
app.kubeshark.com/app: hub
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
POD_REGEX: '.*'
|
||||
@@ -272,8 +272,9 @@ data:
|
||||
AUTH_ENABLED: 'true'
|
||||
AUTH_TYPE: 'default'
|
||||
AUTH_SAML_IDP_METADATA_URL: ''
|
||||
AUTH_SAML_ROLE_ATTRIBUTE: 'role'
|
||||
AUTH_SAML_ROLES: '{"admin":{"canControlDissection":true,"canDownloadPCAP":true,"canStopTrafficCapturing":true,"canUpdateTargetedPods":true,"canUseScripting":true,"filter":"","scriptingPermissions":{"canActivate":true,"canDelete":true,"canSave":true},"showAdminConsoleLink":true}}'
|
||||
AUTH_ROLES: '{"admin":{"canControlDissection":true,"canDownloadPCAP":true,"canStopTrafficCapturing":true,"canUpdateTargetedPods":true,"canUseScripting":true,"filter":"","scriptingPermissions":{"canActivate":true,"canDelete":true,"canSave":true},"showAdminConsoleLink":true}}'
|
||||
AUTH_ROLES_CLAIM: 'role'
|
||||
AUTH_DEFAULT_ROLE: ''
|
||||
AUTH_OIDC_ISSUER: 'not set'
|
||||
AUTH_OIDC_REFRESH_TOKEN_LIFETIME: '3960h'
|
||||
AUTH_OIDC_STATE_PARAM_EXPIRY: '10m'
|
||||
@@ -293,7 +294,7 @@ data:
|
||||
TIMEZONE: ' '
|
||||
CLOUD_LICENSE_ENABLED: 'true'
|
||||
DUPLICATE_TIMEFRAME: '200ms'
|
||||
ENABLED_DISSECTORS: 'amqp,dns,http,icmp,kafka,mongodb,mysql,postgresql,redis,ws,ldap,radius,diameter,udp-flow,tcp-flow,udp-conn,tcp-conn'
|
||||
ENABLED_DISSECTORS: 'amqp,dns,http,icmp,kafka,mongodb,mysql,postgresql,redis,ws,tlsx,ldap,radius,diameter,udp-flow,tcp-flow,udp-conn,tcp-conn'
|
||||
CUSTOM_MACROS: '{"https":"tls and (http or http2)"}'
|
||||
DISSECTORS_UPDATING_ENABLED: 'true'
|
||||
SNAPSHOTS_UPDATING_ENABLED: 'true'
|
||||
@@ -312,10 +313,10 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-cluster-role-default
|
||||
namespace: default
|
||||
@@ -353,16 +354,22 @@ rules:
|
||||
- create
|
||||
- update
|
||||
- delete
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
- tokenreviews
|
||||
verbs:
|
||||
- create
|
||||
---
|
||||
# Source: kubeshark/templates/03-cluster-role-binding.yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-cluster-role-binding-default
|
||||
namespace: default
|
||||
@@ -380,10 +387,10 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
annotations:
|
||||
name: kubeshark-self-config-role
|
||||
@@ -439,10 +446,10 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
annotations:
|
||||
name: kubeshark-self-config-role-binding
|
||||
@@ -462,10 +469,10 @@ kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubeshark.com/app: hub
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-hub
|
||||
namespace: default
|
||||
@@ -483,10 +490,10 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-front
|
||||
namespace: default
|
||||
@@ -504,10 +511,10 @@ kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
@@ -517,10 +524,10 @@ metadata:
|
||||
spec:
|
||||
selector:
|
||||
app.kubeshark.com/app: worker
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
ports:
|
||||
- name: metrics
|
||||
@@ -533,10 +540,10 @@ kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
@@ -546,10 +553,10 @@ metadata:
|
||||
spec:
|
||||
selector:
|
||||
app.kubeshark.com/app: hub
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
ports:
|
||||
- name: metrics
|
||||
@@ -564,10 +571,10 @@ metadata:
|
||||
labels:
|
||||
app.kubeshark.com/app: worker
|
||||
sidecar.istio.io/inject: "false"
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-worker-daemon-set
|
||||
namespace: default
|
||||
@@ -581,10 +588,11 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubeshark.com/app: worker
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
kubeshark.io/internal-auth: "true"
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-worker-daemon-set
|
||||
namespace: kubeshark
|
||||
@@ -594,7 +602,7 @@ spec:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- mkdir -p /sys/fs/bpf && mount | grep -q '/sys/fs/bpf' || mount -t bpf bpf /sys/fs/bpf
|
||||
image: 'docker.io/kubeshark/worker:v53.2'
|
||||
image: 'docker.io/kubeshark/worker:v53.3'
|
||||
imagePullPolicy: Always
|
||||
name: mount-bpf
|
||||
securityContext:
|
||||
@@ -633,7 +641,7 @@ spec:
|
||||
- '500Mi'
|
||||
- -cloud-api-url
|
||||
- 'https://api.kubeshark.com'
|
||||
image: 'docker.io/kubeshark/worker:v53.2'
|
||||
image: 'docker.io/kubeshark/worker:v53.3'
|
||||
imagePullPolicy: Always
|
||||
name: sniffer
|
||||
ports:
|
||||
@@ -709,7 +717,7 @@ spec:
|
||||
- -disable-tls-log
|
||||
- -loglevel
|
||||
- 'warning'
|
||||
image: 'docker.io/kubeshark/worker:v53.2'
|
||||
image: 'docker.io/kubeshark/worker:v53.3'
|
||||
imagePullPolicy: Always
|
||||
name: tracer
|
||||
env:
|
||||
@@ -805,10 +813,10 @@ kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app.kubeshark.com/app: hub
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-hub
|
||||
namespace: default
|
||||
@@ -823,10 +831,10 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubeshark.com/app: hub
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
@@ -844,7 +852,7 @@ spec:
|
||||
- -snapshot-size-limit
|
||||
- '20Gi'
|
||||
- -dissector-image
|
||||
- 'docker.io/kubeshark/worker:v53.2'
|
||||
- 'docker.io/kubeshark/worker:v53.3'
|
||||
- -dissector-cpu
|
||||
- '1'
|
||||
- -dissector-memory
|
||||
@@ -868,7 +876,7 @@ spec:
|
||||
value: 'production'
|
||||
- name: PROFILING_ENABLED
|
||||
value: 'false'
|
||||
image: 'docker.io/kubeshark/hub:v53.2'
|
||||
image: 'docker.io/kubeshark/hub:v53.3'
|
||||
imagePullPolicy: Always
|
||||
readinessProbe:
|
||||
periodSeconds: 5
|
||||
@@ -936,10 +944,10 @@ kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app.kubeshark.com/app: front
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: kubeshark-front
|
||||
namespace: default
|
||||
@@ -954,10 +962,10 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubeshark.com/app: front
|
||||
helm.sh/chart: kubeshark-53.2.5
|
||||
helm.sh/chart: kubeshark-53.3.0
|
||||
app.kubernetes.io/name: kubeshark
|
||||
app.kubernetes.io/instance: kubeshark
|
||||
app.kubernetes.io/version: "53.2.5"
|
||||
app.kubernetes.io/version: "53.3.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
containers:
|
||||
@@ -1012,7 +1020,7 @@ spec:
|
||||
value: 'false'
|
||||
- name: REACT_APP_SENTRY_ENVIRONMENT
|
||||
value: 'production'
|
||||
image: 'docker.io/kubeshark/front:v53.2'
|
||||
image: 'docker.io/kubeshark/front:v53.3'
|
||||
imagePullPolicy: Always
|
||||
name: kubeshark-front
|
||||
livenessProbe:
|
||||
|
||||
@@ -14,6 +14,7 @@ compatible agents.
|
||||
|-------|-------------|
|
||||
| [`network-rca`](network-rca/) | Network Root Cause Analysis. Retrospective traffic analysis via snapshots, with two investigation routes: PCAP (for Wireshark/compliance) and Dissection (for AI-driven API-level investigation). |
|
||||
| [`kfl`](kfl/) | KFL2 (Kubeshark Filter Language) expert. Complete reference for writing, debugging, and optimizing CEL-based traffic filters across all supported protocols. |
|
||||
| [`security-audit`](security-audit/) | Network Security Audit. Systematic 8-phase threat detection across MITRE ATT&CK tactics — C2, exfiltration, lateral movement, credential theft, cryptomining, protocol abuse — using snapshot-based traffic analysis. |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
|
||||
489
skills/install/SKILL.md
Normal file
489
skills/install/SKILL.md
Normal file
@@ -0,0 +1,489 @@
|
||||
---
|
||||
name: install
|
||||
user-invocable: true
|
||||
description: >
|
||||
Kubeshark installation and deployment skill. Use this skill whenever the user wants
|
||||
to install Kubeshark, deploy Kubeshark to a Kubernetes cluster, set up Kubeshark,
|
||||
configure Kubeshark helm values, generate a Kubeshark config file, customize
|
||||
Kubeshark deployment, troubleshoot Kubeshark installation, upgrade Kubeshark,
|
||||
uninstall Kubeshark, or manage the Kubeshark Helm release. Also trigger when
|
||||
the user mentions "kubeshark tap", "kubeshark clean", "helm install kubeshark",
|
||||
"get kubeshark running", "set up traffic capture", "deploy kubeshark",
|
||||
"kubeshark not starting", "kubeshark pods not ready", "configure namespaces",
|
||||
"persistent storage", "cloud storage for snapshots", "kubeshark ingress",
|
||||
"kubeshark auth", "kubeshark SAML", "kubeshark license", "kubeshark config",
|
||||
"custom helm values", "kubeshark on EKS/GKE/AKS", "kubeshark on OpenShift",
|
||||
"kubeshark on KinD/minikube/k3s", "air-gapped", "offline install",
|
||||
or any request related to getting Kubeshark installed, configured, and running
|
||||
in a Kubernetes cluster.
|
||||
---
|
||||
|
||||
# Kubeshark Installation & Deployment
|
||||
|
||||
You are a Kubeshark deployment specialist. Your job is to help users install,
|
||||
configure, and deploy Kubeshark to their Kubernetes cluster — tailoring the
|
||||
configuration to their specific environment, requirements, and use case.
|
||||
|
||||
Kubeshark deploys via Helm. The CLI (`kubeshark tap`) is a thin wrapper that
|
||||
installs a basic Helm chart and establishes a port-forward — nothing more.
|
||||
For larger or production clusters, use Helm directly with a custom values file.
|
||||
|
||||
## Decision: CLI or Helm?
|
||||
|
||||
**Use the CLI** when:
|
||||
- Quick install on a dev/test cluster (minikube, KinD, k3s)
|
||||
- Personal environment, single user
|
||||
- Just want to try Kubeshark quickly
|
||||
|
||||
**Use Helm directly** when:
|
||||
- Larger cluster (staging, production)
|
||||
- Need custom configuration (ingress, auth, storage, namespaces)
|
||||
- GitOps / infrastructure-as-code workflows
|
||||
- Team environment
|
||||
|
||||
## Path A: CLI (Dev/Test Clusters)
|
||||
|
||||
### Step 1 — Install the CLI
|
||||
|
||||
Check if Kubeshark is already installed:
|
||||
|
||||
```bash
|
||||
kubeshark version
|
||||
```
|
||||
|
||||
If not installed, offer one of these methods:
|
||||
|
||||
**Homebrew (easiest, where available):**
|
||||
|
||||
```bash
|
||||
brew tap kubeshark/kubeshark
|
||||
brew install kubeshark
|
||||
```
|
||||
|
||||
**Binary download:**
|
||||
|
||||
For the full list of platforms and architectures, see https://docs.kubeshark.com/en/install
|
||||
|
||||
```bash
|
||||
# Linux (amd64)
|
||||
curl -Lo kubeshark https://github.com/kubeshark/kubeshark/releases/latest/download/kubeshark_linux_amd64
|
||||
chmod +x kubeshark
|
||||
sudo mv kubeshark /usr/local/bin/
|
||||
|
||||
# Linux (arm64)
|
||||
curl -Lo kubeshark https://github.com/kubeshark/kubeshark/releases/latest/download/kubeshark_linux_arm64
|
||||
chmod +x kubeshark
|
||||
sudo mv kubeshark /usr/local/bin/
|
||||
|
||||
# macOS (Apple Silicon)
|
||||
curl -Lo kubeshark https://github.com/kubeshark/kubeshark/releases/latest/download/kubeshark_darwin_arm64
|
||||
chmod +x kubeshark
|
||||
sudo mv kubeshark /usr/local/bin/
|
||||
|
||||
# macOS (Intel)
|
||||
curl -Lo kubeshark https://github.com/kubeshark/kubeshark/releases/latest/download/kubeshark_darwin_amd64
|
||||
chmod +x kubeshark
|
||||
sudo mv kubeshark /usr/local/bin/
|
||||
```
|
||||
|
||||
### Step 2 — Check for Updates
|
||||
|
||||
**Always check for updates before using the CLI.** This is critical — Kubeshark
|
||||
releases frequently and running an outdated version can cause issues.
|
||||
|
||||
```bash
|
||||
# Homebrew
|
||||
brew upgrade kubeshark
|
||||
|
||||
# Binary — check the latest release and re-download if newer
|
||||
kubeshark version
|
||||
# Compare with https://github.com/kubeshark/kubeshark/releases/latest
|
||||
```
|
||||
|
||||
### Step 3 — Deploy with `kubeshark tap`
|
||||
|
||||
```bash
|
||||
kubeshark tap
|
||||
```
|
||||
|
||||
This installs the Helm chart with defaults and opens the dashboard in your browser.
|
||||
That's it for dev/test clusters.
|
||||
|
||||
### Step 4 — Reconnect if Connection Breaks
|
||||
|
||||
If the port-forward drops (laptop sleep, network change, terminal closed):
|
||||
|
||||
```bash
|
||||
kubeshark proxy
|
||||
```
|
||||
|
||||
This re-establishes the port-forward and reopens the dashboard. It does **not**
|
||||
reinstall — Kubeshark is still running in the cluster.
|
||||
|
||||
### Step 5 — Clean Up After Use
|
||||
|
||||
**Always clean up when done.** Kubeshark runs eBPF probes and DaemonSet workers
|
||||
on every node — leaving it running wastes cluster resources.
|
||||
|
||||
```bash
|
||||
kubeshark clean
|
||||
```
|
||||
|
||||
Always remind the user to run `kubeshark clean` when they're finished. This is
|
||||
easy to forget and important.
|
||||
|
||||
## Path B: Helm (Larger / Production Clusters)
|
||||
|
||||
### Step 1 — Upgrade the Helm Chart
|
||||
|
||||
**Always update the Helm repo first.** This is the most important first step —
|
||||
running an outdated chart can cause issues.
|
||||
|
||||
```bash
|
||||
helm repo add kubeshark https://helm.kubeshark.com
|
||||
helm repo update
|
||||
```
|
||||
|
||||
### Step 2 — Create a Config Directory
|
||||
|
||||
Store all configuration files in `~/.kubeshark/`:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.kubeshark
|
||||
```
|
||||
|
||||
**Before writing any file to `~/.kubeshark/`, check if it already exists.**
|
||||
If `~/.kubeshark/values.yaml` (or any target filename) already exists, **ask the
|
||||
user** before overwriting. Either:
|
||||
1. Back up the existing file first: `cp ~/.kubeshark/values.yaml ~/.kubeshark/values.yaml.bak.$(date +%s)`
|
||||
2. Use a descriptive name for the new file (e.g., `values-production.yaml`, `values-staging.yaml`)
|
||||
|
||||
The user may have multiple values files for different clusters or environments.
|
||||
|
||||
### Step 3 — Build the Values File
|
||||
|
||||
Walk through the following configuration areas with the user. Each section
|
||||
explains what the value does and what to recommend.
|
||||
|
||||
#### Pod Targeting (CRITICAL)
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
regex: .*
|
||||
namespaces: []
|
||||
excludedNamespaces: []
|
||||
```
|
||||
|
||||
**This is one of the most important configuration decisions.** By default,
|
||||
Kubeshark monitors the entire cluster's traffic. On a large cluster this is a
|
||||
huge undertaking that consumes significant CPU and memory on every node.
|
||||
|
||||
**Always set namespace targeting.** Ask the user which namespaces contain the
|
||||
workloads they care about, and set those explicitly:
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
namespaces:
|
||||
- production
|
||||
- staging
|
||||
```
|
||||
|
||||
Alternatively, use `excludedNamespaces` to monitor everything except specific
|
||||
namespaces:
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
excludedNamespaces:
|
||||
- kube-system
|
||||
- monitoring
|
||||
- kubeshark
|
||||
```
|
||||
|
||||
The `regex` field filters by pod name within the targeted namespaces. Leave as
|
||||
`.*` unless the user wants to focus on specific pods.
|
||||
|
||||
Setting pod targeting rules causes Kubeshark to focus only on specific workloads,
|
||||
which moderates compute consumption significantly.
|
||||
|
||||
#### Docker Registry (Air-Gapped Environments)
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
docker:
|
||||
registry: docker.io/kubeshark
|
||||
tag: ""
|
||||
```
|
||||
|
||||
- `tap.docker.registry` — Change this for air-gapped environments where there's
|
||||
no access to `docker.io`. Point to your internal registry. Additional config
|
||||
may be needed (pull secrets, registry credentials).
|
||||
- `tap.docker.tag` — Set a specific version. If a patch version is missing, the
|
||||
latest patch in that minor version is used. **Leave empty (recommended)** to
|
||||
use the version matching the Helm chart.
|
||||
|
||||
For air-gapped clusters, also set:
|
||||
|
||||
```yaml
|
||||
internetConnectivity: false
|
||||
```
|
||||
|
||||
This is the **most important setting for air-gapped clusters** — it disables all
|
||||
outbound connectivity checks (license validation, telemetry, update checks).
|
||||
|
||||
#### Capture & Dissection
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
capture:
|
||||
dissection:
|
||||
enabled: true
|
||||
stopAfter: 5m
|
||||
raw:
|
||||
enabled: true
|
||||
storageSize: 1Gi
|
||||
dbMaxSize: 500Mi
|
||||
```
|
||||
|
||||
**`tap.capture.dissection.enabled`** — Controls real-time dissection (L7 protocol
|
||||
parsing on production nodes). Real-time dissection consumes significant compute
|
||||
resources from production nodes. **Recommend starting with `false` (disabled).**
|
||||
This can be toggled on-demand from the dashboard when needed, so it's used only
|
||||
when necessary and doesn't consume resources the rest of the time.
|
||||
|
||||
Dissection is independent from raw capture + snapshots. Raw capture is lightweight
|
||||
and runs continuously; dissection is the heavy operation.
|
||||
|
||||
**`tap.capture.dissection.stopAfter`** — Time after which dissection automatically
|
||||
disables once all client connections end. Set to `0` to never auto-disable (manual
|
||||
control only).
|
||||
|
||||
**`tap.capture.raw.enabled`** — Keep this `true`. Raw capture consumes very little
|
||||
production resources yet captures all traffic. This is what powers snapshots and
|
||||
retrospective analysis.
|
||||
|
||||
**`tap.capture.raw.storageSize`** — The FIFO buffer for raw capture per node.
|
||||
**Recommend 100Gi** for production. The larger this is, the further back in time
|
||||
snapshots can reach.
|
||||
|
||||
**`tap.capture.dbMaxSize`** — Size of the database holding dissected API calls.
|
||||
Bigger = more history kept. Adjust based on how much queryable history the user needs.
|
||||
|
||||
**`tap.capture.captureSelf`** — Debug option. Ignore during installation.
|
||||
|
||||
**`bpfOverride`** — Debug option. Ignore during installation.
|
||||
|
||||
#### Delayed Dissection
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
delayedDissection:
|
||||
cpu: "1"
|
||||
memory: 4Gi
|
||||
```
|
||||
|
||||
Delayed dissection is the process on the Hub that dissects raw capture data within
|
||||
a snapshot. It runs on the Hub node (not production nodes) and is triggered when
|
||||
a delayed dissection operation is requested on a snapshot.
|
||||
|
||||
**Give this as much resources as possible.** Recommend `cpu: "5"` and `memory: 5Gi`.
|
||||
This speeds up snapshot analysis significantly.
|
||||
|
||||
#### Snapshot Storage (Local)
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
snapshots:
|
||||
local:
|
||||
storageClass: ""
|
||||
storageSize: 20Gi
|
||||
```
|
||||
|
||||
This is where snapshots are stored locally. **Be very generous with this.**
|
||||
**Recommend 2Ti (2TB)** for production environments that will accumulate snapshots.
|
||||
|
||||
**`storageClass`** — Must match a valid storage class in the cluster. Suggest
|
||||
based on the cloud provider:
|
||||
|
||||
| Provider | Recommended Storage Class |
|
||||
|----------|-------------------------|
|
||||
| EKS (AWS) | `gp2` or `gp3` |
|
||||
| GKE (Google) | `standard` or `premium-rwo` |
|
||||
| AKS (Azure) | `managed-csi` or `managed-premium` |
|
||||
| OpenShift | Check `kubectl get sc` — varies by provider |
|
||||
| KinD / minikube | `standard` (default) |
|
||||
| Private / bare metal | Ask the user for their storage class |
|
||||
|
||||
Always verify available storage classes with `kubectl get sc`.
|
||||
|
||||
#### Cloud Storage (Long-Term Retention)
|
||||
|
||||
Cloud storage enables uploading snapshots to S3, GCS, or Azure Blob for long-term
|
||||
retention, cross-cluster sharing, and backup/restore.
|
||||
|
||||
For detailed configuration per provider (including IRSA, Workload Identity, static
|
||||
credentials, and ConfigMap/Secret setup), see `references/cloud-storage.md`.
|
||||
|
||||
Summary of provider values:
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
snapshots:
|
||||
cloud:
|
||||
provider: "" # "s3", "azblob", or "gcs" (empty = disabled)
|
||||
prefix: "" # Key prefix in bucket
|
||||
configMaps: [] # Pre-existing ConfigMaps with cloud config
|
||||
secrets: [] # Pre-existing Secrets with cloud credentials
|
||||
```
|
||||
|
||||
Help the user select the right provider based on where their cluster runs and
|
||||
walk them through the authentication setup.
|
||||
|
||||
#### Resources
|
||||
|
||||
For a first installation, **do not change the resource defaults.** Let the user
|
||||
run Kubeshark with defaults first and tune based on actual usage patterns later.
|
||||
|
||||
The defaults are reasonable starting points. Resource consumption depends heavily
|
||||
on how much traffic is processed, which is controlled by pod targeting rules.
|
||||
|
||||
#### Node Selectors
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
nodeSelectorTerms:
|
||||
workers:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/os
|
||||
operator: In
|
||||
values: [linux]
|
||||
```
|
||||
|
||||
Use `nodeSelectorTerms` when the user wants to focus on specific nodes. The less
|
||||
workload processed by Kubeshark, the less CPU and memory it consumes. The goal is
|
||||
to process workloads of interest, not the entire cluster.
|
||||
|
||||
#### Ingress (STRONGLY RECOMMENDED)
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
host: ks.svc.cluster.local
|
||||
path: /
|
||||
tls: []
|
||||
annotations: {}
|
||||
```
|
||||
|
||||
**Ingress is the strongly preferred access method.** While port-forward is available,
|
||||
it is **highly NOT recommended** for anything beyond quick local testing. Port-forward
|
||||
is fragile, drops connections, and doesn't scale for team use.
|
||||
|
||||
**Always help the user configure ingress.** Ask them about their ingress controller
|
||||
(nginx, ALB, Traefik, etc.) and build the ingress config:
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
host: kubeshark.example.com
|
||||
tls:
|
||||
- secretName: kubeshark-tls
|
||||
hosts:
|
||||
- kubeshark.example.com
|
||||
annotations: {}
|
||||
```
|
||||
|
||||
For ALB on AWS:
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
ingress:
|
||||
enabled: true
|
||||
className: alb
|
||||
host: kubeshark.example.com
|
||||
annotations:
|
||||
alb.ingress.kubernetes.io/scheme: internal
|
||||
alb.ingress.kubernetes.io/target-type: ip
|
||||
```
|
||||
|
||||
#### Air-Gapped Clusters
|
||||
|
||||
For air-gapped environments, two settings are essential:
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
docker:
|
||||
registry: your-internal-registry.example.com/kubeshark
|
||||
internetConnectivity: false
|
||||
```
|
||||
|
||||
`internetConnectivity: false` is the **single most important option** for
|
||||
air-gapped clusters. Without it, Kubeshark will attempt outbound connections
|
||||
that will fail and cause issues.
|
||||
|
||||
### Step 4 — Install
|
||||
|
||||
```bash
|
||||
helm install kubeshark kubeshark/kubeshark \
|
||||
-f ~/.kubeshark/values.yaml \
|
||||
-n kubeshark --create-namespace
|
||||
```
|
||||
|
||||
### Step 5 — Upgrade
|
||||
|
||||
When upgrading, **always update the Helm repo first**:
|
||||
|
||||
```bash
|
||||
helm repo update
|
||||
helm upgrade kubeshark kubeshark/kubeshark \
|
||||
-f ~/.kubeshark/values.yaml \
|
||||
-n kubeshark
|
||||
```
|
||||
|
||||
## Uninstalling
|
||||
|
||||
**Via CLI:**
|
||||
|
||||
```bash
|
||||
kubeshark clean
|
||||
kubeshark clean -s kubeshark # Specific namespace
|
||||
```
|
||||
|
||||
**Via Helm:**
|
||||
|
||||
```bash
|
||||
helm uninstall kubeshark -n kubeshark
|
||||
```
|
||||
|
||||
PersistentVolumeClaims are not deleted by default. Remove manually if needed:
|
||||
|
||||
```bash
|
||||
kubectl delete pvc -l app.kubernetes.io/name=kubeshark -n kubeshark
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- **Pods not starting**: Check `kubectl get pods -l app.kubernetes.io/name=kubeshark -n <ns>`
|
||||
and `kubectl describe pod`. Common: ImagePullBackOff (registry), Pending (storage/resources),
|
||||
CrashLoopBackOff (check `kubectl logs`).
|
||||
- **No traffic**: Verify namespaces have running pods, check pod regex, ensure eBPF supported
|
||||
(kernel 4.14+, 5.4+ recommended).
|
||||
- **Permissions**: Requires privileged containers with NET_RAW, NET_ADMIN, SYS_ADMIN,
|
||||
SYS_PTRACE, SYS_RESOURCE, IPC_LOCK capabilities.
|
||||
- **Storage**: Verify storage class exists (`kubectl get sc`), PVC is bound (`kubectl get pvc`).
|
||||
|
||||
## Setup Reference
|
||||
|
||||
### Kubeshark MCP for AI Agents
|
||||
|
||||
After installation, connect the Kubeshark MCP so AI agents can interact with Kubeshark:
|
||||
|
||||
```bash
|
||||
# Claude Code
|
||||
claude mcp add kubeshark -- kubeshark mcp
|
||||
|
||||
# Direct URL (no kubectl needed)
|
||||
claude mcp add kubeshark -- kubeshark mcp --url https://kubeshark.example.com
|
||||
```
|
||||
96
skills/install/references/cloud-storage.md
Normal file
96
skills/install/references/cloud-storage.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# Cloud Storage for Snapshots
|
||||
|
||||
This is a pointer to the authoritative cloud storage documentation maintained in
|
||||
the Helm chart:
|
||||
|
||||
**Source of truth**: `helm-chart/docs/snapshots_cloud_storage.md`
|
||||
|
||||
Always read that file for the latest configuration details, including:
|
||||
|
||||
- Amazon S3 (static credentials, IRSA, cross-account AssumeRole)
|
||||
- Azure Blob Storage (storage key, Workload Identity / DefaultAzureCredential)
|
||||
- Google Cloud Storage (service account JSON, GKE Workload Identity)
|
||||
- IAM permissions and trust policy examples
|
||||
- ConfigMap and Secret setup patterns
|
||||
- Inline values vs. external ConfigMap/Secret approaches
|
||||
|
||||
## Quick Reference
|
||||
|
||||
### Helm Values Structure
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
snapshots:
|
||||
cloud:
|
||||
provider: "" # "s3", "azblob", or "gcs" (empty = disabled)
|
||||
prefix: "" # Key prefix in the bucket/container
|
||||
configMaps: [] # Pre-existing ConfigMaps with cloud config env vars
|
||||
secrets: [] # Pre-existing Secrets with cloud credentials
|
||||
s3:
|
||||
bucket: ""
|
||||
region: ""
|
||||
accessKey: ""
|
||||
secretKey: ""
|
||||
roleArn: ""
|
||||
externalId: ""
|
||||
azblob:
|
||||
storageAccount: ""
|
||||
container: ""
|
||||
storageKey: ""
|
||||
gcs:
|
||||
bucket: ""
|
||||
project: ""
|
||||
credentialsJson: ""
|
||||
```
|
||||
|
||||
### Recommended Auth Per Provider
|
||||
|
||||
| Provider | Production Recommendation |
|
||||
|----------|-------------------------|
|
||||
| S3 (EKS) | IRSA (IAM Roles for Service Accounts) — no static credentials |
|
||||
| S3 (non-EKS) | Static credentials via Secret, or default AWS credential chain |
|
||||
| Azure Blob (AKS) | Workload Identity / Managed Identity |
|
||||
| Azure Blob (non-AKS) | Storage account key via Secret |
|
||||
| GCS (GKE) | GKE Workload Identity — no JSON key file |
|
||||
| GCS (non-GKE) | Service account JSON key via Secret |
|
||||
|
||||
### Inline Values (Simplest Approach)
|
||||
|
||||
Set credentials directly in values.yaml. The Helm chart creates the necessary
|
||||
ConfigMap/Secret resources automatically.
|
||||
|
||||
**S3:**
|
||||
```yaml
|
||||
tap:
|
||||
snapshots:
|
||||
cloud:
|
||||
provider: "s3"
|
||||
s3:
|
||||
bucket: my-kubeshark-snapshots
|
||||
region: us-east-1
|
||||
```
|
||||
|
||||
**GCS:**
|
||||
```yaml
|
||||
tap:
|
||||
snapshots:
|
||||
cloud:
|
||||
provider: "gcs"
|
||||
gcs:
|
||||
bucket: my-kubeshark-snapshots
|
||||
project: my-gcp-project
|
||||
```
|
||||
|
||||
**Azure Blob:**
|
||||
```yaml
|
||||
tap:
|
||||
snapshots:
|
||||
cloud:
|
||||
provider: "azblob"
|
||||
azblob:
|
||||
storageAccount: mykubesharksa
|
||||
container: snapshots
|
||||
```
|
||||
|
||||
For production setups with proper IAM integration, see the full documentation
|
||||
in `helm-chart/docs/snapshots_cloud_storage.md`.
|
||||
376
skills/install/references/helm-values.md
Normal file
376
skills/install/references/helm-values.md
Normal file
@@ -0,0 +1,376 @@
|
||||
# Kubeshark Helm Values Reference
|
||||
|
||||
Complete reference for all Kubeshark Helm chart values. Use this when building
|
||||
custom `values.yaml` files or `--set` flags.
|
||||
|
||||
## Docker Images
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
docker:
|
||||
registry: docker.io/kubeshark # Docker registry
|
||||
tag: "" # Image tag (empty = chart appVersion)
|
||||
tagLocked: true # Lock to specific tag
|
||||
imagePullPolicy: Always # Always, IfNotPresent, Never
|
||||
imagePullSecrets: [] # Registry pull secrets
|
||||
overrideImage: # Override individual component images
|
||||
worker: ""
|
||||
hub: ""
|
||||
front: ""
|
||||
overrideTag: # Override individual component tags
|
||||
worker: ""
|
||||
hub: ""
|
||||
front: ""
|
||||
```
|
||||
|
||||
## Proxy / Port-Forward
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
proxy:
|
||||
worker:
|
||||
srvPort: 48999
|
||||
hub:
|
||||
srvPort: 8898
|
||||
front:
|
||||
port: 8899 # Local port for port-forward
|
||||
host: 127.0.0.1 # Bind address
|
||||
```
|
||||
|
||||
## Pod Targeting
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
regex: .* # Pod name regex filter
|
||||
namespaces: [] # Target namespaces (empty = all)
|
||||
excludedNamespaces: [] # Namespaces to exclude
|
||||
bpfOverride: "" # Custom BPF filter override
|
||||
```
|
||||
|
||||
## Capture & Dissection
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
capture:
|
||||
dissection:
|
||||
enabled: true # Enable L7 dissection
|
||||
stopAfter: 5m # Auto-stop dissection after duration
|
||||
captureSelf: false # Capture Kubeshark's own traffic
|
||||
raw:
|
||||
enabled: true # Enable raw packet capture (needed for snapshots)
|
||||
storageSize: 1Gi # FIFO buffer size per node
|
||||
dbMaxSize: 500Mi # Max L7 database size per node
|
||||
delayedDissection:
|
||||
cpu: "1" # CPU for delayed dissection jobs
|
||||
memory: 4Gi # Memory for delayed dissection jobs
|
||||
storageSize: "" # Storage for delayed dissection
|
||||
storageClass: "" # Storage class for delayed dissection
|
||||
```
|
||||
|
||||
## Snapshots
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
snapshots:
|
||||
local:
|
||||
storageClass: "" # Storage class for local snapshots
|
||||
storageSize: 20Gi # PVC size for local snapshots
|
||||
cloud:
|
||||
provider: "" # s3, gcs, or azblob
|
||||
prefix: "" # Path prefix in bucket
|
||||
configMaps: [] # Additional ConfigMaps to mount
|
||||
secrets: [] # Additional Secrets to mount
|
||||
s3:
|
||||
bucket: ""
|
||||
region: ""
|
||||
accessKey: ""
|
||||
secretKey: ""
|
||||
roleArn: "" # IAM role ARN (IRSA)
|
||||
externalId: "" # STS external ID
|
||||
azblob:
|
||||
storageAccount: ""
|
||||
container: ""
|
||||
storageKey: ""
|
||||
gcs:
|
||||
bucket: ""
|
||||
project: ""
|
||||
credentialsJson: "" # Service account JSON
|
||||
```
|
||||
|
||||
## Helm Release
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
release:
|
||||
repo: https://helm.kubeshark.com # Helm chart repository
|
||||
name: kubeshark # Release name
|
||||
namespace: default # Release namespace
|
||||
helmChartPath: "" # Path to local chart (overrides repo)
|
||||
```
|
||||
|
||||
## Storage
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
persistentStorage: false # Enable PVC for worker data
|
||||
persistentStorageStatic: false # Static provisioning
|
||||
persistentStoragePvcVolumeMode: FileSystem # FileSystem or Block
|
||||
efsFileSytemIdAndPath: "" # EFS file system ID (EKS)
|
||||
secrets: [] # Additional secrets to mount
|
||||
storageLimit: 10Gi # Max storage per node
|
||||
storageClass: standard # Default storage class
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
resources:
|
||||
hub:
|
||||
limits:
|
||||
cpu: "0" # 0 = no limit
|
||||
memory: 5Gi
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 50Mi
|
||||
sniffer:
|
||||
limits:
|
||||
cpu: "0"
|
||||
memory: 5Gi
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 50Mi
|
||||
tracer:
|
||||
limits:
|
||||
cpu: "0"
|
||||
memory: 5Gi
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 50Mi
|
||||
```
|
||||
|
||||
## Health Probes
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
probes:
|
||||
hub:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
sniffer:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
```
|
||||
|
||||
## TLS & Service Mesh
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
serviceMesh: true # Capture mTLS traffic (service mesh)
|
||||
tls: true # Capture OpenSSL/Go TLS traffic
|
||||
disableTlsLog: true # Suppress TLS debug logging
|
||||
packetCapture: best # Capture method: best, af_packet, pcap
|
||||
```
|
||||
|
||||
## Labels, Annotations & Scheduling
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
labels: {} # Additional labels for all pods
|
||||
annotations: {} # Additional annotations for all pods
|
||||
nodeSelectorTerms:
|
||||
hub: # Hub pod node selector
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/os
|
||||
operator: In
|
||||
values: [linux]
|
||||
workers: # Worker DaemonSet node selector
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/os
|
||||
operator: In
|
||||
values: [linux]
|
||||
front: # Frontend pod node selector
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/os
|
||||
operator: In
|
||||
values: [linux]
|
||||
tolerations:
|
||||
hub: []
|
||||
workers:
|
||||
- operator: Exists
|
||||
effect: NoExecute # Workers tolerate NoExecute by default
|
||||
front: []
|
||||
priorityClass: "" # PriorityClassName for pods
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
auth:
|
||||
enabled: false
|
||||
type: saml # Only SAML supported currently
|
||||
roles:
|
||||
admin:
|
||||
filter: "" # KFL filter restricting visible traffic
|
||||
canDownloadPCAP: true
|
||||
canUseScripting: true
|
||||
scriptingPermissions:
|
||||
canSave: true
|
||||
canActivate: true
|
||||
canDelete: true
|
||||
canUpdateTargetedPods: true
|
||||
canStopTrafficCapturing: true
|
||||
canControlDissection: true
|
||||
showAdminConsoleLink: true
|
||||
rolesClaim: role # SAML attribute for role mapping
|
||||
defaultRole: "" # Role for users without a role claim
|
||||
defaultFilter: "" # Default KFL filter for all users
|
||||
saml:
|
||||
idpMetadataUrl: "" # SAML IdP metadata URL
|
||||
x509crt: "" # SP certificate
|
||||
x509key: "" # SP private key
|
||||
```
|
||||
|
||||
## Ingress
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
ingress:
|
||||
enabled: false
|
||||
className: "" # nginx, alb, traefik, etc.
|
||||
host: ks.svc.cluster.local
|
||||
path: /
|
||||
tls: [] # TLS configuration
|
||||
annotations: {} # Ingress annotations
|
||||
```
|
||||
|
||||
## Protocol Dissectors
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
enabledDissectors:
|
||||
- amqp
|
||||
- dns
|
||||
- http
|
||||
- icmp
|
||||
- kafka
|
||||
- mongodb
|
||||
- mysql
|
||||
- postgresql
|
||||
- redis
|
||||
- ws
|
||||
- ldap
|
||||
- radius
|
||||
- diameter
|
||||
- udp-flow
|
||||
- tcp-flow
|
||||
- udp-conn
|
||||
- tcp-conn
|
||||
portMapping: # Default port-to-protocol mappings
|
||||
http: [80, 443, 8080]
|
||||
amqp: [5671, 5672]
|
||||
kafka: [9092]
|
||||
mongodb: [27017]
|
||||
mysql: [3306]
|
||||
postgresql: [5432]
|
||||
redis: [6379]
|
||||
ldap: [389]
|
||||
diameter: [3868]
|
||||
customMacros:
|
||||
https: "tls and (http or http2)"
|
||||
```
|
||||
|
||||
## Networking & Security
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
hostNetwork: true # Use host network (required for capture)
|
||||
ipv6: true # Enable IPv6 support
|
||||
mountBpf: true # Mount BPF filesystem
|
||||
securityContext:
|
||||
privileged: true
|
||||
appArmorProfile:
|
||||
type: ""
|
||||
localhostProfile: ""
|
||||
seLinuxOptions:
|
||||
level: ""
|
||||
role: ""
|
||||
type: ""
|
||||
user: ""
|
||||
capabilities:
|
||||
networkCapture: [NET_RAW, NET_ADMIN]
|
||||
serviceMeshCapture: [SYS_ADMIN, SYS_PTRACE, DAC_OVERRIDE]
|
||||
ebpfCapture: [SYS_ADMIN, SYS_PTRACE, SYS_RESOURCE, IPC_LOCK]
|
||||
```
|
||||
|
||||
## Dashboard
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
dashboard:
|
||||
streamingType: connect-rpc
|
||||
completeStreamingEnabled: true
|
||||
clusterWideMapEnabled: false
|
||||
entriesLimit: "300000"
|
||||
routing:
|
||||
front:
|
||||
basePath: "" # Base path for reverse proxy
|
||||
```
|
||||
|
||||
## Scripting
|
||||
|
||||
```yaml
|
||||
scripting:
|
||||
enabled: false
|
||||
env: {} # Environment variables for scripts
|
||||
source: "" # Git repo for scripts
|
||||
sources: [] # Multiple script sources
|
||||
watchScripts: true # Watch for script changes
|
||||
active: [] # Active scripts
|
||||
console: true # Enable script console
|
||||
```
|
||||
|
||||
## Misc
|
||||
|
||||
```yaml
|
||||
tap:
|
||||
dryRun: false # Preview targeted pods without deploying
|
||||
debug: false # Enable debug mode
|
||||
telemetry:
|
||||
enabled: true # Anonymous usage telemetry
|
||||
resourceGuard:
|
||||
enabled: false # Resource usage guard
|
||||
watchdog:
|
||||
enabled: false # Watchdog process
|
||||
gitops:
|
||||
enabled: false # GitOps mode
|
||||
defaultFilter: "" # Default KFL display filter
|
||||
globalFilter: "" # Global KFL filter (cannot be overridden)
|
||||
dns:
|
||||
nameservers: [] # Custom DNS nameservers
|
||||
searches: [] # Custom DNS search domains
|
||||
options: [] # Custom DNS options
|
||||
misc:
|
||||
jsonTTL: 5m # TTL for JSON entries
|
||||
pcapTTL: "0" # TTL for PCAP files (0 = no TTL)
|
||||
trafficSampleRate: 100 # Traffic sampling rate (1-100)
|
||||
resolutionStrategy: auto # IP resolution: auto, dns, k8s
|
||||
detectDuplicates: false # Detect duplicate packets
|
||||
staleTimeoutSeconds: 30 # Timeout for stale connections
|
||||
tcpFlowTimeout: 1200 # TCP flow idle timeout (seconds)
|
||||
udpFlowTimeout: 1200 # UDP flow idle timeout (seconds)
|
||||
|
||||
headless: false # Suppress browser auto-open
|
||||
license: "" # Kubeshark Pro license key
|
||||
timezone: "" # Override timezone
|
||||
logLevel: warning # Log level: debug, info, warning, error
|
||||
|
||||
kube:
|
||||
configPath: "" # Custom kubeconfig path
|
||||
context: "" # Kubernetes context name
|
||||
```
|
||||
@@ -14,6 +14,7 @@ description: >
|
||||
or any request to slice/search/narrow network traffic in Kubeshark. Also trigger
|
||||
when other skills need to construct filters — KFL is the query language for all
|
||||
Kubeshark traffic analysis.
|
||||
last-updated: 2026-05-08
|
||||
---
|
||||
|
||||
# KFL2 — Kubeshark Filter Language
|
||||
@@ -94,7 +95,8 @@ filter term — they're fast and narrow the search space immediately.
|
||||
| `sctp` | SCTP | `gql` | GraphQL (v1+v2) |
|
||||
| `icmp` | ICMP | `gqlv1` / `gqlv2` | GraphQL version-specific |
|
||||
| `grpc` | gRPC (HTTP/2 sub-protocol) | `mongodb` | MongoDB |
|
||||
| `mysql` | MySQL | `radius` | RADIUS |
|
||||
| `mysql` | MySQL | `postgresql` | PostgreSQL |
|
||||
| `radius` | RADIUS | | |
|
||||
| `diameter` | Diameter | `conn` / `flow` | L4 connection/flow tracking |
|
||||
| | | `tcp_conn` / `udp_conn` | Transport-specific connections |
|
||||
|
||||
@@ -276,6 +278,21 @@ mysql && mysql_error_code != 0 // Error code filtering
|
||||
mysql && mysql_total_size > 10000 // Large queries
|
||||
```
|
||||
|
||||
### PostgreSQL
|
||||
|
||||
```
|
||||
postgresql && postgresql_command == "COM_QUERY" // Query commands
|
||||
postgresql && postgresql_query.contains("SELECT") // SELECT statements
|
||||
postgresql && postgresql_database == "orders_db" // Database filtering
|
||||
postgresql && postgresql_user == "admin" // User filtering
|
||||
postgresql && !postgresql_success // Failed queries
|
||||
postgresql && postgresql_error_code != "" // Error code filtering (SQLSTATE string)
|
||||
postgresql && postgresql_total_size > 10000 // Large queries
|
||||
```
|
||||
|
||||
> **Note**: `postgresql_error_code` is a **string** (SQLSTATE code like `"23505"`),
|
||||
> not an int. This differs from MySQL's `mysql_error_code` which is an int.
|
||||
|
||||
### gRPC
|
||||
|
||||
gRPC is a sub-protocol of HTTP/2. All HTTP variables are also available on gRPC entries.
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# KFL2 Complete Variable and Field Reference
|
||||
|
||||
> Last synced with [kfl2 repo](https://github.com/kubeshark/kfl2): 2026-05-08
|
||||
|
||||
This is the exhaustive reference for every variable available in KFL2 filters.
|
||||
KFL2 is built on Google's CEL (Common Expression Language) and evaluates against
|
||||
Kubeshark's protobuf-based `BaseEntry` structure.
|
||||
@@ -74,7 +76,8 @@ Boolean variables indicating detected protocol. Use as first filter term for per
|
||||
| `icmp` | ICMP | `gqlv1` | GraphQL v1 only |
|
||||
| `grpc` | gRPC (HTTP/2 sub-protocol) | `gqlv2` | GraphQL v2 only |
|
||||
| `mongodb` | MongoDB | `mysql` | MySQL |
|
||||
| `radius` | RADIUS auth | `diameter` | Diameter |
|
||||
| `postgresql` | PostgreSQL | `diameter` | Diameter |
|
||||
| `radius` | RADIUS auth | | |
|
||||
| | | `conn` | L4 connection tracking |
|
||||
| `flow` | L4 flow tracking | `tcp_conn` | TCP connection tracking |
|
||||
| `tcp_flow` | TCP flow tracking | `udp_conn` | UDP connection tracking |
|
||||
@@ -302,6 +305,27 @@ Supported question types: A, AAAA, NS, CNAME, SOA, MX, TXT, SRV, PTR, ANY.
|
||||
|
||||
**Example**: `mysql && mysql_query.contains("SELECT") && !mysql_success`
|
||||
|
||||
## PostgreSQL Variables
|
||||
|
||||
| Variable | Type | Description | Example |
|
||||
|----------|------|-------------|---------|
|
||||
| `postgresql` | bool | PostgreSQL payload detected | |
|
||||
| `postgresql_command` | string | Command tag | `"SELECT"`, `"INSERT"`, `"UPDATE"` |
|
||||
| `postgresql_query` | string | Full SQL query text | `"SELECT * FROM users WHERE id = 1"` |
|
||||
| `postgresql_database` | string | Active database name | `"orders_db"` |
|
||||
| `postgresql_user` | string | Authenticated user name | `"app_service"` |
|
||||
| `postgresql_request_size` | int | Request payload size in bytes | |
|
||||
| `postgresql_response_size` | int | Response payload size in bytes | |
|
||||
| `postgresql_total_size` | int | Combined request + response size | |
|
||||
| `postgresql_success` | bool | Response OK status | |
|
||||
| `postgresql_error_code` | **string** | SQLSTATE error code (NOT int) | `"23505"` (unique violation), `"42P01"` (undefined table) |
|
||||
| `postgresql_error_message` | string | Error description | |
|
||||
|
||||
**Important**: Unlike MySQL's `mysql_error_code` (int), `postgresql_error_code` is a
|
||||
**string** because PostgreSQL uses 5-character SQLSTATE codes.
|
||||
|
||||
**Example**: `postgresql && postgresql_query.contains("SELECT") && !postgresql_success`
|
||||
|
||||
## gRPC Variables
|
||||
|
||||
gRPC is a sub-protocol of HTTP/2. When `grpc` is true, all HTTP variables are also available.
|
||||
|
||||
@@ -109,10 +109,17 @@ Every investigation starts with a snapshot. After that, you choose one of two
|
||||
investigation routes depending on your goal:
|
||||
|
||||
1. **Determine time window** — When did the issue occur? Use `get_data_boundaries`
|
||||
to see what raw capture data is available.
|
||||
2. **Create or locate a snapshot** — Either take a new snapshot covering the
|
||||
to see what raw capture data (L4) is available.
|
||||
2. **Check the L7 (dissected) window** — Before any KFL query on *live* data,
|
||||
call `get_l7_data_boundaries`. It returns the per-node + cluster-wide range
|
||||
of dissected API call data plus a `dissection_enabled` flag. Treat L4
|
||||
(`get_data_boundaries`) as the snapshot/PCAP window and L7
|
||||
(`get_l7_data_boundaries`) as the KFL-query window — they can differ
|
||||
significantly because L7 only starts producing entries once dissection is
|
||||
enabled (existing raw capture is **not** retroactively dissected).
|
||||
3. **Create or locate a snapshot** — Either take a new snapshot covering the
|
||||
incident window, or find an existing one with `list_snapshots`.
|
||||
3. **Choose your investigation route** — PCAP or Dissection (see below).
|
||||
4. **Choose your investigation route** — PCAP or Dissection (see below).
|
||||
|
||||
### Choosing the Right Route
|
||||
|
||||
@@ -164,6 +171,34 @@ Per node:
|
||||
If the incident falls outside the available window, the data has been rotated
|
||||
out. Suggest increasing `storageSize` for future coverage.
|
||||
|
||||
### Check L7 (Dissected) Data Boundaries
|
||||
|
||||
**Tool**: `get_l7_data_boundaries`
|
||||
|
||||
Check what *dissected* L7 entries exist across the cluster. This is the
|
||||
pre-flight check before any KFL query against live data. The response
|
||||
contains:
|
||||
|
||||
- `dissection_enabled`: if `false`, KFL queries on live data will return
|
||||
empty regardless of L4 boundaries. Enabling dissection only captures
|
||||
*forward* — raw capture is **not** retroactively dissected.
|
||||
- `cluster.oldest_ts` / `cluster.newest_ts`: cluster-wide window where KFL
|
||||
on live data has any chance of returning results.
|
||||
- `nodes[].oldest_ts` / `nodes[].newest_ts`: per-node windows for narrowing
|
||||
queries.
|
||||
|
||||
**Key distinction:**
|
||||
|
||||
| | L4 (`get_data_boundaries`) | L7 (`get_l7_data_boundaries`) |
|
||||
|---|---|---|
|
||||
| Data | Raw PCAP capture | Dissected API call entries |
|
||||
| Useful for | Snapshots, PCAP extraction | KFL queries |
|
||||
| Backfill | Comes from FIFO ring buffer | Only forward from dissection-enable |
|
||||
|
||||
If the user is asking an API-level question and `dissection_enabled` is
|
||||
`false`, enable it first — but tell the user they will only see entries
|
||||
captured *after* enabling, never the historical window.
|
||||
|
||||
### Create a Snapshot
|
||||
|
||||
**Tool**: `create_snapshot`
|
||||
@@ -421,11 +456,16 @@ The two routes are complementary. A common pattern:
|
||||
### Post-Incident RCA
|
||||
|
||||
1. Identify the incident time window from alerts, logs, or user reports
|
||||
2. Check `get_data_boundaries` — is the window still in raw capture?
|
||||
3. `create_snapshot` covering the incident window (add 15 minutes buffer)
|
||||
4. **Dissection route**: `start_snapshot_dissection` → `get_api_stats` →
|
||||
2. Check `get_data_boundaries` — is the window still in raw capture (L4)?
|
||||
3. Check `get_l7_data_boundaries` — was dissection enabled at that time, and
|
||||
does the window overlap with the L7 entry range? If `dissection_enabled`
|
||||
is `false` or the window predates the L7 range, the Dissection route is
|
||||
limited to whatever entries exist now — falling back to the PCAP route
|
||||
is often the right call.
|
||||
4. `create_snapshot` covering the incident window (add 15 minutes buffer)
|
||||
5. **Dissection route**: `start_snapshot_dissection` → `get_api_stats` →
|
||||
`list_api_calls` → `get_api_call` → follow the dependency chain
|
||||
5. **PCAP route**: `list_workloads` → `export_snapshot_pcap` with BPF →
|
||||
6. **PCAP route**: `list_workloads` → `export_snapshot_pcap` with BPF →
|
||||
hand off to Wireshark or archive
|
||||
|
||||
### Other Use Cases
|
||||
|
||||
689
skills/security-audit/SKILL.md
Normal file
689
skills/security-audit/SKILL.md
Normal file
@@ -0,0 +1,689 @@
|
||||
---
|
||||
name: security-audit
|
||||
description: >
|
||||
Kubernetes network security audit skill powered by Kubeshark MCP. Use this skill
|
||||
whenever the user wants to audit a cluster for security threats, detect compromised
|
||||
workloads, find malicious traffic patterns, hunt for indicators of compromise (IOCs),
|
||||
check for data exfiltration, identify C2 (command and control) communication,
|
||||
detect cryptomining, find lateral movement, discover credential theft attempts,
|
||||
assess network security posture, or perform threat hunting in Kubernetes.
|
||||
Also trigger when the user mentions security audit, threat detection, compromise
|
||||
assessment, vulnerability scan, "is my cluster compromised", "find malicious traffic",
|
||||
"check for threats", DNS exfiltration, DNS tunneling, port scanning, IMDS access,
|
||||
reverse shell, crypto miner, MITRE ATT&CK, IOC detection, anomaly detection,
|
||||
suspicious traffic, rogue workloads, unauthorized access, or any request to
|
||||
evaluate cluster security through network traffic analysis.
|
||||
---
|
||||
|
||||
# Kubernetes Network Security Audit with Kubeshark MCP
|
||||
|
||||
You are a Kubernetes network security specialist. Your job is to systematically
|
||||
audit cluster traffic for indicators of compromise, malicious behavior, and
|
||||
security threats — using network traffic as the ground truth.
|
||||
|
||||
Network traffic cannot lie. Logs can be tampered with, metrics can be spoofed,
|
||||
but packets on the wire reveal what workloads actually do — what they connect to,
|
||||
what protocols they speak, what data they send. Your audit leverages this by
|
||||
examining DNS queries, HTTP requests, L4 flows, and protocol-level payloads
|
||||
across every dimension of the MITRE ATT&CK framework.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before starting any audit, verify the environment is ready.
|
||||
|
||||
**Tool**: `check_kubeshark_status`
|
||||
|
||||
Confirm Kubeshark is deployed and tools are available. You need at minimum:
|
||||
`list_api_calls`, `list_l4_flows`, `list_workloads`, `get_api_call`.
|
||||
|
||||
**KFL requirement**: This skill uses KFL filters for all queries. Before
|
||||
constructing any filter, load the KFL skill (`skills/kfl/`). KFL is statically
|
||||
typed — incorrect field names will fail silently. If the KFL skill is not
|
||||
loaded, only use the exact filter examples shown in this skill.
|
||||
|
||||
**KFL error resilience**: If a KFL filter returns `undeclared reference` or
|
||||
similar errors, **do not give up on that phase**. Fall back to:
|
||||
1. Port-based filtering: `dst.port == 5432` instead of protocol flags
|
||||
2. Name-based filtering: `dst.name.contains("db")` or `src.name.contains("pod-name")`
|
||||
3. Browsing entries with `get_api_call` on IDs from `list_l4_flows`
|
||||
A KFL error means the filter syntax is wrong, not that the data doesn't exist.
|
||||
|
||||
## Audit Methodology
|
||||
|
||||
A security audit is NOT an incident investigation. You are not responding to
|
||||
a known event — you are proactively searching for threats that may be hiding
|
||||
in normal traffic. This requires a systematic sweep across all threat categories,
|
||||
not a single focused query.
|
||||
|
||||
The audit has **two sections** that run in sequence:
|
||||
|
||||
```
|
||||
SECTION A: Real-Time Analysis → Instant, uses live dissected traffic
|
||||
SECTION B: Snapshot Deep Dive → Immutable evidence, protocol-level inspection
|
||||
```
|
||||
|
||||
### Why Two Sections?
|
||||
|
||||
Kubeshark has two modes of data access:
|
||||
|
||||
1. **Real-time dissection** — traffic is dissected as it flows through the
|
||||
cluster. Provides instant access to L7 data (DNS, HTTP, etc.) that is
|
||||
already captured and indexed. However, real-time dissection is resource-
|
||||
intensive and may not be enabled, or may have gaps in coverage.
|
||||
|
||||
2. **Snapshots** — immutable captures of raw traffic within a time window.
|
||||
Must be created explicitly, then dissected separately. Guarantees complete
|
||||
coverage of all packets in the window, but takes time to create and index.
|
||||
|
||||
Section A uses whatever is already available — fast, immediate, but possibly
|
||||
incomplete. Section B creates snapshots for thorough, evidence-grade analysis.
|
||||
|
||||
### Severity Classification
|
||||
|
||||
Classify every finding using this framework:
|
||||
|
||||
| Severity | Criteria | Examples |
|
||||
|----------|----------|---------|
|
||||
| **CRITICAL** | Active data exfiltration, credential theft in progress, confirmed C2 | DNS tunneling, IMDS credential harvest, mining pool connections |
|
||||
| **HIGH** | Reconnaissance with cluster-wide scope, confirmed unauthorized access | K8s API secret enumeration, port scanning, cluster-admin abuse |
|
||||
| **MEDIUM** | Suspicious patterns requiring investigation, limited-scope recon | Cross-namespace probes, outdated User-Agents, unusual external connections |
|
||||
| **LOW** | Anomalies that may be benign, single-instance events | Unknown workloads, new external destinations, noisy but not malicious |
|
||||
|
||||
### Timezone
|
||||
|
||||
Kubeshark returns timestamps in UTC. Always convert to local time before
|
||||
presenting to the user. Detect the local timezone at the start (e.g.,
|
||||
`date +%Z`). Present local time as primary, with UTC in parentheses:
|
||||
`15:03:22 IST (12:03:22 UTC)`.
|
||||
|
||||
**Conversion**: Kubeshark timestamps are Unix milliseconds. To convert:
|
||||
`ms / 1000` → Unix seconds → datetime → format with timezone offset.
|
||||
Example: `1778534735974` → `2026-05-11 14:05:35 PDT (21:05:35 UTC)`.
|
||||
|
||||
---
|
||||
|
||||
## SECTION A: Real-Time Analysis
|
||||
|
||||
**Goal**: Fast initial sweep using live data that's already available. No
|
||||
waiting for snapshot creation or dissection.
|
||||
|
||||
### Step 1: Check What's Available
|
||||
|
||||
**Tool**: `check_kubeshark_status`
|
||||
|
||||
Confirm Kubeshark is running and which tools are available.
|
||||
|
||||
### Step 2: Query Live Traffic
|
||||
|
||||
**Tool**: `get_l7_data_boundaries`
|
||||
|
||||
Check the time boundaries of dissected API calls in the real-time database.
|
||||
This tells you how far back L7 data is available — use it to understand
|
||||
the scope of your real-time queries before running them.
|
||||
|
||||
Then query the real-time dissected traffic across key dimensions.
|
||||
Use `list_api_calls` and `list_l4_flows` **without** a `snapshot_id` to
|
||||
hit the live data.
|
||||
|
||||
Run these queries simultaneously:
|
||||
|
||||
| Query | KFL Filter | What You're Looking For |
|
||||
|-------|-----------|------------------------|
|
||||
| DNS traffic | `dns` | Mining domains, high-entropy subdomains, external resolution, NXDOMAIN flood |
|
||||
| HTTP traffic | `http` | C2 beaconing, suspicious URLs, external destinations, anomalous headers |
|
||||
| L4 flows | (via `list_l4_flows`) | External IPs, suspicious ports (3333, 4444), IMDS (169.254.169.254), fan-out patterns |
|
||||
| PostgreSQL | `postgresql` | SQL injection patterns, sensitive table access |
|
||||
| Redis | `redis` | Dangerous commands (CONFIG, KEYS, CLIENT LIST) |
|
||||
|
||||
Filter by namespace if the user specified one (e.g., `dns && src.pod.namespace == "k8s-mule"`).
|
||||
|
||||
**Important**: Real-time dissection may have incomplete data — traffic that
|
||||
arrived before dissection was enabled, or during gaps in coverage, won't
|
||||
appear. Treat Section A findings as a fast first pass, not the final word.
|
||||
|
||||
### Step 3: Create Snapshots (Sequential — One at a Time)
|
||||
|
||||
While analyzing real-time data, begin creating snapshots for Section B.
|
||||
|
||||
**Tool**: `get_data_boundaries`
|
||||
|
||||
Check how far back raw capture data exists. Raw capture is the FIFO buffer
|
||||
that feeds snapshot creation — this tells you the time window available
|
||||
for snapshots (which is different from the L7 boundaries in Step 2).
|
||||
|
||||
**CRITICAL: Create snapshots ONE AT A TIME, sequentially.** Kubeshark only
|
||||
supports one concurrent snapshot download. Parallel creation will cause
|
||||
failures and data loss. The pattern is:
|
||||
|
||||
1. Create snapshot → wait for completion → start dissection → move to next
|
||||
2. Snapshot creation is fast (seconds). Dissection is slow (minutes).
|
||||
3. You do NOT need to wait for dissection before creating the next snapshot.
|
||||
Create the next snapshot while the previous one dissects.
|
||||
|
||||
Use `get_data_boundaries` to calculate how many snapshots are needed:
|
||||
|
||||
```
|
||||
total_range_ms = newest_timestamp - oldest_timestamp
|
||||
window_ms = 240000 # 4 minutes
|
||||
num_snapshots = ceil(total_range_ms / window_ms)
|
||||
```
|
||||
|
||||
Then create snapshots in **4-minute increments**, starting from the most
|
||||
recent:
|
||||
|
||||
```
|
||||
Step 1: create_snapshot (now - 4min → now)
|
||||
→ poll get_snapshot until status == "completed"
|
||||
→ start_snapshot_dissection
|
||||
Step 2: create_snapshot (now - 8min → now - 4min)
|
||||
→ poll get_snapshot until status == "completed"
|
||||
→ start_snapshot_dissection
|
||||
Step 3: create_snapshot (now - 12min → now - 8min)
|
||||
→ poll get_snapshot until status == "completed"
|
||||
→ start_snapshot_dissection
|
||||
```
|
||||
|
||||
**Polling pattern**: After `create_snapshot`, call `get_snapshot` with the
|
||||
returned snapshot ID to check status. Repeat until `status == "completed"`.
|
||||
After `start_snapshot_dissection`, call `get_snapshot_dissection_status`
|
||||
and check until `progress == 100`.
|
||||
|
||||
4-minute windows balance snapshot size (fast to create and dissect) against
|
||||
coverage (captures threats with sleep cycles up to ~3 minutes). Most attack
|
||||
patterns in the wild repeat within 30-120 seconds.
|
||||
|
||||
**Do not skip this step.** A single short snapshot will miss threats with
|
||||
longer sleep cycles. The 4-minute windows ensure full coverage.
|
||||
|
||||
**Note**: Small snapshots (under ~15 minutes of traffic) often dissect in
|
||||
seconds rather than minutes. If dissection completes quickly, you can
|
||||
collapse the phased approach (immediate data first, L7 after) into a
|
||||
single pass through all phases.
|
||||
|
||||
### Step 4: Present Intermediate Results
|
||||
|
||||
Present Section A findings to the user as **intermediate results** — clearly
|
||||
labeled as preliminary:
|
||||
|
||||
```
|
||||
## Intermediate Results (Real-Time Analysis)
|
||||
|
||||
⚠️ These findings are based on live dissected traffic, which may have
|
||||
gaps in coverage. Snapshot analysis is in progress and will provide
|
||||
the complete, evidence-grade audit.
|
||||
|
||||
[findings table and details]
|
||||
|
||||
Snapshots are being created and dissected. Full report to follow.
|
||||
```
|
||||
|
||||
This gives the user immediate value while snapshots process. But be explicit:
|
||||
**the audit is not complete until Section B finishes.**
|
||||
|
||||
---
|
||||
|
||||
## SECTION B: Snapshot Deep Dive
|
||||
|
||||
**Goal**: Systematic, thorough analysis against immutable snapshot data.
|
||||
This is the evidence-grade section — complete coverage, reproducible results.
|
||||
|
||||
**The audit is NOT done until this section completes.** Snapshots must be
|
||||
created, dissected, and analyzed at L7 before the final report is generated.
|
||||
Section A may miss traffic that wasn't being dissected in real-time — Section B
|
||||
captures everything in the raw PCAP buffer, including traffic that real-time
|
||||
dissection dropped or never saw. Do not skip this section or treat Section A
|
||||
results as the final word.
|
||||
|
||||
### What a Snapshot Gives You
|
||||
|
||||
A completed snapshot provides **three independent data sources** — do not
|
||||
wait for dissection to use the first two:
|
||||
|
||||
| Source | Available | Tool | What It Provides |
|
||||
|--------|-----------|------|-----------------|
|
||||
| **Workloads & IPs** | Immediately | `list_workloads` with `snapshot_id` | Pod names, namespaces, IPs at capture time |
|
||||
| **PCAP Export** | Immediately | `export_snapshot_pcap` | Raw packets filtered by BPF expression |
|
||||
| **L7 Dissection** | After indexing | `list_api_calls`, `get_api_call`, `get_api_stats` | DNS queries, HTTP requests, SQL statements, Redis commands, gRPC methods |
|
||||
|
||||
### Audit Flow Per Snapshot
|
||||
|
||||
For each 4-minute snapshot, run the full 7-phase sweep. Start with immediate
|
||||
data while dissection completes:
|
||||
|
||||
```
|
||||
Snapshot ready
|
||||
├── Start dissection (background)
|
||||
├── Phase 1: list_workloads (immediate) — workload inventory + IPs
|
||||
│ export_snapshot_pcap (immediate) — raw packet evidence
|
||||
│
|
||||
├── [dissection completes]
|
||||
│
|
||||
├── Phase 2: list_api_calls — DNS threat analysis
|
||||
├── Phase 3: list_api_calls — external HTTP communication
|
||||
├── Phase 4: list_api_calls — lateral movement, K8s API access
|
||||
├── Phase 5: list_api_calls — protocol abuse (PG, Redis, gRPC)
|
||||
├── Phase 6: list_api_calls — credential access (IMDS, cloud APIs)
|
||||
└── Phase 7: correlate all findings
|
||||
```
|
||||
|
||||
Process snapshots in reverse chronological order (most recent first). If the
|
||||
first snapshot reveals enough threats, you may not need to analyze all of them.
|
||||
|
||||
### PCAP for Deep Inspection
|
||||
|
||||
PCAP export happens in Phase 1b (immediately after snapshot creation). In
|
||||
later phases, if a new finding needs deeper packet-level analysis beyond
|
||||
what `list_api_calls` provides, export additional PCAPs using the workload
|
||||
IPs collected in Phase 1a:
|
||||
|
||||
```
|
||||
export_snapshot_pcap(snapshot_id, bpf_filter="host <workload_ip>")
|
||||
```
|
||||
|
||||
### Merging Findings Across Snapshots
|
||||
|
||||
Threats that appear in multiple snapshots are confirmed persistent. One-time
|
||||
events in a single snapshot may be transient. Note which findings repeat
|
||||
across snapshots — persistence is a strong signal of real compromise vs.
|
||||
a single anomalous event.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Workload Inventory & PCAP Evidence
|
||||
|
||||
**Goal**: Identify all active workloads, collect their IPs, and export raw
|
||||
PCAP evidence — all before dissection completes.
|
||||
**Data source**: Immediate (no dissection needed).
|
||||
|
||||
### 1a: Workload Inventory
|
||||
|
||||
**Tool**: `list_workloads` with `snapshot_id`
|
||||
|
||||
Query with the target namespace (or all namespaces). The response includes
|
||||
pod names, namespaces, and **IP addresses at capture time** — these IPs are
|
||||
critical for building BPF filters in later phases and for correlating L4
|
||||
flows to workload identities.
|
||||
|
||||
For each workload, note:
|
||||
- Pod name and namespace
|
||||
- IP address (save these — you'll need them for PCAP export and L4 analysis)
|
||||
- Whether it's expected (matches known deployments)
|
||||
|
||||
**What to flag**:
|
||||
- Workloads not matching any known Deployment/DaemonSet/StatefulSet
|
||||
- Pods with names that mimic system components (e.g., `kube-proxy-debug`)
|
||||
- Unexpected number of replicas or pods in the namespace
|
||||
|
||||
### 1b: PCAP Export (Immediate — No Dissection Needed)
|
||||
|
||||
**Tool**: `export_snapshot_pcap` with `snapshot_id`
|
||||
|
||||
PCAP export is available immediately after snapshot creation — it reads raw
|
||||
packets, not dissected data. Use it now to preserve evidence and get raw
|
||||
packet-level visibility before L7 dissection completes.
|
||||
|
||||
**Export PCAP for every CRITICAL finding** from Section A's real-time analysis.
|
||||
Use the workload IPs from 1a to build BPF filters:
|
||||
|
||||
```
|
||||
export_snapshot_pcap(snapshot_id, bpf_filter="host <workload_ip>")
|
||||
```
|
||||
|
||||
This is especially useful for:
|
||||
- Verifying encrypted C2 (TLS ClientHello SNI inspection)
|
||||
- Confirming Stratum mining protocol content
|
||||
- Extracting DNS tunnel payloads at packet level
|
||||
- Preserving forensic evidence before cluster changes
|
||||
|
||||
If Section A identified no CRITICAL findings yet, export a broad PCAP for
|
||||
the most suspicious workloads based on L4 flow analysis (Phase 3).
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: DNS Threat Analysis
|
||||
|
||||
**Goal**: DNS is the single most reliable indicator of compromise. Every attack
|
||||
that communicates externally needs DNS resolution. Sweep DNS traffic for all
|
||||
known threat patterns.
|
||||
|
||||
### 2a: External DNS (Non-Cluster Queries)
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `dns`
|
||||
|
||||
Examine all DNS queries. Flag anything that is NOT `*.cluster.local` or
|
||||
`*.svc.cluster.local` — these are external resolutions that reveal what
|
||||
workloads are reaching out to.
|
||||
|
||||
**What to flag**:
|
||||
|
||||
| Pattern | Threat | KFL Filter |
|
||||
|---------|--------|------------|
|
||||
| Mining pool domains (minexmr, nanopool, mining-pool) | Cryptojacking | `dns && dns_questions.exists(q, q.contains("minexmr"))` |
|
||||
| High-entropy subdomains (base64-like, >30 chars) | DNS tunneling / exfiltration | `dns` — then inspect subdomain length and entropy |
|
||||
| DGA patterns (random .com/.net with NXDOMAIN) | C2 beaconing | `dns && dns_response && size(dns_answers) == 0` |
|
||||
| DoH resolver domains (cloudflare-dns.com, dns.google) | DNS bypass / C2 channel | `dns && dns_questions.exists(q, q.contains("cloudflare-dns"))` |
|
||||
| Cloud API domains (sts.amazonaws.com, s3.amazonaws.com) | Stolen credential usage | `dns && dns_questions.exists(q, q.contains("amazonaws.com"))` |
|
||||
| C2/attacker domains (attacker, c2, darknet, exfil) | Command & Control | `dns && dns_questions.exists(q, q.contains("c2"))` |
|
||||
|
||||
### 2b: DNS Query Volume and Types
|
||||
|
||||
High query volume from a single pod is suspicious. Also check for unusual
|
||||
record types:
|
||||
|
||||
- **TXT queries** to external domains → data exfiltration
|
||||
- **NULL queries** → DNS tunneling (iodine, dnscat2)
|
||||
- **AXFR queries** → zone transfer attempts (reconnaissance)
|
||||
- **SRV queries** to many namespaces → service enumeration
|
||||
|
||||
### 2c: NXDOMAIN Ratio
|
||||
|
||||
A high NXDOMAIN ratio (>20% of queries) from a single source suggests DGA
|
||||
beaconing — the malware tries many generated domains, most of which don't exist.
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `dns && dns_response && size(dns_answers) == 0`
|
||||
|
||||
Compare the count of failed queries to total queries per source pod.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: External Communication
|
||||
|
||||
**Goal**: Identify all traffic leaving the cluster. Any pod connecting to
|
||||
external IPs or domains needs justification.
|
||||
**Data source**: L7 dissection (after indexing).
|
||||
|
||||
**Note**: L4 flow analysis for external communication is covered in
|
||||
Section A (Step 2) using `list_l4_flows` against real-time data. In
|
||||
Section B, use `list_api_calls` against dissected snapshot data for
|
||||
deeper L7 inspection of external traffic.
|
||||
|
||||
### 3a: HTTP External Requests
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `http && !dst.pod.namespace.startsWith("kube")`
|
||||
|
||||
Inspect outbound HTTP requests for:
|
||||
|
||||
- **Beaconing patterns**: Regular-interval requests to the same external URL
|
||||
- **Suspicious User-Agents**: `Mozilla/4.0`, `curl/`, empty, or malware-like
|
||||
- **Suspicious paths**: `/check?s=`, `/beacon`, `/heartbeat`, `/proxy?coin=`
|
||||
- **Base64 in headers**: Oversized Cookie or custom X-* headers with encoded data
|
||||
- **gRPC to external**: `Content-Type: application/grpc` to non-cluster destinations
|
||||
- **WebSocket upgrades**: `Upgrade: websocket` to external hosts (potential mining)
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: Lateral Movement
|
||||
|
||||
**Goal**: Identify pods communicating with services they shouldn't — crossing
|
||||
namespace boundaries, probing infrastructure, or scanning the network.
|
||||
**Data source**: L7 dissection (after indexing) for cross-namespace HTTP
|
||||
and API server analysis.
|
||||
|
||||
**Note**: Port scanning detection via `list_l4_flows` is covered in
|
||||
Section A (Step 2) against real-time data.
|
||||
|
||||
### 4a: Cross-Namespace Traffic
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `src.pod.namespace != dst.pod.namespace`
|
||||
|
||||
Most pods should only talk within their namespace (and to kube-system services).
|
||||
Cross-namespace traffic to unexpected destinations is a lateral movement indicator.
|
||||
|
||||
### 4b: Kubernetes API Server Access
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `http && dst.port == 443 && path.startsWith("/api")`
|
||||
|
||||
Check what pods are querying the K8s API server and what they're requesting:
|
||||
|
||||
| API Path | Threat | Severity |
|
||||
|----------|--------|----------|
|
||||
| `/api/v1/secrets` | Secret enumeration | CRITICAL |
|
||||
| `/api/v1/pods` | Workload discovery | HIGH |
|
||||
| `/apis/rbac.authorization.k8s.io` | RBAC reconnaissance | HIGH |
|
||||
| `/api/v1/configmaps` | Config enumeration | MEDIUM |
|
||||
| `/api/v1/namespaces` | Namespace discovery | MEDIUM |
|
||||
|
||||
A pod hitting **multiple** of these paths is performing systematic enumeration,
|
||||
not legitimate API access. Legitimate workloads typically access 1-2 specific
|
||||
resources, not sweep across resource types.
|
||||
|
||||
### 4c: Service Fingerprinting
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `http && (path == "/.env" || path == "/actuator/info" || path == "/server-info" || path == "/version")`
|
||||
|
||||
These paths are used for service fingerprinting — mapping what software is
|
||||
running on internal endpoints. A pod probing multiple services with these
|
||||
paths is performing reconnaissance.
|
||||
|
||||
### 4d: Service Account Permission Audit via Traffic
|
||||
|
||||
Cross-reference Phase 4b findings (K8s API traffic) with the source pod's
|
||||
actual service account to determine if permissions are excessive.
|
||||
|
||||
For each pod making API server calls:
|
||||
|
||||
1. **Identify the service account**: From the workload inventory or via
|
||||
`kubectl get pod <name> -n <ns> -o jsonpath='{.spec.serviceAccountName}'`
|
||||
2. **Check what it accessed**: The API paths from Phase 4b reveal what the
|
||||
pod actually queried (secrets, pods, RBAC, configmaps)
|
||||
3. **Compare against expected access**: A `frontend` pod should never hit
|
||||
`/api/v1/secrets`. A `batch-processor` has no reason to query
|
||||
`/apis/rbac.authorization.k8s.io/v1/clusterrolebindings`.
|
||||
|
||||
**What to flag**:
|
||||
|
||||
| Pattern | Threat | Severity |
|
||||
|---------|--------|----------|
|
||||
| Pod queries secrets but its SA only needs pod read | Over-privileged SA or stolen token | HIGH |
|
||||
| Pod hits cluster-wide endpoints (`--all-namespaces` style queries) | Cluster-admin binding | CRITICAL |
|
||||
| Pod's SA is `default` but makes authenticated API calls | Token mounted unnecessarily | MEDIUM |
|
||||
| Multiple pods share the same over-privileged SA | Lateral blast radius | HIGH |
|
||||
|
||||
This converts a network finding (API traffic volume) into an actionable RBAC
|
||||
recommendation — telling the user exactly which ClusterRoleBinding to revoke.
|
||||
|
||||
### 4e: Cross-Namespace Threat Correlation
|
||||
|
||||
When port scanning or lateral movement targets IPs outside the audited
|
||||
namespace (e.g., IPs in the pod CIDR `10.244.x.x` that don't belong to
|
||||
any workload in the target namespace), resolve them to identify the
|
||||
cross-namespace blast radius:
|
||||
|
||||
1. Use `list_workloads` (all namespaces) to map destination IPs to pods
|
||||
2. Identify which namespaces are being probed
|
||||
3. Flag the scope: "port scan from `k8s-mule/network-diagnostics` is
|
||||
targeting pods in `default`, `monitoring`, and `kube-system`"
|
||||
|
||||
This turns a single-namespace finding into a cluster-wide risk assessment.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: Protocol Abuse
|
||||
|
||||
**Goal**: Inspect L7 payload content for attack patterns within supported
|
||||
protocols. This is the phase most often skipped — and where subtle threats hide.
|
||||
|
||||
### 5a: PostgreSQL Wire Protocol
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `postgresql`
|
||||
|
||||
The `postgresql_query` variable contains the full SQL text. Use it to detect:
|
||||
|
||||
| KFL Filter | Threat | Severity |
|
||||
|------------|--------|----------|
|
||||
| `postgresql && postgresql_query.contains("UNION SELECT")` | SQL injection | HIGH |
|
||||
| `postgresql && postgresql_query.contains("pg_shadow")` | Password hash theft | CRITICAL |
|
||||
| `postgresql && postgresql_query.contains("information_schema")` | Schema enumeration | MEDIUM |
|
||||
| `postgresql && postgresql_query.contains("TRUNCATE")` | Data destruction | CRITICAL |
|
||||
| `postgresql && postgresql_query.contains("DROP TABLE")` | Data destruction | CRITICAL |
|
||||
| `postgresql && !postgresql_success` | Failed queries (may indicate probing) | MEDIUM |
|
||||
|
||||
Use `get_api_call` to inspect the full SQL content. Also check `postgresql_user`
|
||||
— queries from unexpected users are suspicious.
|
||||
|
||||
### 5b: Redis Protocol
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `redis`
|
||||
|
||||
Use `redis_type` (command verb) and `redis_command` (full command line) to detect:
|
||||
|
||||
| KFL Filter | Threat | Severity |
|
||||
|------------|--------|----------|
|
||||
| `redis && redis_type == "CONFIG"` | Server config dump/write | HIGH |
|
||||
| `redis && redis_type == "KEYS"` | Full key enumeration | HIGH |
|
||||
| `redis && redis_type == "CLIENT"` | Connection enumeration | MEDIUM |
|
||||
| `redis && redis_type == "DEBUG"` | Debug access | MEDIUM |
|
||||
| `redis && redis_command.contains("CONFIG SET dir")` | Arbitrary file write (RCE) | CRITICAL |
|
||||
| `redis && redis_type == "FLUSHALL"` | Data destruction | CRITICAL |
|
||||
|
||||
### 5c: gRPC Endpoints
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `grpc`
|
||||
|
||||
Use `grpc_method` to inspect method names:
|
||||
|
||||
| KFL Filter | Threat | Severity |
|
||||
|------------|--------|----------|
|
||||
| `grpc && grpc_method.contains("Reflection")` | API surface enumeration | MEDIUM |
|
||||
| `grpc && dst.name.contains("attacker")` | Data exfiltration | HIGH |
|
||||
| `grpc && grpc_status != 0` | Failed gRPC calls (may indicate probing) | LOW |
|
||||
|
||||
### 5d: HTTP Request Anomalies
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `http`
|
||||
|
||||
Check for:
|
||||
- **WebSocket upgrades to external hosts**: `Upgrade: websocket` header — potential
|
||||
mining proxy or persistent C2 channel
|
||||
- **DNS-over-HTTPS requests**: `accept: application/dns-json` header — DNS bypass
|
||||
- **AWS Signature headers**: `Authorization: AWS4-HMAC-SHA256` — stolen cloud creds
|
||||
- **IMDS-specific headers**: `X-aws-ec2-metadata-token-ttl-seconds` — token request
|
||||
|
||||
---
|
||||
|
||||
## Phase 6: Credential Access
|
||||
|
||||
**Goal**: Detect active credential theft — IMDS access, service account abuse,
|
||||
cloud API exploitation.
|
||||
|
||||
### 6a: Instance Metadata Service (IMDS)
|
||||
|
||||
**Tool**: `list_api_calls` with KFL: `dst.ip == "169.254.169.254"`
|
||||
|
||||
Any pod connecting to this IP is attempting to steal the node's cloud credentials.
|
||||
Check the HTTP paths:
|
||||
|
||||
| Path | What's Being Stolen |
|
||||
|------|-------------------|
|
||||
| `/latest/meta-data/iam/security-credentials/` | IAM role name |
|
||||
| `/latest/meta-data/iam/security-credentials/<role>` | Actual AWS credentials |
|
||||
| `/latest/dynamic/instance-identity/document` | Instance identity (account ID, region) |
|
||||
| `/latest/user-data` | Instance bootstrap scripts (may contain secrets) |
|
||||
| `/latest/api/token` (PUT) | IMDSv2 session token |
|
||||
|
||||
### 6b: Service Account Token Exfiltration
|
||||
|
||||
Look for HTTP requests where the body or headers contain JWT tokens
|
||||
(strings starting with `eyJ`). These may be service account tokens being
|
||||
sent to external endpoints.
|
||||
|
||||
---
|
||||
|
||||
## Phase 7: Attack Chain Correlation
|
||||
|
||||
**Goal**: Connect individual findings into a coherent attack narrative.
|
||||
|
||||
After completing phases 1-6, synthesize findings into an attack chain. Real
|
||||
attacks follow a progression:
|
||||
|
||||
```
|
||||
1. INITIAL ACCESS → How did the attacker get in?
|
||||
2. RECONNAISSANCE → Port scanning, DNS enumeration, API discovery
|
||||
3. CREDENTIAL ACCESS → IMDS theft, secret enumeration, token exfil
|
||||
4. LATERAL MOVEMENT → Cross-namespace probing, SSRF, service scanning
|
||||
5. EXFILTRATION → DNS tunneling, HTTP exfil, gRPC streaming
|
||||
6. PERSISTENCE → C2 beaconing, cryptomining (monetization)
|
||||
```
|
||||
|
||||
Map each finding to a stage. If you see findings across multiple stages from
|
||||
the same namespace or related workloads, you've found a coordinated attack.
|
||||
|
||||
### Output Format
|
||||
|
||||
Present the audit results as:
|
||||
|
||||
1. **Workload inventory** — table of all observed workloads with threat level
|
||||
2. **Detailed findings** — one section per finding, ordered by severity
|
||||
3. **Attack chain summary** — if findings correlate, map the kill chain
|
||||
4. **Immediate actions** — prioritized remediation steps
|
||||
|
||||
---
|
||||
|
||||
## Audit Report — Two-Stage Delivery
|
||||
|
||||
The audit produces **two outputs** — an intermediate report during Section A,
|
||||
and a final PDF report after Section B completes.
|
||||
|
||||
### Stage 1: Intermediate Report (after Section A)
|
||||
|
||||
Present findings from real-time analysis directly in the conversation. Clearly
|
||||
label as preliminary. This gives the user immediate value while snapshots
|
||||
are being created and dissected.
|
||||
|
||||
### Stage 2: Final PDF Report (after Section B)
|
||||
|
||||
This is the primary deliverable. It is generated **only after all snapshots
|
||||
have been dissected and analyzed at L7**. Do not generate the final report
|
||||
based on Section A alone — that would miss protocol-level threats (SQL
|
||||
injection, Redis abuse, gRPC exfil) that only appear after dissection.
|
||||
|
||||
1. **Write** the report as markdown: `security-audit-<namespace>-<date>.md`
|
||||
Follow the template in `references/report-template.md` — it defines
|
||||
the full structure: executive summary, threat table, detailed findings
|
||||
with evidence, attack chain analysis, detection coverage, and remediation.
|
||||
|
||||
2. **Convert to PDF** (in preference order):
|
||||
```bash
|
||||
npx md-to-pdf security-audit-<namespace>-<date>.md # Best quality
|
||||
pandoc security-audit-<namespace>-<date>.md -o security-audit-<namespace>-<date>.pdf
|
||||
```
|
||||
If neither tool is available, leave the markdown as the deliverable.
|
||||
|
||||
3. **The final report must include findings from both sections** — Section A
|
||||
(real-time) and Section B (snapshot dissection). Findings confirmed by
|
||||
both sections are marked with higher confidence. Findings only in
|
||||
Section B (missed by real-time) should be noted — this reveals gaps
|
||||
in real-time dissection coverage.
|
||||
|
||||
### Key Report Requirements
|
||||
|
||||
- **Quote raw evidence** — actual DNS queries, HTTP URLs, SQL statements,
|
||||
Redis commands. The reader must be able to verify without re-running.
|
||||
- **Timestamp every finding** — snapshot ID + local time (UTC in parentheses).
|
||||
- **Specific recommendations** — not "fix RBAC" but "revoke ClusterRoleBinding
|
||||
`mule-recon-cluster-admin`".
|
||||
- **Include MITRE ATT&CK IDs** for each finding.
|
||||
- **Evidence preservation** — list snapshot IDs, recommend cloud storage upload.
|
||||
|
||||
---
|
||||
|
||||
## What Network Auditing Cannot Detect
|
||||
|
||||
Be transparent about blind spots. Network traffic analysis **cannot** detect:
|
||||
|
||||
- **Configuration vulnerabilities**: Privileged containers, missing resource
|
||||
limits, permissive RBAC, hostPath mounts — these are YAML-level issues with
|
||||
no traffic signature
|
||||
- **Secrets in environment variables**: Hardcoded credentials don't generate
|
||||
network traffic until used
|
||||
- **Image vulnerabilities**: CVEs in container images are not visible on the wire
|
||||
- **Idle threats**: A malicious pod that hasn't started communicating yet
|
||||
|
||||
Recommend `kubectl`-based configuration auditing for these gaps. Network
|
||||
auditing is the complement, not the replacement, for config-level security
|
||||
scanning.
|
||||
|
||||
## Threat Intelligence Reference
|
||||
|
||||
For detailed descriptions of all 22 network-observable threat scenarios with
|
||||
MITRE ATT&CK mappings and detection guidance, see `references/threat-catalog.md`.
|
||||
64
skills/security-audit/references/kfl-security-filters.md
Normal file
64
skills/security-audit/references/kfl-security-filters.md
Normal file
@@ -0,0 +1,64 @@
|
||||
# KFL Quick Reference: Security Audit Filters
|
||||
|
||||
## DNS Threat Hunting
|
||||
```
|
||||
dns // All DNS traffic
|
||||
dns && dns_response && size(dns_answers) == 0 // Failed lookups (NXDOMAIN — no answers)
|
||||
dns && dns_questions.exists(q, q.contains("minexmr")) // Mining pool DNS
|
||||
dns && dns_questions.exists(q, q.contains("nanopool")) // Mining pool DNS
|
||||
dns && dns_questions.exists(q, q.contains("amazonaws")) // Cloud API resolution
|
||||
dns && dns_questions.exists(q, q.contains("cloudflare-dns")) // DoH bypass
|
||||
dns && dns_questions.exists(q, q.contains("dns.google")) // DoH bypass
|
||||
```
|
||||
|
||||
## External Communication
|
||||
```
|
||||
http && dst.name.contains("attacker") // Known-bad destinations
|
||||
http && map_get(request.headers, "user-agent", "").contains("Mozilla/4.0") // Suspicious UA
|
||||
http && map_get(request.headers, "accept", "").contains("dns-json") // DoH requests
|
||||
http && map_get(request.headers, "upgrade", "") == "websocket" // WebSocket (potential mining)
|
||||
```
|
||||
|
||||
## Lateral Movement
|
||||
```
|
||||
src.pod.namespace != dst.pod.namespace // Cross-namespace traffic
|
||||
http && path.startsWith("/api/v1/secrets") // Secret enumeration
|
||||
http && path == "/.env" // Service fingerprinting
|
||||
http && path == "/actuator/info" // Spring Boot fingerprinting
|
||||
http && path == "/version" // Version fingerprinting
|
||||
```
|
||||
|
||||
## Protocol Inspection
|
||||
```
|
||||
postgresql // PostgreSQL wire protocol
|
||||
postgresql && postgresql_query.contains("UNION SELECT") // SQL injection patterns
|
||||
postgresql && !postgresql_success // Failed PostgreSQL queries
|
||||
redis // Redis protocol
|
||||
grpc // gRPC calls (native detection)
|
||||
grpc && grpc_method.contains("Reflection") // gRPC reflection enumeration
|
||||
```
|
||||
|
||||
## Credential Theft
|
||||
```
|
||||
dst.ip == "169.254.169.254" // IMDS access
|
||||
http && path.contains("/meta-data/iam") // IAM credential paths
|
||||
http && map_get(request.headers, "authorization", "").startsWith("AWS4-HMAC-SHA256") // Stolen AWS creds
|
||||
http && "x-aws-ec2-metadata-token-ttl-seconds" in request.headers // IMDSv2 token request
|
||||
```
|
||||
|
||||
## Resource Hijacking
|
||||
```
|
||||
dst.port == 3333 // Stratum mining (standard)
|
||||
dst.port == 14433 // Stratum mining (alt)
|
||||
dst.port == 45700 // Stratum mining (alt)
|
||||
dst.port == 4444 // Reverse shell / backdoor
|
||||
```
|
||||
|
||||
## Per-Namespace Scoping
|
||||
|
||||
Add namespace filters to any query above:
|
||||
```
|
||||
dns && src.pod.namespace == "k8s-mule" // DNS from specific namespace
|
||||
http && src.pod.namespace == "k8s-mule" // HTTP from specific namespace
|
||||
redis && src.pod.namespace == "k8s-mule" // Redis from specific namespace
|
||||
```
|
||||
102
skills/security-audit/references/report-template.md
Normal file
102
skills/security-audit/references/report-template.md
Normal file
@@ -0,0 +1,102 @@
|
||||
# Security Audit Report Template
|
||||
|
||||
Use this template for the markdown report. Fill in all sections, then convert
|
||||
to PDF.
|
||||
|
||||
```markdown
|
||||
# Kubernetes Network Security Audit Report
|
||||
|
||||
**Cluster**: <cluster name/context>
|
||||
**Namespace**: <target namespace>
|
||||
**Date**: <audit date and time, local timezone>
|
||||
**Audit window**: <start time> — <end time> (<duration>)
|
||||
**Snapshots analyzed**: <count and IDs>
|
||||
**Audited by**: Claude Code + Kubeshark MCP
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
<2-3 sentence summary: how many threats found, highest severity,
|
||||
whether an active attack chain was identified, top recommendation>
|
||||
|
||||
## Threat Summary
|
||||
|
||||
| # | Severity | Workload | Threat | MITRE ATT&CK |
|
||||
|---|----------|----------|--------|---------------|
|
||||
| 1 | CRITICAL | log-shipper | DNS Tunneling | T1048.003 |
|
||||
| 2 | CRITICAL | cloud-health-monitor | IMDS Credential Theft | T1552.005 |
|
||||
| ... | | | | |
|
||||
|
||||
## Detailed Findings
|
||||
|
||||
### Finding 1: <Title> (CRITICAL)
|
||||
|
||||
**Workload**: <pod name>
|
||||
**MITRE ATT&CK**: <technique ID and name>
|
||||
**Snapshot**: <snapshot ID>
|
||||
**Detection method**: <which phase and tool detected this>
|
||||
|
||||
**Evidence**:
|
||||
<Specific traffic data — DNS queries, HTTP requests, L4 flows,
|
||||
protocol payloads. Include timestamps, source/dest, and relevant
|
||||
content. Quote actual query names, URLs, SQL statements, or
|
||||
Redis commands observed.>
|
||||
|
||||
**Impact**:
|
||||
<What this means — data at risk, credentials exposed, scope of access>
|
||||
|
||||
**Recommendation**:
|
||||
<Specific remediation — NetworkPolicy, RBAC change, pod deletion, credential rotation>
|
||||
|
||||
---
|
||||
|
||||
(repeat for each finding)
|
||||
|
||||
## Attack Chain Analysis
|
||||
|
||||
<If findings correlate, map the kill chain:
|
||||
Initial Access → Reconnaissance → Credential Access → Lateral Movement →
|
||||
Exfiltration → Persistence. Identify which workloads participate in each stage.>
|
||||
|
||||
## Detection Coverage
|
||||
|
||||
| Phase | Checked | Findings |
|
||||
|-------|---------|----------|
|
||||
| Workload Inventory | Yes | <count> |
|
||||
| DNS Threat Analysis | Yes | <count> |
|
||||
| External Communication | Yes | <count> |
|
||||
| Lateral Movement | Yes | <count> |
|
||||
| Protocol Abuse | Yes | <count> |
|
||||
| Credential Access | Yes | <count> |
|
||||
|
||||
## Limitations
|
||||
|
||||
<What this audit cannot detect — config-level vulnerabilities,
|
||||
image CVEs, idle threats. Recommend complementary tools.>
|
||||
|
||||
## Immediate Actions
|
||||
|
||||
1. <Highest priority action>
|
||||
2. <Second priority>
|
||||
3. ...
|
||||
|
||||
## Evidence Preservation
|
||||
|
||||
<List snapshot IDs created during this audit. Recommend uploading
|
||||
to cloud storage for long-term retention. Include PCAP export
|
||||
commands for key findings.>
|
||||
```
|
||||
|
||||
## Quality Guidelines
|
||||
|
||||
- **Include raw evidence** — quote actual DNS queries, HTTP URLs, SQL
|
||||
statements, Redis commands. The reader should be able to verify findings
|
||||
without re-running the audit.
|
||||
- **Timestamp everything** — every finding should reference the snapshot ID
|
||||
and timestamp (local time with UTC in parentheses).
|
||||
- **Be specific in recommendations** — not "fix RBAC" but "revoke
|
||||
ClusterRoleBinding `mule-recon-cluster-admin` and replace with a
|
||||
namespace-scoped Role granting only `get` on `pods`".
|
||||
- **Include MITRE ATT&CK IDs** — makes the report actionable for security
|
||||
teams that track coverage against the framework.
|
||||
190
skills/security-audit/references/threat-catalog.md
Normal file
190
skills/security-audit/references/threat-catalog.md
Normal file
@@ -0,0 +1,190 @@
|
||||
# Network Threat Catalog
|
||||
|
||||
22 network-observable threat patterns organized by MITRE ATT&CK tactic.
|
||||
Each entry describes the attack, what it looks like on the wire, and how
|
||||
to detect it with Kubeshark.
|
||||
|
||||
## Command & Control (TA0011)
|
||||
|
||||
### DGA Beaconing (T1568.002)
|
||||
- **What**: Malware generates pseudo-random domain names daily and queries DNS
|
||||
for each. The C2 operator registers a few; most resolve to NXDOMAIN.
|
||||
- **Wire signature**: Burst of DNS queries for high-entropy .com/.net domains
|
||||
with >80% NXDOMAIN response rate.
|
||||
- **KFL**: `dns && dns_response && size(dns_answers) == 0` — then check for entropy in queried names.
|
||||
- **Difficulty**: Medium. NXDOMAIN flood is distinctive but low-rate DGA can
|
||||
blend with legitimate DNS failures.
|
||||
|
||||
### HTTP C2 Beaconing (T1071.001)
|
||||
- **What**: Implant calls home via HTTP GET at regular intervals, receiving
|
||||
tasking in the response body. Cobalt Strike, Meterpreter pattern.
|
||||
- **Wire signature**: Periodic HTTP GET to fixed external URL at suspiciously
|
||||
regular intervals (30-60s). Outdated User-Agent (Mozilla/4.0). Session
|
||||
identifiers in URL path.
|
||||
- **KFL**: `http && dst.name.contains("attacker")` or check for User-Agent anomalies.
|
||||
- **Difficulty**: Medium. Regularity is the key anomaly.
|
||||
|
||||
### Encrypted C2 (T1573.002)
|
||||
- **What**: C2 over HTTPS. Content is encrypted but TLS SNI reveals suspicious
|
||||
domain names.
|
||||
- **Wire signature**: Outbound TLS to non-standard domains (darknet, cdn-mirror).
|
||||
DNS queries preceding the connection reveal the target.
|
||||
- **KFL**: `dns && (dns_questions.exists(q, q.contains("darknet")) || dns_questions.exists(q, q.contains("cdn-mirror")))`.
|
||||
- **Difficulty**: Hard. Encrypted, uses standard port 443.
|
||||
|
||||
### DNS-over-HTTPS C2 (T1572)
|
||||
- **What**: Bypasses cluster DNS by sending queries as HTTPS to public DoH
|
||||
resolvers (cloudflare-dns.com, dns.google). C2 commands embedded in TXT
|
||||
responses.
|
||||
- **Wire signature**: HTTP requests to DoH endpoints with `accept: application/dns-json`
|
||||
header. No corresponding queries on port 53.
|
||||
- **KFL**: `http && (dst.name.contains("cloudflare-dns") || dst.name.contains("dns.google"))`.
|
||||
- **Difficulty**: Hard. Looks like regular HTTPS to trusted providers.
|
||||
|
||||
## Exfiltration (TA0010)
|
||||
|
||||
### DNS Tunneling (T1048.003)
|
||||
- **What**: Full bidirectional data channel over DNS using tools like iodine,
|
||||
dnscat2. Data encoded in long subdomain labels.
|
||||
- **Wire signature**: High-frequency DNS queries (20+/burst) with subdomain
|
||||
labels near 63-byte limit. Mix of A, TXT, NULL query types.
|
||||
- **KFL**: `dns && dns_questions.exists(q, q.contains("data-relay"))` or look for
|
||||
high query rates per source.
|
||||
- **Difficulty**: Medium. Volume and long subdomains are distinctive.
|
||||
|
||||
### HTTP Header Exfiltration (T1048.001)
|
||||
- **What**: Data exfiltrated in HTTP headers (Cookie, X-Trace-ID) disguised
|
||||
as analytics tracking. Low volume to evade detection.
|
||||
- **Wire signature**: HTTP GET to analytics-looking URL with oversized Cookie
|
||||
or custom headers containing base64-encoded data.
|
||||
- **KFL**: `http && dst.name.contains("cdn-provider")`.
|
||||
- **Difficulty**: Hard. Low volume, standard HTTP, looks like analytics.
|
||||
|
||||
### DNS Credential Exfiltration (T1048.003)
|
||||
- **What**: Stolen JWT tokens or credentials encoded in DNS TXT queries to
|
||||
attacker-controlled authoritative nameserver.
|
||||
- **Wire signature**: DNS TXT queries with structured multi-label subdomains
|
||||
containing base64-like encoded data.
|
||||
- **KFL**: `dns && dns_questions.exists(q, q.contains("steal-creds"))`.
|
||||
- **Difficulty**: Medium. Multi-label structure is distinctive.
|
||||
|
||||
### gRPC Stream Exfiltration (T1048.001)
|
||||
- **What**: Data exfiltration via gRPC (HTTP/2) POST to external endpoint.
|
||||
Blends with normal microservice traffic.
|
||||
- **Wire signature**: HTTP/2 POST with `Content-Type: application/grpc` to
|
||||
external destination with exfil-related method names.
|
||||
- **KFL**: `grpc && dst.name.contains("attacker")`.
|
||||
- **Difficulty**: Hard. gRPC is normal in K8s. External destination is the signal.
|
||||
|
||||
## Lateral Movement (TA0008)
|
||||
|
||||
### K8s API Enumeration (T1613)
|
||||
- **What**: Compromised pod uses mounted service account token to enumerate
|
||||
secrets, pods, RBAC bindings across all namespaces.
|
||||
- **Wire signature**: HTTPS to kubernetes.default.svc with broad GET requests
|
||||
across /api/v1/secrets, /pods, /configmaps, /clusterrolebindings.
|
||||
- **KFL**: `http && dst.port == 443 && path.contains("/api/v1/secrets")`.
|
||||
- **Difficulty**: Medium. The fanout across resource types is the anomaly.
|
||||
|
||||
### SSRF to Internal Services (T1090)
|
||||
- **What**: Pod probes cross-namespace internal services it shouldn't talk to —
|
||||
kube-dns metrics, Prometheus, Grafana, dashboards.
|
||||
- **Wire signature**: HTTP to multiple ClusterIP services across namespaces
|
||||
from a single source pod.
|
||||
- **KFL**: `http && src.pod.namespace == "k8s-mule" && dst.pod.namespace != "k8s-mule"`.
|
||||
- **Difficulty**: Medium. Cross-namespace breadth is the signal.
|
||||
|
||||
### Port Scanning (T1046)
|
||||
- **What**: Sweep of common ports across pod CIDR after initial access.
|
||||
- **Wire signature**: Rapid TCP SYN from single source to many IPs on ports
|
||||
80, 443, 3306, 5432, 6379, 8080, 9090, 27017. High RST/timeout rate.
|
||||
- **KFL**: `tcp && src.name == "network-diagnostics"`.
|
||||
- **Difficulty**: Easy. Classic scan pattern — high fan-out, high failure rate.
|
||||
|
||||
### Service Fingerprinting (T1046)
|
||||
- **What**: HTTP probes to discovery paths across multiple services to identify
|
||||
running software.
|
||||
- **Wire signature**: HTTP GET to /version, /healthz, /.env, /actuator/info,
|
||||
/server-info. HEAD and OPTIONS methods. Multiple targets from one source.
|
||||
- **KFL**: `http && (path == "/.env" || path == "/actuator/info")`.
|
||||
- **Difficulty**: Medium. Path patterns are distinctive.
|
||||
|
||||
## Credential Access (TA0006)
|
||||
|
||||
### IMDS Metadata Theft (T1552.005)
|
||||
- **What**: Query AWS/GCP instance metadata to steal IAM role credentials.
|
||||
The Capital One breach vector.
|
||||
- **Wire signature**: HTTP to 169.254.169.254 with paths /latest/meta-data/iam/,
|
||||
/latest/user-data, /latest/api/token (PUT for IMDSv2).
|
||||
- **KFL**: `dst.ip == "169.254.169.254"`.
|
||||
- **Difficulty**: Easy. Destination IP is unique and unmistakable.
|
||||
|
||||
### Cloud API Abuse (T1078.004)
|
||||
- **What**: Direct calls to AWS APIs (STS, S3, EC2) with stolen credentials
|
||||
from a workload pod.
|
||||
- **Wire signature**: DNS for sts.amazonaws.com, s3.amazonaws.com. HTTPS
|
||||
requests with AWS Signature V4 Authorization headers.
|
||||
- **KFL**: `dns && dns_questions.exists(q, q.contains("amazonaws.com"))`.
|
||||
- **Difficulty**: Medium. Cloud API DNS from a non-controller pod is suspicious.
|
||||
|
||||
## Resource Hijacking (TA0040)
|
||||
|
||||
### Stratum Mining Protocol (T1496)
|
||||
- **What**: XMRig/miner connecting to mining pool via Stratum JSON-RPC over TCP.
|
||||
- **Wire signature**: TCP connection to port 3333/14433/45700 with JSON-RPC
|
||||
messages: mining.subscribe, mining.authorize, mining.submit.
|
||||
- **KFL**: `dst.port == 3333`.
|
||||
- **Difficulty**: Medium. Port 3333 is a well-known mining indicator.
|
||||
|
||||
### Mining Pool DNS (T1496)
|
||||
- **What**: DNS resolution of known mining pool domains before connecting.
|
||||
- **Wire signature**: DNS queries for domains containing minexmr, nanopool,
|
||||
mining-pool, hashvault, supportxmr.
|
||||
- **KFL**: `dns && (dns_questions.exists(q, q.contains("minexmr")) || dns_questions.exists(q, q.contains("mining-pool")))`.
|
||||
- **Difficulty**: Easy. Mining domain names are unmistakable.
|
||||
|
||||
### WebSocket Mining (T1496)
|
||||
- **What**: Browser-based miner communicating via WebSocket on standard ports.
|
||||
- **Wire signature**: HTTP Upgrade: websocket request to external host with
|
||||
mining-related URL path (/proxy?coin=, ?algo=randomx).
|
||||
- **KFL**: `http && map_get(request.headers, "upgrade", "") == "websocket"`.
|
||||
- **Difficulty**: Hard. WebSocket on port 80/443 looks normal. Only URL reveals intent.
|
||||
|
||||
## Protocol Abuse
|
||||
|
||||
### SQL Injection via PG Wire (T1190)
|
||||
- **What**: SQL injection payloads sent through PostgreSQL wire protocol.
|
||||
- **Wire signature**: PG protocol carrying UNION SELECT, information_schema,
|
||||
pg_shadow queries.
|
||||
- **KFL**: `postgresql`.
|
||||
- **Difficulty**: Medium. PG dissection reveals the SQL content directly.
|
||||
|
||||
### Redis Unauthorized Access (T1190)
|
||||
- **What**: Unauthenticated Redis instance probed with dangerous commands.
|
||||
- **Wire signature**: Redis protocol: CONFIG GET *, KEYS *, CLIENT LIST, DEBUG.
|
||||
- **KFL**: `redis`.
|
||||
- **Difficulty**: Easy. Redis command names are directly visible.
|
||||
|
||||
### Database Destruction (T1485)
|
||||
- **What**: Ransomware pattern — SELECT * (data theft) then TRUNCATE/DROP (destruction).
|
||||
- **Wire signature**: PG protocol showing SELECT followed by TRUNCATE on same table.
|
||||
- **KFL**: `postgresql`.
|
||||
- **Difficulty**: Medium. DDL commands in PG protocol are visible with dissection.
|
||||
|
||||
## Reconnaissance (TA0043)
|
||||
|
||||
### DNS Zone Enumeration (T1018)
|
||||
- **What**: Brute-force DNS queries across namespaces to discover services.
|
||||
Includes SRV lookups and AXFR zone transfer attempts.
|
||||
- **Wire signature**: High volume of DNS queries for *.svc.cluster.local patterns
|
||||
across many namespaces. Many NXDOMAIN responses.
|
||||
- **KFL**: `dns && src.name == "service-discovery"`.
|
||||
- **Difficulty**: Easy. Volume and cross-namespace pattern is obvious.
|
||||
|
||||
### gRPC Reflection Enumeration (T1046)
|
||||
- **What**: Probing gRPC server reflection to discover API surfaces without
|
||||
needing proto files.
|
||||
- **Wire signature**: HTTP/2 POST to /grpc.reflection.v1alpha.ServerReflection/
|
||||
ServerReflectionInfo across multiple services.
|
||||
- **KFL**: `grpc && grpc_method.contains("Reflection")` or `http && path.contains("grpc.reflection")`.
|
||||
- **Difficulty**: Medium. Reflection path is a known enumeration vector.
|
||||
Reference in New Issue
Block a user