From 71067edb41e8941a8075eddcfb76f1e017f8fd61 Mon Sep 17 00:00:00 2001 From: Noah Campbell Date: Wed, 17 Sep 2025 10:38:11 -0500 Subject: [PATCH] Updated yaml spec (#1863) * v1beta3 spec can be read by preflight * added test files for ease of testing * v1beta3 renderer fixes --- cmd/preflight/cli/docs.go | 2 + docs/v1beta3-guide.md | 280 ++++++++---- go.mod | 2 +- pkg/preflight/read_specs.go | 7 + pkg/preflight/template.go | 6 + sample-preflight-templated.yaml | 227 ---------- sample-troubleshoot.yaml | 54 --- v1beta3-all-analyzers.yaml | 697 ++++++++++++++++++++++++++++++ values-all-analyzers-small.yaml | 4 + values-sample-full.yaml | 63 --- values-sample-minimal.yaml | 40 -- values-v1beta3-1.yaml | 8 +- values-v1beta3-all-analyzers.yaml | 229 ++++++++++ values-v1beta3-full.yaml | 2 +- values-v1beta3-minimal.yaml | 44 -- 15 files changed, 1144 insertions(+), 521 deletions(-) delete mode 100644 sample-preflight-templated.yaml delete mode 100644 sample-troubleshoot.yaml create mode 100644 v1beta3-all-analyzers.yaml create mode 100644 values-all-analyzers-small.yaml delete mode 100644 values-sample-full.yaml delete mode 100644 values-sample-minimal.yaml create mode 100644 values-v1beta3-all-analyzers.yaml delete mode 100644 values-v1beta3-minimal.yaml diff --git a/cmd/preflight/cli/docs.go b/cmd/preflight/cli/docs.go index a88bdbef..ddd7a75d 100644 --- a/cmd/preflight/cli/docs.go +++ b/cmd/preflight/cli/docs.go @@ -111,6 +111,8 @@ func extractDocs(templateFiles []string, valuesFiles []string, setValues []strin useHelm := shouldUseHelmEngine(string(templateContent)) var rendered string if useHelm { + // Seed default-false for referenced boolean values to avoid nil map errors + preflight.SeedDefaultBooleans(string(templateContent), values) rendered, err = preflight.RenderWithHelmTemplate(string(templateContent), values) if err != nil { execValues := legacyContext(values) diff --git a/docs/v1beta3-guide.md b/docs/v1beta3-guide.md index 14afe7a2..8c4dae8a 100644 --- a/docs/v1beta3-guide.md +++ b/docs/v1beta3-guide.md @@ -21,15 +21,15 @@ The examples use Go templates with the standard Sprig function set. Values can b - **Toggling sections**: wrap analyzer blocks in conditionals tied to values. ```yaml - {{- if .Values.storage.enabled }} + {{- if .Values.storageClass.enabled }} - docString: | Title: Default StorageClass Requirements Requirement: - - A StorageClass named "{{ .Values.storage.className }}" must exist - ... + - A StorageClass named "{{ .Values.storageClass.className }}" must exist + Default StorageClass enables dynamic PVC provisioning without manual intervention. storageClass: checkName: Default StorageClass - storageClassName: '{{ .Values.storage.className }}' + storageClassName: '{{ .Values.storageClass.className }}' outcomes: - fail: message: Default StorageClass not found @@ -40,25 +40,25 @@ The examples use Go templates with the standard Sprig function set. Values can b - **Values**: template expressions directly use values from your values files. ```yaml - {{ .Values.kubernetes.minVersion }} + {{ .Values.clusterVersion.minVersion }} ``` -- **Nested conditionals**: further constrain checks (e.g., only when a specific ingress type is used). +- **Nested conditionals**: further constrain checks (e.g., only when a specific CRD is required). ```yaml - {{- if .Values.ingress.enabled }} - {{- if eq .Values.ingress.type "Contour" }} + {{- if .Values.crd.enabled }} - docString: | - Title: Required CRDs and Ingress Capabilities - ... + Title: Required CRD Presence + Requirement: + - CRD must exist: {{ .Values.crd.name }} + The application depends on this CRD for controllers to reconcile desired state. customResourceDefinition: - checkName: Contour IngressRoute CRD - customResourceDefinitionName: ingressroutes.contour.heptio.com + checkName: Required CRD + customResourceDefinitionName: '{{ .Values.crd.name }}' outcomes: - fail: - message: Contour IngressRoute CRD not found; required for ingress routing + message: Required CRD not found - pass: - message: Contour IngressRoute CRD present - {{- end }} + message: Required CRD present {{- end }} ``` @@ -100,26 +100,26 @@ Use the analyzer that matches the requirement, and enumerate `outcomes` with cle checkName: Kubernetes version outcomes: - fail: - when: '< {{ .Values.kubernetes.minVersion }}' - message: This application requires at least Kubernetes {{ .Values.kubernetes.minVersion }}. + when: '< {{ .Values.clusterVersion.minVersion }}' + message: Requires at least Kubernetes {{ .Values.clusterVersion.minVersion }}. - warn: - when: '< {{ .Values.kubernetes.recommendedVersion }}' - message: Recommended version is {{ .Values.kubernetes.recommendedVersion }} or later. + when: '< {{ .Values.clusterVersion.recommendedVersion }}' + message: Recommended to use Kubernetes {{ .Values.clusterVersion.recommendedVersion }} or later. - pass: - when: '>= {{ .Values.kubernetes.recommendedVersion }}' - message: Your cluster meets the recommended and required versions of Kubernetes. + when: '>= {{ .Values.clusterVersion.recommendedVersion }}' + message: Meets recommended and required Kubernetes versions. ``` - **customResourceDefinition**: ensure a CRD exists ```yaml customResourceDefinition: - checkName: Contour IngressRoute CRD - customResourceDefinitionName: ingressroutes.contour.heptio.com + checkName: Required CRD + customResourceDefinitionName: '{{ .Values.crd.name }}' outcomes: - fail: - message: Contour IngressRoute CRD not found; required for ingress routing + message: Required CRD not found - pass: - message: Contour IngressRoute CRD present + message: Required CRD present ``` - **containerRuntime**: verify container runtime @@ -137,7 +137,7 @@ Use the analyzer that matches the requirement, and enumerate `outcomes` with cle ```yaml storageClass: checkName: Default StorageClass - storageClassName: '{{ .Values.storage.className }}' + storageClassName: '{{ .Values.analyzers.storageClass.className }}' outcomes: - fail: message: Default StorageClass not found @@ -148,15 +148,20 @@ Use the analyzer that matches the requirement, and enumerate `outcomes` with cle - **distribution**: whitelist/blacklist distributions ```yaml distribution: + checkName: Supported distribution outcomes: + {{- range $d := .Values.distribution.unsupported }} - fail: - when: '== docker-desktop' - message: The application does not support Docker Desktop Clusters + when: '== {{ $d }}' + message: '{{ $d }} is not supported' + {{- end }} + {{- range $d := .Values.distribution.supported }} - pass: - when: '== eks' - message: EKS is a supported distribution + when: '== {{ $d }}' + message: '{{ $d }} is a supported distribution' + {{- end }} - warn: - message: Unable to determine the distribution of Kubernetes + message: Unable to determine the distribution ``` - **nodeResources**: aggregate across nodes; common patterns include count, CPU, memory, and ephemeral storage @@ -166,55 +171,106 @@ Use the analyzer that matches the requirement, and enumerate `outcomes` with cle checkName: Node count outcomes: - fail: - when: 'count() < {{ .Values.cluster.minNodes }}' - message: This application requires at least {{ .Values.cluster.minNodes }} nodes. + when: 'count() < {{ .Values.nodeResources.count.min }}' + message: Requires at least {{ .Values.nodeResources.count.min }} nodes - warn: - when: 'count() < {{ .Values.cluster.recommendedNodes }}' - message: This application recommends at least {{ .Values.cluster.recommendedNodes }} nodes. + when: 'count() < {{ .Values.nodeResources.count.recommended }}' + message: Recommended at least {{ .Values.nodeResources.count.recommended }} nodes - pass: - message: This cluster has enough nodes. + message: Cluster has sufficient nodes # Cluster CPU total nodeResources: checkName: Cluster CPU total outcomes: - fail: - when: 'sum(cpuCapacity) < {{ .Values.cluster.minCPU }}' - message: The cluster must contain at least {{ .Values.cluster.minCPU }} cores + when: 'sum(cpuCapacity) < {{ .Values.nodeResources.cpu.min }}' + message: Requires at least {{ .Values.nodeResources.cpu.min }} cores - pass: - message: There are at least {{ .Values.cluster.minCPU }} cores in the cluster + message: Cluster CPU capacity meets requirement # Per-node memory (Gi) nodeResources: - checkName: Per-node memory requirement + checkName: Per-node memory outcomes: - fail: - when: 'min(memoryCapacity) < {{ .Values.node.minMemoryGi }}Gi' - message: All nodes must have at least {{ .Values.node.minMemoryGi }} GiB of memory. + when: 'min(memoryCapacity) < {{ .Values.nodeResources.memory.minGi }}Gi' + message: All nodes must have at least {{ .Values.nodeResources.memory.minGi }} GiB - warn: - when: 'min(memoryCapacity) < {{ .Values.node.recommendedMemoryGi }}Gi' - message: All nodes are recommended to have at least {{ .Values.node.recommendedMemoryGi }} GiB of memory. + when: 'min(memoryCapacity) < {{ .Values.nodeResources.memory.recommendedGi }}Gi' + message: Recommended {{ .Values.nodeResources.memory.recommendedGi }} GiB per node - pass: - message: All nodes have at least {{ .Values.node.recommendedMemoryGi }} GiB of memory. + message: All nodes meet recommended memory # Per-node ephemeral storage (Gi) nodeResources: - checkName: Per-node ephemeral storage requirement + checkName: Per-node ephemeral storage outcomes: - fail: - when: 'min(ephemeralStorageCapacity) < {{ .Values.node.minEphemeralGi }}Gi' - message: All nodes must have at least {{ .Values.node.minEphemeralGi }} GiB of ephemeral storage. + when: 'min(ephemeralStorageCapacity) < {{ .Values.nodeResources.ephemeral.minGi }}Gi' + message: All nodes must have at least {{ .Values.nodeResources.ephemeral.minGi }} GiB - warn: - when: 'min(ephemeralStorageCapacity) < {{ .Values.node.recommendedEphemeralGi }}Gi' - message: All nodes are recommended to have at least {{ .Values.node.recommendedEphemeralGi }} GiB of ephemeral storage. + when: 'min(ephemeralStorageCapacity) < {{ .Values.nodeResources.ephemeral.recommendedGi }}Gi' + message: Recommended {{ .Values.nodeResources.ephemeral.recommendedGi }} GiB per node - pass: - message: All nodes have at least {{ .Values.node.recommendedEphemeralGi }} GiB of ephemeral storage. + message: All nodes meet recommended ephemeral storage + ``` + +- **deploymentStatus**: verify workload deployment status + ```yaml + deploymentStatus: + checkName: Deployment ready + namespace: '{{ .Values.workloads.deployments.namespace }}' + name: '{{ .Values.workloads.deployments.name }}' + outcomes: + - fail: + when: absent + message: Deployment not found + - fail: + when: '< {{ .Values.workloads.deployments.minReady }}' + message: Deployment has insufficient ready replicas + - pass: + when: '>= {{ .Values.workloads.deployments.minReady }}' + message: Deployment has sufficient ready replicas + ``` + +- **postgres/mysql/redis**: database connectivity (requires collectors) + ```yaml + # Collector section + - postgres: + collectorName: '{{ .Values.databases.postgres.collectorName }}' + uri: '{{ .Values.databases.postgres.uri }}' + + # Analyzer section + postgres: + checkName: Postgres checks + collectorName: '{{ .Values.databases.postgres.collectorName }}' + outcomes: + - fail: + message: Postgres checks failed + - pass: + message: Postgres checks passed + ``` + +- **textAnalyze/yamlCompare/jsonCompare**: analyze collected data + ```yaml + textAnalyze: + checkName: Text analyze + collectorName: 'cluster-resources' + fileName: '{{ .Values.textAnalyze.fileName }}' + regex: '{{ .Values.textAnalyze.regex }}' + outcomes: + - fail: + message: Pattern matched in files + - pass: + message: Pattern not found ``` ### Design conventions for maintainability - **Guard every optional analyzer** with a values toggle, so consumers can enable only what they need. +- **Always include collectors section** when analyzers require them (databases, http, registryImages, etc.). - **Use `checkName`** to provide a stable, user-facing label for each check. - **Prefer `fail` for unmet hard requirements**, `warn` for soft requirements, and `pass` with a direct, affirmative message. - **Attach `uri`** to outcomes when helpful for remediation. @@ -231,46 +287,60 @@ Provide a values schema that mirrors your toggles and thresholds. Example full a Typical structure: ```yaml -kubernetes: - enabled: false - minVersion: "1.22.0" - recommendedVersion: "1.29.0" - -storage: +clusterVersion: enabled: true - className: "default" + minVersion: "1.24.0" + recommendedVersion: "1.28.0" -cluster: - minNodes: 3 - recommendedNodes: 5 - minCPU: 4 - -node: - minMemoryGi: 8 - recommendedMemoryGi: 32 - minEphemeralGi: 40 - recommendedEphemeralGi: 100 - -ingress: +storageClass: enabled: true - type: "Contour" + className: "standard" -runtime: +crd: + enabled: true + name: "samples.mycompany.com" + +containerRuntime: enabled: true distribution: enabled: true + supported: ["eks", "gke", "aks", "kubeadm"] + unsupported: [] -nodeChecks: - enabled: true +nodeResources: count: enabled: true + min: 1 + recommended: 3 cpu: enabled: true + min: "4" memory: enabled: true + minGi: 8 + recommendedGi: 16 ephemeral: enabled: true + minGi: 20 + recommendedGi: 50 + +workloads: + deployments: + enabled: true + namespace: "default" + name: "example-deploy" + minReady: 1 + +databases: + postgres: + enabled: true + collectorName: "postgres" + uri: "postgres://user:pass@postgres:5432/db?sslmode=disable" + mysql: + enabled: true + collectorName: "mysql" + uri: "mysql://user:pass@tcp(mysql:3306)/db" ``` @@ -310,7 +380,7 @@ Notes: ### Authoring checklist - Add `docString` with Title, Requirement bullets, rationale, and links. -- Gate optional analyzers with `{{- if .Values..enabled }}`. +- Gate optional analyzers with `{{- if .Values.analyzers..enabled }}`. - Parameterize thresholds and names with `.Values` expressions. - Ensure all required values are present in your values files since there are no fallback defaults. - Use precise, user-actionable `message` text for each outcome; add `uri` where helpful. @@ -326,49 +396,79 @@ kind: Preflight metadata: name: your-product-preflight spec: + {{- /* Determine if we need explicit collectors beyond always-on clusterResources */}} + {{- $needExtraCollectors := or .Values.databases.postgres.enabled .Values.http.enabled }} + + collectors: + # Always collect cluster resources to support core analyzers + - clusterResources: {} + + {{- if .Values.databases.postgres.enabled }} + - postgres: + collectorName: '{{ .Values.databases.postgres.collectorName }}' + uri: '{{ .Values.databases.postgres.uri }}' + {{- end }} + analyzers: - {{- if .Values.kubernetes.enabled }} + {{- if .Values.clusterVersion.enabled }} - docString: | Title: Kubernetes Control Plane Requirements Requirement: - Version: - - Minimum: {{ .Values.kubernetes.minVersion }} - - Recommended: {{ .Values.kubernetes.recommendedVersion }} - Running below minimum may remove GA APIs and critical fixes. + - Minimum: {{ .Values.clusterVersion.minVersion }} + - Recommended: {{ .Values.clusterVersion.recommendedVersion }} + - Docs: https://kubernetes.io + These version targets ensure required APIs and defaults are available and patched. clusterVersion: checkName: Kubernetes version outcomes: - fail: - when: '< {{ .Values.kubernetes.minVersion }}' - message: Requires Kubernetes >= {{ .Values.kubernetes.minVersion }}. + when: '< {{ .Values.clusterVersion.minVersion }}' + message: Requires at least Kubernetes {{ .Values.clusterVersion.minVersion }}. - warn: - when: '< {{ .Values.kubernetes.recommendedVersion }}' - message: Recommended {{ .Values.kubernetes.recommendedVersion }} or later. + when: '< {{ .Values.clusterVersion.recommendedVersion }}' + message: Recommended to use Kubernetes {{ .Values.clusterVersion.recommendedVersion }} or later. - pass: - when: '>= {{ .Values.kubernetes.recommendedVersion }}' - message: Meets recommended and required versions. + when: '>= {{ .Values.clusterVersion.recommendedVersion }}' + message: Meets recommended and required Kubernetes versions. {{- end }} - {{- if .Values.storage.enabled }} + {{- if .Values.storageClass.enabled }} - docString: | Title: Default StorageClass Requirements Requirement: - - A StorageClass named "{{ .Values.storage.className }}" must exist + - A StorageClass named "{{ .Values.storageClass.className }}" must exist + A default StorageClass enables dynamic PVC provisioning without manual intervention. storageClass: checkName: Default StorageClass - storageClassName: '{{ .Values.storage.className }}' + storageClassName: '{{ .Values.storageClass.className }}' outcomes: - fail: message: Default StorageClass not found - pass: message: Default StorageClass present {{- end }} + + {{- if .Values.databases.postgres.enabled }} + - docString: | + Title: Postgres Connectivity + Requirement: + - Postgres checks collected by '{{ .Values.databases.postgres.collectorName }}' must pass + postgres: + checkName: Postgres checks + collectorName: '{{ .Values.databases.postgres.collectorName }}' + outcomes: + - fail: + message: Postgres checks failed + - pass: + message: Postgres checks passed + {{- end }} ``` ### References -- Example template in this repo: `v1beta3.yaml` -- Values examples: `values-v1beta3-full.yaml`, `values-v1beta3-minimal.yaml` +- Example template in this repo: `v1beta3-all-analyzers.yaml` +- Values example: `values-v1beta3-all-analyzers.yaml` diff --git a/go.mod b/go.mod index 3dfe260c..b910a1d2 100644 --- a/go.mod +++ b/go.mod @@ -250,7 +250,7 @@ require ( github.com/opencontainers/selinux v1.12.0 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect - github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 github.com/prometheus/client_golang v1.22.0 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.62.0 // indirect diff --git a/pkg/preflight/read_specs.go b/pkg/preflight/read_specs.go index 17aa1c4e..0167e048 100644 --- a/pkg/preflight/read_specs.go +++ b/pkg/preflight/read_specs.go @@ -150,6 +150,10 @@ func preprocessV1Beta3Specs(args []string) ([]string, []string, error) { if strings.Contains(contentStr, "apiVersion: troubleshoot.sh/v1beta3") && strings.Contains(contentStr, "{{") && strings.Contains(contentStr, "}}") { // It's a v1beta3 template, render it + // Seed default-false for referenced boolean flags and create parent maps for any + // .Values.* paths so missing values behave as empty and blocks can be omitted. + SeedDefaultBooleans(contentStr, values) + SeedParentMapsForValueRefs(contentStr, values) rendered, err := RenderWithHelmTemplate(contentStr, values) if err != nil { return nil, nil, errors.Wrapf(err, "failed to render v1beta3 template %s", arg) @@ -176,6 +180,9 @@ func preprocessV1Beta3Specs(args []string) ([]string, []string, error) { contentStr := string(content) if strings.Contains(contentStr, "{{") && strings.Contains(contentStr, "}}") { // It's a v1beta3 template, render it + // Seed default-false for referenced boolean flags and create parent maps for .Values.* paths + SeedDefaultBooleans(contentStr, values) + SeedParentMapsForValueRefs(contentStr, values) rendered, err := RenderWithHelmTemplate(contentStr, values) if err != nil { return nil, nil, errors.Wrapf(err, "failed to render v1beta3 template %s", arg) diff --git a/pkg/preflight/template.go b/pkg/preflight/template.go index e11c97e5..95579d80 100644 --- a/pkg/preflight/template.go +++ b/pkg/preflight/template.go @@ -47,6 +47,12 @@ func RunTemplate(templateFile string, valuesFiles []string, setValues []string, apiVersion := detectAPIVersion(string(templateContent)) var rendered string if strings.HasSuffix(apiVersion, "/v1beta3") || apiVersion == "v1beta3" { + // For v1beta3 templates, pre-seed default false values for any referenced + // .Values.*.(enabled|create) booleans to avoid nil pointer dereferences. + SeedDefaultBooleans(string(templateContent), values) + // Also ensure parent maps exist for all .Values. references so nested lookups + // don't panic when optional maps are omitted from values files. + SeedParentMapsForValueRefs(string(templateContent), values) // Helm for v1beta3 rendered, err = RenderWithHelmTemplate(string(templateContent), values) if err != nil { diff --git a/sample-preflight-templated.yaml b/sample-preflight-templated.yaml deleted file mode 100644 index 6c8daba9..00000000 --- a/sample-preflight-templated.yaml +++ /dev/null @@ -1,227 +0,0 @@ -apiVersion: troubleshoot.sh/v1beta3 -kind: Preflight -metadata: - name: templated-requirements-example -spec: - analyzers: - - docString: | - Title: Kubernetes Control Plane Requirements - Requirement: - - Version: - - Minimum: {{ .Values.kubernetes.minVersion | default "v1.22.0" }} - - Supported: v1.22.x – v1.29.x (stable releases only) - - APIs required (must be enabled, GA): - - admissionregistration.k8s.io/v1 - - apiextensions.k8s.io/v1 - - apps/v1 - - batch/v1 - - networking.k8s.io/v1 - - policy/v1 - - rbac.authorization.k8s.io/v1 - - storage.k8s.io/v1 - clusterVersion: - checkName: Kubernetes version - outcomes: - - fail: - when: '< {{ .Values.kubernetes.minVersion | default "1.22.0" }}' - message: Requires Kubernetes >= {{ .Values.kubernetes.minVersion | default "1.22.0" }} - - pass: - when: '>= {{ .Values.kubernetes.minVersion | default "1.22.0" }}' - message: Kubernetes version is supported - - - docString: | - Title: Container Runtime Requirements - Requirement: - - Runtime: containerd (CRI) version ≥ 1.5 - - Kubelet cgroup driver: systemd - - CRI socket path: /run/containerd/containerd.sock - - Security hardening: - - Seccomp: enabled (default profiles permitted) - - AppArmor: enabled where supported - containerRuntime: - outcomes: - - pass: - when: '== containerd' - message: containerd runtime detected - - fail: - message: Unsupported container runtime; containerd required - - {{- if .Values.storage.enabled }} - - docString: | - Title: Default StorageClass Requirements - Requirement: - - A StorageClass named "{{ .Values.storage.className | default "default" }}" must exist and be annotated as cluster default - - AccessMode: ReadWriteOnce (RWO) required (RWX optional) - - VolumeBindingMode: WaitForFirstConsumer preferred - - allowVolumeExpansion: true recommended - - Baseline performance per volume: - - Minimum: {{ .Values.storage.minIOPS | default "1000" }} write IOPS, {{ .Values.storage.minReadIOPS | default "3000" }} read IOPS - - Recommended: 3000+ write IOPS, 6000+ read IOPS, 250+ MB/s throughput - - Encryption at rest: {{ if .Values.storage.encryption }}enabled{{ else }}optional{{ end }} - storageClass: - checkName: Default StorageClass - storageClassName: '{{ .Values.storage.className | default "default" }}' - outcomes: - - fail: - message: Default StorageClass not found - - pass: - message: Default StorageClass present - {{- end }} - - - docString: | - Title: Cluster Size and Aggregate Capacity - Requirement: - - Node count: Minimum {{ .Values.cluster.minNodes | default "3" }} nodes (HA baseline), Recommended {{ .Values.cluster.recommendedNodes | default "5" }} nodes - - Total CPU: Minimum {{ .Values.cluster.minCPU | default "4" }} vCPU, Recommended 8+ vCPU - - Total Memory: Minimum {{ .Values.cluster.minMemory | default "16" }} GiB, Recommended 32+ GiB - - Control plane sizing: - - Managed control planes supported (EKS/GKE/AKS) - - Self-managed: 3 control-plane nodes recommended - nodeResources: - checkName: Cluster capacity - outcomes: - - fail: - when: 'count() < {{ .Values.cluster.minNodes | default "3" }}' - message: Requires at least {{ .Values.cluster.minNodes | default "3" }} nodes - - warn: - when: 'count() < {{ .Values.cluster.recommendedNodes | default "5" }}' - message: {{ .Values.cluster.recommendedNodes | default "5" }} nodes recommended for headroom - - pass: - message: Node count is sufficient - nodeResources: - checkName: Cluster CPU total - outcomes: - - fail: - when: 'sum(cpuCapacity) < {{ .Values.cluster.minCPU | default "4" }}' - message: Requires at least {{ .Values.cluster.minCPU | default "4" }} vCPU total - - pass: - message: CPU total is sufficient - nodeResources: - checkName: Cluster memory total - outcomes: - - fail: - when: 'sum(memoryCapacity) < {{ .Values.cluster.minMemory | default "16" }}Gi' - message: Requires at least {{ .Values.cluster.minMemory | default "16" }} GiB total memory - - pass: - message: Memory total is sufficient - - {{- if .Values.postgres.enabled }} - - docString: | - Title: Postgres Platform Requirements - Requirement: - - Database: PostgreSQL {{ .Values.postgres.version | default "14+" }} - - Connection: {{ .Values.postgres.uri | default "postgresql://postgres@postgres:5432/postgres" }} - - StorageClass: {{ .Values.postgres.storageClass | default "default" }} with: - - Latency p99 ≤ 5 ms - - ≥ 3000 read IOPS, ≥ 1000 write IOPS - - allowVolumeExpansion: true - - Memory per node: Minimum {{ .Values.postgres.minMemory | default "8" }} GiB; Recommended 32 GiB - - CPU per node: Minimum {{ .Values.postgres.minCPU | default "2" }} vCPU; Recommended 4+ vCPU - storageClass: - checkName: Postgres storage class - storageClassName: '{{ .Values.postgres.storageClass | default "default" }}' - outcomes: - - fail: - message: Postgres StorageClass not found - - pass: - message: Postgres StorageClass present - nodeResources: - checkName: Postgres memory guidance - outcomes: - - fail: - when: 'min(memoryCapacity) < {{ .Values.postgres.minMemory | default "8" }}Gi' - message: All nodes must have at least {{ .Values.postgres.minMemory | default "8" }} GiB of memory for Postgres - - warn: - when: 'min(memoryCapacity) < 32Gi' - message: Nodes are recommended to have at least 32 GiB of memory for Postgres - - pass: - message: Nodes have sufficient memory for Postgres - {{- end }} - - {{- if .Values.redis.enabled }} - - docString: | - Title: Redis Platform Requirements - Requirement: - - Database: Redis {{ .Values.redis.version | default "6.2+" }} - - Connection: {{ .Values.redis.uri | default "redis://default:@redis:6379" }} - - Ephemeral storage per node: Minimum 40 GiB; Recommended 100 GiB - - If persistence enabled: SSD-backed StorageClass with low-latency reads/writes - - Memory per node: Baseline {{ .Values.redis.minMemory | default "4" }} GiB; Recommended sized to dataset with 30% headroom - nodeResources: - checkName: Redis ephemeral storage - outcomes: - - fail: - when: 'min(ephemeralStorageCapacity) < 40Gi' - message: Each node must have at least 40 GiB ephemeral storage for Redis - - warn: - when: 'min(ephemeralStorageCapacity) < 100Gi' - message: 100 GiB per node recommended for Redis - - pass: - message: Nodes have sufficient ephemeral storage for Redis - {{- end }} - - {{- if .Values.ingress.enabled }} - - docString: | - Title: Required CRDs and Ingress Capabilities - Requirement: - - Ingress Controller: {{ .Values.ingress.type | default "Contour" }} - {{- if eq (.Values.ingress.type | default "Contour") "Contour" }} - - CRD must be present: - - Group: heptio.com - - Kind: IngressRoute - - Version: v1beta1 or later served version - {{- end }} - - Ingress capability: - - Layer-7 HTTP/HTTPS routing with TLS termination supported - - Wildcard certificates permitted (optional) - {{- if .Values.ingress.customDomain }} - - Custom domain: {{ .Values.ingress.customDomain }} - {{- end }} - {{- if eq (.Values.ingress.type | default "Contour") "Contour" }} - customResourceDefinition: - checkName: Contour IngressRoute CRD - customResourceDefinitionName: ingressroutes.contour.heptio.com - outcomes: - - fail: - message: Contour IngressRoute CRD not found; required for ingress routing - - pass: - message: Contour IngressRoute CRD present - {{- end }} - {{- end }} - - {{- if .Values.monitoring.enabled }} - - docString: | - Title: Monitoring and Observability Requirements - Requirement: - - Monitoring: {{ .Values.monitoring.type | default "Prometheus" }} - - Metrics retention: {{ .Values.monitoring.retention | default "15 days" }} - - Storage required: {{ .Values.monitoring.storageSize | default "50Gi" }} - - Components: - {{- if .Values.monitoring.prometheus }} - - Prometheus for metrics collection - {{- end }} - {{- if .Values.monitoring.grafana }} - - Grafana for visualization - {{- end }} - {{- if .Values.monitoring.alertmanager }} - - AlertManager for alerting - {{- end }} - storageClass: - checkName: Monitoring storage - storageClassName: '{{ .Values.monitoring.storageClass | default "default" }}' - outcomes: - - fail: - message: Monitoring StorageClass not found - - pass: - message: Monitoring StorageClass present - {{- end }} - - - docString: | - Title: OS and Kernel Requirements - Requirement: - - Nodes: Linux x86_64 (amd64) or arm64 on supported distributions - - Supported OS: {{ range $i, $v := .Values.os.supported }}{{ if $i }}, {{ end }}{{ $v }}{{ end }} - - Kernel: ≥ {{ .Values.os.minKernel | default "5.4" }} with cgroups v1 or v2 (v2 preferred) - - Time sync: chrony or systemd-timesyncd active; clock drift < 500 ms - - Filesystems: ext4 or xfs for container layers and volumes - - SELinux/AppArmor: enforcing/permissive accepted \ No newline at end of file diff --git a/sample-troubleshoot.yaml b/sample-troubleshoot.yaml deleted file mode 100644 index f6020ced..00000000 --- a/sample-troubleshoot.yaml +++ /dev/null @@ -1,54 +0,0 @@ -apiVersion: troubleshoot.sh/v1beta2 -kind: SupportBundle -metadata: - name: my-application-name -spec: - collectors: - - clusterInfo: - collectorName: my-cluster-info - - clusterResources: - collectorName: my-cluster-resources - - http: - name: healthz - get: - url: http://api:3000/healthz - - data: - collectorName: my-password-dump - name: data - data: | - my super secret password is abc123 - another redaction will go here - - data: - collectorName: yaml-data.yaml - name: data - data: | - abc: - xyz: - - hello - - world: "these are removed" - bcd: - abc: - xyz: - - these - - remain ---- -apiVersion: troubleshoot.sh/v1beta2 -kind: Redactor -metadata: - name: my-application-name -spec: - redactors: - - name: replace password # names are not used internally, but are useful for recordkeeping - fileSelector: - file: data/my-password-dump # this targets a single file - removals: - values: - - abc123 # this is a very good password, and I don't want it to be exposed - - name: all files # as no file is specified, this redactor will run against all files - removals: - regex: - - redactor: (another)(?P.*)(here) # this will replace anything between the strings `another` and `here` with `***HIDDEN***` - - selector: 'S3_ENDPOINT' # remove the value in lines following those that contain the string S3_ENDPOINT - redactor: '("value": ").*(")' - yamlPath: - - "abc.xyz.*" # redact all items in the array at key xyz within key abc in yaml documents diff --git a/v1beta3-all-analyzers.yaml b/v1beta3-all-analyzers.yaml new file mode 100644 index 00000000..177e9292 --- /dev/null +++ b/v1beta3-all-analyzers.yaml @@ -0,0 +1,697 @@ +apiVersion: troubleshoot.sh/v1beta3 +kind: Preflight +metadata: + name: all-analyzers +spec: + {{- /* Determine if we need explicit collectors beyond always-on clusterResources */}} + {{- $needExtraCollectors := or (or (or .Values.databases.postgres.enabled .Values.databases.mssql.enabled) (or .Values.databases.mysql.enabled .Values.databases.redis.enabled)) (or (or (or .Values.registryImages.enabled .Values.http.enabled) (or .Values.nodeMetrics.enabled (or .Values.sysctl.enabled .Values.certificates.enabled))) (or (or .Values.goldpinger.enabled .Values.cephStatus.enabled) .Values.longhorn.enabled)) }} + + collectors: + # Always collect cluster resources to support core analyzers (deployments, secrets, pods, events, etc.) + - clusterResources: {} + + {{- if .Values.databases.postgres.enabled }} + - postgres: + collectorName: '{{ .Values.databases.postgres.collectorName }}' + uri: '{{ .Values.databases.postgres.uri }}' + {{- if .Values.databases.postgres.tls }} + tls: + skipVerify: {{ .Values.databases.postgres.tls.skipVerify | default false }} + {{- if .Values.databases.postgres.tls.secret }} + secret: + name: '{{ .Values.databases.postgres.tls.secret.name }}' + namespace: '{{ .Values.databases.postgres.tls.secret.namespace }}' + {{- end }} + {{- end }} + {{- end }} + + {{- if .Values.databases.mssql.enabled }} + - mssql: + collectorName: '{{ .Values.databases.mssql.collectorName }}' + uri: '{{ .Values.databases.mssql.uri }}' + {{- end }} + + {{- if .Values.databases.mysql.enabled }} + - mysql: + collectorName: '{{ .Values.databases.mysql.collectorName }}' + uri: '{{ .Values.databases.mysql.uri }}' + {{- end }} + + {{- if .Values.databases.redis.enabled }} + - redis: + collectorName: '{{ .Values.databases.redis.collectorName }}' + uri: '{{ .Values.databases.redis.uri }}' + {{- end }} + + {{- if .Values.registryImages.enabled }} + - registryImages: + collectorName: '{{ .Values.registryImages.collectorName }}' + namespace: '{{ .Values.registryImages.namespace }}' + {{- if .Values.registryImages.imagePullSecret }} + imagePullSecret: + name: '{{ .Values.registryImages.imagePullSecret.name }}' + {{- if .Values.registryImages.imagePullSecret.data }} + data: + {{- range $k, $v := .Values.registryImages.imagePullSecret.data }} + {{ $k }}: '{{ $v }}' + {{- end }} + {{- end }} + {{- end }} + images: + {{- range .Values.registryImages.images }} + - '{{ . }}' + {{- end }} + {{- end }} + + {{- if .Values.http.enabled }} + - http: + collectorName: '{{ .Values.http.collectorName }}' + {{- if .Values.http.get }} + get: + url: '{{ .Values.http.get.url }}' + {{- if .Values.http.get.timeout }} + timeout: '{{ .Values.http.get.timeout }}' + {{- end }} + {{- if .Values.http.get.insecureSkipVerify }} + insecureSkipVerify: {{ .Values.http.get.insecureSkipVerify }} + {{- end }} + {{- if .Values.http.get.headers }} + headers: + {{- range $k, $v := .Values.http.get.headers }} + {{ $k }}: '{{ $v }}' + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.http.post }} + post: + url: '{{ .Values.http.post.url }}' + {{- if .Values.http.post.timeout }} + timeout: '{{ .Values.http.post.timeout }}' + {{- end }} + {{- if .Values.http.post.insecureSkipVerify }} + insecureSkipVerify: {{ .Values.http.post.insecureSkipVerify }} + {{- end }} + {{- if .Values.http.post.headers }} + headers: + {{- range $k, $v := .Values.http.post.headers }} + {{ $k }}: '{{ $v }}' + {{- end }} + {{- end }} + {{- if .Values.http.post.body }} + body: '{{ .Values.http.post.body }}' + {{- end }} + {{- end }} + {{- end }} + + {{- if .Values.nodeMetrics.enabled }} + - nodeMetrics: + collectorName: '{{ .Values.nodeMetrics.collectorName }}' + {{- if .Values.nodeMetrics.nodeNames }} + nodeNames: + {{- range .Values.nodeMetrics.nodeNames }} + - '{{ . }}' + {{- end }} + {{- end }} + {{- if .Values.nodeMetrics.selector }} + selector: + {{- range .Values.nodeMetrics.selector }} + - '{{ . }}' + {{- end }} + {{- end }} + {{- end }} + + {{- if .Values.sysctl.enabled }} + - sysctl: + collectorName: 'sysctl' + namespace: '{{ .Values.sysctl.namespace }}' + image: '{{ .Values.sysctl.image }}' + {{- if .Values.sysctl.imagePullPolicy }} + imagePullPolicy: '{{ .Values.sysctl.imagePullPolicy }}' + {{- end }} + {{- end }} + + {{- if .Values.certificates.enabled }} + - certificates: + collectorName: 'certs' + {{- if .Values.certificates.secrets }} + secrets: + {{- range .Values.certificates.secrets }} + - name: '{{ .name }}' + namespaces: + {{- range .namespaces }} + - '{{ . }}' + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.certificates.configMaps }} + configMaps: + {{- range .Values.certificates.configMaps }} + - name: '{{ .name }}' + namespaces: + {{- range .namespaces }} + - '{{ . }}' + {{- end }} + {{- end }} + {{- end }} + {{- end }} + + {{- if .Values.longhorn.enabled }} + - longhorn: + collectorName: 'longhorn' + namespace: '{{ .Values.longhorn.namespace }}' + {{- if .Values.longhorn.timeout }} + timeout: '{{ .Values.longhorn.timeout }}' + {{- end }} + {{- end }} + + {{- if .Values.cephStatus.enabled }} + - ceph: + collectorName: 'ceph' + namespace: '{{ .Values.cephStatus.namespace }}' + {{- if .Values.cephStatus.timeout }} + timeout: '{{ .Values.cephStatus.timeout }}' + {{- end }} + {{- end }} + + {{- if .Values.goldpinger.enabled }} + - goldpinger: + collectorName: '{{ .Values.goldpinger.collectorName }}' + namespace: '{{ .Values.goldpinger.namespace }}' + {{- if .Values.goldpinger.collectDelay }} + collectDelay: '{{ .Values.goldpinger.collectDelay }}' + {{- end }} + {{- if .Values.goldpinger.podLaunch }} + podLaunchOptions: + {{- if .Values.goldpinger.podLaunch.namespace }} + namespace: '{{ .Values.goldpinger.podLaunch.namespace }}' + {{- end }} + {{- if .Values.goldpinger.podLaunch.image }} + image: '{{ .Values.goldpinger.podLaunch.image }}' + {{- end }} + {{- if .Values.goldpinger.podLaunch.imagePullSecret }} + imagePullSecret: + name: '{{ .Values.goldpinger.podLaunch.imagePullSecret.name }}' + {{- end }} + {{- if .Values.goldpinger.podLaunch.serviceAccountName }} + serviceAccountName: '{{ .Values.goldpinger.podLaunch.serviceAccountName }}' + {{- end }} + {{- end }} + {{- end }} + + analyzers: + {{- if .Values.clusterVersion.enabled }} + - clusterVersion: + checkName: Kubernetes version + outcomes: + - fail: + when: '< {{ .Values.clusterVersion.minVersion }}' + message: Requires at least Kubernetes {{ .Values.clusterVersion.minVersion }}. + - warn: + when: '< {{ .Values.clusterVersion.recommendedVersion }}' + message: Recommended to use Kubernetes {{ .Values.clusterVersion.recommendedVersion }} or later. + - pass: + when: '>= {{ .Values.clusterVersion.recommendedVersion }}' + message: Meets recommended and required Kubernetes versions. + {{- end }} + + {{- if .Values.storageClass.enabled }} + - storageClass: + checkName: Default StorageClass + storageClassName: '{{ .Values.storageClass.className }}' + outcomes: + - fail: + message: Default StorageClass not found + - pass: + message: Default StorageClass present + {{- end }} + + {{- if .Values.crd.enabled }} + - customResourceDefinition: + checkName: Required CRD + customResourceDefinitionName: '{{ .Values.crd.name }}' + outcomes: + - fail: + message: Required CRD not found + - pass: + message: Required CRD present + {{- end }} + + {{- if .Values.ingress.enabled }} + - ingress: + checkName: Ingress exists + namespace: '{{ .Values.ingress.namespace }}' + ingressName: '{{ .Values.ingress.name }}' + outcomes: + - fail: + message: Expected ingress not found + - pass: + message: Expected ingress present + {{- end }} + + {{- if .Values.secret.enabled }} + - secret: + checkName: Required secret + namespace: '{{ .Values.secret.namespace }}' + secretName: '{{ .Values.secret.name }}' + {{- if .Values.secret.key }} + key: '{{ .Values.secret.key }}' + {{- end }} + outcomes: + - fail: + message: Required secret not found + - pass: + message: Required secret present + {{- end }} + + {{- if .Values.configMap.enabled }} + - configMap: + checkName: Required ConfigMap + namespace: '{{ .Values.configMap.namespace }}' + configMapName: '{{ .Values.configMap.name }}' + {{- if .Values.configMap.key }} + key: '{{ .Values.configMap.key }}' + {{- end }} + outcomes: + - fail: + message: Required ConfigMap not found + - pass: + message: Required ConfigMap present + {{- end }} + + {{- if .Values.imagePullSecret.enabled }} + - imagePullSecret: + checkName: Registry credentials + registryName: '{{ .Values.imagePullSecret.registry }}' + outcomes: + - fail: + message: Cannot pull from registry; credentials missing + - pass: + message: Found credentials for registry + {{- end }} + + {{- if .Values.workloads.deployments.enabled }} + - deploymentStatus: + checkName: Deployment ready + namespace: '{{ .Values.workloads.deployments.namespace }}' + name: '{{ .Values.workloads.deployments.name }}' + outcomes: + - fail: + when: absent + message: Deployment not found + - fail: + when: '< {{ .Values.workloads.deployments.minReady }}' + message: Deployment has insufficient ready replicas + - pass: + when: '>= {{ .Values.workloads.deployments.minReady }}' + message: Deployment has sufficient ready replicas + {{- end }} + + {{- if .Values.workloads.statefulsets.enabled }} + - statefulsetStatus: + checkName: StatefulSet ready + namespace: '{{ .Values.workloads.statefulsets.namespace }}' + name: '{{ .Values.workloads.statefulsets.name }}' + outcomes: + - fail: + when: absent + message: StatefulSet not found + - fail: + when: '< {{ .Values.workloads.statefulsets.minReady }}' + message: StatefulSet has insufficient ready replicas + - pass: + when: '>= {{ .Values.workloads.statefulsets.minReady }}' + message: StatefulSet has sufficient ready replicas + {{- end }} + + {{- if .Values.workloads.jobs.enabled }} + - jobStatus: + checkName: Job completed + namespace: '{{ .Values.workloads.jobs.namespace }}' + name: '{{ .Values.workloads.jobs.name }}' + outcomes: + - fail: + when: absent + message: Job not found + - fail: + when: '= 0' + message: Job has no successful completions + - pass: + when: '> 0' + message: Job completed successfully + {{- end }} + + {{- if .Values.workloads.replicasets.enabled }} + - replicasetStatus: + checkName: ReplicaSet ready + namespace: '{{ .Values.workloads.replicasets.namespace }}' + name: '{{ .Values.workloads.replicasets.name }}' + outcomes: + - fail: + message: ReplicaSet is not ready + - pass: + when: '>= {{ .Values.workloads.replicasets.minReady }}' + message: ReplicaSet has sufficient ready replicas + {{- end }} + + {{- if .Values.clusterPodStatuses.enabled }} + - clusterPodStatuses: + checkName: Pod statuses + namespaces: {{ toYaml .Values.clusterPodStatuses.namespaces | nindent 8 }} + outcomes: + - warn: + message: Some pods are not ready + - pass: + message: All pods are ready + {{- end }} + + {{- if .Values.clusterContainerStatuses.enabled }} + - clusterContainerStatuses: + checkName: Container restarts + namespaces: {{ toYaml .Values.clusterContainerStatuses.namespaces | nindent 8 }} + restartCount: {{ .Values.clusterContainerStatuses.restartCount }} + outcomes: + - warn: + message: One or more containers exceed restart threshold + - pass: + message: Container restarts are within thresholds + {{- end }} + + {{- if .Values.containerRuntime.enabled }} + - containerRuntime: + checkName: Runtime must be containerd + outcomes: + - pass: + when: '== containerd' + message: containerd runtime detected + - fail: + message: Unsupported container runtime; containerd required + {{- end }} + + {{- if .Values.distribution.enabled }} + - distribution: + checkName: Supported distribution + outcomes: + {{- range $d := .Values.distribution.unsupported }} + - fail: + when: '== {{ $d }}' + message: '{{ $d }} is not supported' + {{- end }} + {{- range $d := .Values.distribution.supported }} + - pass: + when: '== {{ $d }}' + message: '{{ $d }} is a supported distribution' + {{- end }} + - warn: + message: Unable to determine the distribution + {{- end }} + + {{- if .Values.nodeResources.count.enabled }} + - nodeResources: + checkName: Node count + outcomes: + - fail: + when: 'count() < {{ .Values.nodeResources.count.min }}' + message: Requires at least {{ .Values.nodeResources.count.min }} nodes + - warn: + when: 'count() < {{ .Values.nodeResources.count.recommended }}' + message: Recommended at least {{ .Values.nodeResources.count.recommended }} nodes + - pass: + message: Cluster has sufficient nodes + {{- end }} + + {{- if .Values.nodeResources.cpu.enabled }} + - nodeResources: + checkName: Cluster CPU total + outcomes: + - fail: + when: 'sum(cpuCapacity) < {{ .Values.nodeResources.cpu.min }}' + message: Requires at least {{ .Values.nodeResources.cpu.min }} cores + - pass: + message: Cluster CPU capacity meets requirement + {{- end }} + + {{- if .Values.nodeResources.memory.enabled }} + - nodeResources: + checkName: Per-node memory + outcomes: + - fail: + when: 'min(memoryCapacity) < {{ .Values.nodeResources.memory.minGi }}Gi' + message: All nodes must have at least {{ .Values.nodeResources.memory.minGi }} GiB + - warn: + when: 'min(memoryCapacity) < {{ .Values.nodeResources.memory.recommendedGi }}Gi' + message: Recommended {{ .Values.nodeResources.memory.recommendedGi }} GiB per node + - pass: + message: All nodes meet recommended memory + {{- end }} + + {{- if .Values.nodeResources.ephemeral.enabled }} + - nodeResources: + checkName: Per-node ephemeral storage + outcomes: + - fail: + when: 'min(ephemeralStorageCapacity) < {{ .Values.nodeResources.ephemeral.minGi }}Gi' + message: All nodes must have at least {{ .Values.nodeResources.ephemeral.minGi }} GiB + - warn: + when: 'min(ephemeralStorageCapacity) < {{ .Values.nodeResources.ephemeral.recommendedGi }}Gi' + message: Recommended {{ .Values.nodeResources.ephemeral.recommendedGi }} GiB per node + - pass: + message: All nodes meet recommended ephemeral storage + {{- end }} + + {{- if .Values.textAnalyze.enabled }} + - textAnalyze: + checkName: Text analyze + collectorName: 'cluster-resources' + fileName: '{{ .Values.textAnalyze.fileName }}' + regex: '{{ .Values.textAnalyze.regex }}' + ignoreIfNoFiles: true + outcomes: + - fail: + message: Pattern matched in files + - pass: + message: Pattern not found + {{- end }} + + {{- if .Values.yamlCompare.enabled }} + - yamlCompare: + checkName: YAML compare + collectorName: 'cluster-resources' + fileName: '{{ .Values.yamlCompare.fileName }}' + path: '{{ .Values.yamlCompare.path }}' + value: '{{ .Values.yamlCompare.value }}' + outcomes: + - fail: + message: YAML value does not match expected + - pass: + message: YAML value matches expected + {{- end }} + + {{- if .Values.jsonCompare.enabled }} + - jsonCompare: + checkName: JSON compare + collectorName: 'cluster-resources' + fileName: '{{ .Values.jsonCompare.fileName }}' + jsonPath: '{{ .Values.jsonCompare.jsonPath }}' + value: '{{ .Values.jsonCompare.value }}' + outcomes: + - fail: + message: JSON value does not match expected + - pass: + message: JSON value matches expected + {{- end }} + + {{- if .Values.databases.postgres.enabled }} + - postgres: + checkName: Postgres checks + collectorName: '{{ .Values.databases.postgres.collectorName }}' + outcomes: + - fail: + message: Postgres checks failed + - pass: + message: Postgres checks passed + {{- end }} + + {{- if .Values.databases.mssql.enabled }} + - mssql: + checkName: MSSQL checks + collectorName: '{{ .Values.databases.mssql.collectorName }}' + outcomes: + - fail: + message: MSSQL checks failed + - pass: + message: MSSQL checks passed + {{- end }} + + {{- if .Values.databases.mysql.enabled }} + - mysql: + checkName: MySQL checks + collectorName: '{{ .Values.databases.mysql.collectorName }}' + outcomes: + - fail: + message: MySQL checks failed + - pass: + message: MySQL checks passed + {{- end }} + + {{- if .Values.databases.redis.enabled }} + - redis: + checkName: Redis checks + collectorName: '{{ .Values.databases.redis.collectorName }}' + outcomes: + - fail: + message: Redis checks failed + - pass: + message: Redis checks passed + {{- end }} + + {{- if .Values.cephStatus.enabled }} + - cephStatus: + checkName: Ceph cluster health + namespace: '{{ .Values.cephStatus.namespace }}' + outcomes: + - fail: + message: Ceph is not healthy + - pass: + message: Ceph is healthy + {{- end }} + + {{- if .Values.velero.enabled }} + - velero: + checkName: Velero installed + {{- end }} + + {{- if .Values.longhorn.enabled }} + - longhorn: + checkName: Longhorn health + namespace: '{{ .Values.longhorn.namespace }}' + outcomes: + - fail: + message: Longhorn is not healthy + - pass: + message: Longhorn is healthy + {{- end }} + + {{- if .Values.registryImages.enabled }} + - registryImages: + checkName: Registry image availability + collectorName: '{{ .Values.registryImages.collectorName }}' + outcomes: + - fail: + message: One or more images are not available + - pass: + message: All images are available + {{- end }} + + {{- if .Values.weaveReport.enabled }} + - weaveReport: + checkName: Weave report + reportFileGlob: '{{ .Values.weaveReport.reportFileGlob }}' + {{- end }} + + {{- if .Values.sysctl.enabled }} + - sysctl: + checkName: Sysctl settings + outcomes: + - warn: + message: One or more sysctl values do not meet recommendations + - pass: + message: Sysctl values meet recommendations + {{- end }} + + {{- if .Values.clusterResource.enabled }} + - clusterResource: + checkName: Cluster resource value + kind: '{{ .Values.clusterResource.kind }}' + clusterScoped: {{ .Values.clusterResource.clusterScoped }} + {{- if not .Values.clusterResource.clusterScoped }} + namespace: '{{ .Values.clusterResource.namespace }}' + {{- end }} + name: '{{ .Values.clusterResource.name }}' + yamlPath: '{{ .Values.clusterResource.yamlPath }}' + {{- if .Values.clusterResource.expectedValue }} + expectedValue: '{{ .Values.clusterResource.expectedValue }}' + {{- end }} + {{- if .Values.clusterResource.regex }} + regex: '{{ .Values.clusterResource.regex }}' + {{- end }} + outcomes: + - fail: + message: Cluster resource field does not match expected value + - pass: + message: Cluster resource field matches expected value + {{- end }} + + {{- if .Values.certificates.enabled }} + - certificates: + checkName: Certificates validity + outcomes: + - warn: + message: One or more certificates may be invalid or expiring soon + - pass: + message: Certificates are valid + {{- end }} + + {{- if .Values.goldpinger.enabled }} + - goldpinger: + checkName: Goldpinger report + collectorName: '{{ .Values.goldpinger.collectorName }}' + filePath: '{{ .Values.goldpinger.filePath }}' + outcomes: + - fail: + message: Goldpinger indicates network issues + - pass: + message: Goldpinger indicates healthy networking + {{- end }} + + {{- if .Values.event.enabled }} + - event: + checkName: Events + collectorName: '{{ .Values.event.collectorName }}' + namespace: '{{ .Values.event.namespace }}' + {{- if .Values.event.kind }} + kind: '{{ .Values.event.kind }}' + {{- end }} + reason: '{{ .Values.event.reason }}' + {{- if .Values.event.regex }} + regex: '{{ .Values.event.regex }}' + {{- end }} + outcomes: + - fail: + when: 'true' + message: Critical events detected + - pass: + when: 'false' + message: No critical events detected + {{- end }} + + {{- if .Values.nodeMetrics.enabled }} + - nodeMetrics: + checkName: Node metrics thresholds + collectorName: '{{ .Values.nodeMetrics.collectorName }}' + {{- if .Values.nodeMetrics.filters.pvc.nameRegex }} + filters: + pvc: + nameRegex: '{{ .Values.nodeMetrics.filters.pvc.nameRegex }}' + {{- if .Values.nodeMetrics.filters.pvc.namespace }} + namespace: '{{ .Values.nodeMetrics.filters.pvc.namespace }}' + {{- end }} + {{- end }} + outcomes: + - warn: + message: Node metrics exceed warning thresholds + - pass: + message: Node metrics within thresholds + {{- end }} + + {{- if .Values.http.enabled }} + - http: + checkName: HTTP checks + collectorName: '{{ .Values.http.collectorName }}' + outcomes: + - fail: + message: One or more HTTP checks failed + - pass: + message: All HTTP checks passed + {{- end }} + + diff --git a/values-all-analyzers-small.yaml b/values-all-analyzers-small.yaml new file mode 100644 index 00000000..cf2f6cd7 --- /dev/null +++ b/values-all-analyzers-small.yaml @@ -0,0 +1,4 @@ +clusterVersion: + enabled: true + minVersion: "1.24.0" + recommendedVersion: "1.28.0" \ No newline at end of file diff --git a/values-sample-full.yaml b/values-sample-full.yaml deleted file mode 100644 index 82c2c840..00000000 --- a/values-sample-full.yaml +++ /dev/null @@ -1,63 +0,0 @@ -# Full Configuration for sample-preflight-templated.yaml -# All features enabled with production-grade settings - -# Kubernetes cluster requirements -kubernetes: - minVersion: "v1.27.0" - -# Storage configuration -storage: - enabled: true - className: "fast-ssd" - minIOPS: 5000 - minReadIOPS: 10000 - encryption: true - -# Cluster sizing -cluster: - minNodes: 5 - recommendedNodes: 7 - minCPU: 8 - minMemory: 32 - -# PostgreSQL database -postgres: - enabled: true - version: "15+" - uri: "postgresql://postgres@postgres-primary.database.svc.cluster.local:5432/production" - storageClass: "fast-ssd" - minMemory: 16 - minCPU: 4 - -# Redis cache -redis: - enabled: true - version: "7.2+" - uri: "redis://default:@redis-sentinel.cache.svc.cluster.local:26379" - minMemory: 8 - -# Ingress configuration -ingress: - enabled: true - type: "Contour" - customDomain: "*.apps.production.example.com" - -# Monitoring stack -monitoring: - enabled: true - type: "Prometheus" - retention: "30 days" - storageSize: "100Gi" - storageClass: "fast-ssd" - prometheus: true - grafana: true - alertmanager: true - -# Operating system -os: - minKernel: "5.15" - supported: - - "Ubuntu 22.04 LTS" - - "RHEL 9" - - "Rocky Linux 9" - - "Amazon Linux 2023" \ No newline at end of file diff --git a/values-sample-minimal.yaml b/values-sample-minimal.yaml deleted file mode 100644 index d0890924..00000000 --- a/values-sample-minimal.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Minimal Configuration for sample-preflight-templated.yaml -# Only essential features enabled - -# Kubernetes cluster requirements -kubernetes: - minVersion: "v1.25.0" - -# Storage disabled -storage: - enabled: false - -# Minimal cluster sizing -cluster: - minNodes: 3 - recommendedNodes: 3 - minCPU: 4 - minMemory: 16 - -# PostgreSQL disabled -postgres: - enabled: false - -# Redis disabled -redis: - enabled: false - -# Ingress disabled -ingress: - enabled: false - -# Monitoring disabled -monitoring: - enabled: false - -# Operating system -os: - minKernel: "5.4" - supported: - - "Ubuntu 20.04+" - - "RHEL 8+" \ No newline at end of file diff --git a/values-v1beta3-1.yaml b/values-v1beta3-1.yaml index 4352c3ae..baf1abd4 100644 --- a/values-v1beta3-1.yaml +++ b/values-v1beta3-1.yaml @@ -7,4 +7,10 @@ kubernetes: storage: enabled: true - className: "default" \ No newline at end of file + className: "default" + + nodeChecks: + cpu: + enabled: false + ephemeral: + enabled: false \ No newline at end of file diff --git a/values-v1beta3-all-analyzers.yaml b/values-v1beta3-all-analyzers.yaml new file mode 100644 index 00000000..aa23ad41 --- /dev/null +++ b/values-v1beta3-all-analyzers.yaml @@ -0,0 +1,229 @@ +clusterVersion: + enabled: true + minVersion: "1.24.0" + recommendedVersion: "1.28.0" + +crd: + enabled: true + name: "samples.mycompany.com" + +ingress: + enabled: true + namespace: "default" + name: "example" + +secret: + enabled: true + namespace: "default" + name: "my-secret" + key: "" + +configMap: + enabled: true + namespace: "kube-public" + name: "cluster-info" + key: "" + +imagePullSecret: + enabled: true + registry: "registry.example.com" + +workloads: + deployments: + enabled: true + namespace: "default" + name: "example-deploy" + minReady: 1 + statefulsets: + enabled: true + namespace: "default" + name: "example-sts" + minReady: 1 + jobs: + enabled: true + namespace: "default" + name: "example-job" + replicasets: + enabled: true + namespace: "default" + name: "example-rs" + minReady: 1 + +clusterPodStatuses: + enabled: true + namespaces: + - "default" + - "kube-system" + +clusterContainerStatuses: + enabled: true + namespaces: + - "default" + - "kube-system" + restartCount: 3 + +containerRuntime: + enabled: true + +distribution: + enabled: true + supported: ["eks", "gke", "aks", "kubeadm"] + unsupported: [] + +nodeResources: + count: + enabled: true + min: 1 + recommended: 3 + cpu: + enabled: true + min: "4" + memory: + enabled: true + minGi: 8 + recommendedGi: 16 + ephemeral: + enabled: true + minGi: 20 + recommendedGi: 50 + +textAnalyze: + enabled: true + fileName: "logs/*.log" + regex: "error" + +yamlCompare: + enabled: true + fileName: "kube-system/sample.yaml" + path: "spec.replicas" + value: "3" + +jsonCompare: + enabled: true + fileName: "custom/sample.json" + jsonPath: "$.items[0].status" + value: "Running" + +databases: + postgres: + enabled: true + collectorName: "postgres" + uri: "postgres://user:pass@postgres:5432/db?sslmode=disable" + tls: + skipVerify: true + secret: + name: "" + namespace: "" + mssql: + enabled: true + collectorName: "mssql" + uri: "sqlserver://user:pass@mssql:1433?database=db" + mysql: + enabled: true + collectorName: "mysql" + uri: "mysql://user:pass@tcp(mysql:3306)/db" + redis: + enabled: true + collectorName: "redis" + uri: "redis://redis:6379" + +cephStatus: + enabled: true + namespace: "rook-ceph" + timeout: "30s" + +velero: + enabled: true + +longhorn: + enabled: true + namespace: "longhorn-system" + timeout: "30s" + +registryImages: + enabled: true + collectorName: "images" + namespace: "default" + imagePullSecret: + name: "" + data: {} + images: + - "alpine:3.19" + - "busybox:1.36" + +http: + enabled: true + collectorName: "http" + get: + url: "https://example.com/healthz" + timeout: "10s" + insecureSkipVerify: true + headers: {} + post: + url: "" + timeout: "" + insecureSkipVerify: true + headers: {} + body: "" + +weaveReport: + enabled: true + reportFileGlob: "weave/*.json" + +sysctl: + enabled: true + namespace: "default" + image: "busybox:1.36" + imagePullPolicy: "IfNotPresent" + +clusterResource: + enabled: true + kind: "Deployment" + clusterScoped: true + namespace: "default" + name: "example-deploy" + yamlPath: "spec.replicas" + expectedValue: "3" + regex: "" + +certificates: + enabled: true + secrets: + - name: "" + namespaces: [] + configMaps: + - name: "" + namespaces: [] + +goldpinger: + enabled: true + collectorName: "goldpinger" + filePath: "goldpinger/check-all.json" + namespace: "default" + collectDelay: "30s" + podLaunch: + namespace: "" + image: "" + imagePullSecret: + name: "" + serviceAccountName: "" + +event: + enabled: true + collectorName: "events" + namespace: "default" + kind: "Pod" + reason: "Unhealthy" + regex: "" + +nodeMetrics: + enabled: true + collectorName: "node-metrics" + filters: + pvc: + nameRegex: "" + namespace: "" + nodeNames: [] + selector: [] + + diff --git a/values-v1beta3-full.yaml b/values-v1beta3-full.yaml index bbd34704..e06a6f72 100644 --- a/values-v1beta3-full.yaml +++ b/values-v1beta3-full.yaml @@ -1,7 +1,7 @@ # Values for v1beta3-templated-from-v1beta2.yaml kubernetes: - enabled: false + enabled: true minVersion: "1.22.0" recommendedVersion: "1.29.0" diff --git a/values-v1beta3-minimal.yaml b/values-v1beta3-minimal.yaml deleted file mode 100644 index 1f510494..00000000 --- a/values-v1beta3-minimal.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Minimal values for v1beta3-templated-from-v1beta2.yaml - -kubernetes: - enabled: false - minVersion: "1.22.0" - recommendedVersion: "1.29.0" - -storage: - enabled: false - className: "default" - -cluster: - minNodes: 3 - recommendedNodes: 3 - minCPU: 4 - -node: - minMemoryGi: 8 - recommendedMemoryGi: 16 - minEphemeralGi: 40 - recommendedEphemeralGi: 40 - -ingress: - enabled: false - type: "Contour" - -runtime: - enabled: false - -distribution: - enabled: false - -nodeChecks: - enabled: false - count: - enabled: false - cpu: - enabled: false - memory: - enabled: false - ephemeral: - enabled: false - -