feat: node metrics analyser (#1520)

* feat: node metrics analyser The analyser only checks PVC usage at the moment. More analysers can be added on a need to have basis * Add tests * Fix flaky test by waiting for goldpinger pods to start * Fix how outcomes get checked * Fix catch all outcome condition * Fix test * feat: node metrics analyser The analyser only checks PVC usage at the moment. More analysers can be added on a need to have basis * Add tests * Fix flaky test by waiting for goldpinger pods to start * Fix how outcomes get checked * Fix catch all outcome condition * Fix test * Regenerate schemas * Fix failing test --------- Co-authored-by: Dexter Yan <yanshaocong@gmail.com>
2026-02-14 10:19:54 +00:00 · 2024-04-09 12:14:10 +01:00
parent dc4403811b
commit db871e6889
17 changed files with 1303 additions and 12 deletions
--- a/config/crds/troubleshoot.sh_analyzers.yaml
+++ b/config/crds/troubleshoot.sh_analyzers.yaml
@@ -1045,6 +1045,66 @@ spec:
                      - collectorName
                      - outcomes
                      type: object
+                    nodeMetrics:
+                      properties:
+                        annotations:
+                          additionalProperties:
+                            type: string
+                          type: object
+                        checkName:
+                          type: string
+                        collectorName:
+                          type: string
+                        exclude:
+                          type: BoolString
+                        filters:
+                          properties:
+                            pvc:
+                              properties:
+                                nameRegex:
+                                  type: string
+                                namespace:
+                                  type: string
+                              type: object
+                          type: object
+                        outcomes:
+                          items:
+                            properties:
+                              fail:
+                                properties:
+                                  message:
+                                    type: string
+                                  uri:
+                                    type: string
+                                  when:
+                                    type: string
+                                type: object
+                              pass:
+                                properties:
+                                  message:
+                                    type: string
+                                  uri:
+                                    type: string
+                                  when:
+                                    type: string
+                                type: object
+                              warn:
+                                properties:
+                                  message:
+                                    type: string
+                                  uri:
+                                    type: string
+                                  when:
+                                    type: string
+                                type: object
+                            type: object
+                          type: array
+                        strict:
+                          type: BoolString
+                      required:
+                      - collectorName
+                      - outcomes
+                      type: object
                    nodeResources:
                      properties:
                        annotations:
--- a/config/crds/troubleshoot.sh_preflights.yaml
+++ b/config/crds/troubleshoot.sh_preflights.yaml
@@ -1045,6 +1045,66 @@ spec:
                      - collectorName
                      - outcomes
                      type: object
+                    nodeMetrics:
+                      properties:
+                        annotations:
+                          additionalProperties:
+                            type: string
+                          type: object
+                        checkName:
+                          type: string
+                        collectorName:
+                          type: string
+                        exclude:
+                          type: BoolString
+                        filters:
+                          properties:
+                            pvc:
+                              properties:
+                                nameRegex:
+                                  type: string
+                                namespace:
+                                  type: string
+                              type: object
+                          type: object
+                        outcomes:
+                          items:
+                            properties:
+                              fail:
+                                properties:
+                                  message:
+                                    type: string
+                                  uri:
+                                    type: string
+                                  when:
+                                    type: string
+                                type: object
+                              pass:
+                                properties:
+                                  message:
+                                    type: string
+                                  uri:
+                                    type: string
+                                  when:
+                                    type: string
+                                type: object
+                              warn:
+                                properties:
+                                  message:
+                                    type: string
+                                  uri:
+                                    type: string
+                                  when:
+                                    type: string
+                                type: object
+                            type: object
+                          type: array
+                        strict:
+                          type: BoolString
+                      required:
+                      - collectorName
+                      - outcomes
+                      type: object
                    nodeResources:
                      properties:
                        annotations:
--- a/config/crds/troubleshoot.sh_supportbundles.yaml
+++ b/config/crds/troubleshoot.sh_supportbundles.yaml
@@ -1076,6 +1076,66 @@ spec:
                      - collectorName
                      - outcomes
                      type: object
+                    nodeMetrics:
+                      properties:
+                        annotations:
+                          additionalProperties:
+                            type: string
+                          type: object
+                        checkName:
+                          type: string
+                        collectorName:
+                          type: string
+                        exclude:
+                          type: BoolString
+                        filters:
+                          properties:
+                            pvc:
+                              properties:
+                                nameRegex:
+                                  type: string
+                                namespace:
+                                  type: string
+                              type: object
+                          type: object
+                        outcomes:
+                          items:
+                            properties:
+                              fail:
+                                properties:
+                                  message:
+                                    type: string
+                                  uri:
+                                    type: string
+                                  when:
+                                    type: string
+                                type: object
+                              pass:
+                                properties:
+                                  message:
+                                    type: string
+                                  uri:
+                                    type: string
+                                  when:
+                                    type: string
+                                type: object
+                              warn:
+                                properties:
+                                  message:
+                                    type: string
+                                  uri:
+                                    type: string
+                                  when:
+                                    type: string
+                                type: object
+                            type: object
+                          type: array
+                        strict:
+                          type: BoolString
+                      required:
+                      - collectorName
+                      - outcomes
+                      type: object
                    nodeResources:
                      properties:
                        annotations:
--- a/go.mod
+++ b/go.mod
@@ -252,6 +252,7 @@ require (
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	helm.sh/helm/v3 v3.14.3
 	k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect
+	k8s.io/kubelet v0.29.3
 	k8s.io/metrics v0.29.3
 	k8s.io/utils v0.0.0-20230726121419-3b25d923346b
 	periph.io/x/host/v3 v3.8.2
--- a/go.sum
+++ b/go.sum
@@ -1573,6 +1573,8 @@ k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/A
 k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA=
 k8s.io/kubectl v0.29.0 h1:Oqi48gXjikDhrBF67AYuZRTcJV4lg2l42GmvsP7FmYI=
 k8s.io/kubectl v0.29.0/go.mod h1:0jMjGWIcMIQzmUaMgAzhSELv5WtHo2a8pq67DtviAJs=
+k8s.io/kubelet v0.29.3 h1:X9h0ZHzc+eUeNTaksbN0ItHyvGhQ7Z0HPjnQD2oHdwU=
+k8s.io/kubelet v0.29.3/go.mod h1:jDiGuTkFOUynyBKzOoC1xRSWlgAZ9UPcTYeFyjr6vas=
 k8s.io/metrics v0.29.3 h1:nN+eavbMQ7Kuif2tIdTr2/F2ec2E/SIAWSruTZ+Ye6U=
 k8s.io/metrics v0.29.3/go.mod h1:kb3tGGC4ZcIDIuvXyUE291RwJ5WmDu0tB4wAVZM6h2I=
 k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI=
--- a/pkg/analyze/analyzer.go
+++ b/pkg/analyze/analyzer.go
@@ -248,6 +248,8 @@ func getAnalyzer(analyzer *troubleshootv1beta2.Analyze) Analyzer {
 		return &AnalyzeGoldpinger{analyzer: analyzer.Goldpinger}
 	case analyzer.Event != nil:
 		return &AnalyzeEvent{analyzer: analyzer.Event}
+	case analyzer.NodeMetrics != nil:
+		return &AnalyzeNodeMetrics{analyzer: analyzer.NodeMetrics}
 	default:
 		return nil
 	}
--- a/pkg/analyze/comparison.go
+++ b/pkg/analyze/comparison.go
@@ -0,0 +1,34 @@
+package analyzer
+
+import "fmt"
+
+type ComparisonOperator int
+
+const (
+	Unknown ComparisonOperator = iota
+	Equal
+	NotEqual
+	GreaterThan
+	GreaterThanOrEqual
+	LessThan
+	LessThanOrEqual
+)
+
+func ParseComparisonOperator(s string) (ComparisonOperator, error) {
+	switch s {
+	case "=", "==", "===":
+		return Equal, nil
+	case "!=", "!==":
+		return NotEqual, nil
+	case "<":
+		return LessThan, nil
+	case ">":
+		return GreaterThan, nil
+	case "<=":
+		return LessThanOrEqual, nil
+	case ">=":
+		return GreaterThanOrEqual, nil
+	}
+
+	return Unknown, fmt.Errorf("unknown operator: %s", s)
+}
--- a/pkg/analyze/comparison_test.go
+++ b/pkg/analyze/comparison_test.go
@@ -0,0 +1,79 @@
+package analyzer
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestParseComparisonOperator(t *testing.T) {
+	tests := []struct {
+		name    string
+		input   string
+		want    ComparisonOperator
+		wantErr bool
+	}{
+		{
+			name:  "equal",
+			input: "=",
+			want:  Equal,
+		},
+		{
+			name:  "equal",
+			input: "==",
+			want:  Equal,
+		},
+		{
+			name:  "equal",
+			input: "===",
+			want:  Equal,
+		},
+		{
+			name:  "not equal",
+			input: "!=",
+			want:  NotEqual,
+		},
+		{
+			name:  "not equal",
+			input: "!==",
+			want:  NotEqual,
+		},
+		{
+			name:  "less than",
+			input: "<",
+			want:  LessThan,
+		},
+		{
+			name:  "greater than",
+			input: ">",
+			want:  GreaterThan,
+		},
+		{
+			name:  "less than or equal",
+			input: "<=",
+			want:  LessThanOrEqual,
+		},
+		{
+			name:  "greater than or equal",
+			input: ">=",
+			want:  GreaterThanOrEqual,
+		},
+		{
+			name:    "invalid operator 1",
+			input:   "",
+			wantErr: true,
+		},
+		{
+			name:    "invalid operator 2",
+			input:   "gibberish",
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := ParseComparisonOperator(tt.input)
+			assert.Equal(t, tt.want, got, "ParseOperator() = %v, want %v", got, tt.want)
+			assert.Equalf(t, tt.wantErr, err != nil, "ParseOperator() error = %v, wantErr %v", err, tt.wantErr)
+		})
+	}
+}
--- a/pkg/analyze/k8s_node_metrics.go
+++ b/pkg/analyze/k8s_node_metrics.go
@@ -0,0 +1,316 @@
+package analyzer
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+	"text/template"
+
+	"github.com/pkg/errors"
+	troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
+	"k8s.io/klog/v2"
+	kubeletv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
+)
+
+type AnalyzeNodeMetrics struct {
+	analyzer *troubleshootv1beta2.NodeMetricsAnalyze
+}
+
+type nodeMetricsComparisonResults struct {
+	PVC pvcTemplateData
+}
+
+type pvcTemplateData struct {
+	UsedPercentage    float64
+	ConcatenatedNames string
+	Names             []string
+}
+
+type pvcUsageStats struct {
+	PvcName string
+	Used    float64
+}
+
+func (a *AnalyzeNodeMetrics) Title() string {
+	title := a.analyzer.CheckName
+	if title == "" {
+		title = a.analyzer.CollectorName
+	}
+	if title == "" {
+		title = "Node Metrics"
+	}
+
+	return title
+}
+
+func (a *AnalyzeNodeMetrics) IsExcluded() (bool, error) {
+	return isExcluded(a.analyzer.Exclude)
+}
+
+func (a *AnalyzeNodeMetrics) Analyze(getFile getCollectedFileContents, findFiles getChildCollectedFileContents) ([]*AnalyzeResult, error) {
+	// Gather all collected node-metrics files
+	collected, err := findFiles(filepath.Join("node-metrics", "*.json"), nil)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to read collected pods")
+	}
+
+	// Unmarshal all collected node-metrics files
+	summaries := []kubeletv1alpha1.Summary{}
+	for _, fileContent := range collected {
+		summary := kubeletv1alpha1.Summary{}
+		if err := json.Unmarshal(fileContent, &summary); err != nil {
+			return nil, errors.Wrap(err, "failed to unmarshal node metrics")
+		}
+
+		summaries = append(summaries, summary)
+	}
+
+	// Run through all outcomes to generate results
+	result, err := a.compareCollectedMetricsWithOutcomes(summaries)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to compare node metrics with outcomes")
+	}
+	if result == nil {
+		return []*AnalyzeResult{}, nil
+	}
+	result.Strict = a.analyzer.Strict.BoolOrDefaultFalse()
+
+	return []*AnalyzeResult{result}, nil
+}
+
+func (a *AnalyzeNodeMetrics) compareCollectedMetricsWithOutcomes(summaries []kubeletv1alpha1.Summary) (*AnalyzeResult, error) {
+	for _, outcome := range a.analyzer.Outcomes {
+		result := &AnalyzeResult{
+			Title: a.Title(),
+		}
+
+		if outcome.Fail != nil {
+			if outcome.Fail.When == "" {
+				result.IsFail = true
+				result.Message = outcome.Fail.Message
+				result.URI = outcome.Fail.URI
+
+				return result, nil
+			} else {
+				isMatch, out, err := a.compareNodeMetricConditionalsToStats(outcome.Fail.When, summaries)
+				if err != nil {
+					return nil, errors.Wrap(err, "failed to compare node metrics conditional with summary stats")
+				}
+
+				if isMatch {
+					result.IsFail = true
+					result.Message = renderTemplate(outcome.Fail.Message, out)
+					result.URI = outcome.Fail.URI
+
+					return result, nil
+				}
+			}
+
+		} else if outcome.Warn != nil {
+			if outcome.Warn.When == "" {
+				result.IsWarn = true
+				result.Message = outcome.Warn.Message
+				result.URI = outcome.Warn.URI
+
+				return result, nil
+			} else {
+				isMatch, out, err := a.compareNodeMetricConditionalsToStats(outcome.Warn.When, summaries)
+				if err != nil {
+					return nil, errors.Wrap(err, "failed to compare node metrics conditional with summary stats")
+				}
+
+				if isMatch {
+					result.IsWarn = true
+					result.Message = renderTemplate(outcome.Warn.Message, out)
+					result.URI = outcome.Warn.URI
+
+					return result, nil
+				}
+			}
+		} else if outcome.Pass != nil {
+			if outcome.Pass.When == "" {
+				result.IsPass = true
+				result.Message = outcome.Pass.Message
+				result.URI = outcome.Pass.URI
+
+				return result, nil
+			} else {
+				isMatch, out, err := a.compareNodeMetricConditionalsToStats(outcome.Pass.When, summaries)
+				if err != nil {
+					return nil, errors.Wrap(err, "failed to compare node metrics conditional with summary stats")
+				}
+
+				if isMatch {
+					result.IsPass = true
+					result.Message = renderTemplate(outcome.Pass.Message, out)
+					result.URI = outcome.Pass.URI
+
+					return result, nil
+				}
+			}
+		}
+	}
+
+	return nil, nil
+}
+
+func (a *AnalyzeNodeMetrics) findPVCUsageStats(summaries []kubeletv1alpha1.Summary) ([]pvcUsageStats, error) {
+	// We just collect usage percentages for now. If other stats are needed, we can add them.
+	stats := []pvcUsageStats{}
+	var nameRegex *regexp.Regexp
+	var ns string
+	var err error
+
+	pvcFilter := a.analyzer.Filters.PVC
+	if pvcFilter != nil {
+		if pvcFilter.NameRegex != "" {
+			nameRegex, err = regexp.Compile(pvcFilter.NameRegex)
+			if err != nil {
+				return nil, errors.Wrap(err, "failed to compile PVC name regex")
+			}
+		}
+
+		ns = pvcFilter.Namespace
+	}
+
+	// Analyze PVCs
+	for _, summary := range summaries {
+		for i := range summary.Pods {
+			pod := summary.Pods[i]
+			if ns != "" && ns != pod.PodRef.Namespace {
+				klog.V(2).Infof("Skipping pvcs in %s/%s pod due to namespace filter", pod.PodRef.Namespace, pod.PodRef.Name)
+				continue
+			}
+
+			for j := range pod.VolumeStats {
+				volume := pod.VolumeStats[j]
+
+				// This is a persistent volume
+				if volume.PVCRef != nil {
+					if nameRegex != nil && !nameRegex.MatchString(volume.PVCRef.Name) {
+						klog.V(2).Infof("Skipping pvc %s/%s due to name regex filter", volume.PVCRef.Namespace, volume.PVCRef.Name)
+						continue
+					}
+
+					// Calculate the usage
+					pvcName := fmt.Sprintf("%s/%s", volume.PVCRef.Namespace, volume.PVCRef.Name)
+
+					used := volume.UsedBytes
+					capacity := volume.CapacityBytes
+					if used != nil && capacity != nil {
+						pvcUsedPercentage := float64(*used) / float64(*capacity) * 100
+						stats = append(stats, pvcUsageStats{
+							PvcName: pvcName,
+							Used:    pvcUsedPercentage,
+						})
+						klog.V(2).Infof("PVC usage for %s: %0.2f%%", pvcName, pvcUsedPercentage)
+					} else {
+						klog.V(2).Infof("Missing capacity or used bytes for PVC %s", pvcName)
+					}
+				}
+			}
+		}
+	}
+
+	return stats, nil
+}
+
+// compareNodeMetricConditionalsToStats compares the conditional with the collected node metrics
+// and returns true if the conditional is met. At the moment we only support comparing PVC usage
+func (a *AnalyzeNodeMetrics) compareNodeMetricConditionalsToStats(conditional string, summaries []kubeletv1alpha1.Summary) (bool, nodeMetricsComparisonResults, error) {
+	klog.V(2).Infof("Comparing node metrics with conditional: %s", conditional)
+	parts := strings.Split(strings.TrimSpace(conditional), " ")
+	out := nodeMetricsComparisonResults{}
+
+	if len(parts) != 3 {
+		return false, out, errors.New("unable to parse conditional")
+	}
+
+	switch parts[0] {
+	case "pvcUsedPercentage":
+		// e.g pvcUsedPercentage >= 50.4
+
+		klog.V(2).Infof("Analyzing volume usage stats for PVCs")
+
+		op, err := ParseComparisonOperator(parts[1])
+		if err != nil {
+			return false, out, errors.Wrap(err, "failed to parse comparison operator")
+		}
+
+		expected, err := strconv.ParseFloat(parts[2], 64)
+		if err != nil {
+			return false, out, errors.Wrap(err, "failed to parse bool")
+		}
+
+		// Pick all PVCs from all summaries. Filters will be applied here
+		pvcUsageStats, err := a.findPVCUsageStats(summaries)
+		if err != nil {
+			return false, out, errors.Wrap(err, "failed to find PVC usage stats")
+		}
+		matchedPVCs := []string{}
+
+		for _, pvcUsage := range pvcUsageStats {
+			value := pvcUsage.Used
+			switch op {
+			case Equal:
+				if value == expected {
+					matchedPVCs = append(matchedPVCs, pvcUsage.PvcName)
+				}
+			case NotEqual:
+				if value != expected {
+					matchedPVCs = append(matchedPVCs, pvcUsage.PvcName)
+				}
+			case LessThan:
+				if value < expected {
+					matchedPVCs = append(matchedPVCs, pvcUsage.PvcName)
+				}
+			case GreaterThan:
+				if value > expected {
+					matchedPVCs = append(matchedPVCs, pvcUsage.PvcName)
+				}
+			case LessThanOrEqual:
+				if value <= expected {
+					matchedPVCs = append(matchedPVCs, pvcUsage.PvcName)
+				}
+			case GreaterThanOrEqual:
+				if value >= expected {
+					matchedPVCs = append(matchedPVCs, pvcUsage.PvcName)
+				}
+			}
+		}
+
+		// Concatenate all matched PVC names
+		out.PVC = pvcTemplateData{
+			Names:             matchedPVCs,
+			ConcatenatedNames: strings.Join(matchedPVCs, ", "),
+		}
+		return len(matchedPVCs) > 0, out, nil
+	}
+
+	return false, out, errors.New("unknown node metric conditional")
+}
+
+func renderTemplate(tmpMsg string, data any) string {
+	if data == nil {
+		return tmpMsg
+	}
+
+	t, err := template.New("msg").Parse(tmpMsg)
+	if err != nil {
+		klog.V(2).Infof("Failed to parse template: %s", err)
+		return tmpMsg
+	}
+
+	var m bytes.Buffer
+	err = t.Execute(&m, data)
+	if err != nil {
+		klog.V(2).Infof("Failed to execute template: %s", err)
+		return tmpMsg
+	}
+
+	return m.String()
+}
--- a/pkg/analyze/k8s_node_metrics_test.go
+++ b/pkg/analyze/k8s_node_metrics_test.go
@@ -0,0 +1,288 @@
+package analyzer
+
+import (
+	"testing"
+
+	troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
+	"github.com/stretchr/testify/assert"
+	kubeletv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
+	utilptr "k8s.io/utils/ptr"
+)
+
+func TestAnalyzeNodeMetrics_findPVCUsageStats(t *testing.T) {
+	tests := []struct {
+		name      string
+		analyzer  troubleshootv1beta2.NodeMetricsAnalyze
+		summaries []kubeletv1alpha1.Summary
+		want      []pvcUsageStats
+		wantErr   bool
+	}{
+		{
+			name:      "no summaries",
+			summaries: []kubeletv1alpha1.Summary{},
+			want:      []pvcUsageStats{},
+		},
+		{
+			name: "one summary",
+			summaries: []kubeletv1alpha1.Summary{
+				{
+					Pods: []kubeletv1alpha1.PodStats{
+						{
+							PodRef: kubeletv1alpha1.PodReference{
+								Namespace: "default",
+								Name:      "my-pod",
+							},
+							VolumeStats: []kubeletv1alpha1.VolumeStats{
+								{
+									Name: "volume-1",
+									PVCRef: &kubeletv1alpha1.PVCReference{
+										Namespace: "default",
+										Name:      "my-pvc",
+									},
+									FsStats: kubeletv1alpha1.FsStats{
+										AvailableBytes: utilptr.To(uint64(20)),
+										UsedBytes:      utilptr.To(uint64(80)),
+										CapacityBytes:  utilptr.To(uint64(100)),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			want: []pvcUsageStats{
+				{
+					Used:    80,
+					PvcName: "default/my-pvc",
+				},
+			},
+		},
+		{
+			name: "one summary with namespace filter",
+			analyzer: troubleshootv1beta2.NodeMetricsAnalyze{
+				Filters: troubleshootv1beta2.NodeMetricsAnalyzeFilters{
+					PVC: &troubleshootv1beta2.PVCRef{
+						Namespace: "another-namespace",
+					},
+				},
+			},
+			summaries: []kubeletv1alpha1.Summary{
+				{
+					Pods: []kubeletv1alpha1.PodStats{
+						{
+							PodRef: kubeletv1alpha1.PodReference{
+								Namespace: "default",
+								Name:      "my-pod",
+							},
+							VolumeStats: []kubeletv1alpha1.VolumeStats{
+								{
+									Name: "volume-1",
+									PVCRef: &kubeletv1alpha1.PVCReference{
+										Namespace: "default",
+										Name:      "my-pvc",
+									},
+									FsStats: kubeletv1alpha1.FsStats{
+										AvailableBytes: utilptr.To(uint64(20)),
+										UsedBytes:      utilptr.To(uint64(80)),
+										CapacityBytes:  utilptr.To(uint64(100)),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			want: []pvcUsageStats{},
+		},
+		{
+			name: "one summary with name regex filter",
+			analyzer: troubleshootv1beta2.NodeMetricsAnalyze{
+				Filters: troubleshootv1beta2.NodeMetricsAnalyzeFilters{
+					PVC: &troubleshootv1beta2.PVCRef{
+						NameRegex: ".*other.*",
+					},
+				},
+			},
+			summaries: []kubeletv1alpha1.Summary{
+				{
+					Pods: []kubeletv1alpha1.PodStats{
+						{
+							PodRef: kubeletv1alpha1.PodReference{
+								Namespace: "default",
+								Name:      "my-pod",
+							},
+							VolumeStats: []kubeletv1alpha1.VolumeStats{
+								{
+									Name: "volume-1",
+									PVCRef: &kubeletv1alpha1.PVCReference{
+										Namespace: "default",
+										Name:      "my-pvc",
+									},
+									FsStats: kubeletv1alpha1.FsStats{
+										AvailableBytes: utilptr.To(uint64(20)),
+										UsedBytes:      utilptr.To(uint64(80)),
+										CapacityBytes:  utilptr.To(uint64(100)),
+									},
+								},
+								{
+									Name: "volume-1",
+									PVCRef: &kubeletv1alpha1.PVCReference{
+										Namespace: "default",
+										Name:      "my-other-pvc",
+									},
+									FsStats: kubeletv1alpha1.FsStats{
+										AvailableBytes: utilptr.To(uint64(25)),
+										UsedBytes:      utilptr.To(uint64(75)),
+										CapacityBytes:  utilptr.To(uint64(100)),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			want: []pvcUsageStats{
+				{
+					Used:    75,
+					PvcName: "default/my-other-pvc",
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			a := &AnalyzeNodeMetrics{
+				analyzer: &tt.analyzer,
+			}
+			got, err := a.findPVCUsageStats(tt.summaries)
+			assert.Equalf(t, tt.wantErr, err != nil, "AnalyzeNodeMetrics.findPVCUsageStats() error = %v, wantErr %v", err, tt.wantErr)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestAnalyzeNodeMetrics_Analyze(t *testing.T) {
+	tests := []struct {
+		name        string
+		analyzer    troubleshootv1beta2.NodeMetricsAnalyze
+		nodeMetrics string
+		want        []*AnalyzeResult
+		wantErr     bool
+	}{
+		{
+			name: "no node metrics",
+			analyzer: troubleshootv1beta2.NodeMetricsAnalyze{
+				Filters: troubleshootv1beta2.NodeMetricsAnalyzeFilters{},
+			},
+			nodeMetrics: "",
+			wantErr:     true,
+		},
+		{
+			name: "invalid node metrics",
+			analyzer: troubleshootv1beta2.NodeMetricsAnalyze{
+				Filters: troubleshootv1beta2.NodeMetricsAnalyzeFilters{},
+			},
+			nodeMetrics: "invalid",
+			wantErr:     true,
+		},
+		{
+			name: "no summaries",
+			analyzer: troubleshootv1beta2.NodeMetricsAnalyze{
+				Filters: troubleshootv1beta2.NodeMetricsAnalyzeFilters{},
+			},
+			nodeMetrics: "{}",
+			want:        []*AnalyzeResult{},
+		},
+		{
+			name: "one summary with name regex filter",
+			analyzer: troubleshootv1beta2.NodeMetricsAnalyze{
+				Outcomes: []*troubleshootv1beta2.Outcome{
+					{
+						Fail: &troubleshootv1beta2.SingleOutcome{
+							When:    "pvcUsedPercentage >= 75",
+							Message: "PVC space usage is too high for pvcs [{{ .PVC.ConcatenatedNames }}]",
+						},
+					},
+					{
+						Pass: &troubleshootv1beta2.SingleOutcome{
+							Message: "No PVCs are using more than 80% of storage",
+						},
+					},
+				},
+				Filters: troubleshootv1beta2.NodeMetricsAnalyzeFilters{
+					PVC: &troubleshootv1beta2.PVCRef{
+						NameRegex: ".*other.*",
+					},
+				},
+			},
+			nodeMetrics: `{
+				"pods": [
+				  {
+					"podRef": {
+					  "name": "my-pod",
+					  "namespace": "my-namespace"
+					},
+					"volume": [
+					  {
+						"capacityBytes": 100,
+						"usedBytes": 80,
+						"pvcRef": {
+						  "name": "backup-pvc",
+						  "namespace": "my-namespace"
+						}
+					  },
+					  {
+						"capacityBytes": 100,
+						"usedBytes": 75,
+						"pvcRef": {
+						  "name": "another-pvc",
+						  "namespace": "my-namespace"
+						}
+					  },
+					  {
+						"capacityBytes": 100,
+						"usedBytes": 80,
+						"pvcRef": {
+						  "name": "the-other-pvc",
+						  "namespace": "my-namespace"
+						}
+					  },
+					  {
+						"capacityBytes": 100,
+						"usedBytes": 65,
+						"pvcRef": {
+						  "name": "to-other-pvc",
+						  "namespace": "my-namespace"
+						}
+					  }
+					]
+				  }
+				]
+			  }`,
+			want: []*AnalyzeResult{
+				{
+					Title:   "Node Metrics",
+					IsFail:  true,
+					Message: "PVC space usage is too high for pvcs [my-namespace/another-pvc, my-namespace/the-other-pvc]",
+				},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			a := &AnalyzeNodeMetrics{
+				analyzer: &tt.analyzer,
+			}
+			filesFn := func(string, []string) (map[string][]byte, error) {
+				return map[string][]byte{
+					"node-metrics.json": []byte(tt.nodeMetrics),
+				}, nil
+			}
+
+			got, err := a.Analyze(nil, filesFn)
+			assert.Equalf(t, tt.wantErr, err != nil, "AnalyzeNodeMetrics.Analyze() error = %v, wantErr %v", err, tt.wantErr)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
--- a/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go
+++ b/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go
@@ -242,6 +242,22 @@ type EventAnalyze struct {
 	Outcomes      []*Outcome `json:"outcomes" yaml:"outcomes"`
 }

+type NodeMetricsAnalyze struct {
+	AnalyzeMeta   `json:",inline" yaml:",inline"`
+	CollectorName string                    `json:"collectorName" yaml:"collectorName"`
+	Filters       NodeMetricsAnalyzeFilters `json:"filters,omitempty" yaml:"filters,omitempty"`
+	Outcomes      []*Outcome                `json:"outcomes" yaml:"outcomes"`
+}
+
+type NodeMetricsAnalyzeFilters struct {
+	PVC *PVCRef `json:"pvc,omitempty" yaml:"pvc,omitempty"`
+}
+
+type PVCRef struct {
+	NameRegex string `json:"nameRegex,omitempty" yaml:"nameRegex,omitempty"`
+	Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"`
+}
+
 type Analyze struct {
 	ClusterVersion           *ClusterVersion           `json:"clusterVersion,omitempty" yaml:"clusterVersion,omitempty"`
 	StorageClass             *StorageClass             `json:"storageClass,omitempty" yaml:"storageClass,omitempty"`
@@ -275,4 +291,5 @@ type Analyze struct {
 	Certificates             *CertificatesAnalyze      `json:"certificates,omitempty" yaml:"certificates,omitempty"`
 	Goldpinger               *GoldpingerAnalyze        `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"`
 	Event                    *EventAnalyze             `json:"event,omitempty" yaml:"event,omitempty"`
+	NodeMetrics              *NodeMetricsAnalyze       `json:"nodeMetrics,omitempty" yaml:"nodeMetrics,omitempty"`
 }
--- a/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go
+++ b/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go
@@ -213,6 +213,11 @@ func (in *Analyze) DeepCopyInto(out *Analyze) {
 		*out = new(EventAnalyze)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.NodeMetrics != nil {
+		in, out := &in.NodeMetrics, &out.NodeMetrics
+		*out = new(NodeMetricsAnalyze)
+		(*in).DeepCopyInto(*out)
+	}
 }

 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Analyze.
@@ -3015,6 +3020,54 @@ func (in *NodeMetrics) DeepCopy() *NodeMetrics {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *NodeMetricsAnalyze) DeepCopyInto(out *NodeMetricsAnalyze) {
+	*out = *in
+	in.AnalyzeMeta.DeepCopyInto(&out.AnalyzeMeta)
+	in.Filters.DeepCopyInto(&out.Filters)
+	if in.Outcomes != nil {
+		in, out := &in.Outcomes, &out.Outcomes
+		*out = make([]*Outcome, len(*in))
+		for i := range *in {
+			if (*in)[i] != nil {
+				in, out := &(*in)[i], &(*out)[i]
+				*out = new(Outcome)
+				(*in).DeepCopyInto(*out)
+			}
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeMetricsAnalyze.
+func (in *NodeMetricsAnalyze) DeepCopy() *NodeMetricsAnalyze {
+	if in == nil {
+		return nil
+	}
+	out := new(NodeMetricsAnalyze)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *NodeMetricsAnalyzeFilters) DeepCopyInto(out *NodeMetricsAnalyzeFilters) {
+	*out = *in
+	if in.PVC != nil {
+		in, out := &in.PVC, &out.PVC
+		*out = new(PVCRef)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeMetricsAnalyzeFilters.
+func (in *NodeMetricsAnalyzeFilters) DeepCopy() *NodeMetricsAnalyzeFilters {
+	if in == nil {
+		return nil
+	}
+	out := new(NodeMetricsAnalyzeFilters)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *NodeResourceFilters) DeepCopyInto(out *NodeResourceFilters) {
 	*out = *in
@@ -3119,6 +3172,21 @@ func (in *Outcome) DeepCopy() *Outcome {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PVCRef) DeepCopyInto(out *PVCRef) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PVCRef.
+func (in *PVCRef) DeepCopy() *PVCRef {
+	if in == nil {
+		return nil
+	}
+	out := new(PVCRef)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *PodLaunchOptions) DeepCopyInto(out *PodLaunchOptions) {
 	*out = *in
--- a/pkg/preflight/flags.go
+++ b/pkg/preflight/flags.go
@@ -2,7 +2,7 @@ package preflight

 import (
 	flag "github.com/spf13/pflag"
-	utilpointer "k8s.io/utils/pointer"
+	utilpointer "k8s.io/utils/ptr"
 )

 const (
@@ -35,16 +35,16 @@ var preflightFlags *PreflightFlags

 func NewPreflightFlags() *PreflightFlags {
 	return &PreflightFlags{
-		Interactive:               utilpointer.Bool(true),
-		Format:                    utilpointer.String("human"),
-		CollectorImage:            utilpointer.String(""),
-		CollectorPullPolicy:       utilpointer.String(""),
-		CollectWithoutPermissions: utilpointer.Bool(true),
-		Selector:                  utilpointer.String(""),
-		SinceTime:                 utilpointer.String(""),
-		Since:                     utilpointer.String(""),
-		Output:                    utilpointer.String("o"),
-		Debug:                     utilpointer.Bool(false),
+		Interactive:               utilpointer.To(true),
+		Format:                    utilpointer.To("human"),
+		CollectorImage:            utilpointer.To(""),
+		CollectorPullPolicy:       utilpointer.To(""),
+		CollectWithoutPermissions: utilpointer.To(true),
+		Selector:                  utilpointer.To(""),
+		SinceTime:                 utilpointer.To(""),
+		Since:                     utilpointer.To(""),
+		Output:                    utilpointer.To("o"),
+		Debug:                     utilpointer.To(false),
 	}
 }

--- a/schemas/analyzer-troubleshoot-v1beta2.json
+++ b/schemas/analyzer-troubleshoot-v1beta2.json
@@ -1565,6 +1565,99 @@
                  }
                }
              },
+              "nodeMetrics": {
+                "type": "object",
+                "required": [
+                  "collectorName",
+                  "outcomes"
+                ],
+                "properties": {
+                  "annotations": {
+                    "type": "object",
+                    "additionalProperties": {
+                      "type": "string"
+                    }
+                  },
+                  "checkName": {
+                    "type": "string"
+                  },
+                  "collectorName": {
+                    "type": "string"
+                  },
+                  "exclude": {
+                    "oneOf": [{"type": "string"},{"type": "boolean"}]
+                  },
+                  "filters": {
+                    "type": "object",
+                    "properties": {
+                      "pvc": {
+                        "type": "object",
+                        "properties": {
+                          "nameRegex": {
+                            "type": "string"
+                          },
+                          "namespace": {
+                            "type": "string"
+                          }
+                        }
+                      }
+                    }
+                  },
+                  "outcomes": {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "properties": {
+                        "fail": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "type": "string"
+                            },
+                            "uri": {
+                              "type": "string"
+                            },
+                            "when": {
+                              "type": "string"
+                            }
+                          }
+                        },
+                        "pass": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "type": "string"
+                            },
+                            "uri": {
+                              "type": "string"
+                            },
+                            "when": {
+                              "type": "string"
+                            }
+                          }
+                        },
+                        "warn": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "type": "string"
+                            },
+                            "uri": {
+                              "type": "string"
+                            },
+                            "when": {
+                              "type": "string"
+                            }
+                          }
+                        }
+                      }
+                    }
+                  },
+                  "strict": {
+                    "oneOf": [{"type": "string"},{"type": "boolean"}]
+                  }
+                }
+              },
              "nodeResources": {
                "type": "object",
                "required": [
--- a/schemas/preflight-troubleshoot-v1beta2.json
+++ b/schemas/preflight-troubleshoot-v1beta2.json
@@ -1565,6 +1565,99 @@
                  }
                }
              },
+              "nodeMetrics": {
+                "type": "object",
+                "required": [
+                  "collectorName",
+                  "outcomes"
+                ],
+                "properties": {
+                  "annotations": {
+                    "type": "object",
+                    "additionalProperties": {
+                      "type": "string"
+                    }
+                  },
+                  "checkName": {
+                    "type": "string"
+                  },
+                  "collectorName": {
+                    "type": "string"
+                  },
+                  "exclude": {
+                    "oneOf": [{"type": "string"},{"type": "boolean"}]
+                  },
+                  "filters": {
+                    "type": "object",
+                    "properties": {
+                      "pvc": {
+                        "type": "object",
+                        "properties": {
+                          "nameRegex": {
+                            "type": "string"
+                          },
+                          "namespace": {
+                            "type": "string"
+                          }
+                        }
+                      }
+                    }
+                  },
+                  "outcomes": {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "properties": {
+                        "fail": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "type": "string"
+                            },
+                            "uri": {
+                              "type": "string"
+                            },
+                            "when": {
+                              "type": "string"
+                            }
+                          }
+                        },
+                        "pass": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "type": "string"
+                            },
+                            "uri": {
+                              "type": "string"
+                            },
+                            "when": {
+                              "type": "string"
+                            }
+                          }
+                        },
+                        "warn": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "type": "string"
+                            },
+                            "uri": {
+                              "type": "string"
+                            },
+                            "when": {
+                              "type": "string"
+                            }
+                          }
+                        }
+                      }
+                    }
+                  },
+                  "strict": {
+                    "oneOf": [{"type": "string"},{"type": "boolean"}]
+                  }
+                }
+              },
              "nodeResources": {
                "type": "object",
                "required": [
--- a/schemas/supportbundle-troubleshoot-v1beta2.json
+++ b/schemas/supportbundle-troubleshoot-v1beta2.json
@@ -1611,6 +1611,99 @@
                  }
                }
              },
+              "nodeMetrics": {
+                "type": "object",
+                "required": [
+                  "collectorName",
+                  "outcomes"
+                ],
+                "properties": {
+                  "annotations": {
+                    "type": "object",
+                    "additionalProperties": {
+                      "type": "string"
+                    }
+                  },
+                  "checkName": {
+                    "type": "string"
+                  },
+                  "collectorName": {
+                    "type": "string"
+                  },
+                  "exclude": {
+                    "oneOf": [{"type": "string"},{"type": "boolean"}]
+                  },
+                  "filters": {
+                    "type": "object",
+                    "properties": {
+                      "pvc": {
+                        "type": "object",
+                        "properties": {
+                          "nameRegex": {
+                            "type": "string"
+                          },
+                          "namespace": {
+                            "type": "string"
+                          }
+                        }
+                      }
+                    }
+                  },
+                  "outcomes": {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "properties": {
+                        "fail": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "type": "string"
+                            },
+                            "uri": {
+                              "type": "string"
+                            },
+                            "when": {
+                              "type": "string"
+                            }
+                          }
+                        },
+                        "pass": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "type": "string"
+                            },
+                            "uri": {
+                              "type": "string"
+                            },
+                            "when": {
+                              "type": "string"
+                            }
+                          }
+                        },
+                        "warn": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "type": "string"
+                            },
+                            "uri": {
+                              "type": "string"
+                            },
+                            "when": {
+                              "type": "string"
+                            }
+                          }
+                        }
+                      }
+                    }
+                  },
+                  "strict": {
+                    "oneOf": [{"type": "string"},{"type": "boolean"}]
+                  }
+                }
+              },
              "nodeResources": {
                "type": "object",
                "required": [
--- a/test/e2e/support-bundle/goldpinger_collector_e2e_test.go
+++ b/test/e2e/support-bundle/goldpinger_collector_e2e_test.go
@@ -10,11 +10,16 @@ import (
 	"path/filepath"
 	"strings"
 	"testing"
+	"time"

 	"github.com/replicatedhq/troubleshoot/internal/testutils"
 	"github.com/replicatedhq/troubleshoot/pkg/convert"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+	v1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/e2e-framework/klient/k8s/resources"
+	"sigs.k8s.io/e2e-framework/klient/wait"
+	"sigs.k8s.io/e2e-framework/klient/wait/conditions"
 	"sigs.k8s.io/e2e-framework/pkg/envconf"
 	"sigs.k8s.io/e2e-framework/pkg/features"
 	"sigs.k8s.io/e2e-framework/third_party/helm"
@@ -27,6 +32,10 @@ metadata:
  name: goldpinger
 spec:
  collectors:
+    - clusterResources:
+        exclude: true
+    - clusterInfo:
+        exclude: true
    - goldpinger:
        namespace: $NAMESPACE
  analyzers:
@@ -48,6 +57,22 @@ func Test_GoldpingerCollector(t *testing.T) {
 				helm.WithTimeout("2m"),
 			)
 			require.NoError(t, err)
+			client, err := c.NewClient()
+			require.NoError(t, err)
+			pods := &v1.PodList{}
+
+			// Lets wait for the goldpinger pods to be running
+			err = client.Resources().WithNamespace(c.Namespace()).List(ctx, pods,
+				resources.WithLabelSelector("app.kubernetes.io/name=goldpinger"),
+			)
+			require.NoError(t, err)
+			require.Len(t, pods.Items, 1)
+
+			err = wait.For(
+				conditions.New(client.Resources()).PodRunning(&pods.Items[0]),
+				wait.WithTimeout(time.Second*30),
+			)
+			require.NoError(t, err)
 			return ctx
 		}).
 		Assess("collect and analyse goldpinger pings", func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context {
@@ -83,13 +108,13 @@ func Test_GoldpingerCollector(t *testing.T) {
 			// Check that we analysed collected goldpinger results.
 			// We should expect a single analysis result for goldpinger.
 			assert.Equal(t, 1, len(analysisResults))
+			assert.True(t, strings.HasPrefix(analysisResults[0].Name, "missing.ping.results.for.goldpinger."))
 			if t.Failed() {
 				t.Logf("Analysis results: %s\n", analysisJSON)
 				t.Logf("Stdout: %s\n", out.String())
 				t.Logf("Stderr: %s\n", stdErr.String())
 				t.FailNow()
 			}
-			assert.True(t, strings.HasPrefix(analysisResults[0].Name, "missing.ping.results.for.goldpinger."))

 			return ctx
 		}).