From 8d237a6c7cffbf01aecd200ea938dbde5ada8331 Mon Sep 17 00:00:00 2001 From: Nick Parker Date: Tue, 4 Feb 2025 14:43:25 +1300 Subject: [PATCH] feat(k8sExporter): Options to allow disabling Events or Node Conditions Both outputs are currently hardcoded to being enabled, this allows disabling one or the other. Defaults to both enabled to retain current behavior. Larger clusters can save some etcd I/O by skipping one of these outputs if they aren't being consumed. In our case we aren't consuming the Events so writing them just creates more churn. --- cmd/options/options.go | 6 ++++++ pkg/exporters/k8sexporter/k8s_exporter.go | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/cmd/options/options.go b/cmd/options/options.go index fa4fb65a..3d90bfde 100644 --- a/cmd/options/options.go +++ b/cmd/options/options.go @@ -65,6 +65,10 @@ type NodeProblemDetectorOptions struct { APIServerWaitInterval time.Duration // K8sExporterHeartbeatPeriod is the period at which the k8s exporter does forcibly sync with apiserver. K8sExporterHeartbeatPeriod time.Duration + // K8sExporterWriteEvents determines whether to write Kubernetes Events for problems. + K8sExporterWriteEvents bool + // K8sExporterUpdateNodeConditions determines whether to update Kubernetes Node Conditions for problems. + K8sExporterUpdateNodeConditions bool // prometheusExporter options // PrometheusServerPort is the port to bind the Prometheus scrape endpoint. Use 0 to disable. @@ -117,6 +121,8 @@ func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) { fs.DurationVar(&npdo.APIServerWaitTimeout, "apiserver-wait-timeout", time.Duration(5)*time.Minute, "The timeout on waiting for kube-apiserver to be ready. This is ignored if --enable-k8s-exporter is false.") fs.DurationVar(&npdo.APIServerWaitInterval, "apiserver-wait-interval", time.Duration(5)*time.Second, "The interval between the checks on the readiness of kube-apiserver. This is ignored if --enable-k8s-exporter is false.") fs.DurationVar(&npdo.K8sExporterHeartbeatPeriod, "k8s-exporter-heartbeat-period", 5*time.Minute, "The period at which k8s-exporter does forcibly sync with apiserver.") + fs.BoolVar(&npdo.K8sExporterWriteEvents, "k8s-exporter-write-events", true, "Whether to write Kubernetes Event objects with event details.") + fs.BoolVar(&npdo.K8sExporterUpdateNodeConditions, "k8s-exporter-update-node-conditions", true, "Whether to update Kubernetes Node conditions with event details.") fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit") fs.StringVar(&npdo.HostnameOverride, "hostname-override", "", "Custom node name used to override hostname") diff --git a/pkg/exporters/k8sexporter/k8s_exporter.go b/pkg/exporters/k8sexporter/k8s_exporter.go index 31ec9c79..f033886e 100644 --- a/pkg/exporters/k8sexporter/k8s_exporter.go +++ b/pkg/exporters/k8sexporter/k8s_exporter.go @@ -38,6 +38,8 @@ import ( type k8sExporter struct { client problemclient.Client conditionManager condition.ConditionManager + writeEvents bool + updateConditions bool } // NewExporterOrDie creates a exporter for Kubernetes apiserver exporting, @@ -60,6 +62,8 @@ func NewExporterOrDie(ctx context.Context, npdo *options.NodeProblemDetectorOpti ke := k8sExporter{ client: c, conditionManager: condition.NewConditionManager(c, clock.RealClock{}, npdo.K8sExporterHeartbeatPeriod), + writeEvents: npdo.K8sExporterWriteEvents, + updateConditions: npdo.K8sExporterUpdateNodeConditions, } ke.startHTTPReporting(npdo) @@ -69,11 +73,15 @@ func NewExporterOrDie(ctx context.Context, npdo *options.NodeProblemDetectorOpti } func (ke *k8sExporter) ExportProblems(status *types.Status) { - for _, event := range status.Events { - ke.client.Eventf(util.ConvertToAPIEventType(event.Severity), status.Source, event.Reason, event.Message) + if ke.writeEvents { + for _, event := range status.Events { + ke.client.Eventf(util.ConvertToAPIEventType(event.Severity), status.Source, event.Reason, event.Message) + } } - for _, cdt := range status.Conditions { - ke.conditionManager.UpdateCondition(cdt) + if ke.updateConditions { + for _, cdt := range status.Conditions { + ke.conditionManager.UpdateCondition(cdt) + } } }