feat(k8sExporter): Options to allow disabling Events or Node Conditions

Both outputs are currently hardcoded to being enabled, this allows disabling one or the other. Defaults to both enabled to retain current behavior.

Larger clusters can save some etcd I/O by skipping one of these outputs if they aren't being consumed. In our case we aren't consuming the Events so writing them just creates more churn.
This commit is contained in:
Nick Parker
2025-02-04 14:43:25 +13:00
parent 12a8f5578c
commit 8d237a6c7c
2 changed files with 18 additions and 4 deletions

View File

@@ -65,6 +65,10 @@ type NodeProblemDetectorOptions struct {
APIServerWaitInterval time.Duration APIServerWaitInterval time.Duration
// K8sExporterHeartbeatPeriod is the period at which the k8s exporter does forcibly sync with apiserver. // K8sExporterHeartbeatPeriod is the period at which the k8s exporter does forcibly sync with apiserver.
K8sExporterHeartbeatPeriod time.Duration K8sExporterHeartbeatPeriod time.Duration
// K8sExporterWriteEvents determines whether to write Kubernetes Events for problems.
K8sExporterWriteEvents bool
// K8sExporterUpdateNodeConditions determines whether to update Kubernetes Node Conditions for problems.
K8sExporterUpdateNodeConditions bool
// prometheusExporter options // prometheusExporter options
// PrometheusServerPort is the port to bind the Prometheus scrape endpoint. Use 0 to disable. // PrometheusServerPort is the port to bind the Prometheus scrape endpoint. Use 0 to disable.
@@ -117,6 +121,8 @@ func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&npdo.APIServerWaitTimeout, "apiserver-wait-timeout", time.Duration(5)*time.Minute, "The timeout on waiting for kube-apiserver to be ready. This is ignored if --enable-k8s-exporter is false.") fs.DurationVar(&npdo.APIServerWaitTimeout, "apiserver-wait-timeout", time.Duration(5)*time.Minute, "The timeout on waiting for kube-apiserver to be ready. This is ignored if --enable-k8s-exporter is false.")
fs.DurationVar(&npdo.APIServerWaitInterval, "apiserver-wait-interval", time.Duration(5)*time.Second, "The interval between the checks on the readiness of kube-apiserver. This is ignored if --enable-k8s-exporter is false.") fs.DurationVar(&npdo.APIServerWaitInterval, "apiserver-wait-interval", time.Duration(5)*time.Second, "The interval between the checks on the readiness of kube-apiserver. This is ignored if --enable-k8s-exporter is false.")
fs.DurationVar(&npdo.K8sExporterHeartbeatPeriod, "k8s-exporter-heartbeat-period", 5*time.Minute, "The period at which k8s-exporter does forcibly sync with apiserver.") fs.DurationVar(&npdo.K8sExporterHeartbeatPeriod, "k8s-exporter-heartbeat-period", 5*time.Minute, "The period at which k8s-exporter does forcibly sync with apiserver.")
fs.BoolVar(&npdo.K8sExporterWriteEvents, "k8s-exporter-write-events", true, "Whether to write Kubernetes Event objects with event details.")
fs.BoolVar(&npdo.K8sExporterUpdateNodeConditions, "k8s-exporter-update-node-conditions", true, "Whether to update Kubernetes Node conditions with event details.")
fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit") fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit")
fs.StringVar(&npdo.HostnameOverride, "hostname-override", fs.StringVar(&npdo.HostnameOverride, "hostname-override",
"", "Custom node name used to override hostname") "", "Custom node name used to override hostname")

View File

@@ -38,6 +38,8 @@ import (
type k8sExporter struct { type k8sExporter struct {
client problemclient.Client client problemclient.Client
conditionManager condition.ConditionManager conditionManager condition.ConditionManager
writeEvents bool
updateConditions bool
} }
// NewExporterOrDie creates a exporter for Kubernetes apiserver exporting, // NewExporterOrDie creates a exporter for Kubernetes apiserver exporting,
@@ -60,6 +62,8 @@ func NewExporterOrDie(ctx context.Context, npdo *options.NodeProblemDetectorOpti
ke := k8sExporter{ ke := k8sExporter{
client: c, client: c,
conditionManager: condition.NewConditionManager(c, clock.RealClock{}, npdo.K8sExporterHeartbeatPeriod), conditionManager: condition.NewConditionManager(c, clock.RealClock{}, npdo.K8sExporterHeartbeatPeriod),
writeEvents: npdo.K8sExporterWriteEvents,
updateConditions: npdo.K8sExporterUpdateNodeConditions,
} }
ke.startHTTPReporting(npdo) ke.startHTTPReporting(npdo)
@@ -69,11 +73,15 @@ func NewExporterOrDie(ctx context.Context, npdo *options.NodeProblemDetectorOpti
} }
func (ke *k8sExporter) ExportProblems(status *types.Status) { func (ke *k8sExporter) ExportProblems(status *types.Status) {
for _, event := range status.Events { if ke.writeEvents {
ke.client.Eventf(util.ConvertToAPIEventType(event.Severity), status.Source, event.Reason, event.Message) for _, event := range status.Events {
ke.client.Eventf(util.ConvertToAPIEventType(event.Severity), status.Source, event.Reason, event.Message)
}
} }
for _, cdt := range status.Conditions { if ke.updateConditions {
ke.conditionManager.UpdateCondition(cdt) for _, cdt := range status.Conditions {
ke.conditionManager.UpdateCondition(cdt)
}
} }
} }