diff --git a/alertmanager/metrics.go b/alertmanager/metrics.go index fc409c25f..f014888ee 100644 --- a/alertmanager/metrics.go +++ b/alertmanager/metrics.go @@ -24,10 +24,18 @@ var ( }, []string{"alertmanager", "endpoint"}, ) + metricCollectRuns = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "unsee_collect_cycles_total", + Help: "Total number of alert collection cycles run", + }, + []string{"alertmanager"}, + ) ) func init() { prometheus.MustRegister(metricAlerts) prometheus.MustRegister(metricAlertGroups) prometheus.MustRegister(metricAlertmanagerErrors) + prometheus.MustRegister(metricCollectRuns) } diff --git a/alertmanager/models.go b/alertmanager/models.go index 322a927fa..6ccae8364 100644 --- a/alertmanager/models.go +++ b/alertmanager/models.go @@ -68,6 +68,22 @@ func (am *Alertmanager) clearData() { am.colors = models.LabelsColorMap{} am.autocomplete = []models.Autocomplete{} am.lock.Unlock() + // reset metrics to 0 since we don't store anything anymore + am.resetMetrics() +} + +func (am *Alertmanager) resetMetrics() { + // reset alert state/instance counters + for _, state := range models.AlertStateList { + metricAlerts.With(prometheus.Labels{ + "alertmanager": am.Name, + "state": state, + }).Set(0) + } + // reset alert group counters + metricAlertGroups.With(prometheus.Labels{ + "alertmanager": am.Name, + }).Set(0) } func (am *Alertmanager) pullSilences(version string) error { @@ -137,6 +153,13 @@ func (am *Alertmanager) pullAlerts(version string) error { dedupedGroups := []models.AlertGroup{} colors := models.LabelsColorMap{} autocompleteMap := map[string]models.Autocomplete{} + + // we'll use this to update alert counter metrics (per state/instance) + alertMetrics := map[string]float64{} + for _, state := range models.AlertStateList { + alertMetrics[state] = 0 + } + log.Infof("[%s] Processing unique alert groups (%d)", am.Name, len(uniqueGroups)) for _, ag := range uniqueGroups { alerts := models.AlertList{} @@ -172,11 +195,7 @@ func (am *Alertmanager) pullAlerts(version string) error { alert.UpdateFingerprints() alerts = append(alerts, alert) - // update internal metrics - metricAlerts.With(prometheus.Labels{ - "alertmanager": am.Name, - "state": alert.State, - }).Inc() + alertMetrics[alert.State]++ } for _, hint := range transform.BuildAutocomplete(alerts) { @@ -192,6 +211,14 @@ func (am *Alertmanager) pullAlerts(version string) error { dedupedGroups = append(dedupedGroups, ag) } + // update internal metrics with new computed values + for state, val := range alertMetrics { + metricAlerts.With(prometheus.Labels{ + "alertmanager": am.Name, + "state": state, + }).Set(val) + } + log.Infof("[%s] Merging autocomplete data (%d)", am.Name, len(autocompleteMap)) autocomplete := []models.Autocomplete{} for _, hint := range autocompleteMap { @@ -237,6 +264,10 @@ func (am *Alertmanager) Pull() error { return err } + metricCollectRuns.With(prometheus.Labels{ + "alertmanager": am.Name, + }).Inc() + am.lastError = "" return nil }