diff --git a/alerts.go b/alerts.go index b8da47b76..358cde79b 100644 --- a/alerts.go +++ b/alerts.go @@ -36,10 +36,12 @@ func getUpstreams() models.AlertmanagerAPISummary { upstreams := alertmanager.GetAlertmanagers() for _, upstream := range upstreams { u := models.AlertmanagerAPIStatus{ - Name: upstream.Name, - URI: upstream.SanitizedURI(), - PublicURI: upstream.PublicURI(), - Error: upstream.Error(), + Name: upstream.Name, + URI: upstream.SanitizedURI(), + PublicURI: upstream.PublicURI(), + Error: upstream.Error(), + Version: upstream.Version(), + ClusterMembers: upstream.ClusterMemberNames(), } summary.Instances = append(summary.Instances, u) diff --git a/internal/alertmanager/models.go b/internal/alertmanager/models.go index 21f5d8a1d..3cc9081c1 100644 --- a/internal/alertmanager/models.go +++ b/internal/alertmanager/models.go @@ -14,6 +14,7 @@ import ( "github.com/prymitive/karma/internal/filters" "github.com/prymitive/karma/internal/mapper" "github.com/prymitive/karma/internal/models" + "github.com/prymitive/karma/internal/slices" "github.com/prymitive/karma/internal/transform" "github.com/prymitive/karma/internal/uri" @@ -30,6 +31,12 @@ type alertmanagerMetrics struct { errors map[string]float64 } +type alertmanagerStatus struct { + version string + amID string + peerIDs []string +} + // Alertmanager represents Alertmanager upstream instance type Alertmanager struct { URI string `json:"uri"` @@ -51,49 +58,68 @@ type Alertmanager struct { autocomplete []models.Autocomplete knownLabels []string lastError string + status alertmanagerStatus // metrics tracked per alertmanager instance metrics alertmanagerMetrics } -func (am *Alertmanager) detectVersion() string { - // if everything fails assume Alertmanager is at latest possible version - defaultVersion := "999.0.0" +func (am *Alertmanager) fetchStatus() alertmanagerStatus { + status := alertmanagerStatus{ + // if everything fails assume Alertmanager is at latest possible version + version: "999.0.0", + amID: "", + peerIDs: []string{}, + } url, err := uri.JoinURL(am.URI, "api/v1/status") if err != nil { log.Errorf("Failed to join url '%s' and path 'api/v1/status': %s", am.SanitizedURI(), err) - return defaultVersion + return status } - ver := alertmanagerVersion{} + resp := alertmanagerStatusResponse{} // read raw body from the source source, err := am.reader.Read(url) if err != nil { log.Errorf("[%s] %s request failed: %s", am.Name, uri.SanitizeURI(url), err) - return defaultVersion + return status } defer source.Close() // decode body as JSON - err = json.NewDecoder(source).Decode(&ver) + err = json.NewDecoder(source).Decode(&resp) if err != nil { log.Errorf("[%s] %s failed to decode as JSON: %s", am.Name, uri.SanitizeURI(url), err) - return defaultVersion + return status } - if ver.Status != "success" { - log.Errorf("[%s] Request to %s returned status %s", am.Name, uri.SanitizeURI(url), ver.Status) - return defaultVersion + if resp.Status != "success" { + log.Errorf("[%s] Request to %s returned status %s", am.Name, uri.SanitizeURI(url), resp.Status) + return status } - if ver.Data.VersionInfo.Version == "" { + if resp.Data.VersionInfo.Version == "" { log.Errorf("[%s] No version information in Alertmanager API at %s", am.Name, uri.SanitizeURI(url)) - return defaultVersion + return status } - log.Infof("[%s] Remote Alertmanager version: %s", am.Name, ver.Data.VersionInfo.Version) - return ver.Data.VersionInfo.Version + status.version = resp.Data.VersionInfo.Version + log.Infof("[%s] Remote Alertmanager version: %s", am.Name, status.version) + + if resp.Data.ClusterStatus.Name != "" { + status.amID = resp.Data.ClusterStatus.Name + for _, peer := range resp.Data.ClusterStatus.Peers { + status.peerIDs = append(status.peerIDs, peer.Name) + } + } else if resp.Data.MeshStatus.Name != "" { + status.amID = resp.Data.MeshStatus.Name + for _, peer := range resp.Data.MeshStatus.Peers { + status.peerIDs = append(status.peerIDs, peer.Name) + } + } + + return status } func (am *Alertmanager) clearData() { @@ -103,6 +129,11 @@ func (am *Alertmanager) clearData() { am.colors = models.LabelsColorMap{} am.autocomplete = []models.Autocomplete{} am.knownLabels = []string{} + am.status = alertmanagerStatus{ + version: "", + amID: "", + peerIDs: []string{}, + } am.lock.Unlock() } @@ -309,9 +340,9 @@ func (am *Alertmanager) pullAlerts(version string) error { func (am *Alertmanager) Pull() error { am.metrics.cycles++ - version := am.detectVersion() + status := am.fetchStatus() - err := am.pullSilences(version) + err := am.pullSilences(status.version) if err != nil { am.clearData() am.setError(err.Error()) @@ -319,7 +350,7 @@ func (am *Alertmanager) Pull() error { return err } - err = am.pullAlerts(version) + err = am.pullAlerts(status.version) if err != nil { am.clearData() am.setError(err.Error()) @@ -327,7 +358,11 @@ func (am *Alertmanager) Pull() error { return err } + am.lock.Lock() + am.status = status am.lastError = "" + am.lock.Unlock() + return nil } @@ -418,5 +453,52 @@ func (am *Alertmanager) Error() string { // SanitizedURI returns a copy of Alertmanager.URI with password replaced by // "xxx" func (am *Alertmanager) SanitizedURI() string { + am.lock.RLock() + defer am.lock.RUnlock() + return uri.SanitizeURI(am.URI) } + +// Version returns last known version of this Alertmanager instance +func (am *Alertmanager) Version() string { + am.lock.RLock() + defer am.lock.RUnlock() + + return am.status.version +} + +// ClusterPeers returns a list of IDs of all peers this instance +// is connected to. +// IDs are the same as in Alertmanager API. +func (am *Alertmanager) ClusterPeers() []string { + am.lock.RLock() + defer am.lock.RUnlock() + + return am.status.peerIDs +} + +// ClusterMemberNames returns a list of names of all Alertmanager instances +// that are in the same cluster as this instance (including self). +// Names are the same as in karma configuration. +func (am *Alertmanager) ClusterMemberNames() []string { + am.lock.RLock() + defer am.lock.RUnlock() + + members := []string{am.Name} + + upstreams := GetAlertmanagers() + for _, upstream := range upstreams { + if upstream.Name == am.Name { + continue + } + for _, peerID := range upstream.ClusterPeers() { + if slices.StringInSlice(am.status.peerIDs, peerID) { + if !slices.StringInSlice(members, upstream.Name) { + members = append(members, upstream.Name) + } + } + } + } + + return members +} diff --git a/internal/alertmanager/status.go b/internal/alertmanager/status.go new file mode 100644 index 000000000..0ee0a5c2d --- /dev/null +++ b/internal/alertmanager/status.go @@ -0,0 +1,34 @@ +package alertmanager + +type v06MeshPeer struct { + Name string `json:"name"` + NickName string `json:"nickName"` +} + +type v06CMeshStatus struct { + Name string `json:"name"` + NickName string `json:"nickName"` + Peers []v06MeshPeer `json:"peers"` +} + +type v015ClusterPeer struct { + Address string `json:"address"` + Name string `json:"name"` +} + +type v015ClusterStatus struct { + Name string `json:"name"` + Peers []v015ClusterPeer `json:"peers"` + Status string `json:"status"` +} + +type alertmanagerStatusResponse struct { + Status string `json:"status"` + Data struct { + VersionInfo struct { + Version string `json:"version"` + } `json:"versionInfo"` + MeshStatus v06CMeshStatus `json:"meshStatus"` + ClusterStatus v015ClusterStatus `json:"clusterStatus"` + } `json:"data"` +} diff --git a/internal/alertmanager/upstream.go b/internal/alertmanager/upstream.go index c7e202b1b..01dff5254 100644 --- a/internal/alertmanager/upstream.go +++ b/internal/alertmanager/upstream.go @@ -37,6 +37,7 @@ func NewAlertmanager(name, upstreamURI string, opts ...Option) (*Alertmanager, e labelValueErrorsSilences: 0, }, }, + status: alertmanagerStatus{}, } for _, opt := range opts { diff --git a/internal/alertmanager/version.go b/internal/alertmanager/version.go deleted file mode 100644 index b001493e0..000000000 --- a/internal/alertmanager/version.go +++ /dev/null @@ -1,12 +0,0 @@ -package alertmanager - -// AlertmanagerVersion is what api/v1/status returns, we only use it to check -// version, so we skip all other keys (except for status) -type alertmanagerVersion struct { - Status string `json:"status"` - Data struct { - VersionInfo struct { - Version string `json:"version"` - } `json:"versionInfo"` - } `json:"data"` -} diff --git a/internal/models/alertmanager.go b/internal/models/alertmanager.go index 545160d95..d40087a91 100644 --- a/internal/models/alertmanager.go +++ b/internal/models/alertmanager.go @@ -29,8 +29,10 @@ type AlertmanagerAPIStatus struct { URI string `json:"uri"` // this is URI client should use to talk to this Alertmanager, it might be // same as real or proxied URI - PublicURI string `json:"publicURI"` - Error string `json:"error"` + PublicURI string `json:"publicURI"` + Error string `json:"error"` + Version string `json:"version"` + ClusterMembers []string `json:"clusterMembers"` } // AlertmanagerAPICounters returns number of Alertmanager instances in each