mirror of
https://github.com/prymitive/karma
synced 2026-05-05 03:16:51 +00:00
feat(api): expose Alertmanager instance version and cluster members
Store and expose via API the version of each Alertmanager instance and the list of instances in the same cluster
This commit is contained in:
10
alerts.go
10
alerts.go
@@ -36,10 +36,12 @@ func getUpstreams() models.AlertmanagerAPISummary {
|
||||
upstreams := alertmanager.GetAlertmanagers()
|
||||
for _, upstream := range upstreams {
|
||||
u := models.AlertmanagerAPIStatus{
|
||||
Name: upstream.Name,
|
||||
URI: upstream.SanitizedURI(),
|
||||
PublicURI: upstream.PublicURI(),
|
||||
Error: upstream.Error(),
|
||||
Name: upstream.Name,
|
||||
URI: upstream.SanitizedURI(),
|
||||
PublicURI: upstream.PublicURI(),
|
||||
Error: upstream.Error(),
|
||||
Version: upstream.Version(),
|
||||
ClusterMembers: upstream.ClusterMemberNames(),
|
||||
}
|
||||
summary.Instances = append(summary.Instances, u)
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/prymitive/karma/internal/filters"
|
||||
"github.com/prymitive/karma/internal/mapper"
|
||||
"github.com/prymitive/karma/internal/models"
|
||||
"github.com/prymitive/karma/internal/slices"
|
||||
"github.com/prymitive/karma/internal/transform"
|
||||
"github.com/prymitive/karma/internal/uri"
|
||||
|
||||
@@ -30,6 +31,12 @@ type alertmanagerMetrics struct {
|
||||
errors map[string]float64
|
||||
}
|
||||
|
||||
type alertmanagerStatus struct {
|
||||
version string
|
||||
amID string
|
||||
peerIDs []string
|
||||
}
|
||||
|
||||
// Alertmanager represents Alertmanager upstream instance
|
||||
type Alertmanager struct {
|
||||
URI string `json:"uri"`
|
||||
@@ -51,49 +58,68 @@ type Alertmanager struct {
|
||||
autocomplete []models.Autocomplete
|
||||
knownLabels []string
|
||||
lastError string
|
||||
status alertmanagerStatus
|
||||
// metrics tracked per alertmanager instance
|
||||
metrics alertmanagerMetrics
|
||||
}
|
||||
|
||||
func (am *Alertmanager) detectVersion() string {
|
||||
// if everything fails assume Alertmanager is at latest possible version
|
||||
defaultVersion := "999.0.0"
|
||||
func (am *Alertmanager) fetchStatus() alertmanagerStatus {
|
||||
status := alertmanagerStatus{
|
||||
// if everything fails assume Alertmanager is at latest possible version
|
||||
version: "999.0.0",
|
||||
amID: "",
|
||||
peerIDs: []string{},
|
||||
}
|
||||
|
||||
url, err := uri.JoinURL(am.URI, "api/v1/status")
|
||||
if err != nil {
|
||||
log.Errorf("Failed to join url '%s' and path 'api/v1/status': %s", am.SanitizedURI(), err)
|
||||
return defaultVersion
|
||||
return status
|
||||
}
|
||||
|
||||
ver := alertmanagerVersion{}
|
||||
resp := alertmanagerStatusResponse{}
|
||||
|
||||
// read raw body from the source
|
||||
source, err := am.reader.Read(url)
|
||||
if err != nil {
|
||||
log.Errorf("[%s] %s request failed: %s", am.Name, uri.SanitizeURI(url), err)
|
||||
return defaultVersion
|
||||
return status
|
||||
}
|
||||
defer source.Close()
|
||||
|
||||
// decode body as JSON
|
||||
err = json.NewDecoder(source).Decode(&ver)
|
||||
err = json.NewDecoder(source).Decode(&resp)
|
||||
if err != nil {
|
||||
log.Errorf("[%s] %s failed to decode as JSON: %s", am.Name, uri.SanitizeURI(url), err)
|
||||
return defaultVersion
|
||||
return status
|
||||
}
|
||||
|
||||
if ver.Status != "success" {
|
||||
log.Errorf("[%s] Request to %s returned status %s", am.Name, uri.SanitizeURI(url), ver.Status)
|
||||
return defaultVersion
|
||||
if resp.Status != "success" {
|
||||
log.Errorf("[%s] Request to %s returned status %s", am.Name, uri.SanitizeURI(url), resp.Status)
|
||||
return status
|
||||
}
|
||||
|
||||
if ver.Data.VersionInfo.Version == "" {
|
||||
if resp.Data.VersionInfo.Version == "" {
|
||||
log.Errorf("[%s] No version information in Alertmanager API at %s", am.Name, uri.SanitizeURI(url))
|
||||
return defaultVersion
|
||||
return status
|
||||
}
|
||||
|
||||
log.Infof("[%s] Remote Alertmanager version: %s", am.Name, ver.Data.VersionInfo.Version)
|
||||
return ver.Data.VersionInfo.Version
|
||||
status.version = resp.Data.VersionInfo.Version
|
||||
log.Infof("[%s] Remote Alertmanager version: %s", am.Name, status.version)
|
||||
|
||||
if resp.Data.ClusterStatus.Name != "" {
|
||||
status.amID = resp.Data.ClusterStatus.Name
|
||||
for _, peer := range resp.Data.ClusterStatus.Peers {
|
||||
status.peerIDs = append(status.peerIDs, peer.Name)
|
||||
}
|
||||
} else if resp.Data.MeshStatus.Name != "" {
|
||||
status.amID = resp.Data.MeshStatus.Name
|
||||
for _, peer := range resp.Data.MeshStatus.Peers {
|
||||
status.peerIDs = append(status.peerIDs, peer.Name)
|
||||
}
|
||||
}
|
||||
|
||||
return status
|
||||
}
|
||||
|
||||
func (am *Alertmanager) clearData() {
|
||||
@@ -103,6 +129,11 @@ func (am *Alertmanager) clearData() {
|
||||
am.colors = models.LabelsColorMap{}
|
||||
am.autocomplete = []models.Autocomplete{}
|
||||
am.knownLabels = []string{}
|
||||
am.status = alertmanagerStatus{
|
||||
version: "",
|
||||
amID: "",
|
||||
peerIDs: []string{},
|
||||
}
|
||||
am.lock.Unlock()
|
||||
}
|
||||
|
||||
@@ -309,9 +340,9 @@ func (am *Alertmanager) pullAlerts(version string) error {
|
||||
func (am *Alertmanager) Pull() error {
|
||||
am.metrics.cycles++
|
||||
|
||||
version := am.detectVersion()
|
||||
status := am.fetchStatus()
|
||||
|
||||
err := am.pullSilences(version)
|
||||
err := am.pullSilences(status.version)
|
||||
if err != nil {
|
||||
am.clearData()
|
||||
am.setError(err.Error())
|
||||
@@ -319,7 +350,7 @@ func (am *Alertmanager) Pull() error {
|
||||
return err
|
||||
}
|
||||
|
||||
err = am.pullAlerts(version)
|
||||
err = am.pullAlerts(status.version)
|
||||
if err != nil {
|
||||
am.clearData()
|
||||
am.setError(err.Error())
|
||||
@@ -327,7 +358,11 @@ func (am *Alertmanager) Pull() error {
|
||||
return err
|
||||
}
|
||||
|
||||
am.lock.Lock()
|
||||
am.status = status
|
||||
am.lastError = ""
|
||||
am.lock.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -418,5 +453,52 @@ func (am *Alertmanager) Error() string {
|
||||
// SanitizedURI returns a copy of Alertmanager.URI with password replaced by
|
||||
// "xxx"
|
||||
func (am *Alertmanager) SanitizedURI() string {
|
||||
am.lock.RLock()
|
||||
defer am.lock.RUnlock()
|
||||
|
||||
return uri.SanitizeURI(am.URI)
|
||||
}
|
||||
|
||||
// Version returns last known version of this Alertmanager instance
|
||||
func (am *Alertmanager) Version() string {
|
||||
am.lock.RLock()
|
||||
defer am.lock.RUnlock()
|
||||
|
||||
return am.status.version
|
||||
}
|
||||
|
||||
// ClusterPeers returns a list of IDs of all peers this instance
|
||||
// is connected to.
|
||||
// IDs are the same as in Alertmanager API.
|
||||
func (am *Alertmanager) ClusterPeers() []string {
|
||||
am.lock.RLock()
|
||||
defer am.lock.RUnlock()
|
||||
|
||||
return am.status.peerIDs
|
||||
}
|
||||
|
||||
// ClusterMemberNames returns a list of names of all Alertmanager instances
|
||||
// that are in the same cluster as this instance (including self).
|
||||
// Names are the same as in karma configuration.
|
||||
func (am *Alertmanager) ClusterMemberNames() []string {
|
||||
am.lock.RLock()
|
||||
defer am.lock.RUnlock()
|
||||
|
||||
members := []string{am.Name}
|
||||
|
||||
upstreams := GetAlertmanagers()
|
||||
for _, upstream := range upstreams {
|
||||
if upstream.Name == am.Name {
|
||||
continue
|
||||
}
|
||||
for _, peerID := range upstream.ClusterPeers() {
|
||||
if slices.StringInSlice(am.status.peerIDs, peerID) {
|
||||
if !slices.StringInSlice(members, upstream.Name) {
|
||||
members = append(members, upstream.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return members
|
||||
}
|
||||
|
||||
34
internal/alertmanager/status.go
Normal file
34
internal/alertmanager/status.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package alertmanager
|
||||
|
||||
type v06MeshPeer struct {
|
||||
Name string `json:"name"`
|
||||
NickName string `json:"nickName"`
|
||||
}
|
||||
|
||||
type v06CMeshStatus struct {
|
||||
Name string `json:"name"`
|
||||
NickName string `json:"nickName"`
|
||||
Peers []v06MeshPeer `json:"peers"`
|
||||
}
|
||||
|
||||
type v015ClusterPeer struct {
|
||||
Address string `json:"address"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type v015ClusterStatus struct {
|
||||
Name string `json:"name"`
|
||||
Peers []v015ClusterPeer `json:"peers"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
type alertmanagerStatusResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
VersionInfo struct {
|
||||
Version string `json:"version"`
|
||||
} `json:"versionInfo"`
|
||||
MeshStatus v06CMeshStatus `json:"meshStatus"`
|
||||
ClusterStatus v015ClusterStatus `json:"clusterStatus"`
|
||||
} `json:"data"`
|
||||
}
|
||||
@@ -37,6 +37,7 @@ func NewAlertmanager(name, upstreamURI string, opts ...Option) (*Alertmanager, e
|
||||
labelValueErrorsSilences: 0,
|
||||
},
|
||||
},
|
||||
status: alertmanagerStatus{},
|
||||
}
|
||||
|
||||
for _, opt := range opts {
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
package alertmanager
|
||||
|
||||
// AlertmanagerVersion is what api/v1/status returns, we only use it to check
|
||||
// version, so we skip all other keys (except for status)
|
||||
type alertmanagerVersion struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
VersionInfo struct {
|
||||
Version string `json:"version"`
|
||||
} `json:"versionInfo"`
|
||||
} `json:"data"`
|
||||
}
|
||||
@@ -29,8 +29,10 @@ type AlertmanagerAPIStatus struct {
|
||||
URI string `json:"uri"`
|
||||
// this is URI client should use to talk to this Alertmanager, it might be
|
||||
// same as real or proxied URI
|
||||
PublicURI string `json:"publicURI"`
|
||||
Error string `json:"error"`
|
||||
PublicURI string `json:"publicURI"`
|
||||
Error string `json:"error"`
|
||||
Version string `json:"version"`
|
||||
ClusterMembers []string `json:"clusterMembers"`
|
||||
}
|
||||
|
||||
// AlertmanagerAPICounters returns number of Alertmanager instances in each
|
||||
|
||||
Reference in New Issue
Block a user