feat(backend): improve alertmanager cluster handling

Always use passed cluster name, generate errors if cluster is in unhealthy state
This commit is contained in:
Łukasz Mierzwa
2020-09-08 17:29:40 +01:00
committed by Łukasz Mierzwa
parent 782d8bf1b3
commit 757f3478cf
4 changed files with 35 additions and 25 deletions

View File

@@ -122,7 +122,7 @@ func getUpstreams() models.AlertmanagerAPISummary {
summary.Instances = append(summary.Instances, u)
summary.Counters.Total++
if u.Error == "" {
if upstream.IsHealthy() {
summary.Counters.Healthy++
} else {
summary.Counters.Failed++

View File

@@ -1464,9 +1464,9 @@ func TestUpstreamStatus(t *testing.T) {
ReadOnly: false,
Headers: map[string]string{},
CORSCredentials: "omit",
Error: "",
Error: "missing cluster peers: ha2",
Version: "0.20.0",
Cluster: "ha1",
Cluster: "Broken HA",
ClusterMembers: []string{"ha1"},
},
{
@@ -1476,15 +1476,14 @@ func TestUpstreamStatus(t *testing.T) {
ReadOnly: true,
Headers: map[string]string{},
CORSCredentials: "omit",
Error: "",
Error: "missing cluster peers: ha1",
Version: "0.19.0",
Cluster: "ha2",
Cluster: "Broken HA",
ClusterMembers: []string{"ha2"},
},
},
Clusters: map[string][]string{
"ha1": {"ha1"},
"ha2": {"ha2"},
"Broken HA": {"ha1"},
},
},
},
@@ -1598,7 +1597,7 @@ func TestUpstreamStatus(t *testing.T) {
ReadOnly: false,
Headers: map[string]string{},
CORSCredentials: "omit",
Error: "",
Error: "missing cluster peers: ha2",
Version: "0.20.0",
Cluster: "ha1",
ClusterMembers: []string{"ha1"},
@@ -1610,7 +1609,7 @@ func TestUpstreamStatus(t *testing.T) {
ReadOnly: true,
Headers: map[string]string{},
CORSCredentials: "omit",
Error: "",
Error: "missing cluster peers: ha1",
Version: "0.19.0",
Cluster: "ha2",
ClusterMembers: []string{"ha2"},
@@ -1742,9 +1741,9 @@ func TestUpstreamStatus(t *testing.T) {
ReadOnly: false,
Headers: map[string]string{},
CORSCredentials: "omit",
Error: "",
Error: "missing cluster peers: ha2",
Version: "0.20.0",
Cluster: "ha1",
Cluster: "Errors",
ClusterMembers: []string{"ha1"},
},
{
@@ -1756,13 +1755,12 @@ func TestUpstreamStatus(t *testing.T) {
CORSCredentials: "omit",
Error: "json: cannot unmarshal array into Go value of type string",
Version: "",
Cluster: "ha2",
Cluster: "Errors",
ClusterMembers: []string{"ha2"},
},
},
Clusters: map[string][]string{
"ha1": {"ha1"},
"ha2": {"ha2"},
"Errors": {"ha1"},
},
},
},

View File

@@ -3,6 +3,7 @@ package alertmanager_test
import (
"fmt"
"os"
"strings"
"testing"
"time"
@@ -230,7 +231,7 @@ func TestClearData(t *testing.T) {
if am.Version() == "" {
t.Errorf("[%s] Got empty version string", am.Name)
}
if am.Error() != "" {
if !strings.HasPrefix(am.Error(), "missing cluster peers:") {
t.Errorf("[%s] Got non-empty error string: %s", am.Name, am.Error())
}
if len(am.Silences()) == 0 {

View File

@@ -425,7 +425,19 @@ func (am *Alertmanager) Error() string {
am.lock.RLock()
defer am.lock.RUnlock()
return am.lastError
if am.lastError != "" {
return am.lastError
}
configPeers := clusterMembersFromConfig(am)
apiPeers := clusterMembersFromAPI(am)
missing, _ := slices.StringSliceDiff(configPeers, apiPeers)
if len(missing) > 0 {
return fmt.Sprintf("missing cluster peers: %s", strings.Join(missing, ", "))
}
return ""
}
// SanitizedURI returns a copy of Alertmanager.URI with password replaced by
@@ -489,18 +501,17 @@ func (am *Alertmanager) ClusterName() string {
var clusterName string
if am.Cluster != "" {
configPeers := clusterMembersFromConfig(am)
apiPeers := clusterMembersFromAPI(am)
missing, extra := slices.StringSliceDiff(configPeers, apiPeers)
if len(missing) == 0 && len(extra) == 0 {
clusterName = am.Cluster
} else {
clusterName = strings.Join(am.ClusterMemberNames(), " | ")
}
clusterName = am.Cluster
} else {
clusterName = strings.Join(am.ClusterMemberNames(), " | ")
}
am.clusterName = clusterName
return clusterName
}
func (am *Alertmanager) IsHealthy() bool {
am.lock.RLock()
defer am.lock.RUnlock()
return am.lastError == ""
}