feat(api): generate alert grid per label value

This allows generating multiple independent alert group lists, one per unique value of the label specified by the user.
This way we can have a separate grid per severity or cluster label value.
This commit is contained in:
Łukasz Mierzwa
2020-03-29 16:43:45 +01:00
parent 636af261ca
commit cff62dda2f
5 changed files with 228 additions and 52 deletions

View File

@@ -168,9 +168,7 @@ func sortByStartsAt(i, j int, groups []models.APIAlertGroup, sortReverse bool) b
return groups[i].LatestStartsAt.Before(groups[j].LatestStartsAt)
}
func sortAlertGroups(c *gin.Context, groupsMap map[string]models.APIAlertGroup) []models.APIAlertGroup {
groups := make([]models.APIAlertGroup, 0, len(groupsMap))
func getSortOptions(c *gin.Context) (string, string, string) {
sortOrder, found := c.GetQuery("sortOrder")
if !found || sortOrder == "" {
sortOrder = config.Config.Grid.Sorting.Order
@@ -190,9 +188,11 @@ func sortAlertGroups(c *gin.Context, groupsMap map[string]models.APIAlertGroup)
sortLabel = config.Config.Grid.Sorting.Label
}
for _, g := range groupsMap {
groups = append(groups, g)
}
return sortOrder, sortReverse, sortLabel
}
func sortAlertGroups(c *gin.Context, groups []models.APIAlertGroup) []models.APIAlertGroup {
sortOrder, sortReverse, sortLabel := getSortOptions(c)
switch sortOrder {
case "startsAt":
@@ -239,3 +239,33 @@ func sortAlertGroups(c *gin.Context, groupsMap map[string]models.APIAlertGroup)
return groups
}
func sortGrids(c *gin.Context, gridLabel string, gridsMap map[string]models.APIGrid, gridSortReverse bool) []models.APIGrid {
grids := make([]models.APIGrid, 0, len(gridsMap))
for _, g := range gridsMap {
g.AlertGroups = sortAlertGroups(c, g.AlertGroups)
grids = append(grids, g)
}
sort.Slice(grids, func(i, j int) bool {
vi := resolveLabelValue(gridLabel, grids[i].LabelValue)
vj := resolveLabelValue(gridLabel, grids[j].LabelValue)
if vi == "" {
// first label is missing
return gridSortReverse
}
if vj == "" {
// second label is missing
return !gridSortReverse
}
// finnally return groups sorted by label
if gridSortReverse {
return !sortorder.NaturalLess(vi, vj)
}
return sortorder.NaturalLess(vi, vj)
})
return grids
}

View File

@@ -1005,13 +1005,13 @@ func TestVerifyAllGroups(t *testing.T) {
t.Errorf("Failed to unmarshal response: %s", err)
}
if len(ur.AlertGroups) != len(groupTests) {
if len(ur.Grids[0].AlertGroups) != len(groupTests) {
t.Errorf("[%s] Got %d alert(s) in response, expected %d",
version, len(ur.AlertGroups), len(groupTests))
version, len(ur.Grids[0].AlertGroups), len(groupTests))
}
for _, testCase := range groupTests {
groupFound := false
for _, group := range ur.AlertGroups {
for _, group := range ur.Grids[0].AlertGroups {
if compareAlertGroups(testCase, group) {
groupFound = true
testAlertGroup(version, t, testCase, group)
@@ -1221,7 +1221,7 @@ func TestSortOrder(t *testing.T) {
}
values := []string{}
for _, ag := range ur.AlertGroups {
for _, ag := range ur.Grids[0].AlertGroups {
v := ag.Labels[testCase.expectedLabel]
if v == "" {
v = ag.Shared.Labels[testCase.expectedLabel]

View File

@@ -203,11 +203,11 @@ func alerts(c *gin.Context) {
return
}
// get filters
gridLabel, _ := c.GetQuery("gridLabel")
matchFilters, validFilters := getFiltersFromQuery(c.QueryArray("q"))
// set pointers for data store objects, need a lock until end of view is reached
alerts := map[string]models.APIAlertGroup{}
grids := map[string]models.APIGrid{}
colors := models.LabelsColorMap{}
counters := map[string]map[string]int{}
@@ -227,18 +227,7 @@ func alerts(c *gin.Context) {
var matches int
for _, ag := range dedupedAlerts {
agCopy := models.AlertGroup{
ID: ag.ID,
Receiver: ag.Receiver,
Labels: ag.Labels,
LatestStartsAt: ag.LatestStartsAt,
Alerts: []models.Alert{},
AlertmanagerCount: map[string]int{},
StateCount: map[string]int{},
}
for _, s := range models.AlertStateList {
agCopy.StateCount[s] = 0
}
perGridAlertGroup := map[string]*models.AlertGroup{}
for _, alert := range ag.Alerts {
alert := alert // scopelint pin
@@ -258,6 +247,25 @@ func alerts(c *gin.Context) {
// we update it here rather than in dedup since here we can apply it
// only for alerts left after filtering
alert.UpdateFingerprints()
alertGridLabelValue := alert.Labels[gridLabel]
agCopy, found := perGridAlertGroup[alertGridLabelValue]
if !found {
agCopy = &models.AlertGroup{
ID: ag.ID,
Receiver: ag.Receiver,
Labels: ag.Labels,
LatestStartsAt: ag.LatestStartsAt,
Alerts: []models.Alert{},
AlertmanagerCount: map[string]int{},
StateCount: map[string]int{},
}
for _, s := range models.AlertStateList {
agCopy.StateCount[s] = 0
}
perGridAlertGroup[alertGridLabelValue] = agCopy
}
agCopy.Alerts = append(agCopy.Alerts, alert)
countLabel(counters, "@state", alert.State)
@@ -307,32 +315,44 @@ func alerts(c *gin.Context) {
}
}
if len(agCopy.Alerts) > 0 {
for i, alert := range agCopy.Alerts {
if alert.IsSilenced() {
for j, am := range alert.Alertmanager {
key := amNameToCluster[am.Name]
// cluster might be wrong when collecting (races between fetches)
// update is with current cluster discovery state
agCopy.Alerts[i].Alertmanager[j].Cluster = key
for _, silence := range am.Silences {
_, found := silences[key][silence.ID]
if !found {
silences[key][silence.ID] = *silence
for gridLabelValue, ag := range perGridAlertGroup {
if len(ag.Alerts) > 0 {
for i, alert := range ag.Alerts {
if alert.IsSilenced() {
for j, am := range alert.Alertmanager {
key := amNameToCluster[am.Name]
// cluster might be wrong when collecting (races between fetches)
// update is with current cluster discovery state
ag.Alerts[i].Alertmanager[j].Cluster = key
for _, silence := range am.Silences {
_, found := silences[key][silence.ID]
if !found {
silences[key][silence.ID] = *silence
}
}
}
}
}
}
sort.Sort(agCopy.Alerts)
agCopy.LatestStartsAt = agCopy.FindLatestStartsAt()
agCopy.Hash = agCopy.ContentFingerprint()
apiAG := models.APIAlertGroup{AlertGroup: agCopy}
apiAG.DedupSharedMaps()
alerts[agCopy.ID] = apiAG
resp.TotalAlerts += len(agCopy.Alerts)
}
sort.Sort(ag.Alerts)
ag.LatestStartsAt = ag.FindLatestStartsAt()
ag.Hash = ag.ContentFingerprint()
apiAG := models.APIAlertGroup{AlertGroup: *ag}
apiAG.DedupSharedMaps()
resp.TotalAlerts += len(ag.Alerts)
grid, found := grids[gridLabelValue]
if !found {
grid = models.APIGrid{
LabelName: gridLabel,
LabelValue: gridLabelValue,
AlertGroups: []models.APIAlertGroup{},
}
grids[gridLabelValue] = grid
}
grid.AlertGroups = append(grid.AlertGroups, apiAG)
grids[gridLabelValue] = grid
}
}
}
for _, filter := range matchFilters {
@@ -341,7 +361,11 @@ func alerts(c *gin.Context) {
}
}
resp.AlertGroups = sortAlertGroups(c, alerts)
//resp.AlertGroups = sortAlertGroups(c, alerts)
v, _ := c.GetQuery("gridSortReverse")
gridSortReverse := v == "1"
resp.Grids = sortGrids(c, gridLabel, grids, gridSortReverse)
resp.Silences = silences
resp.Colors = colors
resp.Counters = countersToLabelStats(counters)

View File

@@ -145,8 +145,8 @@ func TestAlerts(t *testing.T) {
if len(ur.Colors) != 1 {
t.Errorf("[%s] Got %d color(s) in response, expected %d", version, len(ur.Colors), 1)
}
if len(ur.AlertGroups) != 1 {
t.Errorf("[%s] Got %d alert(s) in response, expected %d", version, len(ur.AlertGroups), 1)
if len(ur.Grids[0].AlertGroups) != 1 {
t.Errorf("[%s] Got %d alert group(s) in response, expected %d", version, len(ur.Grids[0].AlertGroups), 1)
}
if ur.Version == "" {
t.Errorf("[%s] Empty version in response", version)
@@ -172,7 +172,7 @@ func TestAlerts(t *testing.T) {
if len(ur.Counters) != 6 {
t.Errorf("[%s] Invalid number of counters in response (%d): %v", version, len(ur.Counters), ur.Counters)
}
for _, ag := range ur.AlertGroups {
for _, ag := range ur.Grids[0].AlertGroups {
for _, a := range ag.Alerts {
linkCount := 0
for _, annotation := range a.Annotations {
@@ -192,6 +192,122 @@ func TestAlerts(t *testing.T) {
}
}
func TestGrids(t *testing.T) {
type testCaseGridT struct {
labelValue string
alertGroupCount int
}
type testCaseT struct {
gridLabel string
requestQuery string
grids []testCaseGridT
}
testCases := []testCaseT{
{
gridLabel: "cluster",
requestQuery: "",
grids: []testCaseGridT{
{labelValue: "dev", alertGroupCount: 4},
{labelValue: "prod", alertGroupCount: 4},
{labelValue: "staging", alertGroupCount: 4},
},
},
{
gridLabel: "cluster",
requestQuery: "&gridSortReverse=1",
grids: []testCaseGridT{
{labelValue: "staging", alertGroupCount: 4},
{labelValue: "prod", alertGroupCount: 4},
{labelValue: "dev", alertGroupCount: 4},
},
},
{
gridLabel: "foo",
requestQuery: "",
grids: []testCaseGridT{
{labelValue: "", alertGroupCount: 10},
},
},
{
gridLabel: "",
requestQuery: "",
grids: []testCaseGridT{
{labelValue: "", alertGroupCount: 10},
},
},
{
gridLabel: "",
requestQuery: "&q=foo=bar",
grids: []testCaseGridT{},
},
{
gridLabel: "disk",
requestQuery: "",
grids: []testCaseGridT{
{labelValue: "sda", alertGroupCount: 2},
{labelValue: "", alertGroupCount: 8},
},
},
{
gridLabel: "disk",
requestQuery: "&gridSortReverse=1",
grids: []testCaseGridT{
{labelValue: "", alertGroupCount: 8},
{labelValue: "sda", alertGroupCount: 2},
},
},
{
gridLabel: "disk",
requestQuery: "&q=alertname=Free_Disk_Space_Too_Low",
grids: []testCaseGridT{
{labelValue: "sda", alertGroupCount: 2},
},
},
}
mockConfig()
for _, version := range mock.ListAllMocks() {
for _, testCase := range testCases {
t.Run(fmt.Sprintf("version=%q gridLabel=%q query=%q", version, testCase.gridLabel, testCase.requestQuery), func(t *testing.T) {
mockAlerts(version)
r := ginTestEngine()
// re-run a few times to test the cache
for i := 1; i <= 3; i++ {
req := httptest.NewRequest("GET", "/alerts.json?gridLabel="+testCase.gridLabel+testCase.requestQuery, nil)
resp := httptest.NewRecorder()
r.ServeHTTP(resp, req)
if resp.Code != http.StatusOK {
t.Errorf("GET /alerts.json returned status %d", resp.Code)
}
ur := models.AlertsResponse{}
err := json.Unmarshal(resp.Body.Bytes(), &ur)
if err != nil {
t.Errorf("Failed to unmarshal response: %s", err)
}
if len(ur.Grids) != len(testCase.grids) {
t.Errorf("Expected %d grids, got %d", len(testCase.grids), len(ur.Grids))
} else {
for index, expectedGrid := range testCase.grids {
grid := ur.Grids[index]
if grid.LabelName != testCase.gridLabel {
t.Errorf("Got wrong labelName for grid %d: %q, expected %q", index, grid.LabelName, testCase.gridLabel)
}
if grid.LabelValue != expectedGrid.labelValue {
t.Errorf("Got wrong labelValue for grid %d: %q, expected %q", index, grid.LabelValue, expectedGrid.labelValue)
}
if len(grid.AlertGroups) != expectedGrid.alertGroupCount {
t.Errorf("Got wrong alert group count for grid %d: %d, expected %d", index, len(grid.AlertGroups), expectedGrid.alertGroupCount)
}
}
}
}
})
}
}
}
func TestValidateAllAlerts(t *testing.T) {
mockConfig()
for _, version := range mock.ListAllMocks() {
@@ -212,7 +328,7 @@ func TestValidateAllAlerts(t *testing.T) {
if err != nil {
t.Errorf("Failed to unmarshal response: %s", err)
}
for _, ag := range ur.AlertGroups {
for _, ag := range ur.Grids[0].AlertGroups {
for _, a := range ag.Alerts {
if !slices.StringInSlice(models.AlertStateList, a.State) {
t.Errorf("Invalid alert status '%s', not in %v", a.State, models.AlertStateList)

View File

@@ -300,6 +300,12 @@ type AuthenticationInfo struct {
Username string `json:"username"`
}
type APIGrid struct {
LabelName string `json:"labelName"`
LabelValue string `json:"labelValue"`
AlertGroups []APIAlertGroup `json:"alertGroups"`
}
// AlertsResponse is the structure of JSON response UI will use to get alert data
type AlertsResponse struct {
Status string `json:"status"`
@@ -307,7 +313,7 @@ type AlertsResponse struct {
Version string `json:"version"`
Upstreams AlertmanagerAPISummary `json:"upstreams"`
Silences map[string]map[string]Silence `json:"silences"`
AlertGroups []APIAlertGroup `json:"groups"`
Grids []APIGrid `json:"grids"`
TotalAlerts int `json:"totalAlerts"`
Colors LabelsColorMap `json:"colors"`
Filters []Filter `json:"filters"`