mirror of
https://github.com/prymitive/karma
synced 2026-02-13 20:59:53 +00:00
feat(api): generate alert grid per label value
This allows generating multiple independent alert group lists, one per unique value of the label specified by the user. This way we can have a separate grid per severity or cluster label value.
This commit is contained in:
@@ -168,9 +168,7 @@ func sortByStartsAt(i, j int, groups []models.APIAlertGroup, sortReverse bool) b
|
||||
return groups[i].LatestStartsAt.Before(groups[j].LatestStartsAt)
|
||||
}
|
||||
|
||||
func sortAlertGroups(c *gin.Context, groupsMap map[string]models.APIAlertGroup) []models.APIAlertGroup {
|
||||
groups := make([]models.APIAlertGroup, 0, len(groupsMap))
|
||||
|
||||
func getSortOptions(c *gin.Context) (string, string, string) {
|
||||
sortOrder, found := c.GetQuery("sortOrder")
|
||||
if !found || sortOrder == "" {
|
||||
sortOrder = config.Config.Grid.Sorting.Order
|
||||
@@ -190,9 +188,11 @@ func sortAlertGroups(c *gin.Context, groupsMap map[string]models.APIAlertGroup)
|
||||
sortLabel = config.Config.Grid.Sorting.Label
|
||||
}
|
||||
|
||||
for _, g := range groupsMap {
|
||||
groups = append(groups, g)
|
||||
}
|
||||
return sortOrder, sortReverse, sortLabel
|
||||
}
|
||||
|
||||
func sortAlertGroups(c *gin.Context, groups []models.APIAlertGroup) []models.APIAlertGroup {
|
||||
sortOrder, sortReverse, sortLabel := getSortOptions(c)
|
||||
|
||||
switch sortOrder {
|
||||
case "startsAt":
|
||||
@@ -239,3 +239,33 @@ func sortAlertGroups(c *gin.Context, groupsMap map[string]models.APIAlertGroup)
|
||||
|
||||
return groups
|
||||
}
|
||||
|
||||
func sortGrids(c *gin.Context, gridLabel string, gridsMap map[string]models.APIGrid, gridSortReverse bool) []models.APIGrid {
|
||||
grids := make([]models.APIGrid, 0, len(gridsMap))
|
||||
|
||||
for _, g := range gridsMap {
|
||||
g.AlertGroups = sortAlertGroups(c, g.AlertGroups)
|
||||
grids = append(grids, g)
|
||||
}
|
||||
|
||||
sort.Slice(grids, func(i, j int) bool {
|
||||
vi := resolveLabelValue(gridLabel, grids[i].LabelValue)
|
||||
vj := resolveLabelValue(gridLabel, grids[j].LabelValue)
|
||||
|
||||
if vi == "" {
|
||||
// first label is missing
|
||||
return gridSortReverse
|
||||
}
|
||||
if vj == "" {
|
||||
// second label is missing
|
||||
return !gridSortReverse
|
||||
}
|
||||
// finnally return groups sorted by label
|
||||
if gridSortReverse {
|
||||
return !sortorder.NaturalLess(vi, vj)
|
||||
}
|
||||
return sortorder.NaturalLess(vi, vj)
|
||||
})
|
||||
|
||||
return grids
|
||||
}
|
||||
|
||||
@@ -1005,13 +1005,13 @@ func TestVerifyAllGroups(t *testing.T) {
|
||||
t.Errorf("Failed to unmarshal response: %s", err)
|
||||
}
|
||||
|
||||
if len(ur.AlertGroups) != len(groupTests) {
|
||||
if len(ur.Grids[0].AlertGroups) != len(groupTests) {
|
||||
t.Errorf("[%s] Got %d alert(s) in response, expected %d",
|
||||
version, len(ur.AlertGroups), len(groupTests))
|
||||
version, len(ur.Grids[0].AlertGroups), len(groupTests))
|
||||
}
|
||||
for _, testCase := range groupTests {
|
||||
groupFound := false
|
||||
for _, group := range ur.AlertGroups {
|
||||
for _, group := range ur.Grids[0].AlertGroups {
|
||||
if compareAlertGroups(testCase, group) {
|
||||
groupFound = true
|
||||
testAlertGroup(version, t, testCase, group)
|
||||
@@ -1221,7 +1221,7 @@ func TestSortOrder(t *testing.T) {
|
||||
}
|
||||
|
||||
values := []string{}
|
||||
for _, ag := range ur.AlertGroups {
|
||||
for _, ag := range ur.Grids[0].AlertGroups {
|
||||
v := ag.Labels[testCase.expectedLabel]
|
||||
if v == "" {
|
||||
v = ag.Shared.Labels[testCase.expectedLabel]
|
||||
|
||||
@@ -203,11 +203,11 @@ func alerts(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// get filters
|
||||
gridLabel, _ := c.GetQuery("gridLabel")
|
||||
|
||||
matchFilters, validFilters := getFiltersFromQuery(c.QueryArray("q"))
|
||||
|
||||
// set pointers for data store objects, need a lock until end of view is reached
|
||||
alerts := map[string]models.APIAlertGroup{}
|
||||
grids := map[string]models.APIGrid{}
|
||||
colors := models.LabelsColorMap{}
|
||||
counters := map[string]map[string]int{}
|
||||
|
||||
@@ -227,18 +227,7 @@ func alerts(c *gin.Context) {
|
||||
|
||||
var matches int
|
||||
for _, ag := range dedupedAlerts {
|
||||
agCopy := models.AlertGroup{
|
||||
ID: ag.ID,
|
||||
Receiver: ag.Receiver,
|
||||
Labels: ag.Labels,
|
||||
LatestStartsAt: ag.LatestStartsAt,
|
||||
Alerts: []models.Alert{},
|
||||
AlertmanagerCount: map[string]int{},
|
||||
StateCount: map[string]int{},
|
||||
}
|
||||
for _, s := range models.AlertStateList {
|
||||
agCopy.StateCount[s] = 0
|
||||
}
|
||||
perGridAlertGroup := map[string]*models.AlertGroup{}
|
||||
|
||||
for _, alert := range ag.Alerts {
|
||||
alert := alert // scopelint pin
|
||||
@@ -258,6 +247,25 @@ func alerts(c *gin.Context) {
|
||||
// we update it here rather than in dedup since here we can apply it
|
||||
// only for alerts left after filtering
|
||||
alert.UpdateFingerprints()
|
||||
|
||||
alertGridLabelValue := alert.Labels[gridLabel]
|
||||
agCopy, found := perGridAlertGroup[alertGridLabelValue]
|
||||
if !found {
|
||||
agCopy = &models.AlertGroup{
|
||||
ID: ag.ID,
|
||||
Receiver: ag.Receiver,
|
||||
Labels: ag.Labels,
|
||||
LatestStartsAt: ag.LatestStartsAt,
|
||||
Alerts: []models.Alert{},
|
||||
AlertmanagerCount: map[string]int{},
|
||||
StateCount: map[string]int{},
|
||||
}
|
||||
for _, s := range models.AlertStateList {
|
||||
agCopy.StateCount[s] = 0
|
||||
}
|
||||
perGridAlertGroup[alertGridLabelValue] = agCopy
|
||||
}
|
||||
|
||||
agCopy.Alerts = append(agCopy.Alerts, alert)
|
||||
|
||||
countLabel(counters, "@state", alert.State)
|
||||
@@ -307,32 +315,44 @@ func alerts(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
if len(agCopy.Alerts) > 0 {
|
||||
for i, alert := range agCopy.Alerts {
|
||||
if alert.IsSilenced() {
|
||||
for j, am := range alert.Alertmanager {
|
||||
key := amNameToCluster[am.Name]
|
||||
// cluster might be wrong when collecting (races between fetches)
|
||||
// update is with current cluster discovery state
|
||||
agCopy.Alerts[i].Alertmanager[j].Cluster = key
|
||||
for _, silence := range am.Silences {
|
||||
_, found := silences[key][silence.ID]
|
||||
if !found {
|
||||
silences[key][silence.ID] = *silence
|
||||
for gridLabelValue, ag := range perGridAlertGroup {
|
||||
if len(ag.Alerts) > 0 {
|
||||
for i, alert := range ag.Alerts {
|
||||
if alert.IsSilenced() {
|
||||
for j, am := range alert.Alertmanager {
|
||||
key := amNameToCluster[am.Name]
|
||||
// cluster might be wrong when collecting (races between fetches)
|
||||
// update is with current cluster discovery state
|
||||
ag.Alerts[i].Alertmanager[j].Cluster = key
|
||||
for _, silence := range am.Silences {
|
||||
_, found := silences[key][silence.ID]
|
||||
if !found {
|
||||
silences[key][silence.ID] = *silence
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
sort.Sort(agCopy.Alerts)
|
||||
agCopy.LatestStartsAt = agCopy.FindLatestStartsAt()
|
||||
agCopy.Hash = agCopy.ContentFingerprint()
|
||||
apiAG := models.APIAlertGroup{AlertGroup: agCopy}
|
||||
apiAG.DedupSharedMaps()
|
||||
alerts[agCopy.ID] = apiAG
|
||||
resp.TotalAlerts += len(agCopy.Alerts)
|
||||
}
|
||||
sort.Sort(ag.Alerts)
|
||||
ag.LatestStartsAt = ag.FindLatestStartsAt()
|
||||
ag.Hash = ag.ContentFingerprint()
|
||||
apiAG := models.APIAlertGroup{AlertGroup: *ag}
|
||||
apiAG.DedupSharedMaps()
|
||||
resp.TotalAlerts += len(ag.Alerts)
|
||||
|
||||
grid, found := grids[gridLabelValue]
|
||||
if !found {
|
||||
grid = models.APIGrid{
|
||||
LabelName: gridLabel,
|
||||
LabelValue: gridLabelValue,
|
||||
AlertGroups: []models.APIAlertGroup{},
|
||||
}
|
||||
grids[gridLabelValue] = grid
|
||||
}
|
||||
grid.AlertGroups = append(grid.AlertGroups, apiAG)
|
||||
grids[gridLabelValue] = grid
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, filter := range matchFilters {
|
||||
@@ -341,7 +361,11 @@ func alerts(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
resp.AlertGroups = sortAlertGroups(c, alerts)
|
||||
//resp.AlertGroups = sortAlertGroups(c, alerts)
|
||||
v, _ := c.GetQuery("gridSortReverse")
|
||||
gridSortReverse := v == "1"
|
||||
|
||||
resp.Grids = sortGrids(c, gridLabel, grids, gridSortReverse)
|
||||
resp.Silences = silences
|
||||
resp.Colors = colors
|
||||
resp.Counters = countersToLabelStats(counters)
|
||||
|
||||
@@ -145,8 +145,8 @@ func TestAlerts(t *testing.T) {
|
||||
if len(ur.Colors) != 1 {
|
||||
t.Errorf("[%s] Got %d color(s) in response, expected %d", version, len(ur.Colors), 1)
|
||||
}
|
||||
if len(ur.AlertGroups) != 1 {
|
||||
t.Errorf("[%s] Got %d alert(s) in response, expected %d", version, len(ur.AlertGroups), 1)
|
||||
if len(ur.Grids[0].AlertGroups) != 1 {
|
||||
t.Errorf("[%s] Got %d alert group(s) in response, expected %d", version, len(ur.Grids[0].AlertGroups), 1)
|
||||
}
|
||||
if ur.Version == "" {
|
||||
t.Errorf("[%s] Empty version in response", version)
|
||||
@@ -172,7 +172,7 @@ func TestAlerts(t *testing.T) {
|
||||
if len(ur.Counters) != 6 {
|
||||
t.Errorf("[%s] Invalid number of counters in response (%d): %v", version, len(ur.Counters), ur.Counters)
|
||||
}
|
||||
for _, ag := range ur.AlertGroups {
|
||||
for _, ag := range ur.Grids[0].AlertGroups {
|
||||
for _, a := range ag.Alerts {
|
||||
linkCount := 0
|
||||
for _, annotation := range a.Annotations {
|
||||
@@ -192,6 +192,122 @@ func TestAlerts(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGrids(t *testing.T) {
|
||||
type testCaseGridT struct {
|
||||
labelValue string
|
||||
alertGroupCount int
|
||||
}
|
||||
type testCaseT struct {
|
||||
gridLabel string
|
||||
requestQuery string
|
||||
grids []testCaseGridT
|
||||
}
|
||||
testCases := []testCaseT{
|
||||
{
|
||||
gridLabel: "cluster",
|
||||
requestQuery: "",
|
||||
grids: []testCaseGridT{
|
||||
{labelValue: "dev", alertGroupCount: 4},
|
||||
{labelValue: "prod", alertGroupCount: 4},
|
||||
{labelValue: "staging", alertGroupCount: 4},
|
||||
},
|
||||
},
|
||||
{
|
||||
gridLabel: "cluster",
|
||||
requestQuery: "&gridSortReverse=1",
|
||||
grids: []testCaseGridT{
|
||||
{labelValue: "staging", alertGroupCount: 4},
|
||||
{labelValue: "prod", alertGroupCount: 4},
|
||||
{labelValue: "dev", alertGroupCount: 4},
|
||||
},
|
||||
},
|
||||
{
|
||||
gridLabel: "foo",
|
||||
requestQuery: "",
|
||||
grids: []testCaseGridT{
|
||||
{labelValue: "", alertGroupCount: 10},
|
||||
},
|
||||
},
|
||||
{
|
||||
gridLabel: "",
|
||||
requestQuery: "",
|
||||
grids: []testCaseGridT{
|
||||
{labelValue: "", alertGroupCount: 10},
|
||||
},
|
||||
},
|
||||
{
|
||||
gridLabel: "",
|
||||
requestQuery: "&q=foo=bar",
|
||||
grids: []testCaseGridT{},
|
||||
},
|
||||
{
|
||||
gridLabel: "disk",
|
||||
requestQuery: "",
|
||||
grids: []testCaseGridT{
|
||||
{labelValue: "sda", alertGroupCount: 2},
|
||||
{labelValue: "", alertGroupCount: 8},
|
||||
},
|
||||
},
|
||||
{
|
||||
gridLabel: "disk",
|
||||
requestQuery: "&gridSortReverse=1",
|
||||
grids: []testCaseGridT{
|
||||
{labelValue: "", alertGroupCount: 8},
|
||||
{labelValue: "sda", alertGroupCount: 2},
|
||||
},
|
||||
},
|
||||
{
|
||||
gridLabel: "disk",
|
||||
requestQuery: "&q=alertname=Free_Disk_Space_Too_Low",
|
||||
grids: []testCaseGridT{
|
||||
{labelValue: "sda", alertGroupCount: 2},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
mockConfig()
|
||||
for _, version := range mock.ListAllMocks() {
|
||||
for _, testCase := range testCases {
|
||||
t.Run(fmt.Sprintf("version=%q gridLabel=%q query=%q", version, testCase.gridLabel, testCase.requestQuery), func(t *testing.T) {
|
||||
mockAlerts(version)
|
||||
r := ginTestEngine()
|
||||
// re-run a few times to test the cache
|
||||
for i := 1; i <= 3; i++ {
|
||||
req := httptest.NewRequest("GET", "/alerts.json?gridLabel="+testCase.gridLabel+testCase.requestQuery, nil)
|
||||
resp := httptest.NewRecorder()
|
||||
r.ServeHTTP(resp, req)
|
||||
if resp.Code != http.StatusOK {
|
||||
t.Errorf("GET /alerts.json returned status %d", resp.Code)
|
||||
}
|
||||
|
||||
ur := models.AlertsResponse{}
|
||||
err := json.Unmarshal(resp.Body.Bytes(), &ur)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to unmarshal response: %s", err)
|
||||
}
|
||||
|
||||
if len(ur.Grids) != len(testCase.grids) {
|
||||
t.Errorf("Expected %d grids, got %d", len(testCase.grids), len(ur.Grids))
|
||||
} else {
|
||||
for index, expectedGrid := range testCase.grids {
|
||||
grid := ur.Grids[index]
|
||||
if grid.LabelName != testCase.gridLabel {
|
||||
t.Errorf("Got wrong labelName for grid %d: %q, expected %q", index, grid.LabelName, testCase.gridLabel)
|
||||
}
|
||||
if grid.LabelValue != expectedGrid.labelValue {
|
||||
t.Errorf("Got wrong labelValue for grid %d: %q, expected %q", index, grid.LabelValue, expectedGrid.labelValue)
|
||||
}
|
||||
if len(grid.AlertGroups) != expectedGrid.alertGroupCount {
|
||||
t.Errorf("Got wrong alert group count for grid %d: %d, expected %d", index, len(grid.AlertGroups), expectedGrid.alertGroupCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateAllAlerts(t *testing.T) {
|
||||
mockConfig()
|
||||
for _, version := range mock.ListAllMocks() {
|
||||
@@ -212,7 +328,7 @@ func TestValidateAllAlerts(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Errorf("Failed to unmarshal response: %s", err)
|
||||
}
|
||||
for _, ag := range ur.AlertGroups {
|
||||
for _, ag := range ur.Grids[0].AlertGroups {
|
||||
for _, a := range ag.Alerts {
|
||||
if !slices.StringInSlice(models.AlertStateList, a.State) {
|
||||
t.Errorf("Invalid alert status '%s', not in %v", a.State, models.AlertStateList)
|
||||
|
||||
@@ -300,6 +300,12 @@ type AuthenticationInfo struct {
|
||||
Username string `json:"username"`
|
||||
}
|
||||
|
||||
type APIGrid struct {
|
||||
LabelName string `json:"labelName"`
|
||||
LabelValue string `json:"labelValue"`
|
||||
AlertGroups []APIAlertGroup `json:"alertGroups"`
|
||||
}
|
||||
|
||||
// AlertsResponse is the structure of JSON response UI will use to get alert data
|
||||
type AlertsResponse struct {
|
||||
Status string `json:"status"`
|
||||
@@ -307,7 +313,7 @@ type AlertsResponse struct {
|
||||
Version string `json:"version"`
|
||||
Upstreams AlertmanagerAPISummary `json:"upstreams"`
|
||||
Silences map[string]map[string]Silence `json:"silences"`
|
||||
AlertGroups []APIAlertGroup `json:"groups"`
|
||||
Grids []APIGrid `json:"grids"`
|
||||
TotalAlerts int `json:"totalAlerts"`
|
||||
Colors LabelsColorMap `json:"colors"`
|
||||
Filters []Filter `json:"filters"`
|
||||
|
||||
Reference in New Issue
Block a user