From 81ce5d3098423cc26a94784dcb9036e94b3a9d43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Mierzwa?= Date: Wed, 5 Jul 2017 23:30:06 -0700 Subject: [PATCH] Speed up alert fingerprint generation Dynamic fingerprints made the code much slower, pprof shows they are responsible for ~70% of all cpu usage for any API call. To make it worse they are applied to all alerts, since dedup layer doesn't know which alerts will be filtered later, it operates on all of them. This PR will: 1. add benchmarks to so it's easier to track performance 2. Keep current methods for accessing fingerprints, but use precomputed static fields in those 3. Refactor Alert methods to use pointers, so we're not working on a copy Benchmark timing went down from ~4000ns to 0.4ns for fingerprint calls and API response times from 1.3s (for my test sample) to 0.2s, which puts it back to the same level as before moving fingerprints to be dynamic. I still don't like this code much, it's all over the place, but I don't have a good idea how to better structure this, let's hope I'll be wiser in the future. --- Makefile | 2 +- alertmanager/models.go | 2 ++ models/alert.go | 25 +++++++++++++++------ models/alert_test.go | 47 +++++++++++++++++++++++++++++++++++++++ models/alertgroup_test.go | 9 ++++++++ views.go | 5 +++++ 6 files changed, 82 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 0567c3788..b7288d00a 100644 --- a/Makefile +++ b/Makefile @@ -88,7 +88,7 @@ endif .PHONY: test test: lint bindata_assetfs.go - go test -cover `go list ./... | grep -v /vendor/` + go test -bench=. -cover `go list ./... | grep -v /vendor/` .build/vendor.ok: go get -u github.com/kardianos/govendor diff --git a/alertmanager/models.go b/alertmanager/models.go index 19b43d6bb..322a927fa 100644 --- a/alertmanager/models.go +++ b/alertmanager/models.go @@ -168,6 +168,8 @@ func (am *Alertmanager) pullAlerts(version string) error { for k, v := range alert.Labels { transform.ColorLabel(colors, k, v) } + + alert.UpdateFingerprints() alerts = append(alerts, alert) // update internal metrics diff --git a/models/alert.go b/models/alert.go index 30216979b..0d3573196 100644 --- a/models/alert.go +++ b/models/alert.go @@ -45,30 +45,41 @@ type Alert struct { Alertmanager []AlertmanagerInstance `json:"alertmanager"` Receiver string `json:"receiver"` Links map[string]string `json:"links"` + // fingerprints are precomputed for speed + labelsFP string `hash:"-"` + contentFP string `hash:"-"` +} + +// UpdateFingerprints will generate a new set of fingerprints for this alert +// it should be called after modifying any field that isn't tagged with hash:"-" +func (a *Alert) UpdateFingerprints() { + a.labelsFP = fmt.Sprintf("%x", structhash.Sha1(a.Labels, 1)) + a.contentFP = fmt.Sprintf("%x", structhash.Sha1(a, 1)) } // LabelsFingerprint is a checksum computed only from labels which should be // unique for every alert -func (a Alert) LabelsFingerprint() string { - return fmt.Sprintf("%x", structhash.Sha1(a.Labels, 1)) +func (a *Alert) LabelsFingerprint() string { + return a.labelsFP } // ContentFingerprint is a checksum computed from entire alert object -func (a Alert) ContentFingerprint() string { - return fmt.Sprintf("%x", structhash.Sha1(a, 1)) +// except some blacklisted fields tagged with hash:"-" +func (a *Alert) ContentFingerprint() string { + return a.contentFP } // IsSilenced will return true if alert should be considered silenced -func (a Alert) IsSilenced() bool { +func (a *Alert) IsSilenced() bool { return (a.State == AlertStateSuppressed && len(a.SilencedBy) > 0) } // IsInhibited will return true if alert should be considered silenced -func (a Alert) IsInhibited() bool { +func (a *Alert) IsInhibited() bool { return (a.State == AlertStateSuppressed && len(a.InhibitedBy) > 0) } // IsActive will return true if alert is not suppressed in any way -func (a Alert) IsActive() bool { +func (a *Alert) IsActive() bool { return (a.State == AlertStateActive) } diff --git a/models/alert_test.go b/models/alert_test.go index ce05bcb44..e9c8ac0da 100644 --- a/models/alert_test.go +++ b/models/alert_test.go @@ -2,6 +2,7 @@ package models_test import ( "testing" + "time" "github.com/cloudflare/unsee/models" ) @@ -52,3 +53,49 @@ func TestAlertState(t *testing.T) { } } } + +func BenchmarkLabelsFingerprint(b *testing.B) { + alert := models.Alert{ + Labels: map[string]string{ + "foo1": "bar1", + "foo1bar1": "545jjjssd", + "foo1xxxx": "bdjjs88ff", + "agdfdfd": "bar1", + "fossdsf3o1": "bar11111", + "fdfdgfdgoo1": "bar1", + }, + } + for n := 0; n < b.N; n++ { + alert.LabelsFingerprint() + } +} + +func BenchmarkLabelsContent(b *testing.B) { + alert := models.Alert{ + Annotations: map[string]string{ + "foo": "bar", + "abc": "Neque porro quisquam est qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit...", + }, + Labels: map[string]string{ + "foo1": "bar1", + "foo1bar1": "545jjjssd", + "foo1xxxx": "bdjjs88ff", + "agdfdfd": "bar1", + "fossdsf3o1": "bar11111", + "fdfdgfdgoo1": "bar1", + }, + State: models.AlertStateActive, + StartsAt: time.Date(2015, time.March, 10, 0, 0, 0, 0, time.UTC), + Alertmanager: []models.AlertmanagerInstance{ + models.AlertmanagerInstance{ + Name: "default", + URI: "http://localhost", + State: models.AlertStateActive, + }, + }, + } + alert.UpdateFingerprints() + for n := 0; n < b.N; n++ { + alert.LabelsFingerprint() + } +} diff --git a/models/alertgroup_test.go b/models/alertgroup_test.go index 1de912f9b..8a2edd868 100644 --- a/models/alertgroup_test.go +++ b/models/alertgroup_test.go @@ -47,6 +47,7 @@ var alertListSortTests = []alertListSortTest{ func TestUnseeAlertListSort(t *testing.T) { al := models.AlertList{} for _, testCase := range alertListSortTests { + testCase.alert.UpdateFingerprints() al = append(al, testCase.alert) } @@ -56,6 +57,7 @@ func TestUnseeAlertListSort(t *testing.T) { for i := 1; i <= iterations; i++ { sort.Sort(al) for _, testCase := range alertListSortTests { + testCase.alert.UpdateFingerprints() if al[testCase.position].ContentFingerprint() != testCase.alert.ContentFingerprint() { failures++ } @@ -120,6 +122,13 @@ var agFPTests = []agFPTest{ func TestAlertGroupContentFingerprint(t *testing.T) { for _, testCase := range agFPTests { + alerts := models.AlertList{} + for _, alert := range testCase.ag.Alerts { + alert.UpdateFingerprints() + alerts = append(alerts, alert) + } + sort.Sort(alerts) + testCase.ag.Alerts = alerts if testCase.ag.ContentFingerprint() != testCase.fingerprint { t.Errorf("Invalid AlertGroup ContentFingerprint(), expected '%s', got '%s', AlertGroup: %v", testCase.fingerprint, testCase.ag.ContentFingerprint(), testCase.ag) diff --git a/views.go b/views.go index 8bf2f0775..cd8178863 100644 --- a/views.go +++ b/views.go @@ -133,6 +133,11 @@ func alerts(c *gin.Context) { } if !validFilters || (slices.BoolInSlice(results, true) && !slices.BoolInSlice(results, false)) { matches++ + // we need to update fingerprints since we've modified some fields in dedup + // and agCopy.ContentFingerprint() depends on per alert fingerprint + // we update it here rather than in dedup since here we can apply it + // only for alerts left after filtering + alert.UpdateFingerprints() agCopy.Alerts = append(agCopy.Alerts, alert) countLabel(counters, "@state", alert.State)