From 2b8ed3996416869cf89fcdec3c6a3fcc936b0293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Mierzwa?= Date: Fri, 12 Mar 2021 15:48:13 +0000 Subject: [PATCH] fix(backend): enforce 5s sleep between upstream pulls Might fix #2888 --- cmd/karma/main_test.go | 34 ++++++++++++++++++++++++++++++++++ cmd/karma/timer.go | 16 ++++++++++++++++ cmd/karma/views_test.go | 3 +++ 3 files changed, 53 insertions(+) diff --git a/cmd/karma/main_test.go b/cmd/karma/main_test.go index ac0d0de6b..3eb149eb9 100644 --- a/cmd/karma/main_test.go +++ b/cmd/karma/main_test.go @@ -6,6 +6,7 @@ import ( "net/http/httptest" "strings" "testing" + "time" "github.com/prymitive/karma/internal/config" @@ -121,3 +122,36 @@ func TestGetViewURL(t *testing.T) { }) } } + +func TestPullFromAlertmanager(t *testing.T) { + zerolog.SetGlobalLevel(zerolog.FatalLevel) + mockConfig() + mockCache() + lastPull = time.Time{} + + start := time.Now() + pullFromAlertmanager() + dur := time.Since(start) + if dur > time.Second { + t.Errorf("First pullFromAlertmanager took %s, expected <= 1s", dur) + return + } + + start = time.Now() + pullFromAlertmanager() + dur = time.Since(start) + if dur < time.Second*5 { + t.Errorf("Second pullFromAlertmanager took %s, expected >= 5s", dur) + return + } + + time.Sleep(time.Second * 6) + + start = time.Now() + pullFromAlertmanager() + dur = time.Since(start) + if dur > time.Second { + t.Errorf("Third pullFromAlertmanager took %s, expected <= 1s", dur) + return + } +} diff --git a/cmd/karma/timer.go b/cmd/karma/timer.go index d7d432f15..ba95c6a5d 100644 --- a/cmd/karma/timer.go +++ b/cmd/karma/timer.go @@ -3,16 +3,30 @@ package main import ( "runtime" "sync" + "time" "github.com/prymitive/karma/internal/alertmanager" "github.com/rs/zerolog/log" ) +var ( + lastPull time.Time +) + func pullFromAlertmanager() { // always flush cache once we're done defer apiCache.Flush() + // Ensure that we're not putting write locks in a tight loop + // We need at least 5s since last pull + nextPull := lastPull.Add(time.Second * 5) + waitNeeded := time.Until(nextPull) + if waitNeeded > 0 { + log.Warn().Dur("wait", waitNeeded).Msg("Less than 5s since the last pull, will wait before next cycle to process client requests, try increasing alertmanager.interval option if you see this warning too often") + time.Sleep(waitNeeded) + } + log.Info().Msg("Pulling latest alerts and silences from Alertmanager") upstreams := alertmanager.GetAlertmanagers() @@ -34,6 +48,8 @@ func pullFromAlertmanager() { log.Info().Msg("Collection completed") runtime.GC() + + lastPull = time.Now() } // Tick is the background timer used to call PullFromAlertmanager diff --git a/cmd/karma/views_test.go b/cmd/karma/views_test.go index 14bc2810f..6f5a512e8 100644 --- a/cmd/karma/views_test.go +++ b/cmd/karma/views_test.go @@ -147,6 +147,7 @@ func mockAlerts(version string) { mock.RegisterURL("http://localhost/api/v2/silences", version, "api/v2/silences") mock.RegisterURL("http://localhost/api/v2/alerts/groups", version, "api/v2/alerts/groups") + lastPull = time.Time{} pullFromAlertmanager() } @@ -2308,6 +2309,7 @@ func TestUpstreamStatus(t *testing.T) { for _, m := range testCase.mocks { httpmock.RegisterResponder("GET", m.uri, httpmock.NewStringResponder(m.code, m.body)) } + lastPull = time.Time{} pullFromAlertmanager() req := httptest.NewRequest("GET", "/alerts.json?q=@receiver=by-cluster-service&q=alertname=HTTP_Probe_Failed&q=instance=web1", nil) @@ -2550,6 +2552,7 @@ func TestAlertFilters(t *testing.T) { t.Fatal(err) } + lastPull = time.Time{} pullFromAlertmanager() r := testRouter()