fix(backend): enforce 5s sleep between upstream pulls

Might fix #2888
This commit is contained in:
Łukasz Mierzwa
2021-03-12 15:48:13 +00:00
committed by Łukasz Mierzwa
parent 6ff62e954d
commit 2b8ed39964
3 changed files with 53 additions and 0 deletions

View File

@@ -6,6 +6,7 @@ import (
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/prymitive/karma/internal/config"
@@ -121,3 +122,36 @@ func TestGetViewURL(t *testing.T) {
})
}
}
func TestPullFromAlertmanager(t *testing.T) {
zerolog.SetGlobalLevel(zerolog.FatalLevel)
mockConfig()
mockCache()
lastPull = time.Time{}
start := time.Now()
pullFromAlertmanager()
dur := time.Since(start)
if dur > time.Second {
t.Errorf("First pullFromAlertmanager took %s, expected <= 1s", dur)
return
}
start = time.Now()
pullFromAlertmanager()
dur = time.Since(start)
if dur < time.Second*5 {
t.Errorf("Second pullFromAlertmanager took %s, expected >= 5s", dur)
return
}
time.Sleep(time.Second * 6)
start = time.Now()
pullFromAlertmanager()
dur = time.Since(start)
if dur > time.Second {
t.Errorf("Third pullFromAlertmanager took %s, expected <= 1s", dur)
return
}
}

View File

@@ -3,16 +3,30 @@ package main
import (
"runtime"
"sync"
"time"
"github.com/prymitive/karma/internal/alertmanager"
"github.com/rs/zerolog/log"
)
var (
lastPull time.Time
)
func pullFromAlertmanager() {
// always flush cache once we're done
defer apiCache.Flush()
// Ensure that we're not putting write locks in a tight loop
// We need at least 5s since last pull
nextPull := lastPull.Add(time.Second * 5)
waitNeeded := time.Until(nextPull)
if waitNeeded > 0 {
log.Warn().Dur("wait", waitNeeded).Msg("Less than 5s since the last pull, will wait before next cycle to process client requests, try increasing alertmanager.interval option if you see this warning too often")
time.Sleep(waitNeeded)
}
log.Info().Msg("Pulling latest alerts and silences from Alertmanager")
upstreams := alertmanager.GetAlertmanagers()
@@ -34,6 +48,8 @@ func pullFromAlertmanager() {
log.Info().Msg("Collection completed")
runtime.GC()
lastPull = time.Now()
}
// Tick is the background timer used to call PullFromAlertmanager

View File

@@ -147,6 +147,7 @@ func mockAlerts(version string) {
mock.RegisterURL("http://localhost/api/v2/silences", version, "api/v2/silences")
mock.RegisterURL("http://localhost/api/v2/alerts/groups", version, "api/v2/alerts/groups")
lastPull = time.Time{}
pullFromAlertmanager()
}
@@ -2308,6 +2309,7 @@ func TestUpstreamStatus(t *testing.T) {
for _, m := range testCase.mocks {
httpmock.RegisterResponder("GET", m.uri, httpmock.NewStringResponder(m.code, m.body))
}
lastPull = time.Time{}
pullFromAlertmanager()
req := httptest.NewRequest("GET", "/alerts.json?q=@receiver=by-cluster-service&q=alertname=HTTP_Probe_Failed&q=instance=web1", nil)
@@ -2550,6 +2552,7 @@ func TestAlertFilters(t *testing.T) {
t.Fatal(err)
}
lastPull = time.Time{}
pullFromAlertmanager()
r := testRouter()