fix(backend): fix race in alert history

This commit is contained in:
Łukasz Mierzwa
2023-02-27 11:52:21 +00:00
committed by Łukasz Mierzwa
parent a26efecd48
commit 4852791852
2 changed files with 21 additions and 15 deletions

View File

@@ -11,6 +11,7 @@ import (
"sort"
"strings"
"sync"
"sync/atomic"
"time"
lru "github.com/hashicorp/golang-lru/v2"
@@ -126,6 +127,7 @@ type historyPoller struct {
queryTimeout time.Duration
knownBad *lru.Cache[string, *knownBadUpstream]
cache *lru.Cache[string, *cachedOffsets]
isRunning atomic.Bool
}
func newHistoryPoller(queueSize int, queryTimeout time.Duration) *historyPoller {
@@ -141,6 +143,7 @@ func newHistoryPoller(queueSize int, queryTimeout time.Duration) *historyPoller
}
func (hp *historyPoller) run(workers int) {
hp.isRunning.Store(true)
wg := sync.WaitGroup{}
for w := 1; w <= workers; w++ {
w := w
@@ -154,12 +157,15 @@ func (hp *historyPoller) run(workers int) {
}
func (hp *historyPoller) stop() {
hp.isRunning.Store(false)
log.Debug().Msg("Stopping history poller")
close(hp.queue)
}
func (hp *historyPoller) submit(uri string, labels map[string]string, result chan<- historyQueryResult) {
hp.queue <- historyJob{uri: uri, labels: labels, result: result}
if hp.isRunning.Load() {
hp.queue <- historyJob{uri: uri, labels: labels, result: result}
}
}
func (hp *historyPoller) cacheSave(key string, values []OffsetSample) {

View File

@@ -1,36 +1,36 @@
http response prometheus /api/v1/labels 200 {"status":"success","data":["alertname"]}
http response prometheus /api/v1/query_range 200 {"status":"success","data":{"resultType":"matrix","result":[{"metric":{},"values":[]}]}}
http start prometheus 127.0.0.1:9112
http start prometheus 127.0.0.1:9110
exec bash -x ./test.sh &
karma.bin-should-work --pid-file=karma.pid --alertmanager.uri=http://127.0.0.1:7112 --listen.address=127.0.0.1 --listen.port=8112 --history.enabled=true --history.timeout=10s
karma.bin-should-work --pid-file=karma.pid --alertmanager.uri=http://127.0.0.1:7110 --listen.address=127.0.0.1 --listen.port=8110 --history.enabled=true --history.timeout=10s
! stdout .
cmp stderr stderr.txt
-- stderr.txt --
level=info msg="Version: dev"
level=info msg="Configured Alertmanager source" name=default proxy=false readonly=false uri=http://127.0.0.1:7112
level=info msg="Configured Alertmanager source" name=default proxy=false readonly=false uri=http://127.0.0.1:7110
level=info msg="Writing PID file" path=karma.pid
level=info msg="Initial Alertmanager collection"
level=info msg="Pulling latest alerts and silences from Alertmanager"
level=info msg="Collecting alerts and silences" alertmanager=default
level=info msg="GET request" timeout=40 uri=http://127.0.0.1:7112/metrics
level=error msg="Request failed" error="Get \"http://127.0.0.1:7112/metrics\": dial tcp 127.0.0.1:7112: connect: connection refused" alertmanager=default uri=http://127.0.0.1:7112
level=error msg="Collection failed" error="Get \"http://127.0.0.1:7112/api/v2/status\": dial tcp 127.0.0.1:7112: connect: connection refused" alertmanager=default try=1/2
level=info msg="GET request" timeout=40 uri=http://127.0.0.1:7112/metrics
level=error msg="Request failed" error="Get \"http://127.0.0.1:7112/metrics\": dial tcp 127.0.0.1:7112: connect: connection refused" alertmanager=default uri=http://127.0.0.1:7112
level=error msg="Collection failed" error="Get \"http://127.0.0.1:7112/api/v2/status\": dial tcp 127.0.0.1:7112: connect: connection refused" alertmanager=default try=2/2
level=info msg="GET request" timeout=40 uri=http://127.0.0.1:7110/metrics
level=error msg="Request failed" error="Get \"http://127.0.0.1:7110/metrics\": dial tcp 127.0.0.1:7110: connect: connection refused" alertmanager=default uri=http://127.0.0.1:7110
level=error msg="Collection failed" error="Get \"http://127.0.0.1:7110/api/v2/status\": dial tcp 127.0.0.1:7110: connect: connection refused" alertmanager=default try=1/2
level=info msg="GET request" timeout=40 uri=http://127.0.0.1:7110/metrics
level=error msg="Request failed" error="Get \"http://127.0.0.1:7110/metrics\": dial tcp 127.0.0.1:7110: connect: connection refused" alertmanager=default uri=http://127.0.0.1:7110
level=error msg="Collection failed" error="Get \"http://127.0.0.1:7110/api/v2/status\": dial tcp 127.0.0.1:7110: connect: connection refused" alertmanager=default try=2/2
level=info msg="Collection completed"
level=info msg="Done, starting HTTP server"
level=info msg="Starting HTTP server" address=127.0.0.1:8112
level=info msg="Starting HTTP server" address=127.0.0.1:8110
level=info msg="Shutting down HTTP server" signal=terminated
level=info msg="HTTP server shut down"
level=info msg="Removing PID file" path=karma.pid
-- query.json --
{
"sources": [
"http://127.0.0.1:9112",
"http://127.0.0.1:9112"
"http://127.0.0.1:9110",
"http://127.0.0.1:9110"
],
"labels": {
"alertname": "Fake Alert"
@@ -38,6 +38,6 @@ level=info msg="Removing PID file" path=karma.pid
}
-- test.sh --
while [ ! -f karma.pid ]; do sleep 1 ; done
sleep 5
curl -s -f -o /dev/null -XPOST -d @query.json http://127.0.0.1:8112/history.json
sleep 1
curl -s -f -o /dev/null -XPOST -d @query.json http://127.0.0.1:8110/history.json
cat karma.pid | xargs kill