mirror of
https://github.com/prymitive/karma
synced 2026-05-05 03:16:51 +00:00
fix(backend): retry failed alertmanager requests
This commit is contained in:
committed by
Łukasz Mierzwa
parent
d172c58a1a
commit
c71c9e6107
@@ -19,6 +19,7 @@
|
||||
|
||||
- Refactored internal APIs.
|
||||
- Overview modal won't show label name for every value to save screen space.
|
||||
- Retry failed requests when collecting alerts and silences from alertmanager.
|
||||
|
||||
## v0.92
|
||||
|
||||
|
||||
@@ -143,7 +143,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=default
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=default uri=http://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=default
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=default try=1/2
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=default uri=http://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=default try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTP server" address=127.0.0.1:8068
|
||||
|
||||
@@ -14,7 +14,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=default
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=1/2
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTP server" address=127.0.0.1:8069
|
||||
|
||||
@@ -14,7 +14,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=default
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=1/2
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTP server" address=127.0.0.1:8073
|
||||
|
||||
@@ -13,7 +13,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=default
|
||||
level=info msg="GET request" timeout=40 uri=http://foo:xxx@127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://foo:***@127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://foo:xxx@127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=1/2
|
||||
level=info msg="GET request" timeout=40 uri=http://foo:xxx@127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://foo:***@127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://foo:xxx@127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=error msg="Execution failed" error="listen tcp: address 9999999: invalid port"
|
||||
|
||||
@@ -124,7 +124,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=default
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=1/2
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTP server" address=127.0.0.1:8083
|
||||
|
||||
@@ -14,7 +14,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=default
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=1/2
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTP server" address=127.0.0.1:8085
|
||||
|
||||
@@ -17,7 +17,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=local
|
||||
level=info msg="GET request" timeout=10 uri=https://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"https://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local uri=https://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local try=1/2
|
||||
level=info msg="GET request" timeout=10 uri=https://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"https://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local uri=https://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTPS server" address=127.0.0.1:8088
|
||||
|
||||
@@ -17,7 +17,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=local
|
||||
level=info msg="GET request" timeout=10 uri=https://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"https://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local uri=https://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local try=1/2
|
||||
level=info msg="GET request" timeout=10 uri=https://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"https://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local uri=https://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTPS server" address=127.0.0.1:8089
|
||||
|
||||
@@ -17,7 +17,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=local
|
||||
level=info msg="GET request" timeout=10 uri=https://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"https://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local uri=https://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local try=1/2
|
||||
level=info msg="GET request" timeout=10 uri=https://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"https://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local uri=https://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTPS server" address=127.0.0.1:8090
|
||||
|
||||
@@ -17,7 +17,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=local
|
||||
level=info msg="GET request" timeout=10 uri=https://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"https://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local uri=https://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local try=1/2
|
||||
level=info msg="GET request" timeout=10 uri=https://127.0.0.1:9093/metrics
|
||||
level=error msg="Request failed" error="Get \"https://127.0.0.1:9093/metrics\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local uri=https://127.0.0.1:9093
|
||||
level=error msg="Collection failed" error="Get \"https://127.0.0.1:9093/api/v2/status\": dial tcp 127.0.0.1:9093: connect: connection refused" alertmanager=local try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTPS server" address=127.0.0.1:8091
|
||||
|
||||
@@ -16,7 +16,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=proxied
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1:9094/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1:9094/metrics\": dial tcp 127.0.0.1:9094: connect: connection refused" alertmanager=proxied uri=http://127.0.0.1:9094
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1:9094/api/v2/status\": dial tcp 127.0.0.1:9094: connect: connection refused" alertmanager=proxied
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1:9094/api/v2/status\": dial tcp 127.0.0.1:9094: connect: connection refused" alertmanager=proxied try=1/2
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1:9094/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1:9094/metrics\": dial tcp 127.0.0.1:9094: connect: connection refused" alertmanager=proxied uri=http://127.0.0.1:9094
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1:9094/api/v2/status\": dial tcp 127.0.0.1:9094: connect: connection refused" alertmanager=proxied try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTP server" address=127.0.0.1:8094
|
||||
|
||||
@@ -14,7 +14,10 @@ level=info msg="Pulling latest alerts and silences from Alertmanager"
|
||||
level=info msg="Collecting alerts and silences" alertmanager=default
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=1/2
|
||||
level=info msg="GET request" timeout=40 uri=http://127.0.0.1/metrics
|
||||
level=error msg="Request failed" error="Get \"http://127.0.0.1/metrics\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default uri=http://127.0.0.1
|
||||
level=error msg="Collection failed" error="Get \"http://127.0.0.1/api/v2/status\": dial tcp 127.0.0.1:80: connect: connection refused" alertmanager=default try=2/2
|
||||
level=info msg="Collection completed"
|
||||
level=info msg="Done, starting HTTP server"
|
||||
level=info msg="Starting HTTP server" address=127.0.0.1:8099
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
@@ -9,6 +10,10 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
const (
|
||||
maxTries = 2
|
||||
)
|
||||
|
||||
func pullFromAlertmanager() {
|
||||
// always flush cache once we're done
|
||||
defer apiCache.Purge()
|
||||
@@ -22,9 +27,17 @@ func pullFromAlertmanager() {
|
||||
for _, upstream := range upstreams {
|
||||
go func(am *alertmanager.Alertmanager) {
|
||||
log.Info().Str("alertmanager", am.Name).Msg("Collecting alerts and silences")
|
||||
err := am.Pull()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("alertmanager", am.Name).Msg("Collection failed")
|
||||
for i := 1; i <= maxTries; i++ {
|
||||
err := am.Pull()
|
||||
if err != nil {
|
||||
log.Error().
|
||||
Err(err).
|
||||
Str("alertmanager", am.Name).
|
||||
Str("try", fmt.Sprintf("%d/%d", i, maxTries)).
|
||||
Msg("Collection failed")
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
wg.Done()
|
||||
}(upstream)
|
||||
|
||||
Reference in New Issue
Block a user