mirror of
https://github.com/weaveworks/scope.git
synced 2026-03-03 02:00:43 +00:00
Merge pull request #1665 from weaveworks/use-histograms
Use histograms over summaries
This commit is contained in:
@@ -31,10 +31,11 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
dynamoRequestDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
|
||||
dynamoRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "scope",
|
||||
Name: "dynamo_request_duration_seconds",
|
||||
Help: "Time in seconds spent doing DynamoDB requests.",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}, []string{"method", "status_code"})
|
||||
dynamoConsumedCapacity = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "scope",
|
||||
@@ -217,7 +218,7 @@ func (c *awsCollector) CreateTables() error {
|
||||
func (c *awsCollector) getReportKeys(userid string, row int64, start, end time.Time) ([]string, error) {
|
||||
rowKey := fmt.Sprintf("%s-%s", userid, strconv.FormatInt(row, 10))
|
||||
var resp *dynamodb.QueryOutput
|
||||
err := instrument.TimeRequest("Query", dynamoRequestDuration, func() error {
|
||||
err := instrument.TimeRequestHistogram("Query", dynamoRequestDuration, func() error {
|
||||
var err error
|
||||
resp, err = c.db.Query(&dynamodb.QueryInput{
|
||||
TableName: aws.String(c.tableName),
|
||||
@@ -377,7 +378,7 @@ func (c *awsCollector) Add(ctx context.Context, rep report.Report) error {
|
||||
Add(float64(len(s3Key)))
|
||||
|
||||
var resp *dynamodb.PutItemOutput
|
||||
err = instrument.TimeRequest("PutItem", dynamoRequestDuration, func() error {
|
||||
err = instrument.TimeRequestHistogram("PutItem", dynamoRequestDuration, func() error {
|
||||
var err error
|
||||
resp, err = c.db.PutItem(&dynamodb.PutItemInput{
|
||||
TableName: aws.String(c.tableName),
|
||||
|
||||
@@ -29,10 +29,11 @@ var (
|
||||
Help: "Total count of reports found in memcache that were not found in our in-memory cache.",
|
||||
})
|
||||
|
||||
memcacheRequestDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
|
||||
memcacheRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "scope",
|
||||
Name: "memcache_request_duration_seconds",
|
||||
Help: "Total time spent in seconds doing memcache requests.",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}, []string{"method", "status_code"})
|
||||
)
|
||||
|
||||
@@ -148,7 +149,7 @@ func memcacheStatusCode(err error) string {
|
||||
func (c *MemcacheClient) FetchReports(keys []string) (map[string]report.Report, []string, error) {
|
||||
memcacheRequests.Add(float64(len(keys)))
|
||||
var found map[string]*memcache.Item
|
||||
err := instrument.TimeRequestStatus("Get", memcacheRequestDuration, memcacheStatusCode, func() error {
|
||||
err := instrument.TimeRequestHistogramStatus("Get", memcacheRequestDuration, memcacheStatusCode, func() error {
|
||||
var err error
|
||||
found, err = c.client.GetMulti(keys)
|
||||
return err
|
||||
@@ -202,7 +203,7 @@ func (c *MemcacheClient) FetchReports(keys []string) (map[string]report.Report,
|
||||
|
||||
// StoreBytes stores a report, expecting the report to be serialized already.
|
||||
func (c *MemcacheClient) StoreBytes(key string, content []byte) error {
|
||||
return instrument.TimeRequestStatus("Put", memcacheRequestDuration, memcacheStatusCode, func() error {
|
||||
return instrument.TimeRequestHistogramStatus("Put", memcacheRequestDuration, memcacheStatusCode, func() error {
|
||||
item := memcache.Item{Key: key, Value: content, Expiration: c.expiration}
|
||||
return c.client.Set(&item)
|
||||
})
|
||||
|
||||
@@ -13,10 +13,11 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
s3RequestDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
|
||||
s3RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "scope",
|
||||
Name: "s3_request_duration_seconds",
|
||||
Help: "Time in seconds spent doing S3 requests.",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}, []string{"method", "status_code"})
|
||||
)
|
||||
|
||||
@@ -69,7 +70,7 @@ func (store *S3Store) FetchReports(keys []string) (map[string]report.Report, []s
|
||||
|
||||
func (store *S3Store) fetchReport(key string) (*report.Report, error) {
|
||||
var resp *s3.GetObjectOutput
|
||||
err := instrument.TimeRequest("Get", s3RequestDuration, func() error {
|
||||
err := instrument.TimeRequestHistogram("Get", s3RequestDuration, func() error {
|
||||
var err error
|
||||
resp, err = store.s3.GetObject(&s3.GetObjectInput{
|
||||
Bucket: aws.String(store.bucketName),
|
||||
@@ -86,7 +87,7 @@ func (store *S3Store) fetchReport(key string) (*report.Report, error) {
|
||||
// StoreBytes stores a report in S3, expecting the report to be serialized
|
||||
// already.
|
||||
func (store *S3Store) StoreBytes(key string, content []byte) error {
|
||||
return instrument.TimeRequest("Put", s3RequestDuration, func() error {
|
||||
return instrument.TimeRequestHistogram("Put", s3RequestDuration, func() error {
|
||||
_, err := store.s3.PutObject(&s3.PutObjectInput{
|
||||
Body: bytes.NewReader(content),
|
||||
Bucket: aws.String(store.bucketName),
|
||||
|
||||
@@ -23,10 +23,11 @@ import (
|
||||
var (
|
||||
longPollTime = aws.Int64(10)
|
||||
rpcTimeout = time.Minute
|
||||
sqsRequestDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
|
||||
sqsRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "scope",
|
||||
Name: "sqs_request_duration_seconds",
|
||||
Help: "Time in seconds spent doing SQS requests.",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}, []string{"method", "status_code"})
|
||||
)
|
||||
|
||||
@@ -95,7 +96,7 @@ func (cr *sqsControlRouter) getOrCreateQueue(name string) (*string, error) {
|
||||
// CreateQueue creates a queue or if it already exists, returns url of said queue
|
||||
var createQueueRes *sqs.CreateQueueOutput
|
||||
var err error
|
||||
err = instrument.TimeRequestStatus("CreateQueue", sqsRequestDuration, nil, func() error {
|
||||
err = instrument.TimeRequestHistogram("CreateQueue", sqsRequestDuration, func() error {
|
||||
createQueueRes, err = cr.service.CreateQueue(&sqs.CreateQueueInput{
|
||||
QueueName: aws.String(name),
|
||||
})
|
||||
@@ -128,7 +129,7 @@ func (cr *sqsControlRouter) loop() {
|
||||
for {
|
||||
var res *sqs.ReceiveMessageOutput
|
||||
var err error
|
||||
err = instrument.TimeRequestStatus("ReceiveMessage", sqsRequestDuration, nil, func() error {
|
||||
err = instrument.TimeRequestHistogram("ReceiveMessage", sqsRequestDuration, func() error {
|
||||
res, err = cr.service.ReceiveMessage(&sqs.ReceiveMessageInput{
|
||||
QueueUrl: responseQueueURL,
|
||||
WaitTimeSeconds: longPollTime,
|
||||
@@ -158,7 +159,7 @@ func (cr *sqsControlRouter) deleteMessages(queueURL *string, messages []*sqs.Mes
|
||||
Id: message.MessageId,
|
||||
})
|
||||
}
|
||||
return instrument.TimeRequestStatus("DeleteMessageBatch", sqsRequestDuration, nil, func() error {
|
||||
return instrument.TimeRequestHistogram("DeleteMessageBatch", sqsRequestDuration, func() error {
|
||||
_, err := cr.service.DeleteMessageBatch(&sqs.DeleteMessageBatchInput{
|
||||
QueueUrl: queueURL,
|
||||
Entries: entries,
|
||||
@@ -194,7 +195,7 @@ func (cr *sqsControlRouter) sendMessage(queueURL *string, message interface{}) e
|
||||
}
|
||||
log.Infof("sendMessage to %s: %s", *queueURL, buf.String())
|
||||
|
||||
return instrument.TimeRequestStatus("SendMessage", sqsRequestDuration, nil, func() error {
|
||||
return instrument.TimeRequestHistogram("SendMessage", sqsRequestDuration, func() error {
|
||||
_, err := cr.service.SendMessage(&sqs.SendMessageInput{
|
||||
QueueUrl: queueURL,
|
||||
MessageBody: aws.String(buf.String()),
|
||||
@@ -217,7 +218,7 @@ func (cr *sqsControlRouter) Handle(ctx context.Context, probeID string, req xfer
|
||||
}
|
||||
|
||||
var probeQueueURL *sqs.GetQueueUrlOutput
|
||||
err = instrument.TimeRequestStatus("GetQueueUrl", sqsRequestDuration, nil, func() error {
|
||||
err = instrument.TimeRequestHistogram("GetQueueUrl", sqsRequestDuration, func() error {
|
||||
probeQueueName := fmt.Sprintf("%sprobe-%s-%s", cr.prefix, userID, probeID)
|
||||
probeQueueURL, err = cr.service.GetQueueUrl(&sqs.GetQueueUrlInput{
|
||||
QueueName: aws.String(probeQueueName),
|
||||
@@ -241,7 +242,7 @@ func (cr *sqsControlRouter) Handle(ctx context.Context, probeID string, req xfer
|
||||
}()
|
||||
|
||||
// Next, send the request to that queue
|
||||
if err := instrument.TimeRequestStatus("SendMessage", sqsRequestDuration, nil, func() error {
|
||||
if err := instrument.TimeRequestHistogram("SendMessage", sqsRequestDuration, func() error {
|
||||
return cr.sendMessage(probeQueueURL.QueueUrl, sqsRequestMessage{
|
||||
ID: id,
|
||||
Request: req,
|
||||
@@ -324,7 +325,7 @@ func (pw *probeWorker) loop() {
|
||||
|
||||
var res *sqs.ReceiveMessageOutput
|
||||
var err error
|
||||
err = instrument.TimeRequestStatus("ReceiveMessage", sqsRequestDuration, nil, func() error {
|
||||
err = instrument.TimeRequestHistogram("ReceiveMessage", sqsRequestDuration, func() error {
|
||||
res, err = pw.router.service.ReceiveMessage(&sqs.ReceiveMessageInput{
|
||||
QueueUrl: pw.requestQueueURL,
|
||||
WaitTimeSeconds: longPollTime,
|
||||
|
||||
@@ -39,3 +39,29 @@ func TimeRequestStatus(method string, metric *prometheus.SummaryVec, toStatusCod
|
||||
metric.WithLabelValues(method, toStatusCode(err)).Observe(duration.Seconds())
|
||||
return err
|
||||
}
|
||||
|
||||
// TimeRequestHistogram runs 'f' and records how long it took in the given Prometheus
|
||||
// histogram metric. If 'f' returns successfully, record a "200". Otherwise, record
|
||||
// "500".
|
||||
//
|
||||
// If you want more complicated logic for translating errors into statuses,
|
||||
// use 'TimeRequestStatus'.
|
||||
func TimeRequestHistogram(method string, metric *prometheus.HistogramVec, f func() error) error {
|
||||
return TimeRequestHistogramStatus(method, metric, ErrorCode, f)
|
||||
}
|
||||
|
||||
// TimeRequestHistogramStatus runs 'f' and records how long it took in the given
|
||||
// Prometheus histogram metric.
|
||||
//
|
||||
// toStatusCode is a function that translates errors returned by 'f' into
|
||||
// HTTP-like status codes.
|
||||
func TimeRequestHistogramStatus(method string, metric *prometheus.HistogramVec, toStatusCode func(error) string, f func() error) error {
|
||||
if toStatusCode == nil {
|
||||
toStatusCode = ErrorCode
|
||||
}
|
||||
startTime := time.Now()
|
||||
err := f()
|
||||
duration := time.Now().Sub(startTime)
|
||||
metric.WithLabelValues(method, toStatusCode(err)).Observe(duration.Seconds())
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ type Instrument struct {
|
||||
RouteMatcher interface {
|
||||
Match(*http.Request, *mux.RouteMatch) bool
|
||||
}
|
||||
Duration *prometheus.SummaryVec
|
||||
Duration *prometheus.HistogramVec
|
||||
}
|
||||
|
||||
func isWSHandshakeRequest(req *http.Request) bool {
|
||||
|
||||
@@ -32,10 +32,11 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
requestDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
|
||||
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "scope",
|
||||
Name: "request_duration_seconds",
|
||||
Help: "Time in seconds spent serving HTTP requests.",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}, []string{"method", "route", "status_code", "ws"})
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user