feat: Record summary of execution times of support bundle operations (collect/redact/analyse) (#935)

When running a support bundle, we want to know how long each operation
(collect, redact, analyze) takes. This commit adds a new trace exporter
that records the start and end times of each operation, and then prints
a summary of the execution. The summary is also stored in the support
bundle.

Related to #923
This commit is contained in:
Evans Mungai
2023-02-07 09:50:21 +00:00
committed by GitHub
parent e156b8c215
commit 100f9a13b6
20 changed files with 795 additions and 60 deletions

View File

@@ -35,6 +35,7 @@ define LDFLAGS
endef
BUILDFLAGS = -tags "netgo containers_image_ostree_stub exclude_graphdriver_devicemapper exclude_graphdriver_btrfs containers_image_openpgp" -installsuffix netgo
BUILDPATHS = ./pkg/... ./cmd/... ./internal/...
all: test support-bundle preflight collect analyze
@@ -45,16 +46,16 @@ ffi: fmt vet
.PHONY: test
test: generate fmt vet
if [ -n $(RUN) ]; then \
go test ${BUILDFLAGS} ./pkg/... ./cmd/... -coverprofile cover.out -run $(RUN); \
go test ${BUILDFLAGS} ${BUILDPATHS} -coverprofile cover.out -run $(RUN); \
else \
go test ${BUILDFLAGS} ./pkg/... ./cmd/... -coverprofile cover.out; \
go test ${BUILDFLAGS} ${BUILDPATHS} -coverprofile cover.out; \
fi
# Go tests that require a K8s instance
# TODOLATER: merge with test, so we get unified coverage reports? it'll add 21~sec to the test job though...
.PHONY: test-integration
test-integration:
go test -v --tags "integration exclude_graphdriver_devicemapper exclude_graphdriver_btrfs" ./pkg/... ./cmd/...
go test -v --tags "integration exclude_graphdriver_devicemapper exclude_graphdriver_btrfs" ${BUILDPATHS}
.PHONY: preflight-e2e-test
preflight-e2e-test:
@@ -82,11 +83,11 @@ collect:
.PHONY: fmt
fmt:
go fmt ./pkg/... ./cmd/...
go fmt ${BUILDPATHS}
.PHONY: vet
vet:
go vet ${BUILDFLAGS} ./pkg/... ./cmd/...
go vet ${BUILDFLAGS} ${BUILDPATHS}
.PHONY: generate
generate: controller-gen client-gen
@@ -213,8 +214,8 @@ scan:
.PHONY: lint
lint:
golangci-lint run --new -c .golangci.yaml pkg/... cmd/...
golangci-lint run --new -c .golangci.yaml ${BUILDPATHS}
.PHONY: lint-and-fix
lint-and-fix:
golangci-lint run --new --fix -c .golangci.yaml pkg/... cmd/...
golangci-lint run --new --fix -c .golangci.yaml ${BUILDPATHS}

View File

@@ -1,11 +1,13 @@
package cli
import (
"fmt"
"os"
"strings"
"github.com/go-logr/logr"
"github.com/replicatedhq/troubleshoot/cmd/util"
"github.com/replicatedhq/troubleshoot/internal/traces"
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
"github.com/replicatedhq/troubleshoot/pkg/logger"
"github.com/replicatedhq/troubleshoot/pkg/preflight"
@@ -37,7 +39,19 @@ that a cluster meets the requirements to run an application.`,
},
RunE: func(cmd *cobra.Command, args []string) error {
v := viper.GetViper()
return preflight.RunPreflights(v.GetBool("interactive"), v.GetString("output"), v.GetString("format"), args)
closer, err := traces.ConfigureTracing("preflight")
if err != nil {
// Do not fail running preflights if tracing fails
logger.Printf("Failed to initialize open tracing provider: %v", err)
} else {
defer closer()
}
err = preflight.RunPreflights(v.GetBool("interactive"), v.GetString("output"), v.GetString("format"), args)
if v.GetBool("debug") {
fmt.Printf("\n%s", traces.GetExporterInstance().GetSummary())
}
return err
},
PostRun: func(cmd *cobra.Command, args []string) {
if err := util.StopProfiling(); err != nil {

View File

@@ -1,11 +1,13 @@
package cli
import (
"fmt"
"os"
"strings"
"github.com/go-logr/logr"
"github.com/replicatedhq/troubleshoot/cmd/util"
"github.com/replicatedhq/troubleshoot/internal/traces"
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
"github.com/replicatedhq/troubleshoot/pkg/logger"
"github.com/spf13/cobra"
@@ -40,7 +42,20 @@ from a server that can be used to assist when troubleshooting a Kubernetes clust
RunE: func(cmd *cobra.Command, args []string) error {
v := viper.GetViper()
return runTroubleshoot(v, args)
closer, err := traces.ConfigureTracing("support-bundle")
if err != nil {
// Do not fail running support-bundle if tracing fails
logger.Printf("Failed to initialize open tracing provider: %v", err)
} else {
defer closer()
}
err = runTroubleshoot(v, args)
if v.GetBool("debug") {
fmt.Printf("\n%s", traces.GetExporterInstance().GetSummary())
}
return err
},
PersistentPostRun: func(cmd *cobra.Command, args []string) {
if err := util.StopProfiling(); err != nil {

View File

@@ -40,7 +40,7 @@ func writeVersionFile(path string) error {
return err
}
filename := filepath.Join(path, constants.VersionFilename)
filename := filepath.Join(path, constants.VERSION_FILENAME)
err = ioutil.WriteFile(filename, b, 0644)
if err != nil {
return err

View File

@@ -3,6 +3,8 @@ kind: SupportBundle
metadata:
name: example
spec:
hostCollectors:
- hostOS: {}
collectors:
- logs:
selector:

6
go.mod
View File

@@ -34,6 +34,8 @@ require (
github.com/spf13/viper v1.15.0
github.com/stretchr/testify v1.8.1
github.com/tj/go-spin v1.1.0
go.opentelemetry.io/otel v1.12.0
go.opentelemetry.io/otel/sdk v1.11.2
golang.org/x/sync v0.1.0
gopkg.in/yaml.v2 v2.4.0
k8s.io/api v0.26.1
@@ -53,6 +55,7 @@ require (
github.com/docker/distribution v2.8.1+incompatible // indirect
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
github.com/evanphx/json-patch/v5 v5.6.0 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.2.1 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b // indirect
@@ -61,6 +64,7 @@ require (
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sylabs/sif/v2 v2.9.0 // indirect
go.opentelemetry.io/otel/trace v1.12.0 // indirect
golang.org/x/mod v0.7.0 // indirect
golang.org/x/tools v0.4.0 // indirect
)
@@ -186,7 +190,7 @@ require (
golang.org/x/oauth2 v0.4.0 // indirect
golang.org/x/sys v0.4.0 // indirect
golang.org/x/term v0.4.0 // indirect
golang.org/x/text v0.6.0 // indirect
golang.org/x/text v0.6.0
golang.org/x/time v0.3.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
google.golang.org/api v0.107.0 // indirect

9
go.sum
View File

@@ -399,8 +399,11 @@ github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KE
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-logr/zapr v1.2.3 h1:a9vnzlIBPQBBkeaR9IuMUfmVOrQlkoC4YfPoFkX3T7A=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
@@ -1031,6 +1034,12 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
go.opentelemetry.io/otel v1.12.0 h1:IgfC7kqQrRccIKuB7Cl+SRUmsKbEwSGPr0Eu+/ht1SQ=
go.opentelemetry.io/otel v1.12.0/go.mod h1:geaoz0L0r1BEOR81k7/n9W4TCXYCJ7bPO7K374jQHG0=
go.opentelemetry.io/otel/sdk v1.11.2 h1:GF4JoaEx7iihdMFu30sOyRx52HDHOkl9xQ8SMqNXUiU=
go.opentelemetry.io/otel/sdk v1.11.2/go.mod h1:wZ1WxImwpq+lVRo4vsmSOxdd+xwoUJ6rqyLc3SyX9aU=
go.opentelemetry.io/otel/trace v1.12.0 h1:p28in++7Kd0r2d8gSt931O57fdjUyWxkVbESuILAeUc=
go.opentelemetry.io/otel/trace v1.12.0/go.mod h1:pHlgBynn6s25qJ2szD+Bv+iwKJttjHSI3lUAyf0GNuQ=
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5 h1:+FNtrFTmVw0YZGpBGX56XDee331t6JAXeK2bcyhLOOc=
go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5/go.mod h1:nmDLcffg48OtT/PSW0Hg7FvpRQsQh5OSqIylirxKC7o=

273
internal/traces/exporter.go Normal file
View File

@@ -0,0 +1,273 @@
package traces
import (
"context"
"sort"
"strings"
"sync"
"time"
"github.com/replicatedhq/troubleshoot/pkg/constants"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/sdk/trace/tracetest"
"golang.org/x/text/language"
"golang.org/x/text/message"
)
var (
_ trace.SpanExporter = (*Exporter)(nil)
once sync.Once
exporter *Exporter
printer = message.NewPrinter(language.English)
)
const legend = "Suceeded (S), eXcluded (X), Failed (F)\n"
// FUTURE WORK: This exporter should only be used by troubleshoot CLIs
// until the following issue is addressed:
// 1. The cache of spans grows infinitely at the moment. This is OK for short lived
// invocations such as CLI applications running one-shot commands. For long running
// applications, this will be a problem.
// * We can drop non-troubleshoot spans from the cache. There is an OTEP to add
// "inheritable attributes" to spans which is unsupported as of today. Once/if
// it gets accepted and implemented, it will allow us to add a "troubleshoot"
// attribute to the "root" span which gets propagated to downstream spans which
// we can use to filter out non-troubleshoot spans.
// https://github.com/dynatrace-oss-contrib/oteps/blob/feature/context-scoped-attributes/text/0207-context-scoped-attributes.md
// https://github.com/open-telemetry/opentelemetry-specification/issues/1337
// * We can "search" for the "root" span by traversing up the span tree and drop
// all spans that are not descendants of the "root" span. This can be slow, but
// we can do it in a background goroutine (adds a bit of complexity).
// 2. At the moment, the summary of an execution is the only case this exporter
// is being used for.
// GetExporterInstance creates a singleton exporter instance
func GetExporterInstance() *Exporter {
once.Do(func() {
exporter = &Exporter{
allSpans: make([]trace.ReadOnlySpan, 1024),
}
})
return exporter
}
// Exporter is an implementation of trace.SpanExporter that writes to a destination.
type Exporter struct {
spansMu sync.Mutex
allSpans []trace.ReadOnlySpan
stoppedMu sync.RWMutex
stopped bool
}
// ExportSpans writes spans to an in-memory cache
// This function can/will be called on every span.End() at worst.
func (e *Exporter) ExportSpans(ctx context.Context, spans []trace.ReadOnlySpan) error {
// This is a no-op if the context is canceled.
if ctx.Err() != nil {
return ctx.Err()
}
e.stoppedMu.RLock()
stopped := e.stopped
e.stoppedMu.RUnlock()
if stopped {
return nil
}
if len(spans) == 0 {
return nil
}
e.spansMu.Lock()
defer e.spansMu.Unlock()
// Cache received span updates allSpans
e.allSpans = append(e.allSpans, spans...)
return nil
}
func isType(stub *tracetest.SpanStub, t string) bool {
if stub == nil {
return false
}
for _, attr := range stub.Attributes {
if string(attr.Key) == "type" && strings.Contains(attr.Value.AsString(), t) {
return true
}
}
return false
}
func formatStubName(stub *tracetest.SpanStub) string {
if stub == nil {
return ""
}
for _, attr := range stub.Attributes {
if stub.Status.Code == codes.Error {
return stub.Name + " (F)"
}
if string(attr.Key) == constants.EXCLUDED && attr.Value.AsBool() {
return stub.Name + " (X)"
}
}
return stub.Name + " (S)" // Assume success
}
// maxInt returns the larger of x or y.
func maxInt(x, y int) int {
if x < y {
return y
}
return x
}
// GetSummary returns the runtime summary of the execution
// so far. Call this function after your "root" span has ended
// and the program operations needing tracing have completed.
func (e *Exporter) GetSummary() string {
e.spansMu.Lock()
stubs := tracetest.SpanStubsFromReadOnlySpans(e.allSpans)
e.spansMu.Unlock()
// No spans to log
if len(stubs) == 0 {
return ""
}
// TODO: We may want to collect more information about the
// execution of the program. For example, we can collect
// the number of times a collector was executed, whether
// it was successful or not, if it was skipped, etc.
collectors := make(map[string]time.Duration)
redactors := make(map[string]time.Duration)
analysers := make(map[string]time.Duration)
totalDuration := time.Duration(0)
for i := range stubs {
stub := &stubs[i]
// Summary of span stubs
duration := stub.EndTime.Sub(stub.StartTime)
stubName := formatStubName(stub)
switch {
case stub.Name == constants.TROUBLESHOOT_ROOT_SPAN_NAME:
totalDuration = duration
case isType(stub, "Collect"):
collectors[stubName] = duration
case isType(stub, "Redactors"):
redactors[stub.Name] = duration
case isType(stub, "Analyze"):
analysers[stubName] = duration
default:
continue
}
}
sb := strings.Builder{}
collectorsSummary(collectors, &sb)
redactorsSummary(redactors, &sb)
analysersSummary(analysers, &sb)
sb.WriteString(printer.Sprintf("\nDuration: %dms\n", totalDuration/time.Millisecond))
return sb.String()
}
// summary of collector runtimes
func collectorsSummary(summary map[string]time.Duration, sb *strings.Builder) {
padding, keys := sortedKeysAndPadding(summary)
sb.WriteString("============ Collectors summary =============\n")
sb.WriteString(legend)
sb.WriteString("=============================================\n")
if len(summary) == 0 {
sb.WriteString("No collectors executed\n")
return
}
for _, name := range keys {
sb.WriteString(printer.Sprintf("%-*s : %dms\n", padding, name, summary[name]/time.Millisecond))
}
}
// summary of redactor runtime
func redactorsSummary(summary map[string]time.Duration, sb *strings.Builder) {
padding, keys := sortedKeysAndPadding(summary)
sb.WriteString("\n============ Redactors summary =============\n")
if len(summary) == 0 {
sb.WriteString("No redactors executed\n")
return
}
for _, name := range keys {
sb.WriteString(printer.Sprintf("%-*s : %dms\n", padding, name, summary[name]/time.Millisecond))
}
}
// summary of analyser runtime
func analysersSummary(summary map[string]time.Duration, sb *strings.Builder) {
padding, keys := sortedKeysAndPadding(summary)
sb.WriteString("\n============= Analyzers summary =============\n")
sb.WriteString(legend)
sb.WriteString("=============================================\n")
if len(summary) == 0 {
sb.WriteString("No analyzers executed\n")
return
}
for _, name := range keys {
sb.WriteString(printer.Sprintf("%-*s : %dms\n", padding, name, summary[name]/time.Millisecond))
}
}
func sortedKeysAndPadding(summary map[string]time.Duration) (int, []string) {
keys := make([]string, 0, len(summary))
padding := 0
for k := range summary {
padding = maxInt(padding, len(k))
keys = append(keys, k)
}
sort.SliceStable(keys, func(l, r int) bool {
return summary[keys[l]] > summary[keys[r]]
})
return padding, keys
}
// Shutdown is called to stop the exporter, it preforms no action.
func (e *Exporter) Shutdown(ctx context.Context) error {
e.stoppedMu.Lock()
e.stopped = true
e.stoppedMu.Unlock()
e.Reset()
select {
case <-ctx.Done():
return ctx.Err()
default:
}
return nil
}
func (e *Exporter) Reset() {
e.spansMu.Lock()
e.allSpans = e.allSpans[:0] // clear the slice
e.spansMu.Unlock()
}
// MarshalLog is the marshaling function used by the logging system to represent this exporter.
func (e *Exporter) MarshalLog() interface{} {
return struct {
Type string
}{
Type: "troubleshoot",
}
}

View File

@@ -0,0 +1,187 @@
package traces
import (
"context"
"fmt"
"strings"
"testing"
"time"
"github.com/replicatedhq/troubleshoot/pkg/constants"
"github.com/replicatedhq/troubleshoot/pkg/logger"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/sdk/trace/tracetest"
)
func TestExporter_GetSummary(t *testing.T) {
logger.SetQuiet(true)
tests := []struct {
name string
spans tracetest.SpanStubs
want string
}{
{
name: "with no spans",
spans: tracetest.SpanStubs{},
want: "",
},
{
name: "with root span only",
spans: tracetest.SpanStubs{
tracetest.SpanStub{
Name: constants.TROUBLESHOOT_ROOT_SPAN_NAME,
StartTime: time.Now(),
EndTime: time.Now().Add(time.Second),
},
},
want: "Duration: 1,000ms",
},
{
name: "with collectors",
spans: tracetest.SpanStubs{
tracetest.SpanStub{
Name: "all-logs", StartTime: time.Now(), EndTime: time.Now().Add(time.Minute),
Attributes: []attribute.KeyValue{
attribute.String("type", "*collect.CollectLogs"),
},
},
tracetest.SpanStub{
Name: "host-os", StartTime: time.Now(), EndTime: time.Now().Add(time.Second),
Attributes: []attribute.KeyValue{
attribute.String("type", "*collect.CollectHostOS"),
},
},
tracetest.SpanStub{
Name: "excluded-collector", StartTime: time.Now(), EndTime: time.Now().Add(time.Millisecond * 2),
Attributes: []attribute.KeyValue{
attribute.String("type", "*collect.CollectHostOS"),
attribute.Bool("excluded", true),
},
},
tracetest.SpanStub{
Name: "failed-collector", StartTime: time.Now(), EndTime: time.Now().Add(time.Millisecond),
Attributes: []attribute.KeyValue{
attribute.String("type", "*collect.CollectHostOS"),
},
Status: trace.Status{
Code: codes.Error,
Description: "some error",
},
},
},
want: `
============ Collectors summary =============
Suceeded (S), eXcluded (X), Failed (F)
=============================================
all-logs (S) : 60,000ms
host-os (S) : 1,000ms
excluded-collector (X) : 2ms
failed-collector (F) : 1ms`,
},
{
name: "with analyzers",
spans: tracetest.SpanStubs{
tracetest.SpanStub{
Name: "cluster-version", StartTime: time.Now(), EndTime: time.Now().Add(time.Second),
Attributes: []attribute.KeyValue{
attribute.String("type", "*analyzer.AnalyzeClusterVersion"),
},
},
tracetest.SpanStub{
Name: "host-cpu", StartTime: time.Now(), EndTime: time.Now().Add(time.Minute),
Attributes: []attribute.KeyValue{
attribute.String("type", "*analyzer.AnalyzeHostCPU"),
},
},
tracetest.SpanStub{
Name: "excluded-analyser", StartTime: time.Now(), EndTime: time.Now().Add(time.Millisecond * 2),
Attributes: []attribute.KeyValue{
attribute.String("type", "*collect.AnalyzeHostCPU"),
attribute.Bool("excluded", true),
},
},
tracetest.SpanStub{
Name: "failed-analyser", StartTime: time.Now(), EndTime: time.Now().Add(time.Millisecond),
Attributes: []attribute.KeyValue{
attribute.String("type", "*collect.AnalyzeHostCPU"),
},
Status: trace.Status{
Code: codes.Error,
Description: "some error",
},
},
},
want: `
============= Analyzers summary =============
Suceeded (S), eXcluded (X), Failed (F)
=============================================
host-cpu (S) : 60,000ms
cluster-version (S) : 1,000ms
excluded-analyser (X) : 2ms
failed-analyser (F) : 1ms`,
},
{
name: "with redactors",
spans: tracetest.SpanStubs{
tracetest.SpanStub{
Name: "cluster redactor", StartTime: time.Now(), EndTime: time.Now().Add(time.Second),
Attributes: []attribute.KeyValue{
attribute.String("type", "Redactors"),
},
},
},
want: `
============ Redactors summary =============
cluster redactor : 1,000ms`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
e := &Exporter{}
ctx := context.Background()
err := e.ExportSpans(ctx, tt.spans.Snapshots())
require.NoError(t, err)
assert.Contains(t, e.GetSummary(), strings.TrimSpace(tt.want))
})
}
}
func TestExporter_ExportSpansWithDoneContext(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()
e := &Exporter{}
spans := tracetest.SpanStubs{}
assert.EqualError(t, e.ExportSpans(ctx, spans.Snapshots()), context.Canceled.Error())
}
func TestExporter_Shutdown(t *testing.T) {
e := &Exporter{}
ctx := context.Background()
spans := tracetest.SpanStubs{}
for i := 0; i < 5; i++ {
spans = append(spans, tracetest.SpanStub{Name: fmt.Sprintf("span-%d", i)})
}
err := e.ExportSpans(ctx, spans.Snapshots())
require.NoError(t, err)
assert.Len(t, e.allSpans, 5)
require.NoError(t, e.Shutdown(ctx))
assert.Len(t, e.allSpans, 0)
err = e.ExportSpans(ctx, spans.Snapshots())
require.NoError(t, err)
assert.Len(t, e.allSpans, 0)
}

60
internal/traces/otel.go Normal file
View File

@@ -0,0 +1,60 @@
package traces
import (
"context"
"github.com/replicatedhq/troubleshoot/pkg/logger"
"github.com/replicatedhq/troubleshoot/pkg/version"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.12.0"
)
// ConfigureTracing configures the OpenTelemetry trace provider for CLI
// commands. Projects using troubleshoot as a library would need to register
// troubleshoot's exporter like so.
//
// var tp *trace.TracerProvider // client application's trace provider
// tp.RegisterSpanProcessor(
// trace.NewSimpleSpanProcessor(
// traces.GetExporterInstance(), // Troubleshoot's exporter
// ),
// )
//
// The client application is responsible for constructing the trace provider
// and registering the exporter. Multiple exporters can be registered.
func ConfigureTracing(processName string) (func(), error) {
r, err := resource.Merge(
resource.Default(),
resource.NewWithAttributes(
resource.Default().SchemaURL(),
semconv.ProcessCommandKey.String(processName),
semconv.ProcessRuntimeVersionKey.String(version.Version()),
attribute.String("environment", "cli"),
),
)
if err != nil {
return nil, err
}
// Trace provider for support bundle cli. Each application is required
// to have its own trace provider.
tp := trace.NewTracerProvider(
trace.WithSampler(trace.AlwaysSample()),
trace.WithSyncer(
GetExporterInstance(),
),
trace.WithResource(r),
)
otel.SetTracerProvider(tp)
return func() {
if err := tp.Shutdown(context.Background()); err != nil {
logger.Printf("Failed to shutdown trace provider: %v", err)
}
}, nil
}

View File

@@ -1,13 +1,19 @@
package analyzer
import (
"context"
"fmt"
"reflect"
"strconv"
"github.com/pkg/errors"
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
"github.com/replicatedhq/troubleshoot/pkg/constants"
"github.com/replicatedhq/troubleshoot/pkg/logger"
"github.com/replicatedhq/troubleshoot/pkg/multitype"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
corev1 "k8s.io/api/core/v1"
)
@@ -50,14 +56,25 @@ func isExcluded(excludeVal *multitype.BoolOrString) (bool, error) {
return parsed, nil
}
func HostAnalyze(hostAnalyzer *troubleshootv1beta2.HostAnalyze, getFile getCollectedFileContents, findFiles getChildCollectedFileContents) []*AnalyzeResult {
func HostAnalyze(
ctx context.Context,
hostAnalyzer *troubleshootv1beta2.HostAnalyze,
getFile getCollectedFileContents,
findFiles getChildCollectedFileContents,
) []*AnalyzeResult {
analyzer, ok := GetHostAnalyzer(hostAnalyzer)
if !ok {
return NewAnalyzeResultError(analyzer, errors.New("invalid host analyzer"))
}
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, analyzer.Title())
span.SetAttributes(attribute.String("type", reflect.TypeOf(analyzer).String()))
defer span.End()
isExcluded, _ := analyzer.IsExcluded()
if isExcluded {
logger.Printf("Excluding %q analyzer", analyzer.Title())
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
return nil
}
@@ -83,7 +100,12 @@ func NewAnalyzeResultError(analyzer HostAnalyzer, err error) []*AnalyzeResult {
}}
}
func Analyze(analyzer *troubleshootv1beta2.Analyze, getFile getCollectedFileContents, findFiles getChildCollectedFileContents) ([]*AnalyzeResult, error) {
func Analyze(
ctx context.Context,
analyzer *troubleshootv1beta2.Analyze,
getFile getCollectedFileContents,
findFiles getChildCollectedFileContents,
) ([]*AnalyzeResult, error) {
if analyzer == nil {
return nil, errors.New("nil analyzer")
}
@@ -97,16 +119,24 @@ func Analyze(analyzer *troubleshootv1beta2.Analyze, getFile getCollectedFileCont
}}, nil
}
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, analyzerInst.Title())
span.SetAttributes(attribute.String("type", reflect.TypeOf(analyzerInst).String()))
defer span.End()
isExcluded, err := analyzerInst.IsExcluded()
if err != nil {
span.SetStatus(codes.Error, err.Error())
return nil, err
}
if isExcluded {
logger.Printf("Excluding %q analyzer", analyzerInst.Title())
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
return nil, nil
}
results, err := analyzerInst.Analyze(getFile, findFiles)
if err != nil {
span.SetStatus(codes.Error, err.Error())
return nil, err
}

View File

@@ -1,6 +1,7 @@
package analyzer
import (
"context"
"testing"
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
@@ -67,7 +68,7 @@ func Test_GetExcludeFlag(t *testing.T) {
}
func TestAnalyzeWithNilAnalyzer(t *testing.T) {
got, err := Analyze(nil, nil, nil)
got, err := Analyze(context.Background(), nil, nil, nil)
assert.Error(t, err)
assert.Nil(t, got)
}

View File

@@ -3,6 +3,7 @@ package analyzer
import (
"archive/tar"
"compress/gzip"
"context"
"io"
"io/fs"
"os"
@@ -24,7 +25,12 @@ type fileContentProvider struct {
}
// Analyze local will analyze a locally available (already downloaded) bundle
func AnalyzeLocal(localBundlePath string, analyzers []*troubleshootv1beta2.Analyze, hostAnalyzers []*troubleshootv1beta2.HostAnalyze) ([]*AnalyzeResult, error) {
func AnalyzeLocal(
ctx context.Context,
localBundlePath string,
analyzers []*troubleshootv1beta2.Analyze,
hostAnalyzers []*troubleshootv1beta2.HostAnalyze,
) ([]*AnalyzeResult, error) {
rootDir, err := FindBundleRootDir(localBundlePath)
if err != nil {
return nil, errors.Wrap(err, "failed to find root dir")
@@ -34,7 +40,7 @@ func AnalyzeLocal(localBundlePath string, analyzers []*troubleshootv1beta2.Analy
analyzeResults := []*AnalyzeResult{}
for _, analyzer := range analyzers {
analyzeResult, err := Analyze(analyzer, fcp.getFileContents, fcp.getChildFileContents)
analyzeResult, err := Analyze(ctx, analyzer, fcp.getFileContents, fcp.getChildFileContents)
if err != nil {
logger.Printf("An analyzer failed to run: %v", err)
continue
@@ -49,7 +55,7 @@ func AnalyzeLocal(localBundlePath string, analyzers []*troubleshootv1beta2.Analy
}
for _, hostAnalyzer := range hostAnalyzers {
analyzeResult := HostAnalyze(hostAnalyzer, fcp.getFileContents, fcp.getChildFileContents)
analyzeResult := HostAnalyze(ctx, hostAnalyzer, fcp.getFileContents, fcp.getChildFileContents)
analyzeResults = append(analyzeResults, analyzeResult...)
}
@@ -81,7 +87,7 @@ func DownloadAndAnalyze(bundleURL string, analyzersSpec string) ([]*AnalyzeResul
hostAnalyzers = parsedHostAnalyzers
}
return AnalyzeLocal(rootDir, analyzers, hostAnalyzers)
return AnalyzeLocal(context.Background(), rootDir, analyzers, hostAnalyzers)
}
func DownloadAndExtractSupportBundle(bundleURL string) (string, string, error) {
@@ -101,10 +107,10 @@ func DownloadAndExtractSupportBundle(bundleURL string) (string, string, error) {
return "", "", errors.Wrap(err, "failed to find root dir")
}
_, err = os.Stat(filepath.Join(bundleDir, constants.VersionFilename))
_, err = os.Stat(filepath.Join(bundleDir, constants.VERSION_FILENAME))
if err != nil {
os.RemoveAll(tmpDir)
return "", "", errors.Wrap(err, "failed to read "+constants.VersionFilename)
return "", "", errors.Wrap(err, "failed to read "+constants.VERSION_FILENAME)
}
return tmpDir, bundleDir, nil
@@ -283,7 +289,7 @@ func FindBundleRootDir(localBundlePath string) (string, error) {
isInSubDir := true
for _, name := range names {
if name == constants.VersionFilename {
if name == constants.VERSION_FILENAME {
isInSubDir = false
break
}

View File

@@ -29,13 +29,9 @@ func TestCollectHostUDPPortStatus_Collect(t *testing.T) {
return port, conn, err
}
type fields struct {
hostCollector *troubleshootv1beta2.UDPPortStatus
}
tests := []struct {
name string
getPort func(t *testing.T) (port int, closeFn func() error)
fields fields
want map[string][]byte
}{
{

View File

@@ -9,11 +9,17 @@ const (
DEFAULT_CLIENT_BURST = 100
// DEFAULT_CLIENT_USER_AGENT is an field that specifies the caller of troubleshoot request.
DEFAULT_CLIENT_USER_AGENT = "ReplicatedTroubleshoot"
// VersionFilename is the name of the file that contains the support bundle version.
VersionFilename = "version.yaml"
// VERSION_FILENAME is the name of the file that contains the support bundle version.
VERSION_FILENAME = "version.yaml"
// DEFAULT_LOGS_COLLECTOR_TIMEOUT is the default timeout for logs collector.
DEFAULT_LOGS_COLLECTOR_TIMEOUT = 60 * time.Second
// Tracing constants
LIB_TRACER_NAME = "github.com/replicatedhq/troubleshoot"
TROUBLESHOOT_ROOT_SPAN_NAME = "ReplicatedTroubleshootRootSpan"
EXCLUDED = "excluded"
// Cluster Resources Collector Directories
CLUSTER_RESOURCES_DIR = "cluster-resources"
CLUSTER_RESOURCES_NAMESPACES = "namespace"

View File

@@ -1,6 +1,7 @@
package preflight
import (
"context"
"encoding/json"
"fmt"
"path/filepath"
@@ -13,12 +14,12 @@ import (
// Analyze runs the analyze phase of preflight checks
func (c ClusterCollectResult) Analyze() []*analyze.AnalyzeResult {
return doAnalyze(c.AllCollectedData, c.Spec.Spec.Analyzers, nil, "")
return doAnalyze(c.Context, c.AllCollectedData, c.Spec.Spec.Analyzers, nil, "")
}
// Analyze runs the analyze phase of host preflight checks
func (c HostCollectResult) Analyze() []*analyze.AnalyzeResult {
return doAnalyze(c.AllCollectedData, nil, c.Spec.Spec.Analyzers, "")
return doAnalyze(c.Context, c.AllCollectedData, nil, c.Spec.Spec.Analyzers, "")
}
// Analyze runs the analyze phase of host preflight checks.
@@ -43,12 +44,18 @@ func (c RemoteCollectResult) Analyze() []*analyze.AnalyzeResult {
byteResult[k] = []byte(v)
}
results = append(results, doAnalyze(byteResult, nil, c.Spec.Spec.Analyzers, nodeName)...)
results = append(results, doAnalyze(c.Context, byteResult, nil, c.Spec.Spec.Analyzers, nodeName)...)
}
return results
}
func doAnalyze(allCollectedData map[string][]byte, analyzers []*troubleshootv1beta2.Analyze, hostAnalyzers []*troubleshootv1beta2.HostAnalyze, nodeName string) []*analyze.AnalyzeResult {
func doAnalyze(
ctx context.Context,
allCollectedData map[string][]byte,
analyzers []*troubleshootv1beta2.Analyze,
hostAnalyzers []*troubleshootv1beta2.HostAnalyze,
nodeName string,
) []*analyze.AnalyzeResult {
getCollectedFileContents := func(fileName string) ([]byte, error) {
contents, ok := allCollectedData[fileName]
if !ok {
@@ -89,7 +96,7 @@ func doAnalyze(allCollectedData map[string][]byte, analyzers []*troubleshootv1be
analyzeResults := []*analyze.AnalyzeResult{}
for _, analyzer := range analyzers {
analyzeResult, err := analyze.Analyze(analyzer, getCollectedFileContents, getChildCollectedFileContents)
analyzeResult, err := analyze.Analyze(ctx, analyzer, getCollectedFileContents, getChildCollectedFileContents)
if err != nil {
strict, strictErr := HasStrictAnalyzer(analyzer)
if strictErr != nil {
@@ -112,7 +119,7 @@ func doAnalyze(allCollectedData map[string][]byte, analyzers []*troubleshootv1be
}
for _, hostAnalyzer := range hostAnalyzers {
analyzeResult := analyze.HostAnalyze(hostAnalyzer, getCollectedFileContents, getChildCollectedFileContents)
analyzeResult := analyze.HostAnalyze(ctx, hostAnalyzer, getCollectedFileContents, getChildCollectedFileContents)
analyzeResults = append(analyzeResults, analyzeResult...)
}

View File

@@ -12,7 +12,11 @@ import (
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
"github.com/replicatedhq/troubleshoot/pkg/collect"
"github.com/replicatedhq/troubleshoot/pkg/constants"
"github.com/replicatedhq/troubleshoot/pkg/logger"
"github.com/replicatedhq/troubleshoot/pkg/version"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)
@@ -55,6 +59,7 @@ type ClusterCollectResult struct {
RemoteCollectors collect.RemoteCollectors
isRBACAllowed bool
Spec *troubleshootv1beta2.Preflight
Context context.Context
}
func (cr ClusterCollectResult) IsRBACAllowed() bool {
@@ -65,6 +70,7 @@ type HostCollectResult struct {
AllCollectedData map[string][]byte
Collectors []collect.HostCollector
Spec *troubleshootv1beta2.HostPreflight
Context context.Context
}
func (cr HostCollectResult) IsRBACAllowed() bool {
@@ -75,6 +81,7 @@ type RemoteCollectResult struct {
AllCollectedData map[string][]byte
Collectors collect.RemoteCollectors
Spec *troubleshootv1beta2.HostPreflight
Context context.Context
}
func (cr RemoteCollectResult) IsRBACAllowed() bool {
@@ -83,6 +90,12 @@ func (cr RemoteCollectResult) IsRBACAllowed() bool {
// CollectHost runs the collection phase of host preflight checks
func CollectHost(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (CollectResult, error) {
return CollectHostWithContext(context.Background(), opts, p)
}
func CollectHostWithContext(
ctx context.Context, opts CollectOpts, p *troubleshootv1beta2.HostPreflight,
) (CollectResult, error) {
collectSpecs := make([]*troubleshootv1beta2.HostCollect, 0, 0)
if p != nil && p.Spec.Collectors != nil {
collectSpecs = append(collectSpecs, p.Spec.Collectors...)
@@ -101,11 +114,18 @@ func CollectHost(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Collec
collectResult := HostCollectResult{
Collectors: collectors,
Spec: p,
Context: ctx,
}
for _, collector := range collectors {
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.Title())
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
isExcluded, _ := collector.IsExcluded()
if isExcluded {
logger.Printf("Excluding %q collector", collector.Title())
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
span.End()
continue
}
@@ -117,6 +137,7 @@ func CollectHost(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Collec
for k, v := range result {
allCollectedData[k] = v
}
span.End()
}
collectResult.AllCollectedData = allCollectedData
@@ -126,6 +147,10 @@ func CollectHost(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Collec
// Collect runs the collection phase of preflight checks
func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult, error) {
return CollectWithContext(context.Background(), opts, p)
}
func CollectWithContext(ctx context.Context, opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult, error) {
var allCollectors []collect.Collector
var foundForbidden bool
@@ -133,8 +158,12 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
if p != nil && p.Spec.Collectors != nil {
collectSpecs = append(collectSpecs, p.Spec.Collectors...)
}
collectSpecs = collect.EnsureCollectorInList(collectSpecs, troubleshootv1beta2.Collect{ClusterInfo: &troubleshootv1beta2.ClusterInfo{}})
collectSpecs = collect.EnsureCollectorInList(collectSpecs, troubleshootv1beta2.Collect{ClusterResources: &troubleshootv1beta2.ClusterResources{}})
collectSpecs = collect.EnsureCollectorInList(
collectSpecs, troubleshootv1beta2.Collect{ClusterInfo: &troubleshootv1beta2.ClusterInfo{}},
)
collectSpecs = collect.EnsureCollectorInList(
collectSpecs, troubleshootv1beta2.Collect{ClusterResources: &troubleshootv1beta2.ClusterResources{}},
)
collectSpecs = collect.DedupCollectors(collectSpecs)
collectSpecs = collect.EnsureClusterResourcesFirst(collectSpecs)
@@ -153,7 +182,7 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
for _, desiredCollector := range collectSpecs {
if collectorInterface, ok := collect.GetCollector(desiredCollector, "", opts.Namespace, opts.KubernetesRestConfig, k8sClient, nil); ok {
if collector, ok := collectorInterface.(collect.Collector); ok {
err := collector.CheckRBAC(context.Background(), collector, desiredCollector, opts.KubernetesRestConfig, opts.Namespace)
err := collector.CheckRBAC(ctx, collector, desiredCollector, opts.KubernetesRestConfig, opts.Namespace)
if err != nil {
return nil, errors.Wrap(err, "failed to check RBAC for collectors")
}
@@ -193,6 +222,7 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
collectResult := ClusterCollectResult{
Collectors: allCollectors,
Spec: p,
Context: ctx,
}
if foundForbidden && !opts.IgnorePermissionErrors {
@@ -201,8 +231,14 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
}
for i, collector := range allCollectors {
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.Title())
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
isExcluded, _ := collector.IsExcluded()
if isExcluded {
logger.Printf("Excluding %q collector", collector.Title())
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
span.End()
continue
}
@@ -217,6 +253,8 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
TotalCount: len(allCollectors),
Collectors: collectorList,
}
span.SetStatus(codes.Error, "skipping collector, insufficient RBAC permissions")
span.End()
continue
}
}
@@ -245,6 +283,8 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
TotalCount: len(allCollectors),
Collectors: collectorList,
}
span.SetStatus(codes.Error, err.Error())
span.End()
continue
}
@@ -262,6 +302,7 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
for k, v := range result {
allCollectedData[k] = v
}
span.End()
}
collectResult.AllCollectedData = allCollectedData
@@ -271,6 +312,10 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
// Collect runs the collection phase of preflight checks
func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (CollectResult, error) {
return CollectRemoteWithContext(context.Background(), opts, p)
}
func CollectRemoteWithContext(ctx context.Context, opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (CollectResult, error) {
collectSpecs := make([]*troubleshootv1beta2.RemoteCollect, 0, 0)
if p != nil && p.Spec.RemoteCollectors != nil {
collectSpecs = append(collectSpecs, p.Spec.RemoteCollectors...)
@@ -296,6 +341,7 @@ func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Coll
collectResult := RemoteCollectResult{
Collectors: collectors,
Spec: p,
Context: ctx,
}
// generate a map of all collectors for atomic status messages
@@ -308,6 +354,9 @@ func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Coll
// Run preflights collectors synchronously
for i, collector := range collectors {
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.GetDisplayName())
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
collectorList[collector.GetDisplayName()] = CollectorStatus{
Status: "running",
}
@@ -334,6 +383,8 @@ func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Coll
TotalCount: len(collectors),
Collectors: collectorList,
}
span.SetStatus(codes.Error, err.Error())
span.End()
continue
}
@@ -375,6 +426,8 @@ func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Coll
}
}
span.End()
}
collectResult.AllCollectedData = allCollectedData

View File

@@ -3,7 +3,7 @@ package preflight
import (
"context"
"fmt"
"io/ioutil"
"io"
"net/http"
"net/url"
"os"
@@ -18,12 +18,14 @@ import (
analyzer "github.com/replicatedhq/troubleshoot/pkg/analyze"
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
troubleshootclientsetscheme "github.com/replicatedhq/troubleshoot/pkg/client/troubleshootclientset/scheme"
"github.com/replicatedhq/troubleshoot/pkg/constants"
"github.com/replicatedhq/troubleshoot/pkg/docrewrite"
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
"github.com/replicatedhq/troubleshoot/pkg/oci"
"github.com/replicatedhq/troubleshoot/pkg/specs"
"github.com/spf13/viper"
spin "github.com/tj/go-spin"
"go.opentelemetry.io/otel"
"golang.org/x/sync/errgroup"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
@@ -31,6 +33,10 @@ import (
)
func RunPreflights(interactive bool, output string, format string, args []string) error {
ctx, root := otel.Tracer(
constants.LIB_TRACER_NAME).Start(context.Background(), constants.TROUBLESHOOT_ROOT_SPAN_NAME)
defer root.End()
if interactive {
fmt.Print(cursor.Hide())
defer fmt.Print(cursor.Show())
@@ -64,7 +70,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
preflightContent = spec
} else if _, err = os.Stat(v); err == nil {
b, err := ioutil.ReadFile(v)
b, err := os.ReadFile(v)
if err != nil {
return err
}
@@ -103,7 +109,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
body, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
@@ -142,7 +148,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
progressCh := make(chan interface{})
defer close(progressCh)
ctx, stopProgressCollection := context.WithCancel(context.Background())
ctx, stopProgressCollection := context.WithCancel(ctx)
// make sure we shut down progress collection goroutines if an error occurs
defer stopProgressCollection()
progressCollection, ctx := errgroup.WithContext(ctx)
@@ -156,7 +162,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
uploadResultsMap := make(map[string][]CollectResult)
if preflightSpec != nil {
r, err := collectInCluster(preflightSpec, progressCh)
r, err := collectInCluster(ctx, preflightSpec, progressCh)
if err != nil {
return errors.Wrap(err, "failed to collect in cluster")
}
@@ -165,7 +171,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
}
if uploadResultSpecs != nil {
for _, spec := range uploadResultSpecs {
r, err := collectInCluster(spec, progressCh)
r, err := collectInCluster(ctx, spec, progressCh)
if err != nil {
return errors.Wrap(err, "failed to collect in cluster")
}
@@ -176,14 +182,14 @@ func RunPreflights(interactive bool, output string, format string, args []string
}
if hostPreflightSpec != nil {
if len(hostPreflightSpec.Spec.Collectors) > 0 {
r, err := collectHost(hostPreflightSpec, progressCh)
r, err := collectHost(ctx, hostPreflightSpec, progressCh)
if err != nil {
return errors.Wrap(err, "failed to collect from host")
}
collectResults = append(collectResults, *r)
}
if len(hostPreflightSpec.Spec.RemoteCollectors) > 0 {
r, err := collectRemote(hostPreflightSpec, progressCh)
r, err := collectRemote(ctx, hostPreflightSpec, progressCh)
if err != nil {
return errors.Wrap(err, "failed to collect remotely")
}
@@ -284,7 +290,7 @@ func collectNonInteractiveProgess(ctx context.Context, progressCh <-chan interfa
}
}
func collectInCluster(preflightSpec *troubleshootv1beta2.Preflight, progressCh chan interface{}) (*CollectResult, error) {
func collectInCluster(ctx context.Context, preflightSpec *troubleshootv1beta2.Preflight, progressCh chan interface{}) (*CollectResult, error) {
v := viper.GetViper()
restConfig, err := k8sutil.GetRESTConfig()
@@ -306,7 +312,7 @@ func collectInCluster(preflightSpec *troubleshootv1beta2.Preflight, progressCh c
}
}
collectResults, err := Collect(collectOpts, preflightSpec)
collectResults, err := CollectWithContext(ctx, collectOpts, preflightSpec)
if err != nil {
if collectResults != nil && !collectResults.IsRBACAllowed() {
if preflightSpec.Spec.UploadResultsTo != "" {
@@ -323,7 +329,7 @@ func collectInCluster(preflightSpec *troubleshootv1beta2.Preflight, progressCh c
return &collectResults, nil
}
func collectRemote(preflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*CollectResult, error) {
func collectRemote(ctx context.Context, preflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*CollectResult, error) {
v := viper.GetViper()
restConfig, err := k8sutil.GetRESTConfig()
@@ -365,7 +371,7 @@ func collectRemote(preflightSpec *troubleshootv1beta2.HostPreflight, progressCh
return &collectResults, nil
}
func collectHost(hostPreflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*CollectResult, error) {
func collectHost(ctx context.Context, hostPreflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*CollectResult, error) {
collectOpts := CollectOpts{
ProgressChan: progressCh,
}

View File

@@ -15,13 +15,17 @@ import (
"github.com/replicatedhq/troubleshoot/pkg/collect"
"github.com/replicatedhq/troubleshoot/pkg/constants"
"github.com/replicatedhq/troubleshoot/pkg/convert"
"github.com/replicatedhq/troubleshoot/pkg/logger"
"github.com/replicatedhq/troubleshoot/pkg/version"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"gopkg.in/yaml.v2"
"k8s.io/client-go/kubernetes"
)
func runHostCollectors(hostCollectors []*troubleshootv1beta2.HostCollect, additionalRedactors *troubleshootv1beta2.Redactor, bundlePath string, opts SupportBundleCreateOpts) (collect.CollectorResult, error) {
collectSpecs := make([]*troubleshootv1beta2.HostCollect, 0, 0)
func runHostCollectors(ctx context.Context, hostCollectors []*troubleshootv1beta2.HostCollect, additionalRedactors *troubleshootv1beta2.Redactor, bundlePath string, opts SupportBundleCreateOpts) (collect.CollectorResult, error) {
collectSpecs := make([]*troubleshootv1beta2.HostCollect, 0)
collectSpecs = append(collectSpecs, hostCollectors...)
allCollectedData := make(map[string][]byte)
@@ -35,16 +39,25 @@ func runHostCollectors(hostCollectors []*troubleshootv1beta2.HostCollect, additi
}
for _, collector := range collectors {
// TODO: Add context to host collectors
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.Title())
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
isExcluded, _ := collector.IsExcluded()
if isExcluded {
logger.Printf("Excluding %q collector", collector.Title())
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
span.End()
continue
}
opts.ProgressChan <- fmt.Sprintf("[%s] Running host collector...", collector.Title())
result, err := collector.Collect(opts.ProgressChan)
if err != nil {
span.SetStatus(codes.Error, err.Error())
opts.ProgressChan <- errors.Errorf("failed to run host collector: %s: %v", collector.Title(), err)
}
span.End()
for k, v := range result {
allCollectedData[k] = v
}
@@ -58,17 +71,21 @@ func runHostCollectors(hostCollectors []*troubleshootv1beta2.HostCollect, additi
}
if opts.Redact {
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, "Host collectors")
span.SetAttributes(attribute.String("type", "Redactors"))
err := collect.RedactResult(bundlePath, collectResult, globalRedactors)
if err != nil {
err = errors.Wrap(err, "failed to redact host collector results")
span.SetStatus(codes.Error, err.Error())
return collectResult, err
}
span.End()
}
return collectResult, nil
}
func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactors *troubleshootv1beta2.Redactor, bundlePath string, opts SupportBundleCreateOpts) (collect.CollectorResult, error) {
func runCollectors(ctx context.Context, collectors []*troubleshootv1beta2.Collect, additionalRedactors *troubleshootv1beta2.Redactor, bundlePath string, opts SupportBundleCreateOpts) (collect.CollectorResult, error) {
var allCollectors []collect.Collector
var foundForbidden bool
@@ -94,7 +111,7 @@ func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactor
for _, desiredCollector := range collectSpecs {
if collectorInterface, ok := collect.GetCollector(desiredCollector, bundlePath, opts.Namespace, opts.KubernetesRestConfig, k8sClient, opts.SinceTime); ok {
if collector, ok := collectorInterface.(collect.Collector); ok {
err := collector.CheckRBAC(context.Background(), collector, desiredCollector, opts.KubernetesRestConfig, opts.Namespace)
err := collector.CheckRBAC(ctx, collector, desiredCollector, opts.KubernetesRestConfig, opts.Namespace)
if err != nil {
return nil, errors.Wrap(err, "failed to check RBAC for collectors")
}
@@ -130,8 +147,14 @@ func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactor
}
for _, collector := range allCollectors {
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.Title())
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
isExcluded, _ := collector.IsExcluded()
if isExcluded {
logger.Printf("Excluding %q collector", collector.Title())
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
span.End()
continue
}
@@ -140,17 +163,22 @@ func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactor
if _, ok := collector.(*collect.CollectClusterResources); !ok {
msg := fmt.Sprintf("skipping collector %s with insufficient RBAC permissions", collector.Title())
opts.CollectorProgressCallback(opts.ProgressChan, msg)
span.SetStatus(codes.Error, "skipping collector, insufficient RBAC permissions")
span.End()
continue
}
}
opts.CollectorProgressCallback(opts.ProgressChan, collector.Title())
result, err := collector.Collect(opts.ProgressChan)
if err != nil {
span.SetStatus(codes.Error, err.Error())
opts.ProgressChan <- errors.Errorf("failed to run collector: %s: %v", collector.Title(), err)
}
for k, v := range result {
allCollectedData[k] = v
}
span.End()
}
collectResult := allCollectedData
@@ -161,10 +189,17 @@ func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactor
}
if opts.Redact {
// TODO: Should we record how long each redactor takes?
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, "In-cluster collectors")
span.SetAttributes(attribute.String("type", "Redactors"))
err := collect.RedactResult(bundlePath, collectResult, globalRedactors)
if err != nil {
return collectResult, errors.Wrap(err, "failed to redact in cluster collector results")
err := errors.Wrap(err, "failed to redact in cluster collector results")
span.SetStatus(codes.Error, err.Error())
span.End()
return collectResult, err
}
span.End()
}
return collectResult, nil

View File

@@ -1,6 +1,8 @@
package supportbundle
import (
"bytes"
"context"
"fmt"
"net/http"
"os"
@@ -11,11 +13,14 @@ import (
cursor "github.com/ahmetalpbalkan/go-cursor"
"github.com/fatih/color"
"github.com/pkg/errors"
"github.com/replicatedhq/troubleshoot/internal/traces"
analyzer "github.com/replicatedhq/troubleshoot/pkg/analyze"
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
"github.com/replicatedhq/troubleshoot/pkg/collect"
"github.com/replicatedhq/troubleshoot/pkg/constants"
"github.com/replicatedhq/troubleshoot/pkg/convert"
"github.com/replicatedhq/troubleshoot/pkg/logger"
"go.opentelemetry.io/otel"
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
)
@@ -43,7 +48,10 @@ type SupportBundleResponse struct {
// collectors, analyzers and after collection steps. Input arguments are specifications.
// if FromCLI option is set to true, the output is the name of the archive on disk in the cwd.
// if FromCLI option is set to false, the support bundle is archived in the OS temp folder (os.TempDir()).
func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, additionalRedactors *troubleshootv1beta2.Redactor, opts SupportBundleCreateOpts) (*SupportBundleResponse, error) {
func CollectSupportBundleFromSpec(
spec *troubleshootv1beta2.SupportBundleSpec, additionalRedactors *troubleshootv1beta2.Redactor, opts SupportBundleCreateOpts,
) (*SupportBundleResponse, error) {
resultsResponse := SupportBundleResponse{}
if opts.KubernetesRestConfig == nil {
@@ -90,13 +98,23 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
var result, files, hostFiles collect.CollectorResult
ctx, root := otel.Tracer(constants.LIB_TRACER_NAME).Start(
context.Background(), constants.TROUBLESHOOT_ROOT_SPAN_NAME,
)
defer func() {
// If this function returns an error, root.End() may not be called.
// We want to ensure this happens, so we defer it. It is safe to call
// root.End() multiple times.
root.End()
}()
// Cache error returned by collectors and return it at the end of the function
// so as to have a chance to run analyzers and archive the support bundle after.
// If both host and in cluster collectors fail, the errors will be wrapped
collectorsErrs := []string{}
if spec.HostCollectors != nil {
// Run host collectors
hostFiles, err = runHostCollectors(spec.HostCollectors, additionalRedactors, bundlePath, opts)
hostFiles, err = runHostCollectors(ctx, spec.HostCollectors, additionalRedactors, bundlePath, opts)
if err != nil {
collectorsErrs = append(collectorsErrs, fmt.Sprintf("failed to run host collectors: %s", err))
}
@@ -104,7 +122,7 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
if spec.Collectors != nil {
// Run collectors
files, err = runCollectors(spec.Collectors, additionalRedactors, bundlePath, opts)
files, err = runCollectors(ctx, spec.Collectors, additionalRedactors, bundlePath, opts)
if err != nil {
collectorsErrs = append(collectorsErrs, fmt.Sprintf("failed to run collectors: %s", err))
}
@@ -128,13 +146,13 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
return nil, errors.Wrap(err, "failed to get version file")
}
err = result.SaveResult(bundlePath, constants.VersionFilename, version)
err = result.SaveResult(bundlePath, constants.VERSION_FILENAME, version)
if err != nil {
return nil, errors.Wrap(err, "failed to write version")
}
// Run Analyzers
analyzeResults, err := AnalyzeSupportBundle(spec, bundlePath)
analyzeResults, err := AnalyzeSupportBundle(ctx, spec, bundlePath)
if err != nil {
if opts.FromCLI {
c := color.New(color.FgHiRed)
@@ -156,6 +174,17 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
return nil, errors.Wrap(err, "failed to write analysis")
}
// Complete tracing by ending the root span and collecting
// the summary of the traces. Store them in the support bundle.
root.End()
summary := traces.GetExporterInstance().GetSummary()
err = result.SaveResult(bundlePath, "execution-data/summary.txt", bytes.NewReader([]byte(summary)))
if err != nil {
// Don't fail the support bundle if we can't save the execution summary
logger.Printf("failed to save execution summary file in the support bundle: %v", err)
}
// Archive Support Bundle
if err := result.ArchiveSupportBundle(bundlePath, filename); err != nil {
return nil, errors.Wrap(err, "create bundle file")
}
@@ -174,6 +203,7 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
if len(collectorsErrs) > 0 {
// TODO: Consider a collectors error type
// TODO: use errors.Join in go 1.20 (https://pkg.go.dev/errors#Join)
return &resultsResponse, fmt.Errorf(strings.Join(collectorsErrs, "\n"))
}
@@ -222,11 +252,11 @@ func ProcessSupportBundleAfterCollection(spec *troubleshootv1beta2.SupportBundle
// AnalyzeSupportBundle performs analysis on a support bundle using the support bundle spec and an already unpacked support
// bundle on disk
func AnalyzeSupportBundle(spec *troubleshootv1beta2.SupportBundleSpec, tmpDir string) ([]*analyzer.AnalyzeResult, error) {
func AnalyzeSupportBundle(ctx context.Context, spec *troubleshootv1beta2.SupportBundleSpec, tmpDir string) ([]*analyzer.AnalyzeResult, error) {
if len(spec.Analyzers) == 0 && len(spec.HostAnalyzers) == 0 {
return nil, nil
}
analyzeResults, err := analyzer.AnalyzeLocal(tmpDir, spec.Analyzers, spec.HostAnalyzers)
analyzeResults, err := analyzer.AnalyzeLocal(ctx, tmpDir, spec.Analyzers, spec.HostAnalyzers)
if err != nil {
return nil, errors.Wrap(err, "failed to analyze support bundle")
}