mirror of
https://github.com/replicatedhq/troubleshoot.git
synced 2026-02-14 18:29:53 +00:00
feat: Record summary of execution times of support bundle operations (collect/redact/analyse) (#935)
When running a support bundle, we want to know how long each operation (collect, redact, analyze) takes. This commit adds a new trace exporter that records the start and end times of each operation, and then prints a summary of the execution. The summary is also stored in the support bundle. Related to #923
This commit is contained in:
15
Makefile
15
Makefile
@@ -35,6 +35,7 @@ define LDFLAGS
|
||||
endef
|
||||
|
||||
BUILDFLAGS = -tags "netgo containers_image_ostree_stub exclude_graphdriver_devicemapper exclude_graphdriver_btrfs containers_image_openpgp" -installsuffix netgo
|
||||
BUILDPATHS = ./pkg/... ./cmd/... ./internal/...
|
||||
|
||||
all: test support-bundle preflight collect analyze
|
||||
|
||||
@@ -45,16 +46,16 @@ ffi: fmt vet
|
||||
.PHONY: test
|
||||
test: generate fmt vet
|
||||
if [ -n $(RUN) ]; then \
|
||||
go test ${BUILDFLAGS} ./pkg/... ./cmd/... -coverprofile cover.out -run $(RUN); \
|
||||
go test ${BUILDFLAGS} ${BUILDPATHS} -coverprofile cover.out -run $(RUN); \
|
||||
else \
|
||||
go test ${BUILDFLAGS} ./pkg/... ./cmd/... -coverprofile cover.out; \
|
||||
go test ${BUILDFLAGS} ${BUILDPATHS} -coverprofile cover.out; \
|
||||
fi
|
||||
|
||||
# Go tests that require a K8s instance
|
||||
# TODOLATER: merge with test, so we get unified coverage reports? it'll add 21~sec to the test job though...
|
||||
.PHONY: test-integration
|
||||
test-integration:
|
||||
go test -v --tags "integration exclude_graphdriver_devicemapper exclude_graphdriver_btrfs" ./pkg/... ./cmd/...
|
||||
go test -v --tags "integration exclude_graphdriver_devicemapper exclude_graphdriver_btrfs" ${BUILDPATHS}
|
||||
|
||||
.PHONY: preflight-e2e-test
|
||||
preflight-e2e-test:
|
||||
@@ -82,11 +83,11 @@ collect:
|
||||
|
||||
.PHONY: fmt
|
||||
fmt:
|
||||
go fmt ./pkg/... ./cmd/...
|
||||
go fmt ${BUILDPATHS}
|
||||
|
||||
.PHONY: vet
|
||||
vet:
|
||||
go vet ${BUILDFLAGS} ./pkg/... ./cmd/...
|
||||
go vet ${BUILDFLAGS} ${BUILDPATHS}
|
||||
|
||||
.PHONY: generate
|
||||
generate: controller-gen client-gen
|
||||
@@ -213,8 +214,8 @@ scan:
|
||||
|
||||
.PHONY: lint
|
||||
lint:
|
||||
golangci-lint run --new -c .golangci.yaml pkg/... cmd/...
|
||||
golangci-lint run --new -c .golangci.yaml ${BUILDPATHS}
|
||||
|
||||
.PHONY: lint-and-fix
|
||||
lint-and-fix:
|
||||
golangci-lint run --new --fix -c .golangci.yaml pkg/... cmd/...
|
||||
golangci-lint run --new --fix -c .golangci.yaml ${BUILDPATHS}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/go-logr/logr"
|
||||
"github.com/replicatedhq/troubleshoot/cmd/util"
|
||||
"github.com/replicatedhq/troubleshoot/internal/traces"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/logger"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/preflight"
|
||||
@@ -37,7 +39,19 @@ that a cluster meets the requirements to run an application.`,
|
||||
},
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
v := viper.GetViper()
|
||||
return preflight.RunPreflights(v.GetBool("interactive"), v.GetString("output"), v.GetString("format"), args)
|
||||
closer, err := traces.ConfigureTracing("preflight")
|
||||
if err != nil {
|
||||
// Do not fail running preflights if tracing fails
|
||||
logger.Printf("Failed to initialize open tracing provider: %v", err)
|
||||
} else {
|
||||
defer closer()
|
||||
}
|
||||
|
||||
err = preflight.RunPreflights(v.GetBool("interactive"), v.GetString("output"), v.GetString("format"), args)
|
||||
if v.GetBool("debug") {
|
||||
fmt.Printf("\n%s", traces.GetExporterInstance().GetSummary())
|
||||
}
|
||||
return err
|
||||
},
|
||||
PostRun: func(cmd *cobra.Command, args []string) {
|
||||
if err := util.StopProfiling(); err != nil {
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/go-logr/logr"
|
||||
"github.com/replicatedhq/troubleshoot/cmd/util"
|
||||
"github.com/replicatedhq/troubleshoot/internal/traces"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/logger"
|
||||
"github.com/spf13/cobra"
|
||||
@@ -40,7 +42,20 @@ from a server that can be used to assist when troubleshooting a Kubernetes clust
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
v := viper.GetViper()
|
||||
|
||||
return runTroubleshoot(v, args)
|
||||
closer, err := traces.ConfigureTracing("support-bundle")
|
||||
if err != nil {
|
||||
// Do not fail running support-bundle if tracing fails
|
||||
logger.Printf("Failed to initialize open tracing provider: %v", err)
|
||||
} else {
|
||||
defer closer()
|
||||
}
|
||||
|
||||
err = runTroubleshoot(v, args)
|
||||
if v.GetBool("debug") {
|
||||
fmt.Printf("\n%s", traces.GetExporterInstance().GetSummary())
|
||||
}
|
||||
|
||||
return err
|
||||
},
|
||||
PersistentPostRun: func(cmd *cobra.Command, args []string) {
|
||||
if err := util.StopProfiling(); err != nil {
|
||||
|
||||
@@ -40,7 +40,7 @@ func writeVersionFile(path string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
filename := filepath.Join(path, constants.VersionFilename)
|
||||
filename := filepath.Join(path, constants.VERSION_FILENAME)
|
||||
err = ioutil.WriteFile(filename, b, 0644)
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@@ -3,6 +3,8 @@ kind: SupportBundle
|
||||
metadata:
|
||||
name: example
|
||||
spec:
|
||||
hostCollectors:
|
||||
- hostOS: {}
|
||||
collectors:
|
||||
- logs:
|
||||
selector:
|
||||
|
||||
6
go.mod
6
go.mod
@@ -34,6 +34,8 @@ require (
|
||||
github.com/spf13/viper v1.15.0
|
||||
github.com/stretchr/testify v1.8.1
|
||||
github.com/tj/go-spin v1.1.0
|
||||
go.opentelemetry.io/otel v1.12.0
|
||||
go.opentelemetry.io/otel/sdk v1.11.2
|
||||
golang.org/x/sync v0.1.0
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
k8s.io/api v0.26.1
|
||||
@@ -53,6 +55,7 @@ require (
|
||||
github.com/docker/distribution v2.8.1+incompatible // indirect
|
||||
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
|
||||
github.com/evanphx/json-patch/v5 v5.6.0 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.2.1 // indirect
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b // indirect
|
||||
@@ -61,6 +64,7 @@ require (
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/sylabs/sif/v2 v2.9.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.12.0 // indirect
|
||||
golang.org/x/mod v0.7.0 // indirect
|
||||
golang.org/x/tools v0.4.0 // indirect
|
||||
)
|
||||
@@ -186,7 +190,7 @@ require (
|
||||
golang.org/x/oauth2 v0.4.0 // indirect
|
||||
golang.org/x/sys v0.4.0 // indirect
|
||||
golang.org/x/term v0.4.0 // indirect
|
||||
golang.org/x/text v0.6.0 // indirect
|
||||
golang.org/x/text v0.6.0
|
||||
golang.org/x/time v0.3.0 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
|
||||
google.golang.org/api v0.107.0 // indirect
|
||||
|
||||
9
go.sum
9
go.sum
@@ -399,8 +399,11 @@ github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KE
|
||||
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
|
||||
github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
|
||||
github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
|
||||
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-logr/zapr v1.2.3 h1:a9vnzlIBPQBBkeaR9IuMUfmVOrQlkoC4YfPoFkX3T7A=
|
||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
@@ -1031,6 +1034,12 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
|
||||
go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk=
|
||||
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
|
||||
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
|
||||
go.opentelemetry.io/otel v1.12.0 h1:IgfC7kqQrRccIKuB7Cl+SRUmsKbEwSGPr0Eu+/ht1SQ=
|
||||
go.opentelemetry.io/otel v1.12.0/go.mod h1:geaoz0L0r1BEOR81k7/n9W4TCXYCJ7bPO7K374jQHG0=
|
||||
go.opentelemetry.io/otel/sdk v1.11.2 h1:GF4JoaEx7iihdMFu30sOyRx52HDHOkl9xQ8SMqNXUiU=
|
||||
go.opentelemetry.io/otel/sdk v1.11.2/go.mod h1:wZ1WxImwpq+lVRo4vsmSOxdd+xwoUJ6rqyLc3SyX9aU=
|
||||
go.opentelemetry.io/otel/trace v1.12.0 h1:p28in++7Kd0r2d8gSt931O57fdjUyWxkVbESuILAeUc=
|
||||
go.opentelemetry.io/otel/trace v1.12.0/go.mod h1:pHlgBynn6s25qJ2szD+Bv+iwKJttjHSI3lUAyf0GNuQ=
|
||||
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
|
||||
go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5 h1:+FNtrFTmVw0YZGpBGX56XDee331t6JAXeK2bcyhLOOc=
|
||||
go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5/go.mod h1:nmDLcffg48OtT/PSW0Hg7FvpRQsQh5OSqIylirxKC7o=
|
||||
|
||||
273
internal/traces/exporter.go
Normal file
273
internal/traces/exporter.go
Normal file
@@ -0,0 +1,273 @@
|
||||
package traces
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/replicatedhq/troubleshoot/pkg/constants"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/sdk/trace"
|
||||
"go.opentelemetry.io/otel/sdk/trace/tracetest"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/message"
|
||||
)
|
||||
|
||||
var (
|
||||
_ trace.SpanExporter = (*Exporter)(nil)
|
||||
once sync.Once
|
||||
exporter *Exporter
|
||||
printer = message.NewPrinter(language.English)
|
||||
)
|
||||
|
||||
const legend = "Suceeded (S), eXcluded (X), Failed (F)\n"
|
||||
|
||||
// FUTURE WORK: This exporter should only be used by troubleshoot CLIs
|
||||
// until the following issue is addressed:
|
||||
// 1. The cache of spans grows infinitely at the moment. This is OK for short lived
|
||||
// invocations such as CLI applications running one-shot commands. For long running
|
||||
// applications, this will be a problem.
|
||||
// * We can drop non-troubleshoot spans from the cache. There is an OTEP to add
|
||||
// "inheritable attributes" to spans which is unsupported as of today. Once/if
|
||||
// it gets accepted and implemented, it will allow us to add a "troubleshoot"
|
||||
// attribute to the "root" span which gets propagated to downstream spans which
|
||||
// we can use to filter out non-troubleshoot spans.
|
||||
// https://github.com/dynatrace-oss-contrib/oteps/blob/feature/context-scoped-attributes/text/0207-context-scoped-attributes.md
|
||||
// https://github.com/open-telemetry/opentelemetry-specification/issues/1337
|
||||
// * We can "search" for the "root" span by traversing up the span tree and drop
|
||||
// all spans that are not descendants of the "root" span. This can be slow, but
|
||||
// we can do it in a background goroutine (adds a bit of complexity).
|
||||
// 2. At the moment, the summary of an execution is the only case this exporter
|
||||
// is being used for.
|
||||
|
||||
// GetExporterInstance creates a singleton exporter instance
|
||||
func GetExporterInstance() *Exporter {
|
||||
once.Do(func() {
|
||||
exporter = &Exporter{
|
||||
allSpans: make([]trace.ReadOnlySpan, 1024),
|
||||
}
|
||||
})
|
||||
return exporter
|
||||
}
|
||||
|
||||
// Exporter is an implementation of trace.SpanExporter that writes to a destination.
|
||||
type Exporter struct {
|
||||
spansMu sync.Mutex
|
||||
allSpans []trace.ReadOnlySpan
|
||||
|
||||
stoppedMu sync.RWMutex
|
||||
stopped bool
|
||||
}
|
||||
|
||||
// ExportSpans writes spans to an in-memory cache
|
||||
// This function can/will be called on every span.End() at worst.
|
||||
func (e *Exporter) ExportSpans(ctx context.Context, spans []trace.ReadOnlySpan) error {
|
||||
// This is a no-op if the context is canceled.
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
e.stoppedMu.RLock()
|
||||
stopped := e.stopped
|
||||
e.stoppedMu.RUnlock()
|
||||
if stopped {
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(spans) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
e.spansMu.Lock()
|
||||
defer e.spansMu.Unlock()
|
||||
|
||||
// Cache received span updates allSpans
|
||||
e.allSpans = append(e.allSpans, spans...)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isType(stub *tracetest.SpanStub, t string) bool {
|
||||
if stub == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, attr := range stub.Attributes {
|
||||
if string(attr.Key) == "type" && strings.Contains(attr.Value.AsString(), t) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func formatStubName(stub *tracetest.SpanStub) string {
|
||||
if stub == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
for _, attr := range stub.Attributes {
|
||||
if stub.Status.Code == codes.Error {
|
||||
return stub.Name + " (F)"
|
||||
}
|
||||
if string(attr.Key) == constants.EXCLUDED && attr.Value.AsBool() {
|
||||
return stub.Name + " (X)"
|
||||
}
|
||||
}
|
||||
return stub.Name + " (S)" // Assume success
|
||||
}
|
||||
|
||||
// maxInt returns the larger of x or y.
|
||||
func maxInt(x, y int) int {
|
||||
if x < y {
|
||||
return y
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// GetSummary returns the runtime summary of the execution
|
||||
// so far. Call this function after your "root" span has ended
|
||||
// and the program operations needing tracing have completed.
|
||||
func (e *Exporter) GetSummary() string {
|
||||
e.spansMu.Lock()
|
||||
stubs := tracetest.SpanStubsFromReadOnlySpans(e.allSpans)
|
||||
e.spansMu.Unlock()
|
||||
|
||||
// No spans to log
|
||||
if len(stubs) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// TODO: We may want to collect more information about the
|
||||
// execution of the program. For example, we can collect
|
||||
// the number of times a collector was executed, whether
|
||||
// it was successful or not, if it was skipped, etc.
|
||||
collectors := make(map[string]time.Duration)
|
||||
redactors := make(map[string]time.Duration)
|
||||
analysers := make(map[string]time.Duration)
|
||||
|
||||
totalDuration := time.Duration(0)
|
||||
|
||||
for i := range stubs {
|
||||
stub := &stubs[i]
|
||||
|
||||
// Summary of span stubs
|
||||
duration := stub.EndTime.Sub(stub.StartTime)
|
||||
stubName := formatStubName(stub)
|
||||
switch {
|
||||
case stub.Name == constants.TROUBLESHOOT_ROOT_SPAN_NAME:
|
||||
totalDuration = duration
|
||||
case isType(stub, "Collect"):
|
||||
collectors[stubName] = duration
|
||||
case isType(stub, "Redactors"):
|
||||
redactors[stub.Name] = duration
|
||||
case isType(stub, "Analyze"):
|
||||
analysers[stubName] = duration
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
sb := strings.Builder{}
|
||||
|
||||
collectorsSummary(collectors, &sb)
|
||||
redactorsSummary(redactors, &sb)
|
||||
analysersSummary(analysers, &sb)
|
||||
sb.WriteString(printer.Sprintf("\nDuration: %dms\n", totalDuration/time.Millisecond))
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// summary of collector runtimes
|
||||
func collectorsSummary(summary map[string]time.Duration, sb *strings.Builder) {
|
||||
padding, keys := sortedKeysAndPadding(summary)
|
||||
|
||||
sb.WriteString("============ Collectors summary =============\n")
|
||||
sb.WriteString(legend)
|
||||
sb.WriteString("=============================================\n")
|
||||
if len(summary) == 0 {
|
||||
sb.WriteString("No collectors executed\n")
|
||||
return
|
||||
}
|
||||
|
||||
for _, name := range keys {
|
||||
sb.WriteString(printer.Sprintf("%-*s : %dms\n", padding, name, summary[name]/time.Millisecond))
|
||||
}
|
||||
}
|
||||
|
||||
// summary of redactor runtime
|
||||
func redactorsSummary(summary map[string]time.Duration, sb *strings.Builder) {
|
||||
padding, keys := sortedKeysAndPadding(summary)
|
||||
|
||||
sb.WriteString("\n============ Redactors summary =============\n")
|
||||
if len(summary) == 0 {
|
||||
sb.WriteString("No redactors executed\n")
|
||||
return
|
||||
}
|
||||
|
||||
for _, name := range keys {
|
||||
sb.WriteString(printer.Sprintf("%-*s : %dms\n", padding, name, summary[name]/time.Millisecond))
|
||||
}
|
||||
}
|
||||
|
||||
// summary of analyser runtime
|
||||
func analysersSummary(summary map[string]time.Duration, sb *strings.Builder) {
|
||||
padding, keys := sortedKeysAndPadding(summary)
|
||||
|
||||
sb.WriteString("\n============= Analyzers summary =============\n")
|
||||
sb.WriteString(legend)
|
||||
sb.WriteString("=============================================\n")
|
||||
if len(summary) == 0 {
|
||||
sb.WriteString("No analyzers executed\n")
|
||||
return
|
||||
}
|
||||
|
||||
for _, name := range keys {
|
||||
sb.WriteString(printer.Sprintf("%-*s : %dms\n", padding, name, summary[name]/time.Millisecond))
|
||||
}
|
||||
}
|
||||
|
||||
func sortedKeysAndPadding(summary map[string]time.Duration) (int, []string) {
|
||||
keys := make([]string, 0, len(summary))
|
||||
padding := 0
|
||||
for k := range summary {
|
||||
padding = maxInt(padding, len(k))
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.SliceStable(keys, func(l, r int) bool {
|
||||
return summary[keys[l]] > summary[keys[r]]
|
||||
})
|
||||
return padding, keys
|
||||
}
|
||||
|
||||
// Shutdown is called to stop the exporter, it preforms no action.
|
||||
func (e *Exporter) Shutdown(ctx context.Context) error {
|
||||
e.stoppedMu.Lock()
|
||||
e.stopped = true
|
||||
e.stoppedMu.Unlock()
|
||||
|
||||
e.Reset()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Exporter) Reset() {
|
||||
e.spansMu.Lock()
|
||||
e.allSpans = e.allSpans[:0] // clear the slice
|
||||
e.spansMu.Unlock()
|
||||
}
|
||||
|
||||
// MarshalLog is the marshaling function used by the logging system to represent this exporter.
|
||||
func (e *Exporter) MarshalLog() interface{} {
|
||||
return struct {
|
||||
Type string
|
||||
}{
|
||||
Type: "troubleshoot",
|
||||
}
|
||||
}
|
||||
187
internal/traces/exporter_test.go
Normal file
187
internal/traces/exporter_test.go
Normal file
@@ -0,0 +1,187 @@
|
||||
package traces
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/replicatedhq/troubleshoot/pkg/constants"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/logger"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/sdk/trace"
|
||||
"go.opentelemetry.io/otel/sdk/trace/tracetest"
|
||||
)
|
||||
|
||||
func TestExporter_GetSummary(t *testing.T) {
|
||||
logger.SetQuiet(true)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
spans tracetest.SpanStubs
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "with no spans",
|
||||
spans: tracetest.SpanStubs{},
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "with root span only",
|
||||
spans: tracetest.SpanStubs{
|
||||
tracetest.SpanStub{
|
||||
Name: constants.TROUBLESHOOT_ROOT_SPAN_NAME,
|
||||
StartTime: time.Now(),
|
||||
EndTime: time.Now().Add(time.Second),
|
||||
},
|
||||
},
|
||||
want: "Duration: 1,000ms",
|
||||
},
|
||||
{
|
||||
name: "with collectors",
|
||||
spans: tracetest.SpanStubs{
|
||||
tracetest.SpanStub{
|
||||
Name: "all-logs", StartTime: time.Now(), EndTime: time.Now().Add(time.Minute),
|
||||
Attributes: []attribute.KeyValue{
|
||||
attribute.String("type", "*collect.CollectLogs"),
|
||||
},
|
||||
},
|
||||
tracetest.SpanStub{
|
||||
Name: "host-os", StartTime: time.Now(), EndTime: time.Now().Add(time.Second),
|
||||
Attributes: []attribute.KeyValue{
|
||||
attribute.String("type", "*collect.CollectHostOS"),
|
||||
},
|
||||
},
|
||||
tracetest.SpanStub{
|
||||
Name: "excluded-collector", StartTime: time.Now(), EndTime: time.Now().Add(time.Millisecond * 2),
|
||||
Attributes: []attribute.KeyValue{
|
||||
attribute.String("type", "*collect.CollectHostOS"),
|
||||
attribute.Bool("excluded", true),
|
||||
},
|
||||
},
|
||||
tracetest.SpanStub{
|
||||
Name: "failed-collector", StartTime: time.Now(), EndTime: time.Now().Add(time.Millisecond),
|
||||
Attributes: []attribute.KeyValue{
|
||||
attribute.String("type", "*collect.CollectHostOS"),
|
||||
},
|
||||
Status: trace.Status{
|
||||
Code: codes.Error,
|
||||
Description: "some error",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: `
|
||||
============ Collectors summary =============
|
||||
Suceeded (S), eXcluded (X), Failed (F)
|
||||
=============================================
|
||||
all-logs (S) : 60,000ms
|
||||
host-os (S) : 1,000ms
|
||||
excluded-collector (X) : 2ms
|
||||
failed-collector (F) : 1ms`,
|
||||
},
|
||||
{
|
||||
name: "with analyzers",
|
||||
spans: tracetest.SpanStubs{
|
||||
tracetest.SpanStub{
|
||||
Name: "cluster-version", StartTime: time.Now(), EndTime: time.Now().Add(time.Second),
|
||||
Attributes: []attribute.KeyValue{
|
||||
attribute.String("type", "*analyzer.AnalyzeClusterVersion"),
|
||||
},
|
||||
},
|
||||
tracetest.SpanStub{
|
||||
Name: "host-cpu", StartTime: time.Now(), EndTime: time.Now().Add(time.Minute),
|
||||
Attributes: []attribute.KeyValue{
|
||||
attribute.String("type", "*analyzer.AnalyzeHostCPU"),
|
||||
},
|
||||
},
|
||||
tracetest.SpanStub{
|
||||
Name: "excluded-analyser", StartTime: time.Now(), EndTime: time.Now().Add(time.Millisecond * 2),
|
||||
Attributes: []attribute.KeyValue{
|
||||
attribute.String("type", "*collect.AnalyzeHostCPU"),
|
||||
attribute.Bool("excluded", true),
|
||||
},
|
||||
},
|
||||
tracetest.SpanStub{
|
||||
Name: "failed-analyser", StartTime: time.Now(), EndTime: time.Now().Add(time.Millisecond),
|
||||
Attributes: []attribute.KeyValue{
|
||||
attribute.String("type", "*collect.AnalyzeHostCPU"),
|
||||
},
|
||||
Status: trace.Status{
|
||||
Code: codes.Error,
|
||||
Description: "some error",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: `
|
||||
============= Analyzers summary =============
|
||||
Suceeded (S), eXcluded (X), Failed (F)
|
||||
=============================================
|
||||
host-cpu (S) : 60,000ms
|
||||
cluster-version (S) : 1,000ms
|
||||
excluded-analyser (X) : 2ms
|
||||
failed-analyser (F) : 1ms`,
|
||||
},
|
||||
{
|
||||
name: "with redactors",
|
||||
spans: tracetest.SpanStubs{
|
||||
tracetest.SpanStub{
|
||||
Name: "cluster redactor", StartTime: time.Now(), EndTime: time.Now().Add(time.Second),
|
||||
Attributes: []attribute.KeyValue{
|
||||
attribute.String("type", "Redactors"),
|
||||
},
|
||||
},
|
||||
},
|
||||
want: `
|
||||
============ Redactors summary =============
|
||||
cluster redactor : 1,000ms`,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
e := &Exporter{}
|
||||
|
||||
ctx := context.Background()
|
||||
err := e.ExportSpans(ctx, tt.spans.Snapshots())
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Contains(t, e.GetSummary(), strings.TrimSpace(tt.want))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExporter_ExportSpansWithDoneContext(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
e := &Exporter{}
|
||||
spans := tracetest.SpanStubs{}
|
||||
|
||||
assert.EqualError(t, e.ExportSpans(ctx, spans.Snapshots()), context.Canceled.Error())
|
||||
}
|
||||
|
||||
func TestExporter_Shutdown(t *testing.T) {
|
||||
e := &Exporter{}
|
||||
|
||||
ctx := context.Background()
|
||||
spans := tracetest.SpanStubs{}
|
||||
for i := 0; i < 5; i++ {
|
||||
spans = append(spans, tracetest.SpanStub{Name: fmt.Sprintf("span-%d", i)})
|
||||
}
|
||||
|
||||
err := e.ExportSpans(ctx, spans.Snapshots())
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Len(t, e.allSpans, 5)
|
||||
|
||||
require.NoError(t, e.Shutdown(ctx))
|
||||
assert.Len(t, e.allSpans, 0)
|
||||
|
||||
err = e.ExportSpans(ctx, spans.Snapshots())
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Len(t, e.allSpans, 0)
|
||||
}
|
||||
60
internal/traces/otel.go
Normal file
60
internal/traces/otel.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package traces
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/replicatedhq/troubleshoot/pkg/logger"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/version"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
"go.opentelemetry.io/otel/sdk/trace"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.12.0"
|
||||
)
|
||||
|
||||
// ConfigureTracing configures the OpenTelemetry trace provider for CLI
|
||||
// commands. Projects using troubleshoot as a library would need to register
|
||||
// troubleshoot's exporter like so.
|
||||
//
|
||||
// var tp *trace.TracerProvider // client application's trace provider
|
||||
// tp.RegisterSpanProcessor(
|
||||
// trace.NewSimpleSpanProcessor(
|
||||
// traces.GetExporterInstance(), // Troubleshoot's exporter
|
||||
// ),
|
||||
// )
|
||||
//
|
||||
// The client application is responsible for constructing the trace provider
|
||||
// and registering the exporter. Multiple exporters can be registered.
|
||||
func ConfigureTracing(processName string) (func(), error) {
|
||||
r, err := resource.Merge(
|
||||
resource.Default(),
|
||||
resource.NewWithAttributes(
|
||||
resource.Default().SchemaURL(),
|
||||
semconv.ProcessCommandKey.String(processName),
|
||||
semconv.ProcessRuntimeVersionKey.String(version.Version()),
|
||||
attribute.String("environment", "cli"),
|
||||
),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Trace provider for support bundle cli. Each application is required
|
||||
// to have its own trace provider.
|
||||
tp := trace.NewTracerProvider(
|
||||
trace.WithSampler(trace.AlwaysSample()),
|
||||
trace.WithSyncer(
|
||||
GetExporterInstance(),
|
||||
),
|
||||
trace.WithResource(r),
|
||||
)
|
||||
|
||||
otel.SetTracerProvider(tp)
|
||||
|
||||
return func() {
|
||||
if err := tp.Shutdown(context.Background()); err != nil {
|
||||
logger.Printf("Failed to shutdown trace provider: %v", err)
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
@@ -1,13 +1,19 @@
|
||||
package analyzer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/constants"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/logger"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/multitype"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
@@ -50,14 +56,25 @@ func isExcluded(excludeVal *multitype.BoolOrString) (bool, error) {
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func HostAnalyze(hostAnalyzer *troubleshootv1beta2.HostAnalyze, getFile getCollectedFileContents, findFiles getChildCollectedFileContents) []*AnalyzeResult {
|
||||
func HostAnalyze(
|
||||
ctx context.Context,
|
||||
hostAnalyzer *troubleshootv1beta2.HostAnalyze,
|
||||
getFile getCollectedFileContents,
|
||||
findFiles getChildCollectedFileContents,
|
||||
) []*AnalyzeResult {
|
||||
analyzer, ok := GetHostAnalyzer(hostAnalyzer)
|
||||
if !ok {
|
||||
return NewAnalyzeResultError(analyzer, errors.New("invalid host analyzer"))
|
||||
}
|
||||
|
||||
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, analyzer.Title())
|
||||
span.SetAttributes(attribute.String("type", reflect.TypeOf(analyzer).String()))
|
||||
defer span.End()
|
||||
|
||||
isExcluded, _ := analyzer.IsExcluded()
|
||||
if isExcluded {
|
||||
logger.Printf("Excluding %q analyzer", analyzer.Title())
|
||||
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -83,7 +100,12 @@ func NewAnalyzeResultError(analyzer HostAnalyzer, err error) []*AnalyzeResult {
|
||||
}}
|
||||
}
|
||||
|
||||
func Analyze(analyzer *troubleshootv1beta2.Analyze, getFile getCollectedFileContents, findFiles getChildCollectedFileContents) ([]*AnalyzeResult, error) {
|
||||
func Analyze(
|
||||
ctx context.Context,
|
||||
analyzer *troubleshootv1beta2.Analyze,
|
||||
getFile getCollectedFileContents,
|
||||
findFiles getChildCollectedFileContents,
|
||||
) ([]*AnalyzeResult, error) {
|
||||
if analyzer == nil {
|
||||
return nil, errors.New("nil analyzer")
|
||||
}
|
||||
@@ -97,16 +119,24 @@ func Analyze(analyzer *troubleshootv1beta2.Analyze, getFile getCollectedFileCont
|
||||
}}, nil
|
||||
}
|
||||
|
||||
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, analyzerInst.Title())
|
||||
span.SetAttributes(attribute.String("type", reflect.TypeOf(analyzerInst).String()))
|
||||
defer span.End()
|
||||
|
||||
isExcluded, err := analyzerInst.IsExcluded()
|
||||
if err != nil {
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return nil, err
|
||||
}
|
||||
if isExcluded {
|
||||
logger.Printf("Excluding %q analyzer", analyzerInst.Title())
|
||||
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
results, err := analyzerInst.Analyze(getFile, findFiles)
|
||||
if err != nil {
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package analyzer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
@@ -67,7 +68,7 @@ func Test_GetExcludeFlag(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAnalyzeWithNilAnalyzer(t *testing.T) {
|
||||
got, err := Analyze(nil, nil, nil)
|
||||
got, err := Analyze(context.Background(), nil, nil, nil)
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, got)
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package analyzer
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
@@ -24,7 +25,12 @@ type fileContentProvider struct {
|
||||
}
|
||||
|
||||
// Analyze local will analyze a locally available (already downloaded) bundle
|
||||
func AnalyzeLocal(localBundlePath string, analyzers []*troubleshootv1beta2.Analyze, hostAnalyzers []*troubleshootv1beta2.HostAnalyze) ([]*AnalyzeResult, error) {
|
||||
func AnalyzeLocal(
|
||||
ctx context.Context,
|
||||
localBundlePath string,
|
||||
analyzers []*troubleshootv1beta2.Analyze,
|
||||
hostAnalyzers []*troubleshootv1beta2.HostAnalyze,
|
||||
) ([]*AnalyzeResult, error) {
|
||||
rootDir, err := FindBundleRootDir(localBundlePath)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to find root dir")
|
||||
@@ -34,7 +40,7 @@ func AnalyzeLocal(localBundlePath string, analyzers []*troubleshootv1beta2.Analy
|
||||
|
||||
analyzeResults := []*AnalyzeResult{}
|
||||
for _, analyzer := range analyzers {
|
||||
analyzeResult, err := Analyze(analyzer, fcp.getFileContents, fcp.getChildFileContents)
|
||||
analyzeResult, err := Analyze(ctx, analyzer, fcp.getFileContents, fcp.getChildFileContents)
|
||||
if err != nil {
|
||||
logger.Printf("An analyzer failed to run: %v", err)
|
||||
continue
|
||||
@@ -49,7 +55,7 @@ func AnalyzeLocal(localBundlePath string, analyzers []*troubleshootv1beta2.Analy
|
||||
}
|
||||
|
||||
for _, hostAnalyzer := range hostAnalyzers {
|
||||
analyzeResult := HostAnalyze(hostAnalyzer, fcp.getFileContents, fcp.getChildFileContents)
|
||||
analyzeResult := HostAnalyze(ctx, hostAnalyzer, fcp.getFileContents, fcp.getChildFileContents)
|
||||
analyzeResults = append(analyzeResults, analyzeResult...)
|
||||
}
|
||||
|
||||
@@ -81,7 +87,7 @@ func DownloadAndAnalyze(bundleURL string, analyzersSpec string) ([]*AnalyzeResul
|
||||
hostAnalyzers = parsedHostAnalyzers
|
||||
}
|
||||
|
||||
return AnalyzeLocal(rootDir, analyzers, hostAnalyzers)
|
||||
return AnalyzeLocal(context.Background(), rootDir, analyzers, hostAnalyzers)
|
||||
}
|
||||
|
||||
func DownloadAndExtractSupportBundle(bundleURL string) (string, string, error) {
|
||||
@@ -101,10 +107,10 @@ func DownloadAndExtractSupportBundle(bundleURL string) (string, string, error) {
|
||||
return "", "", errors.Wrap(err, "failed to find root dir")
|
||||
}
|
||||
|
||||
_, err = os.Stat(filepath.Join(bundleDir, constants.VersionFilename))
|
||||
_, err = os.Stat(filepath.Join(bundleDir, constants.VERSION_FILENAME))
|
||||
if err != nil {
|
||||
os.RemoveAll(tmpDir)
|
||||
return "", "", errors.Wrap(err, "failed to read "+constants.VersionFilename)
|
||||
return "", "", errors.Wrap(err, "failed to read "+constants.VERSION_FILENAME)
|
||||
}
|
||||
|
||||
return tmpDir, bundleDir, nil
|
||||
@@ -283,7 +289,7 @@ func FindBundleRootDir(localBundlePath string) (string, error) {
|
||||
|
||||
isInSubDir := true
|
||||
for _, name := range names {
|
||||
if name == constants.VersionFilename {
|
||||
if name == constants.VERSION_FILENAME {
|
||||
isInSubDir = false
|
||||
break
|
||||
}
|
||||
|
||||
@@ -29,13 +29,9 @@ func TestCollectHostUDPPortStatus_Collect(t *testing.T) {
|
||||
return port, conn, err
|
||||
}
|
||||
|
||||
type fields struct {
|
||||
hostCollector *troubleshootv1beta2.UDPPortStatus
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
getPort func(t *testing.T) (port int, closeFn func() error)
|
||||
fields fields
|
||||
want map[string][]byte
|
||||
}{
|
||||
{
|
||||
|
||||
@@ -9,11 +9,17 @@ const (
|
||||
DEFAULT_CLIENT_BURST = 100
|
||||
// DEFAULT_CLIENT_USER_AGENT is an field that specifies the caller of troubleshoot request.
|
||||
DEFAULT_CLIENT_USER_AGENT = "ReplicatedTroubleshoot"
|
||||
// VersionFilename is the name of the file that contains the support bundle version.
|
||||
VersionFilename = "version.yaml"
|
||||
// VERSION_FILENAME is the name of the file that contains the support bundle version.
|
||||
VERSION_FILENAME = "version.yaml"
|
||||
// DEFAULT_LOGS_COLLECTOR_TIMEOUT is the default timeout for logs collector.
|
||||
DEFAULT_LOGS_COLLECTOR_TIMEOUT = 60 * time.Second
|
||||
|
||||
// Tracing constants
|
||||
|
||||
LIB_TRACER_NAME = "github.com/replicatedhq/troubleshoot"
|
||||
TROUBLESHOOT_ROOT_SPAN_NAME = "ReplicatedTroubleshootRootSpan"
|
||||
EXCLUDED = "excluded"
|
||||
|
||||
// Cluster Resources Collector Directories
|
||||
CLUSTER_RESOURCES_DIR = "cluster-resources"
|
||||
CLUSTER_RESOURCES_NAMESPACES = "namespace"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package preflight
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
@@ -13,12 +14,12 @@ import (
|
||||
|
||||
// Analyze runs the analyze phase of preflight checks
|
||||
func (c ClusterCollectResult) Analyze() []*analyze.AnalyzeResult {
|
||||
return doAnalyze(c.AllCollectedData, c.Spec.Spec.Analyzers, nil, "")
|
||||
return doAnalyze(c.Context, c.AllCollectedData, c.Spec.Spec.Analyzers, nil, "")
|
||||
}
|
||||
|
||||
// Analyze runs the analyze phase of host preflight checks
|
||||
func (c HostCollectResult) Analyze() []*analyze.AnalyzeResult {
|
||||
return doAnalyze(c.AllCollectedData, nil, c.Spec.Spec.Analyzers, "")
|
||||
return doAnalyze(c.Context, c.AllCollectedData, nil, c.Spec.Spec.Analyzers, "")
|
||||
}
|
||||
|
||||
// Analyze runs the analyze phase of host preflight checks.
|
||||
@@ -43,12 +44,18 @@ func (c RemoteCollectResult) Analyze() []*analyze.AnalyzeResult {
|
||||
byteResult[k] = []byte(v)
|
||||
|
||||
}
|
||||
results = append(results, doAnalyze(byteResult, nil, c.Spec.Spec.Analyzers, nodeName)...)
|
||||
results = append(results, doAnalyze(c.Context, byteResult, nil, c.Spec.Spec.Analyzers, nodeName)...)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func doAnalyze(allCollectedData map[string][]byte, analyzers []*troubleshootv1beta2.Analyze, hostAnalyzers []*troubleshootv1beta2.HostAnalyze, nodeName string) []*analyze.AnalyzeResult {
|
||||
func doAnalyze(
|
||||
ctx context.Context,
|
||||
allCollectedData map[string][]byte,
|
||||
analyzers []*troubleshootv1beta2.Analyze,
|
||||
hostAnalyzers []*troubleshootv1beta2.HostAnalyze,
|
||||
nodeName string,
|
||||
) []*analyze.AnalyzeResult {
|
||||
getCollectedFileContents := func(fileName string) ([]byte, error) {
|
||||
contents, ok := allCollectedData[fileName]
|
||||
if !ok {
|
||||
@@ -89,7 +96,7 @@ func doAnalyze(allCollectedData map[string][]byte, analyzers []*troubleshootv1be
|
||||
|
||||
analyzeResults := []*analyze.AnalyzeResult{}
|
||||
for _, analyzer := range analyzers {
|
||||
analyzeResult, err := analyze.Analyze(analyzer, getCollectedFileContents, getChildCollectedFileContents)
|
||||
analyzeResult, err := analyze.Analyze(ctx, analyzer, getCollectedFileContents, getChildCollectedFileContents)
|
||||
if err != nil {
|
||||
strict, strictErr := HasStrictAnalyzer(analyzer)
|
||||
if strictErr != nil {
|
||||
@@ -112,7 +119,7 @@ func doAnalyze(allCollectedData map[string][]byte, analyzers []*troubleshootv1be
|
||||
}
|
||||
|
||||
for _, hostAnalyzer := range hostAnalyzers {
|
||||
analyzeResult := analyze.HostAnalyze(hostAnalyzer, getCollectedFileContents, getChildCollectedFileContents)
|
||||
analyzeResult := analyze.HostAnalyze(ctx, hostAnalyzer, getCollectedFileContents, getChildCollectedFileContents)
|
||||
analyzeResults = append(analyzeResults, analyzeResult...)
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,11 @@ import (
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/collect"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/constants"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/logger"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/version"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/rest"
|
||||
)
|
||||
@@ -55,6 +59,7 @@ type ClusterCollectResult struct {
|
||||
RemoteCollectors collect.RemoteCollectors
|
||||
isRBACAllowed bool
|
||||
Spec *troubleshootv1beta2.Preflight
|
||||
Context context.Context
|
||||
}
|
||||
|
||||
func (cr ClusterCollectResult) IsRBACAllowed() bool {
|
||||
@@ -65,6 +70,7 @@ type HostCollectResult struct {
|
||||
AllCollectedData map[string][]byte
|
||||
Collectors []collect.HostCollector
|
||||
Spec *troubleshootv1beta2.HostPreflight
|
||||
Context context.Context
|
||||
}
|
||||
|
||||
func (cr HostCollectResult) IsRBACAllowed() bool {
|
||||
@@ -75,6 +81,7 @@ type RemoteCollectResult struct {
|
||||
AllCollectedData map[string][]byte
|
||||
Collectors collect.RemoteCollectors
|
||||
Spec *troubleshootv1beta2.HostPreflight
|
||||
Context context.Context
|
||||
}
|
||||
|
||||
func (cr RemoteCollectResult) IsRBACAllowed() bool {
|
||||
@@ -83,6 +90,12 @@ func (cr RemoteCollectResult) IsRBACAllowed() bool {
|
||||
|
||||
// CollectHost runs the collection phase of host preflight checks
|
||||
func CollectHost(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (CollectResult, error) {
|
||||
return CollectHostWithContext(context.Background(), opts, p)
|
||||
}
|
||||
|
||||
func CollectHostWithContext(
|
||||
ctx context.Context, opts CollectOpts, p *troubleshootv1beta2.HostPreflight,
|
||||
) (CollectResult, error) {
|
||||
collectSpecs := make([]*troubleshootv1beta2.HostCollect, 0, 0)
|
||||
if p != nil && p.Spec.Collectors != nil {
|
||||
collectSpecs = append(collectSpecs, p.Spec.Collectors...)
|
||||
@@ -101,11 +114,18 @@ func CollectHost(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Collec
|
||||
collectResult := HostCollectResult{
|
||||
Collectors: collectors,
|
||||
Spec: p,
|
||||
Context: ctx,
|
||||
}
|
||||
|
||||
for _, collector := range collectors {
|
||||
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.Title())
|
||||
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
|
||||
|
||||
isExcluded, _ := collector.IsExcluded()
|
||||
if isExcluded {
|
||||
logger.Printf("Excluding %q collector", collector.Title())
|
||||
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
|
||||
span.End()
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -117,6 +137,7 @@ func CollectHost(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Collec
|
||||
for k, v := range result {
|
||||
allCollectedData[k] = v
|
||||
}
|
||||
span.End()
|
||||
}
|
||||
|
||||
collectResult.AllCollectedData = allCollectedData
|
||||
@@ -126,6 +147,10 @@ func CollectHost(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Collec
|
||||
|
||||
// Collect runs the collection phase of preflight checks
|
||||
func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult, error) {
|
||||
return CollectWithContext(context.Background(), opts, p)
|
||||
}
|
||||
|
||||
func CollectWithContext(ctx context.Context, opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult, error) {
|
||||
var allCollectors []collect.Collector
|
||||
var foundForbidden bool
|
||||
|
||||
@@ -133,8 +158,12 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
|
||||
if p != nil && p.Spec.Collectors != nil {
|
||||
collectSpecs = append(collectSpecs, p.Spec.Collectors...)
|
||||
}
|
||||
collectSpecs = collect.EnsureCollectorInList(collectSpecs, troubleshootv1beta2.Collect{ClusterInfo: &troubleshootv1beta2.ClusterInfo{}})
|
||||
collectSpecs = collect.EnsureCollectorInList(collectSpecs, troubleshootv1beta2.Collect{ClusterResources: &troubleshootv1beta2.ClusterResources{}})
|
||||
collectSpecs = collect.EnsureCollectorInList(
|
||||
collectSpecs, troubleshootv1beta2.Collect{ClusterInfo: &troubleshootv1beta2.ClusterInfo{}},
|
||||
)
|
||||
collectSpecs = collect.EnsureCollectorInList(
|
||||
collectSpecs, troubleshootv1beta2.Collect{ClusterResources: &troubleshootv1beta2.ClusterResources{}},
|
||||
)
|
||||
collectSpecs = collect.DedupCollectors(collectSpecs)
|
||||
collectSpecs = collect.EnsureClusterResourcesFirst(collectSpecs)
|
||||
|
||||
@@ -153,7 +182,7 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
|
||||
for _, desiredCollector := range collectSpecs {
|
||||
if collectorInterface, ok := collect.GetCollector(desiredCollector, "", opts.Namespace, opts.KubernetesRestConfig, k8sClient, nil); ok {
|
||||
if collector, ok := collectorInterface.(collect.Collector); ok {
|
||||
err := collector.CheckRBAC(context.Background(), collector, desiredCollector, opts.KubernetesRestConfig, opts.Namespace)
|
||||
err := collector.CheckRBAC(ctx, collector, desiredCollector, opts.KubernetesRestConfig, opts.Namespace)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to check RBAC for collectors")
|
||||
}
|
||||
@@ -193,6 +222,7 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
|
||||
collectResult := ClusterCollectResult{
|
||||
Collectors: allCollectors,
|
||||
Spec: p,
|
||||
Context: ctx,
|
||||
}
|
||||
|
||||
if foundForbidden && !opts.IgnorePermissionErrors {
|
||||
@@ -201,8 +231,14 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
|
||||
}
|
||||
|
||||
for i, collector := range allCollectors {
|
||||
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.Title())
|
||||
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
|
||||
|
||||
isExcluded, _ := collector.IsExcluded()
|
||||
if isExcluded {
|
||||
logger.Printf("Excluding %q collector", collector.Title())
|
||||
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
|
||||
span.End()
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -217,6 +253,8 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
|
||||
TotalCount: len(allCollectors),
|
||||
Collectors: collectorList,
|
||||
}
|
||||
span.SetStatus(codes.Error, "skipping collector, insufficient RBAC permissions")
|
||||
span.End()
|
||||
continue
|
||||
}
|
||||
}
|
||||
@@ -245,6 +283,8 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
|
||||
TotalCount: len(allCollectors),
|
||||
Collectors: collectorList,
|
||||
}
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
span.End()
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -262,6 +302,7 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
|
||||
for k, v := range result {
|
||||
allCollectedData[k] = v
|
||||
}
|
||||
span.End()
|
||||
}
|
||||
|
||||
collectResult.AllCollectedData = allCollectedData
|
||||
@@ -271,6 +312,10 @@ func Collect(opts CollectOpts, p *troubleshootv1beta2.Preflight) (CollectResult,
|
||||
|
||||
// Collect runs the collection phase of preflight checks
|
||||
func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (CollectResult, error) {
|
||||
return CollectRemoteWithContext(context.Background(), opts, p)
|
||||
}
|
||||
|
||||
func CollectRemoteWithContext(ctx context.Context, opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (CollectResult, error) {
|
||||
collectSpecs := make([]*troubleshootv1beta2.RemoteCollect, 0, 0)
|
||||
if p != nil && p.Spec.RemoteCollectors != nil {
|
||||
collectSpecs = append(collectSpecs, p.Spec.RemoteCollectors...)
|
||||
@@ -296,6 +341,7 @@ func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Coll
|
||||
collectResult := RemoteCollectResult{
|
||||
Collectors: collectors,
|
||||
Spec: p,
|
||||
Context: ctx,
|
||||
}
|
||||
|
||||
// generate a map of all collectors for atomic status messages
|
||||
@@ -308,6 +354,9 @@ func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Coll
|
||||
|
||||
// Run preflights collectors synchronously
|
||||
for i, collector := range collectors {
|
||||
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.GetDisplayName())
|
||||
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
|
||||
|
||||
collectorList[collector.GetDisplayName()] = CollectorStatus{
|
||||
Status: "running",
|
||||
}
|
||||
@@ -334,6 +383,8 @@ func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Coll
|
||||
TotalCount: len(collectors),
|
||||
Collectors: collectorList,
|
||||
}
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
span.End()
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -375,6 +426,8 @@ func CollectRemote(opts CollectOpts, p *troubleshootv1beta2.HostPreflight) (Coll
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
span.End()
|
||||
}
|
||||
|
||||
collectResult.AllCollectedData = allCollectedData
|
||||
|
||||
@@ -3,7 +3,7 @@ package preflight
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
@@ -18,12 +18,14 @@ import (
|
||||
analyzer "github.com/replicatedhq/troubleshoot/pkg/analyze"
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
troubleshootclientsetscheme "github.com/replicatedhq/troubleshoot/pkg/client/troubleshootclientset/scheme"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/constants"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/docrewrite"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/oci"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/specs"
|
||||
"github.com/spf13/viper"
|
||||
spin "github.com/tj/go-spin"
|
||||
"go.opentelemetry.io/otel"
|
||||
"golang.org/x/sync/errgroup"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
@@ -31,6 +33,10 @@ import (
|
||||
)
|
||||
|
||||
func RunPreflights(interactive bool, output string, format string, args []string) error {
|
||||
ctx, root := otel.Tracer(
|
||||
constants.LIB_TRACER_NAME).Start(context.Background(), constants.TROUBLESHOOT_ROOT_SPAN_NAME)
|
||||
defer root.End()
|
||||
|
||||
if interactive {
|
||||
fmt.Print(cursor.Hide())
|
||||
defer fmt.Print(cursor.Show())
|
||||
@@ -64,7 +70,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
|
||||
|
||||
preflightContent = spec
|
||||
} else if _, err = os.Stat(v); err == nil {
|
||||
b, err := ioutil.ReadFile(v)
|
||||
b, err := os.ReadFile(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -103,7 +109,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -142,7 +148,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
|
||||
progressCh := make(chan interface{})
|
||||
defer close(progressCh)
|
||||
|
||||
ctx, stopProgressCollection := context.WithCancel(context.Background())
|
||||
ctx, stopProgressCollection := context.WithCancel(ctx)
|
||||
// make sure we shut down progress collection goroutines if an error occurs
|
||||
defer stopProgressCollection()
|
||||
progressCollection, ctx := errgroup.WithContext(ctx)
|
||||
@@ -156,7 +162,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
|
||||
uploadResultsMap := make(map[string][]CollectResult)
|
||||
|
||||
if preflightSpec != nil {
|
||||
r, err := collectInCluster(preflightSpec, progressCh)
|
||||
r, err := collectInCluster(ctx, preflightSpec, progressCh)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to collect in cluster")
|
||||
}
|
||||
@@ -165,7 +171,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
|
||||
}
|
||||
if uploadResultSpecs != nil {
|
||||
for _, spec := range uploadResultSpecs {
|
||||
r, err := collectInCluster(spec, progressCh)
|
||||
r, err := collectInCluster(ctx, spec, progressCh)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to collect in cluster")
|
||||
}
|
||||
@@ -176,14 +182,14 @@ func RunPreflights(interactive bool, output string, format string, args []string
|
||||
}
|
||||
if hostPreflightSpec != nil {
|
||||
if len(hostPreflightSpec.Spec.Collectors) > 0 {
|
||||
r, err := collectHost(hostPreflightSpec, progressCh)
|
||||
r, err := collectHost(ctx, hostPreflightSpec, progressCh)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to collect from host")
|
||||
}
|
||||
collectResults = append(collectResults, *r)
|
||||
}
|
||||
if len(hostPreflightSpec.Spec.RemoteCollectors) > 0 {
|
||||
r, err := collectRemote(hostPreflightSpec, progressCh)
|
||||
r, err := collectRemote(ctx, hostPreflightSpec, progressCh)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to collect remotely")
|
||||
}
|
||||
@@ -284,7 +290,7 @@ func collectNonInteractiveProgess(ctx context.Context, progressCh <-chan interfa
|
||||
}
|
||||
}
|
||||
|
||||
func collectInCluster(preflightSpec *troubleshootv1beta2.Preflight, progressCh chan interface{}) (*CollectResult, error) {
|
||||
func collectInCluster(ctx context.Context, preflightSpec *troubleshootv1beta2.Preflight, progressCh chan interface{}) (*CollectResult, error) {
|
||||
v := viper.GetViper()
|
||||
|
||||
restConfig, err := k8sutil.GetRESTConfig()
|
||||
@@ -306,7 +312,7 @@ func collectInCluster(preflightSpec *troubleshootv1beta2.Preflight, progressCh c
|
||||
}
|
||||
}
|
||||
|
||||
collectResults, err := Collect(collectOpts, preflightSpec)
|
||||
collectResults, err := CollectWithContext(ctx, collectOpts, preflightSpec)
|
||||
if err != nil {
|
||||
if collectResults != nil && !collectResults.IsRBACAllowed() {
|
||||
if preflightSpec.Spec.UploadResultsTo != "" {
|
||||
@@ -323,7 +329,7 @@ func collectInCluster(preflightSpec *troubleshootv1beta2.Preflight, progressCh c
|
||||
return &collectResults, nil
|
||||
}
|
||||
|
||||
func collectRemote(preflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*CollectResult, error) {
|
||||
func collectRemote(ctx context.Context, preflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*CollectResult, error) {
|
||||
v := viper.GetViper()
|
||||
|
||||
restConfig, err := k8sutil.GetRESTConfig()
|
||||
@@ -365,7 +371,7 @@ func collectRemote(preflightSpec *troubleshootv1beta2.HostPreflight, progressCh
|
||||
return &collectResults, nil
|
||||
}
|
||||
|
||||
func collectHost(hostPreflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*CollectResult, error) {
|
||||
func collectHost(ctx context.Context, hostPreflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*CollectResult, error) {
|
||||
collectOpts := CollectOpts{
|
||||
ProgressChan: progressCh,
|
||||
}
|
||||
|
||||
@@ -15,13 +15,17 @@ import (
|
||||
"github.com/replicatedhq/troubleshoot/pkg/collect"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/constants"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/convert"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/logger"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/version"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"gopkg.in/yaml.v2"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
)
|
||||
|
||||
func runHostCollectors(hostCollectors []*troubleshootv1beta2.HostCollect, additionalRedactors *troubleshootv1beta2.Redactor, bundlePath string, opts SupportBundleCreateOpts) (collect.CollectorResult, error) {
|
||||
collectSpecs := make([]*troubleshootv1beta2.HostCollect, 0, 0)
|
||||
func runHostCollectors(ctx context.Context, hostCollectors []*troubleshootv1beta2.HostCollect, additionalRedactors *troubleshootv1beta2.Redactor, bundlePath string, opts SupportBundleCreateOpts) (collect.CollectorResult, error) {
|
||||
collectSpecs := make([]*troubleshootv1beta2.HostCollect, 0)
|
||||
collectSpecs = append(collectSpecs, hostCollectors...)
|
||||
|
||||
allCollectedData := make(map[string][]byte)
|
||||
@@ -35,16 +39,25 @@ func runHostCollectors(hostCollectors []*troubleshootv1beta2.HostCollect, additi
|
||||
}
|
||||
|
||||
for _, collector := range collectors {
|
||||
// TODO: Add context to host collectors
|
||||
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.Title())
|
||||
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
|
||||
|
||||
isExcluded, _ := collector.IsExcluded()
|
||||
if isExcluded {
|
||||
logger.Printf("Excluding %q collector", collector.Title())
|
||||
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
|
||||
span.End()
|
||||
continue
|
||||
}
|
||||
|
||||
opts.ProgressChan <- fmt.Sprintf("[%s] Running host collector...", collector.Title())
|
||||
result, err := collector.Collect(opts.ProgressChan)
|
||||
if err != nil {
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
opts.ProgressChan <- errors.Errorf("failed to run host collector: %s: %v", collector.Title(), err)
|
||||
}
|
||||
span.End()
|
||||
for k, v := range result {
|
||||
allCollectedData[k] = v
|
||||
}
|
||||
@@ -58,17 +71,21 @@ func runHostCollectors(hostCollectors []*troubleshootv1beta2.HostCollect, additi
|
||||
}
|
||||
|
||||
if opts.Redact {
|
||||
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, "Host collectors")
|
||||
span.SetAttributes(attribute.String("type", "Redactors"))
|
||||
err := collect.RedactResult(bundlePath, collectResult, globalRedactors)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "failed to redact host collector results")
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return collectResult, err
|
||||
}
|
||||
span.End()
|
||||
}
|
||||
|
||||
return collectResult, nil
|
||||
}
|
||||
|
||||
func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactors *troubleshootv1beta2.Redactor, bundlePath string, opts SupportBundleCreateOpts) (collect.CollectorResult, error) {
|
||||
func runCollectors(ctx context.Context, collectors []*troubleshootv1beta2.Collect, additionalRedactors *troubleshootv1beta2.Redactor, bundlePath string, opts SupportBundleCreateOpts) (collect.CollectorResult, error) {
|
||||
var allCollectors []collect.Collector
|
||||
var foundForbidden bool
|
||||
|
||||
@@ -94,7 +111,7 @@ func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactor
|
||||
for _, desiredCollector := range collectSpecs {
|
||||
if collectorInterface, ok := collect.GetCollector(desiredCollector, bundlePath, opts.Namespace, opts.KubernetesRestConfig, k8sClient, opts.SinceTime); ok {
|
||||
if collector, ok := collectorInterface.(collect.Collector); ok {
|
||||
err := collector.CheckRBAC(context.Background(), collector, desiredCollector, opts.KubernetesRestConfig, opts.Namespace)
|
||||
err := collector.CheckRBAC(ctx, collector, desiredCollector, opts.KubernetesRestConfig, opts.Namespace)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to check RBAC for collectors")
|
||||
}
|
||||
@@ -130,8 +147,14 @@ func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactor
|
||||
}
|
||||
|
||||
for _, collector := range allCollectors {
|
||||
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, collector.Title())
|
||||
span.SetAttributes(attribute.String("type", reflect.TypeOf(collector).String()))
|
||||
|
||||
isExcluded, _ := collector.IsExcluded()
|
||||
if isExcluded {
|
||||
logger.Printf("Excluding %q collector", collector.Title())
|
||||
span.SetAttributes(attribute.Bool(constants.EXCLUDED, true))
|
||||
span.End()
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -140,17 +163,22 @@ func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactor
|
||||
if _, ok := collector.(*collect.CollectClusterResources); !ok {
|
||||
msg := fmt.Sprintf("skipping collector %s with insufficient RBAC permissions", collector.Title())
|
||||
opts.CollectorProgressCallback(opts.ProgressChan, msg)
|
||||
span.SetStatus(codes.Error, "skipping collector, insufficient RBAC permissions")
|
||||
span.End()
|
||||
continue
|
||||
}
|
||||
}
|
||||
opts.CollectorProgressCallback(opts.ProgressChan, collector.Title())
|
||||
result, err := collector.Collect(opts.ProgressChan)
|
||||
if err != nil {
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
opts.ProgressChan <- errors.Errorf("failed to run collector: %s: %v", collector.Title(), err)
|
||||
}
|
||||
|
||||
for k, v := range result {
|
||||
allCollectedData[k] = v
|
||||
}
|
||||
span.End()
|
||||
}
|
||||
|
||||
collectResult := allCollectedData
|
||||
@@ -161,10 +189,17 @@ func runCollectors(collectors []*troubleshootv1beta2.Collect, additionalRedactor
|
||||
}
|
||||
|
||||
if opts.Redact {
|
||||
// TODO: Should we record how long each redactor takes?
|
||||
_, span := otel.Tracer(constants.LIB_TRACER_NAME).Start(ctx, "In-cluster collectors")
|
||||
span.SetAttributes(attribute.String("type", "Redactors"))
|
||||
err := collect.RedactResult(bundlePath, collectResult, globalRedactors)
|
||||
if err != nil {
|
||||
return collectResult, errors.Wrap(err, "failed to redact in cluster collector results")
|
||||
err := errors.Wrap(err, "failed to redact in cluster collector results")
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
span.End()
|
||||
return collectResult, err
|
||||
}
|
||||
span.End()
|
||||
}
|
||||
|
||||
return collectResult, nil
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package supportbundle
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -11,11 +13,14 @@ import (
|
||||
cursor "github.com/ahmetalpbalkan/go-cursor"
|
||||
"github.com/fatih/color"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/replicatedhq/troubleshoot/internal/traces"
|
||||
analyzer "github.com/replicatedhq/troubleshoot/pkg/analyze"
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/collect"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/constants"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/convert"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/logger"
|
||||
"go.opentelemetry.io/otel"
|
||||
"k8s.io/client-go/rest"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
@@ -43,7 +48,10 @@ type SupportBundleResponse struct {
|
||||
// collectors, analyzers and after collection steps. Input arguments are specifications.
|
||||
// if FromCLI option is set to true, the output is the name of the archive on disk in the cwd.
|
||||
// if FromCLI option is set to false, the support bundle is archived in the OS temp folder (os.TempDir()).
|
||||
func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, additionalRedactors *troubleshootv1beta2.Redactor, opts SupportBundleCreateOpts) (*SupportBundleResponse, error) {
|
||||
func CollectSupportBundleFromSpec(
|
||||
spec *troubleshootv1beta2.SupportBundleSpec, additionalRedactors *troubleshootv1beta2.Redactor, opts SupportBundleCreateOpts,
|
||||
) (*SupportBundleResponse, error) {
|
||||
|
||||
resultsResponse := SupportBundleResponse{}
|
||||
|
||||
if opts.KubernetesRestConfig == nil {
|
||||
@@ -90,13 +98,23 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
|
||||
|
||||
var result, files, hostFiles collect.CollectorResult
|
||||
|
||||
ctx, root := otel.Tracer(constants.LIB_TRACER_NAME).Start(
|
||||
context.Background(), constants.TROUBLESHOOT_ROOT_SPAN_NAME,
|
||||
)
|
||||
defer func() {
|
||||
// If this function returns an error, root.End() may not be called.
|
||||
// We want to ensure this happens, so we defer it. It is safe to call
|
||||
// root.End() multiple times.
|
||||
root.End()
|
||||
}()
|
||||
|
||||
// Cache error returned by collectors and return it at the end of the function
|
||||
// so as to have a chance to run analyzers and archive the support bundle after.
|
||||
// If both host and in cluster collectors fail, the errors will be wrapped
|
||||
collectorsErrs := []string{}
|
||||
if spec.HostCollectors != nil {
|
||||
// Run host collectors
|
||||
hostFiles, err = runHostCollectors(spec.HostCollectors, additionalRedactors, bundlePath, opts)
|
||||
hostFiles, err = runHostCollectors(ctx, spec.HostCollectors, additionalRedactors, bundlePath, opts)
|
||||
if err != nil {
|
||||
collectorsErrs = append(collectorsErrs, fmt.Sprintf("failed to run host collectors: %s", err))
|
||||
}
|
||||
@@ -104,7 +122,7 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
|
||||
|
||||
if spec.Collectors != nil {
|
||||
// Run collectors
|
||||
files, err = runCollectors(spec.Collectors, additionalRedactors, bundlePath, opts)
|
||||
files, err = runCollectors(ctx, spec.Collectors, additionalRedactors, bundlePath, opts)
|
||||
if err != nil {
|
||||
collectorsErrs = append(collectorsErrs, fmt.Sprintf("failed to run collectors: %s", err))
|
||||
}
|
||||
@@ -128,13 +146,13 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
|
||||
return nil, errors.Wrap(err, "failed to get version file")
|
||||
}
|
||||
|
||||
err = result.SaveResult(bundlePath, constants.VersionFilename, version)
|
||||
err = result.SaveResult(bundlePath, constants.VERSION_FILENAME, version)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to write version")
|
||||
}
|
||||
|
||||
// Run Analyzers
|
||||
analyzeResults, err := AnalyzeSupportBundle(spec, bundlePath)
|
||||
analyzeResults, err := AnalyzeSupportBundle(ctx, spec, bundlePath)
|
||||
if err != nil {
|
||||
if opts.FromCLI {
|
||||
c := color.New(color.FgHiRed)
|
||||
@@ -156,6 +174,17 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
|
||||
return nil, errors.Wrap(err, "failed to write analysis")
|
||||
}
|
||||
|
||||
// Complete tracing by ending the root span and collecting
|
||||
// the summary of the traces. Store them in the support bundle.
|
||||
root.End()
|
||||
summary := traces.GetExporterInstance().GetSummary()
|
||||
err = result.SaveResult(bundlePath, "execution-data/summary.txt", bytes.NewReader([]byte(summary)))
|
||||
if err != nil {
|
||||
// Don't fail the support bundle if we can't save the execution summary
|
||||
logger.Printf("failed to save execution summary file in the support bundle: %v", err)
|
||||
}
|
||||
|
||||
// Archive Support Bundle
|
||||
if err := result.ArchiveSupportBundle(bundlePath, filename); err != nil {
|
||||
return nil, errors.Wrap(err, "create bundle file")
|
||||
}
|
||||
@@ -174,6 +203,7 @@ func CollectSupportBundleFromSpec(spec *troubleshootv1beta2.SupportBundleSpec, a
|
||||
|
||||
if len(collectorsErrs) > 0 {
|
||||
// TODO: Consider a collectors error type
|
||||
// TODO: use errors.Join in go 1.20 (https://pkg.go.dev/errors#Join)
|
||||
return &resultsResponse, fmt.Errorf(strings.Join(collectorsErrs, "\n"))
|
||||
}
|
||||
|
||||
@@ -222,11 +252,11 @@ func ProcessSupportBundleAfterCollection(spec *troubleshootv1beta2.SupportBundle
|
||||
|
||||
// AnalyzeSupportBundle performs analysis on a support bundle using the support bundle spec and an already unpacked support
|
||||
// bundle on disk
|
||||
func AnalyzeSupportBundle(spec *troubleshootv1beta2.SupportBundleSpec, tmpDir string) ([]*analyzer.AnalyzeResult, error) {
|
||||
func AnalyzeSupportBundle(ctx context.Context, spec *troubleshootv1beta2.SupportBundleSpec, tmpDir string) ([]*analyzer.AnalyzeResult, error) {
|
||||
if len(spec.Analyzers) == 0 && len(spec.HostAnalyzers) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
analyzeResults, err := analyzer.AnalyzeLocal(tmpDir, spec.Analyzers, spec.HostAnalyzers)
|
||||
analyzeResults, err := analyzer.AnalyzeLocal(ctx, tmpDir, spec.Analyzers, spec.HostAnalyzers)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to analyze support bundle")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user