mirror of
https://github.com/replicatedhq/troubleshoot.git
synced 2026-04-15 07:16:34 +00:00
* Add collect command and remote host collectors
Adds the ability to run a host collector on a set of remote k8s nodes.
Target nodes can be filtered using the --selector flag, with the same
syntax as kubectl. Existing flags for --collector-image,
--collector-pullpolicy and --request-timeout are used. To run on a
specified node, --selector="kubernetes.io/hostname=kind-worker2" could
be used.
The collect command is used by the remote collector to output the
results using a "raw" format, which uses the filename as the key, and
the value the output as a escaped json string. When run manually it
defaults to fully decoded json. The existing block devices,
ipv4interfaces and services host collectors don't decode properly - the
fix is to convert their slice output to a map (fix not included as
unsure what depends on the existing format).
The collect command is also useful for troubleshooting preflight issues.
Examples are included to show remote collector usage.
```
bin/collect --collector-image=croomes/troubleshoot:latest examples/collect/remote/memory.yaml --namespace test
{
"kind-control-plane": {
"system/memory.json": {
"total": 1304207360
}
},
"kind-worker": {
"system/memory.json": {
"total": 1695780864
}
},
"kind-worker2": {
"system/memory.json": {
"total": 1726353408
}
}
}
```
The preflight command has been updated to run remote collectors. To run
a host collector remotely it must be specified in the spec as a
`remoteCollector`:
```
apiVersion: troubleshoot.sh/v1beta2
kind: HostPreflight
metadata:
name: memory
spec:
remoteCollectors:
- memory:
collectorName: memory
analyzers:
- memory:
outcomes:
- fail:
when: "< 8Gi"
message: At least 8Gi of memory is required
- warn:
when: "< 32Gi"
message: At least 32Gi of memory is recommended
- pass:
message: The system has as sufficient memory
```
Results for each node are analyzed separately, with the node name
appended to the title:
```
bin/preflight --interactive=false --collector-image=croomes/troubleshoot:latest examples/preflight/remote/memory.yaml --format=json
{memory running 0 1}
{memory completed 1 1}
{
"fail": [
{
"title": "Amount of Memory (kind-worker2)",
"message": "At least 8Gi of memory is required"
},
{
"title": "Amount of Memory (kind-worker)",
"message": "At least 8Gi of memory is required"
},
{
"title": "Amount of Memory (kind-control-plane)",
"message": "At least 8Gi of memory is required"
}
]
}
```
Also added a host collector to allow preflight checks of required kernel
modules, which is the main driver for this change.
354 lines
9.7 KiB
Go
354 lines
9.7 KiB
Go
package cli
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"strings"
|
|
"time"
|
|
|
|
cursor "github.com/ahmetalpbalkan/go-cursor"
|
|
"github.com/fatih/color"
|
|
"github.com/pkg/errors"
|
|
"github.com/replicatedhq/troubleshoot/cmd/util"
|
|
analyzer "github.com/replicatedhq/troubleshoot/pkg/analyze"
|
|
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
|
troubleshootclientsetscheme "github.com/replicatedhq/troubleshoot/pkg/client/troubleshootclientset/scheme"
|
|
"github.com/replicatedhq/troubleshoot/pkg/docrewrite"
|
|
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
|
|
"github.com/replicatedhq/troubleshoot/pkg/preflight"
|
|
"github.com/replicatedhq/troubleshoot/pkg/specs"
|
|
"github.com/spf13/viper"
|
|
spin "github.com/tj/go-spin"
|
|
"golang.org/x/sync/errgroup"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
"k8s.io/client-go/kubernetes/scheme"
|
|
)
|
|
|
|
func runPreflights(v *viper.Viper, arg string) error {
|
|
if v.GetBool("interactive") {
|
|
fmt.Print(cursor.Hide())
|
|
defer fmt.Print(cursor.Show())
|
|
}
|
|
|
|
go func() {
|
|
signalChan := make(chan os.Signal, 1)
|
|
signal.Notify(signalChan, os.Interrupt)
|
|
<-signalChan
|
|
os.Exit(0)
|
|
}()
|
|
|
|
var preflightContent []byte
|
|
var err error
|
|
if strings.HasPrefix(arg, "secret/") {
|
|
// format secret/namespace-name/secret-name
|
|
pathParts := strings.Split(arg, "/")
|
|
if len(pathParts) != 3 {
|
|
return errors.Errorf("path %s must have 3 components", arg)
|
|
}
|
|
|
|
spec, err := specs.LoadFromSecret(pathParts[1], pathParts[2], "preflight-spec")
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to get spec from secret")
|
|
}
|
|
|
|
preflightContent = spec
|
|
} else if _, err = os.Stat(arg); err == nil {
|
|
b, err := ioutil.ReadFile(arg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
preflightContent = b
|
|
} else {
|
|
if !util.IsURL(arg) {
|
|
return fmt.Errorf("%s is not a URL and was not found (err %s)", arg, err)
|
|
}
|
|
|
|
req, err := http.NewRequest("GET", arg, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
req.Header.Set("User-Agent", "Replicated_Preflight/v1beta2")
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
preflightContent = body
|
|
}
|
|
|
|
preflightContent, err = docrewrite.ConvertToV1Beta2(preflightContent)
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to convert to v1beta2")
|
|
}
|
|
|
|
troubleshootclientsetscheme.AddToScheme(scheme.Scheme)
|
|
decode := scheme.Codecs.UniversalDeserializer().Decode
|
|
obj, _, err := decode([]byte(preflightContent), nil, nil)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to parse %s", arg)
|
|
}
|
|
|
|
var collectResults []preflight.CollectResult
|
|
preflightSpecName := ""
|
|
|
|
progressCh := make(chan interface{})
|
|
defer close(progressCh)
|
|
|
|
ctx, stopProgressCollection := context.WithCancel(context.Background())
|
|
// make sure we shut down progress collection goroutines if an error occurs
|
|
defer stopProgressCollection()
|
|
progressCollection, ctx := errgroup.WithContext(ctx)
|
|
|
|
if v.GetBool("interactive") {
|
|
progressCollection.Go(collectInteractiveProgress(ctx, progressCh))
|
|
} else {
|
|
progressCollection.Go(collectNonInteractiveProgess(ctx, progressCh))
|
|
}
|
|
|
|
if preflightSpec, ok := obj.(*troubleshootv1beta2.Preflight); ok {
|
|
r, err := collectInCluster(preflightSpec, progressCh)
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to collect in cluster")
|
|
}
|
|
collectResults = append(collectResults, *r)
|
|
preflightSpecName = preflightSpec.Name
|
|
} else if hostPreflightSpec, ok := obj.(*troubleshootv1beta2.HostPreflight); ok {
|
|
if len(hostPreflightSpec.Spec.Collectors) > 0 {
|
|
r, err := collectHost(hostPreflightSpec, progressCh)
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to collect from host")
|
|
}
|
|
collectResults = append(collectResults, *r)
|
|
}
|
|
if len(hostPreflightSpec.Spec.RemoteCollectors) > 0 {
|
|
r, err := collectRemote(hostPreflightSpec, progressCh)
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to collect remotely")
|
|
}
|
|
collectResults = append(collectResults, *r)
|
|
}
|
|
preflightSpecName = hostPreflightSpec.Name
|
|
}
|
|
|
|
if collectResults == nil {
|
|
return errors.New("no results")
|
|
}
|
|
|
|
analyzeResults := []*analyzer.AnalyzeResult{}
|
|
for _, res := range collectResults {
|
|
analyzeResults = append(analyzeResults, res.Analyze()...)
|
|
}
|
|
|
|
if preflightSpec, ok := obj.(*troubleshootv1beta2.Preflight); ok {
|
|
if preflightSpec.Spec.UploadResultsTo != "" {
|
|
err := uploadResults(preflightSpec.Spec.UploadResultsTo, analyzeResults)
|
|
if err != nil {
|
|
progressCh <- err
|
|
}
|
|
}
|
|
}
|
|
|
|
stopProgressCollection()
|
|
progressCollection.Wait()
|
|
|
|
if v.GetBool("interactive") {
|
|
if len(analyzeResults) == 0 {
|
|
return errors.New("no data has been collected")
|
|
}
|
|
return showInteractiveResults(preflightSpecName, analyzeResults)
|
|
}
|
|
|
|
return showStdoutResults(v.GetString("format"), preflightSpecName, analyzeResults)
|
|
}
|
|
|
|
func collectInteractiveProgress(ctx context.Context, progressCh <-chan interface{}) func() error {
|
|
return func() error {
|
|
spinner := spin.New()
|
|
lastMsg := ""
|
|
|
|
errorTxt := color.New(color.FgHiRed)
|
|
infoTxt := color.New(color.FgCyan)
|
|
|
|
for {
|
|
select {
|
|
case msg := <-progressCh:
|
|
switch msg := msg.(type) {
|
|
case error:
|
|
errorTxt.Printf("%s\r * %v\n", cursor.ClearEntireLine(), msg)
|
|
case string:
|
|
if lastMsg == msg {
|
|
break
|
|
}
|
|
lastMsg = msg
|
|
infoTxt.Printf("%s\r * %s\n", cursor.ClearEntireLine(), msg)
|
|
|
|
}
|
|
case <-time.After(time.Millisecond * 100):
|
|
fmt.Printf("\r %s %s ", color.CyanString("Running Preflight Checks"), spinner.Next())
|
|
case <-ctx.Done():
|
|
fmt.Printf("\r%s\r", cursor.ClearEntireLine())
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func collectNonInteractiveProgess(ctx context.Context, progressCh <-chan interface{}) func() error {
|
|
return func() error {
|
|
for {
|
|
select {
|
|
case msg := <-progressCh:
|
|
switch msg := msg.(type) {
|
|
case error:
|
|
fmt.Fprintf(os.Stderr, "error - %v\n", msg)
|
|
case string:
|
|
fmt.Fprintf(os.Stderr, "%s\n", msg)
|
|
case preflight.CollectProgress:
|
|
fmt.Fprintf(os.Stderr, "%s\n", msg.String())
|
|
|
|
}
|
|
case <-ctx.Done():
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func collectInCluster(preflightSpec *troubleshootv1beta2.Preflight, progressCh chan interface{}) (*preflight.CollectResult, error) {
|
|
v := viper.GetViper()
|
|
|
|
restConfig, err := k8sutil.GetRESTConfig()
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to convert kube flags to rest config")
|
|
}
|
|
|
|
collectOpts := preflight.CollectOpts{
|
|
Namespace: v.GetString("namespace"),
|
|
IgnorePermissionErrors: v.GetBool("collect-without-permissions"),
|
|
ProgressChan: progressCh,
|
|
KubernetesRestConfig: restConfig,
|
|
}
|
|
|
|
if v.GetString("since") != "" || v.GetString("since-time") != "" {
|
|
err := parseTimeFlags(v, preflightSpec.Spec.Collectors)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
collectResults, err := preflight.Collect(collectOpts, preflightSpec)
|
|
if err != nil {
|
|
if !collectResults.IsRBACAllowed() {
|
|
if preflightSpec.Spec.UploadResultsTo != "" {
|
|
clusterCollectResults := collectResults.(preflight.ClusterCollectResult)
|
|
err := uploadErrors(preflightSpec.Spec.UploadResultsTo, clusterCollectResults.Collectors)
|
|
if err != nil {
|
|
progressCh <- err
|
|
}
|
|
}
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
return &collectResults, nil
|
|
}
|
|
|
|
func collectRemote(preflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*preflight.CollectResult, error) {
|
|
v := viper.GetViper()
|
|
|
|
restConfig, err := k8sutil.GetRESTConfig()
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to convert kube flags to rest config")
|
|
}
|
|
|
|
labelSelector, err := labels.Parse(v.GetString("selector"))
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "unable to parse selector")
|
|
}
|
|
|
|
namespace := v.GetString("namespace")
|
|
if namespace == "" {
|
|
namespace = "default"
|
|
}
|
|
|
|
timeout := v.GetDuration("request-timeout")
|
|
if timeout == 0 {
|
|
timeout = 30 * time.Second
|
|
}
|
|
|
|
collectOpts := preflight.CollectOpts{
|
|
Namespace: namespace,
|
|
IgnorePermissionErrors: v.GetBool("collect-without-permissions"),
|
|
ProgressChan: progressCh,
|
|
KubernetesRestConfig: restConfig,
|
|
Image: v.GetString("collector-image"),
|
|
PullPolicy: v.GetString("collector-pullpolicy"),
|
|
LabelSelector: labelSelector.String(),
|
|
Timeout: timeout,
|
|
}
|
|
|
|
collectResults, err := preflight.CollectRemote(collectOpts, preflightSpec)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to collect from remote")
|
|
}
|
|
|
|
return &collectResults, nil
|
|
}
|
|
|
|
func collectHost(hostPreflightSpec *troubleshootv1beta2.HostPreflight, progressCh chan interface{}) (*preflight.CollectResult, error) {
|
|
collectOpts := preflight.CollectOpts{
|
|
ProgressChan: progressCh,
|
|
}
|
|
|
|
collectResults, err := preflight.CollectHost(collectOpts, hostPreflightSpec)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to collect from host")
|
|
}
|
|
|
|
return &collectResults, nil
|
|
}
|
|
|
|
func parseTimeFlags(v *viper.Viper, collectors []*troubleshootv1beta2.Collect) error {
|
|
var (
|
|
sinceTime time.Time
|
|
err error
|
|
)
|
|
if v.GetString("since-time") != "" {
|
|
if v.GetString("since") != "" {
|
|
return errors.Errorf("at most one of `sinceTime` or `since` may be specified")
|
|
}
|
|
sinceTime, err = time.Parse(time.RFC3339, v.GetString("since-time"))
|
|
if err != nil {
|
|
return errors.Wrap(err, "unable to parse --since-time flag")
|
|
}
|
|
} else {
|
|
parsedDuration, err := time.ParseDuration(v.GetString("since"))
|
|
if err != nil {
|
|
return errors.Wrap(err, "unable to parse --since flag")
|
|
}
|
|
now := time.Now()
|
|
sinceTime = now.Add(0 - parsedDuration)
|
|
}
|
|
for _, collector := range collectors {
|
|
if collector.Logs != nil {
|
|
if collector.Logs.Limits == nil {
|
|
collector.Logs.Limits = new(troubleshootv1beta2.LogLimits)
|
|
}
|
|
collector.Logs.Limits.SinceTime = metav1.NewTime(sinceTime)
|
|
}
|
|
}
|
|
return nil
|
|
}
|