support multiple exit codes based on what went wrong/right (#1135)

0 = all passed, 3 = at least one failure, 4 = no failures but at least 1 warn 1 as a catch all (generic errors), 2 for invalid input/specs etc ref https://github.com/replicatedhq/troubleshoot/issues/1131 docs https://github.com/replicatedhq/troubleshoot.sh/pull/489
2026-02-14 10:19:54 +00:00 · 2023-05-10 09:33:13 +10:00
parent 766469b867
commit 3548b46cfc
6 changed files with 235 additions and 30 deletions
--- a/.github/workflows/build-test-deploy.yaml
+++ b/.github/workflows/build-test-deploy.yaml
@@ -112,10 +112,17 @@ jobs:
          path: bin/
      - run: chmod +x bin/preflight
      - run: |
+          set +e
          ./bin/preflight --interactive=false --format=json https://preflight.replicated.com > result.json
+          EXIT_CODE=$?
          cat result.json

          EXIT_STATUS=0
+          if [ $EXIT_CODE -ne 3 ]; then
+            echo "Expected exit code of 3 (some checks failed), got $EXIT_CODE"
+            EXIT_STATUS=1
+          fi
+
          if grep -q "was not collected" result.json; then
            echo "Some files were not collected"
            EXIT_STATUS=1
--- a/cmd/preflight/cli/root.go
+++ b/cmd/preflight/cli/root.go
@@ -1,15 +1,18 @@
 package cli

 import (
+	"errors"
 	"fmt"
 	"os"
 	"strings"

 	"github.com/replicatedhq/troubleshoot/cmd/util"
 	"github.com/replicatedhq/troubleshoot/internal/traces"
+	"github.com/replicatedhq/troubleshoot/pkg/constants"
 	"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
 	"github.com/replicatedhq/troubleshoot/pkg/logger"
 	"github.com/replicatedhq/troubleshoot/pkg/preflight"
+	"github.com/replicatedhq/troubleshoot/pkg/types"
 	"github.com/spf13/cobra"
 	"github.com/spf13/viper"
 	"k8s.io/klog/v2"
@@ -22,7 +25,8 @@ func RootCmd() *cobra.Command {
 		Short: "Run and retrieve preflight checks in a cluster",
 		Long: `A preflight check is a set of validations that can and should be run to ensure
 that a cluster meets the requirements to run an application.`,
-		SilenceUsage: true,
+		SilenceUsage:  true,
+		SilenceErrors: true,
 		PreRun: func(cmd *cobra.Command, args []string) {
 			v := viper.GetViper()
 			v.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
@@ -48,6 +52,7 @@ that a cluster meets the requirements to run an application.`,
 			if v.GetBool("debug") || v.IsSet("v") {
 				fmt.Printf("\n%s", traces.GetExporterInstance().GetSummary())
 			}
+
 			return err
 		},
 		PostRun: func(cmd *cobra.Command, args []string) {
@@ -75,7 +80,24 @@ that a cluster meets the requirements to run an application.`,
 }

 func InitAndExecute() {
-	if err := RootCmd().Execute(); err != nil {
+	cmd := RootCmd()
+	err := cmd.Execute()
+
+	if err != nil {
+		var exitErr types.ExitError
+		if errors.As(err, &exitErr) {
+			// We need to do this, there's situations where we need the non-zero exit code (which comes as part of the custom error struct)
+			// but there's no actual error, just an exit code.
+			// If there's also an error to output (eg. invalid format etc) then print it as well
+			if exitErr.ExitStatus() != constants.EXIT_CODE_FAIL && exitErr.ExitStatus() != constants.EXIT_CODE_WARN {
+				cmd.PrintErrln("Error:", err.Error())
+			}
+
+			os.Exit(exitErr.ExitStatus())
+		}
+
+		// Fallback, should almost never be used (the above Exit() should handle almost all situations
+		cmd.PrintErrln("Error:", err.Error())
 		os.Exit(1)
 	}
 }
--- a/pkg/constants/constants.go
+++ b/pkg/constants/constants.go
@@ -51,4 +51,10 @@ const (
 	CLUSTER_RESOURCES_ROLE_BINDINGS               = "rolebindings"
 	CLUSTER_RESOURCES_CLUSTER_ROLES               = "clusterroles"
 	CLUSTER_RESOURCES_CLUSTER_ROLE_BINDINGS       = "clusterRoleBindings"
+
+	// Custom exit codes
+	EXIT_CODE_CATCH_ALL   = 1
+	EXIT_CODE_SPEC_ISSUES = 2
+	EXIT_CODE_FAIL        = 3
+	EXIT_CODE_WARN        = 4
 )
--- a/pkg/preflight/run.go
+++ b/pkg/preflight/run.go
@@ -16,6 +16,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/replicatedhq/troubleshoot/cmd/util"
 	analyzer "github.com/replicatedhq/troubleshoot/pkg/analyze"
+	analyzerunner "github.com/replicatedhq/troubleshoot/pkg/analyze"
 	troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
 	troubleshootclientsetscheme "github.com/replicatedhq/troubleshoot/pkg/client/troubleshootclientset/scheme"
 	"github.com/replicatedhq/troubleshoot/pkg/constants"
@@ -23,6 +24,7 @@ import (
 	"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
 	"github.com/replicatedhq/troubleshoot/pkg/oci"
 	"github.com/replicatedhq/troubleshoot/pkg/specs"
+	"github.com/replicatedhq/troubleshoot/pkg/types"
 	"github.com/spf13/viper"
 	spin "github.com/tj/go-spin"
 	"go.opentelemetry.io/otel"
@@ -47,7 +49,8 @@ func RunPreflights(interactive bool, output string, format string, args []string
 		signalChan := make(chan os.Signal, 1)
 		signal.Notify(signalChan, os.Interrupt)
 		<-signalChan
-		os.Exit(0)
+		// exiting due to a signal shouldn't be considered successful
+		os.Exit(1)
 	}()

 	var preflightContent []byte
@@ -61,64 +64,66 @@ func RunPreflights(interactive bool, output string, format string, args []string
 			// format secret/namespace-name/secret-name
 			pathParts := strings.Split(v, "/")
 			if len(pathParts) != 3 {
-				return errors.Errorf("path %s must have 3 components", v)
+				return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, errors.Errorf("path %s must have 3 components", v))
 			}

 			spec, err := specs.LoadFromSecret(pathParts[1], pathParts[2], "preflight-spec")
 			if err != nil {
-				return errors.Wrap(err, "failed to get spec from secret")
+				return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, errors.Wrap(err, "failed to get spec from secret"))
 			}

 			preflightContent = spec
 		} else if _, err = os.Stat(v); err == nil {
 			b, err := os.ReadFile(v)
 			if err != nil {
-				return err
+				return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, err)
 			}

 			preflightContent = b
 		} else if v == "-" {
 			b, err := io.ReadAll(os.Stdin)
 			if err != nil {
-				return err
+				return types.NewExitCodeError(constants.EXIT_CODE_CATCH_ALL, err)
 			}
 			preflightContent = b
 		} else {
 			u, err := url.Parse(v)
 			if err != nil {
-				return err
+				return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, err)
 			}

 			if u.Scheme == "oci" {
 				content, err := oci.PullPreflightFromOCI(v)
 				if err != nil {
 					if err == oci.ErrNoRelease {
-						return errors.Errorf("no release found for %s.\nCheck the oci:// uri for errors or contact the application vendor for support.", v)
+						return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, errors.Errorf("no release found for %s.\nCheck the oci:// uri for errors or contact the application vendor for support.", v))
 					}

-					return err
+					return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, err)
 				}

 				preflightContent = content
 			} else {
 				if !util.IsURL(v) {
-					return fmt.Errorf("%s is not a URL and was not found (err %s)", v, err)
+					return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, fmt.Errorf("%s is not a URL and was not found (err %s)", v, err))
 				}

 				req, err := http.NewRequest("GET", v, nil)
 				if err != nil {
-					return err
+					// exit code: should this be catch all or spec issues...?
+					return types.NewExitCodeError(constants.EXIT_CODE_CATCH_ALL, err)
 				}
 				req.Header.Set("User-Agent", "Replicated_Preflight/v1beta2")
 				resp, err := http.DefaultClient.Do(req)
 				if err != nil {
-					return err
+					// exit code: should this be catch all or spec issues...?
+					return types.NewExitCodeError(constants.EXIT_CODE_CATCH_ALL, err)
 				}
 				defer resp.Body.Close()

 				body, err := io.ReadAll(resp.Body)
 				if err != nil {
-					return err
+					return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, err)
 				}

 				preflightContent = body
@@ -137,7 +142,7 @@ func RunPreflights(interactive bool, output string, format string, args []string

 			err := yaml.Unmarshal([]byte(doc), &parsedDocHead)
 			if err != nil {
-				return errors.Wrap(err, "failed to parse yaml")
+				return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, errors.Wrap(err, "failed to parse yaml"))
 			}

 			if parsedDocHead.Kind != "Preflight" {
@@ -146,14 +151,14 @@ func RunPreflights(interactive bool, output string, format string, args []string

 			preflightContent, err = docrewrite.ConvertToV1Beta2([]byte(doc))
 			if err != nil {
-				return errors.Wrap(err, "failed to convert to v1beta2")
+				return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, errors.Wrap(err, "failed to convert to v1beta2"))
 			}

 			troubleshootclientsetscheme.AddToScheme(scheme.Scheme)
 			decode := scheme.Codecs.UniversalDeserializer().Decode
 			obj, _, err := decode([]byte(preflightContent), nil, nil)
 			if err != nil {
-				return errors.Wrapf(err, "failed to parse %s", v)
+				return types.NewExitCodeError(constants.EXIT_CODE_SPEC_ISSUES, errors.Wrapf(err, "failed to parse %s", v))
 			}

 			if spec, ok := obj.(*troubleshootv1beta2.Preflight); ok {
@@ -192,7 +197,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
 	if preflightSpec != nil {
 		r, err := collectInCluster(ctx, preflightSpec, progressCh)
 		if err != nil {
-			return errors.Wrap(err, "failed to collect in cluster")
+			return types.NewExitCodeError(constants.EXIT_CODE_CATCH_ALL, errors.Wrap(err, "failed to collect in cluster"))
 		}
 		collectResults = append(collectResults, *r)
 		preflightSpecName = preflightSpec.Name
@@ -201,7 +206,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
 		for _, spec := range uploadResultSpecs {
 			r, err := collectInCluster(ctx, spec, progressCh)
 			if err != nil {
-				return errors.Wrap(err, "failed to collect in cluster")
+				return types.NewExitCodeError(constants.EXIT_CODE_CATCH_ALL, errors.Wrap(err, "failed to collect in cluster"))
 			}
 			uploadResultsMap[spec.Spec.UploadResultsTo] = append(uploadResultsMap[spec.Spec.UploadResultsTo], *r)
 			uploadCollectResults = append(collectResults, *r)
@@ -212,14 +217,14 @@ func RunPreflights(interactive bool, output string, format string, args []string
 		if len(hostPreflightSpec.Spec.Collectors) > 0 {
 			r, err := collectHost(ctx, hostPreflightSpec, progressCh)
 			if err != nil {
-				return errors.Wrap(err, "failed to collect from host")
+				return types.NewExitCodeError(constants.EXIT_CODE_CATCH_ALL, errors.Wrap(err, "failed to collect from host"))
 			}
 			collectResults = append(collectResults, *r)
 		}
 		if len(hostPreflightSpec.Spec.RemoteCollectors) > 0 {
 			r, err := collectRemote(ctx, hostPreflightSpec, progressCh)
 			if err != nil {
-				return errors.Wrap(err, "failed to collect remotely")
+				return types.NewExitCodeError(constants.EXIT_CODE_CATCH_ALL, errors.Wrap(err, "failed to collect remotely"))
 			}
 			collectResults = append(collectResults, *r)
 		}
@@ -227,7 +232,7 @@ func RunPreflights(interactive bool, output string, format string, args []string
 	}

 	if collectResults == nil && uploadCollectResults == nil {
-		return errors.New("no results")
+		return types.NewExitCodeError(constants.EXIT_CODE_CATCH_ALL, errors.New("no results"))
 	}

 	analyzeResults := []*analyzer.AnalyzeResult{}
@@ -255,14 +260,48 @@ func RunPreflights(interactive bool, output string, format string, args []string
 	stopProgressCollection()
 	progressCollection.Wait()

-	if interactive {
-		if len(analyzeResults) == 0 {
-			return errors.New("no data has been collected")
-		}
-		return showInteractiveResults(preflightSpecName, output, analyzeResults)
+	if len(analyzeResults) == 0 {
+		return types.NewExitCodeError(constants.EXIT_CODE_CATCH_ALL, errors.New("no data has been collected"))
 	}

-	return showTextResults(format, preflightSpecName, output, analyzeResults)
+	if interactive {
+		err = showInteractiveResults(preflightSpecName, output, analyzeResults)
+	} else {
+		err = showTextResults(format, preflightSpecName, output, analyzeResults)
+	}
+
+	if err != nil {
+		return err
+	}
+
+	exitCode := checkOutcomesToExitCode(analyzeResults)
+
+	if exitCode == 0 {
+		return nil
+	}
+
+	return types.NewExitCodeError(exitCode, errors.New("preflights failed with warnings or errors"))
+}
+
+// Determine if any preflight checks passed vs failed vs warned
+// If all checks passed: 0
+// If 1 or more checks failed: 3
+// If no checks failed, but 1 or more warn: 4
+func checkOutcomesToExitCode(analyzeResults []*analyzerunner.AnalyzeResult) int {
+	// Assume pass until they don't
+	exitCode := 0
+
+	for _, analyzeResult := range analyzeResults {
+		if analyzeResult.IsWarn {
+			exitCode = constants.EXIT_CODE_WARN
+		} else if analyzeResult.IsFail {
+			exitCode = constants.EXIT_CODE_FAIL
+			// No need to check further, a fail is a fail
+			return exitCode
+		}
+	}
+
+	return exitCode
 }

 func collectInteractiveProgress(ctx context.Context, progressCh <-chan interface{}) func() error {
--- a/pkg/preflight/run_test.go
+++ b/pkg/preflight/run_test.go
@@ -7,11 +7,14 @@
 package preflight

 import (
+	"errors"
 	"os"
 	"path/filepath"
 	"testing"

 	"github.com/replicatedhq/troubleshoot/internal/testutils"
+	analyzerunner "github.com/replicatedhq/troubleshoot/pkg/analyze"
+	"github.com/replicatedhq/troubleshoot/pkg/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	/*
@@ -58,7 +61,8 @@ PASS
 		format      string
 		args        []string
 		//
-		wantErr bool
+		wantExitCode int
+		wantErr      bool
 		// May be in stdout or file, depending on above value
 		wantOutputContent string
 	}{
@@ -69,6 +73,7 @@ PASS
 			output:            "",
 			format:            "human",
 			args:              []string{preflightFile},
+			wantExitCode:      0,
 			wantErr:           false,
 			wantOutputContent: wantOutputContentHuman,
 		},
@@ -78,6 +83,7 @@ PASS
 			output:            "",
 			format:            "json",
 			args:              []string{preflightFile},
+			wantExitCode:      0,
 			wantErr:           false,
 			wantOutputContent: wantOutputContentJson,
 		},
@@ -87,6 +93,7 @@ PASS
 			output:            "",
 			format:            "yaml",
 			args:              []string{preflightFile},
+			wantExitCode:      0,
 			wantErr:           false,
 			wantOutputContent: wantOutputContentYaml,
 		},
@@ -96,6 +103,7 @@ PASS
 			output:            testutils.TempFilename("preflight_out_test_"),
 			format:            "human",
 			args:              []string{preflightFile},
+			wantExitCode:      0,
 			wantErr:           false,
 			wantOutputContent: wantOutputContentHuman,
 		},
@@ -105,6 +113,7 @@ PASS
 			output:            testutils.TempFilename("preflight_out_test_"),
 			format:            "json",
 			args:              []string{preflightFile},
+			wantExitCode:      0,
 			wantErr:           false,
 			wantOutputContent: wantOutputContentJson,
 		},
@@ -114,6 +123,7 @@ PASS
 			output:            testutils.TempFilename("preflight_out_test_"),
 			format:            "yaml",
 			args:              []string{preflightFile},
+			wantExitCode:      0,
 			wantErr:           false,
 			wantOutputContent: wantOutputContentYaml,
 		},
@@ -182,9 +192,17 @@ PASS
 			os.Stderr = origStderr

 			if tt.wantErr {
+				// It's always an error of some form
 				assert.Error(t, tErr)
+
+				var exitErr types.ExitError
+				// Make sure we can always turn it into an ExitError
+				assert.True(t, errors.As(tErr, &exitErr))
+
+				assert.Equal(t, tt.wantExitCode, exitErr.ExitStatus())
+				assert.NotEmpty(t, exitErr.Error())
 			} else {
-				require.NoError(t, tErr)
+				assert.Nil(t, tErr)
 			}

 			useBufOut := string(bufOut[:nOut])
@@ -214,3 +232,90 @@ PASS
 		})
 	}
 }
+
+func TestCheckOutcomesToExitCode(t *testing.T) {
+	tests := []struct {
+		name         string
+		results      []*analyzerunner.AnalyzeResult
+		wantExitCode int
+	}{
+		{
+			name: "all-pass",
+			results: []*analyzerunner.AnalyzeResult{
+				&analyzerunner.AnalyzeResult{
+					IsPass: true,
+					IsFail: false,
+					IsWarn: false,
+				},
+				&analyzerunner.AnalyzeResult{
+					IsPass: true,
+					IsFail: false,
+					IsWarn: false,
+				},
+			},
+			wantExitCode: 0,
+		},
+		{
+			name: "one-warn",
+			results: []*analyzerunner.AnalyzeResult{
+				&analyzerunner.AnalyzeResult{
+					IsPass: true,
+					IsFail: false,
+					IsWarn: false,
+				},
+				&analyzerunner.AnalyzeResult{
+					IsPass: false,
+					IsFail: false,
+					IsWarn: true,
+				},
+			},
+			wantExitCode: 4,
+		},
+		{
+			name: "one-fail",
+			results: []*analyzerunner.AnalyzeResult{
+				&analyzerunner.AnalyzeResult{
+					IsPass: true,
+					IsFail: false,
+					IsWarn: false,
+				},
+				&analyzerunner.AnalyzeResult{
+					IsPass: false,
+					IsFail: true,
+					IsWarn: false,
+				},
+			},
+			wantExitCode: 3,
+		},
+		{
+			name: "one-warn-one-fail",
+			results: []*analyzerunner.AnalyzeResult{
+				&analyzerunner.AnalyzeResult{
+					IsPass: true,
+					IsFail: false,
+					IsWarn: false,
+				},
+				&analyzerunner.AnalyzeResult{
+					IsPass: false,
+					IsFail: true,
+					IsWarn: false,
+				},
+				&analyzerunner.AnalyzeResult{
+					IsPass: false,
+					IsFail: false,
+					IsWarn: true,
+				},
+			},
+			// A fail is a fail...
+			wantExitCode: 3,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gotExitCode := checkOutcomesToExitCode(tt.results)
+
+			assert.Equal(t, tt.wantExitCode, gotExitCode)
+		})
+	}
+}
--- a/pkg/types/types.go
+++ b/pkg/types/types.go
@@ -7,3 +7,29 @@ type NotFoundError struct {
 func (e *NotFoundError) Error() string {
 	return e.Name + ": not found"
 }
+
+type ExitError interface {
+	Error() string
+	ExitStatus() int
+}
+
+type ExitCodeError struct {
+	Msg  string
+	Code int
+}
+
+func (e *ExitCodeError) Error() string {
+	return e.Msg
+}
+
+func (e *ExitCodeError) ExitStatus() int {
+	return e.Code
+}
+
+func NewExitCodeError(exitCode int, theErr error) *ExitCodeError {
+	useErr := ""
+	if theErr != nil {
+		useErr = theErr.Error()
+	}
+	return &ExitCodeError{Msg: useErr, Code: exitCode}
+}