diff --git a/pkg/collect/registry.go b/pkg/collect/registry.go index 7527dc91..4ae5e4f4 100644 --- a/pkg/collect/registry.go +++ b/pkg/collect/registry.go @@ -62,7 +62,7 @@ func (c *CollectRegistry) Collect(progressChan chan<- interface{}) (CollectorRes } for _, image := range c.Collector.Images { - exists, err := imageExists(c.Namespace, c.ClientConfig, c.Collector, image) + exists, err := imageExists(c.Namespace, c.ClientConfig, c.Collector, image, 10*time.Second) if err != nil { registryInfo.Images[image] = RegistryImage{ Error: err.Error(), @@ -90,7 +90,7 @@ func (c *CollectRegistry) Collect(progressChan chan<- interface{}) (CollectorRes return output, nil } -func imageExists(namespace string, clientConfig *rest.Config, registryCollector *troubleshootv1beta2.RegistryImages, image string) (bool, error) { +func imageExists(namespace string, clientConfig *rest.Config, registryCollector *troubleshootv1beta2.RegistryImages, image string, deadline time.Duration) (bool, error) { imageRef, err := alltransports.ParseImageName(fmt.Sprintf("docker://%s", image)) if err != nil { return false, errors.Wrapf(err, "failed to parse image name %s", image) @@ -102,28 +102,45 @@ func imageExists(namespace string, clientConfig *rest.Config, registryCollector return false, errors.Wrap(err, "failed to get auth config") } + sysCtx := types.SystemContext{ + DockerDisableV1Ping: true, + DockerInsecureSkipTLSVerify: types.OptionalBoolTrue, + } + if authConfig != nil { + sysCtx.DockerAuthConfig = &types.DockerAuthConfig{ + Username: authConfig.username, + Password: authConfig.password, + } + } + + if deadline == 0 { + deadline = 10 * time.Second + } + var lastErr error for i := 0; i < 3; i++ { - sysCtx := types.SystemContext{ - DockerDisableV1Ping: true, - DockerInsecureSkipTLSVerify: types.OptionalBoolTrue, - } - if authConfig != nil { - sysCtx.DockerAuthConfig = &types.DockerAuthConfig{ - Username: authConfig.username, - Password: authConfig.password, + err := func() error { + ctx, cancel := context.WithTimeout(context.Background(), deadline) + defer cancel() + remoteImage, err := imageRef.NewImage(ctx, &sysCtx) + if err != nil { + return err } - } - - remoteImage, err := imageRef.NewImage(context.Background(), &sysCtx) + remoteImage.Close() + return nil + }() if err == nil { klog.V(2).Infof("image %s exists", image) - remoteImage.Close() return true, nil } klog.Errorf("failed to get image %s: %v", image, err) + // if this is a context timeout, stop here so we dont run this check for too long + if errors.Is(err, context.DeadlineExceeded) { + return false, errors.Wrap(err, "failed to get image manifest") + } + if strings.Contains(err.Error(), "no image found in manifest list for architecture") { // manifest was downloaded, but no matching architecture found in manifest // should this count as image does not exist? diff --git a/pkg/collect/registry_test.go b/pkg/collect/registry_test.go index 94fb9f16..fc9793dd 100644 --- a/pkg/collect/registry_test.go +++ b/pkg/collect/registry_test.go @@ -1,13 +1,17 @@ package collect import ( + "context" "encoding/base64" "fmt" + "net" "testing" + "time" "github.com/containers/image/v5/transports/alltransports" "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" "github.com/stretchr/testify/assert" + "k8s.io/client-go/rest" ) func TestGetImageAuthConfigFromData(t *testing.T) { @@ -76,3 +80,42 @@ func TestGetImageAuthConfigFromData(t *testing.T) { }) } } + +func TestImageExists_ContextDeadlineExceeded(t *testing.T) { + // Start a server that accepts connections but doesn't respond + // This will cause the context deadline exceeded error + listener, err := net.Listen("tcp", "127.0.0.1:0") + assert.NoError(t, err) + defer listener.Close() + + port := listener.Addr().(*net.TCPAddr).Port + + // Start a goroutine that accepts connections but doesn't respond + go func() { + for { + conn, err := listener.Accept() + if err != nil { + return + } + // Keep the connection open but don't respond + // This will cause the client to timeout + time.Sleep(2 * time.Second) + conn.Close() + } + }() + + // Create a test registry collector pointing to our test server + registryCollector := &v1beta2.RegistryImages{ + Images: []string{fmt.Sprintf("127.0.0.1:%d/test-image:latest", port)}, + } + + // Create a minimal client config + clientConfig := &rest.Config{} + + // Test the imageExists function - this should trigger context deadline exceeded + exists, err := imageExists("default", clientConfig, registryCollector, fmt.Sprintf("127.0.0.1:%d/test-image:latest", port), 100*time.Millisecond) + + // Verify the result - should return false without error due to context deadline exceeded handling + assert.ErrorIs(t, err, context.DeadlineExceeded) + assert.False(t, exists) +}