fix(collect): add context timeout to registry collector (#1846)

* fix(collect): add context timeout to registry collector

* f

* f
This commit is contained in:
Ethan Mosbaugh
2025-09-10 11:47:58 -07:00
committed by GitHub
parent 50ada4abdf
commit f352396e2e
2 changed files with 74 additions and 14 deletions

View File

@@ -62,7 +62,7 @@ func (c *CollectRegistry) Collect(progressChan chan<- interface{}) (CollectorRes
}
for _, image := range c.Collector.Images {
exists, err := imageExists(c.Namespace, c.ClientConfig, c.Collector, image)
exists, err := imageExists(c.Namespace, c.ClientConfig, c.Collector, image, 10*time.Second)
if err != nil {
registryInfo.Images[image] = RegistryImage{
Error: err.Error(),
@@ -90,7 +90,7 @@ func (c *CollectRegistry) Collect(progressChan chan<- interface{}) (CollectorRes
return output, nil
}
func imageExists(namespace string, clientConfig *rest.Config, registryCollector *troubleshootv1beta2.RegistryImages, image string) (bool, error) {
func imageExists(namespace string, clientConfig *rest.Config, registryCollector *troubleshootv1beta2.RegistryImages, image string, deadline time.Duration) (bool, error) {
imageRef, err := alltransports.ParseImageName(fmt.Sprintf("docker://%s", image))
if err != nil {
return false, errors.Wrapf(err, "failed to parse image name %s", image)
@@ -102,28 +102,45 @@ func imageExists(namespace string, clientConfig *rest.Config, registryCollector
return false, errors.Wrap(err, "failed to get auth config")
}
sysCtx := types.SystemContext{
DockerDisableV1Ping: true,
DockerInsecureSkipTLSVerify: types.OptionalBoolTrue,
}
if authConfig != nil {
sysCtx.DockerAuthConfig = &types.DockerAuthConfig{
Username: authConfig.username,
Password: authConfig.password,
}
}
if deadline == 0 {
deadline = 10 * time.Second
}
var lastErr error
for i := 0; i < 3; i++ {
sysCtx := types.SystemContext{
DockerDisableV1Ping: true,
DockerInsecureSkipTLSVerify: types.OptionalBoolTrue,
}
if authConfig != nil {
sysCtx.DockerAuthConfig = &types.DockerAuthConfig{
Username: authConfig.username,
Password: authConfig.password,
err := func() error {
ctx, cancel := context.WithTimeout(context.Background(), deadline)
defer cancel()
remoteImage, err := imageRef.NewImage(ctx, &sysCtx)
if err != nil {
return err
}
}
remoteImage, err := imageRef.NewImage(context.Background(), &sysCtx)
remoteImage.Close()
return nil
}()
if err == nil {
klog.V(2).Infof("image %s exists", image)
remoteImage.Close()
return true, nil
}
klog.Errorf("failed to get image %s: %v", image, err)
// if this is a context timeout, stop here so we dont run this check for too long
if errors.Is(err, context.DeadlineExceeded) {
return false, errors.Wrap(err, "failed to get image manifest")
}
if strings.Contains(err.Error(), "no image found in manifest list for architecture") {
// manifest was downloaded, but no matching architecture found in manifest
// should this count as image does not exist?

View File

@@ -1,13 +1,17 @@
package collect
import (
"context"
"encoding/base64"
"fmt"
"net"
"testing"
"time"
"github.com/containers/image/v5/transports/alltransports"
"github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
"github.com/stretchr/testify/assert"
"k8s.io/client-go/rest"
)
func TestGetImageAuthConfigFromData(t *testing.T) {
@@ -76,3 +80,42 @@ func TestGetImageAuthConfigFromData(t *testing.T) {
})
}
}
func TestImageExists_ContextDeadlineExceeded(t *testing.T) {
// Start a server that accepts connections but doesn't respond
// This will cause the context deadline exceeded error
listener, err := net.Listen("tcp", "127.0.0.1:0")
assert.NoError(t, err)
defer listener.Close()
port := listener.Addr().(*net.TCPAddr).Port
// Start a goroutine that accepts connections but doesn't respond
go func() {
for {
conn, err := listener.Accept()
if err != nil {
return
}
// Keep the connection open but don't respond
// This will cause the client to timeout
time.Sleep(2 * time.Second)
conn.Close()
}
}()
// Create a test registry collector pointing to our test server
registryCollector := &v1beta2.RegistryImages{
Images: []string{fmt.Sprintf("127.0.0.1:%d/test-image:latest", port)},
}
// Create a minimal client config
clientConfig := &rest.Config{}
// Test the imageExists function - this should trigger context deadline exceeded
exists, err := imageExists("default", clientConfig, registryCollector, fmt.Sprintf("127.0.0.1:%d/test-image:latest", port), 100*time.Millisecond)
// Verify the result - should return false without error due to context deadline exceeded handling
assert.ErrorIs(t, err, context.DeadlineExceeded)
assert.False(t, exists)
}