mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-03-03 02:00:36 +00:00
add context to long running operations
This commit is contained in:
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
package condition
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -49,7 +50,7 @@ const (
|
||||
// not. This addresses 3).
|
||||
type ConditionManager interface {
|
||||
// Start starts the condition manager.
|
||||
Start()
|
||||
Start(ctx context.Context)
|
||||
// UpdateCondition updates a specific condition.
|
||||
UpdateCondition(types.Condition)
|
||||
// GetConditions returns all current conditions.
|
||||
@@ -88,8 +89,8 @@ func NewConditionManager(client problemclient.Client, clock clock.Clock, heartbe
|
||||
}
|
||||
}
|
||||
|
||||
func (c *conditionManager) Start() {
|
||||
go c.syncLoop()
|
||||
func (c *conditionManager) Start(ctx context.Context) {
|
||||
go c.syncLoop(ctx)
|
||||
}
|
||||
|
||||
func (c *conditionManager) UpdateCondition(condition types.Condition) {
|
||||
@@ -110,15 +111,17 @@ func (c *conditionManager) GetConditions() []types.Condition {
|
||||
return conditions
|
||||
}
|
||||
|
||||
func (c *conditionManager) syncLoop() {
|
||||
func (c *conditionManager) syncLoop(ctx context.Context) {
|
||||
ticker := c.clock.NewTicker(updatePeriod)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C():
|
||||
if c.needUpdates() || c.needResync() || c.needHeartbeat() {
|
||||
c.sync()
|
||||
c.sync(ctx)
|
||||
}
|
||||
case <-ctx.Done():
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -150,14 +153,14 @@ func (c *conditionManager) needHeartbeat() bool {
|
||||
}
|
||||
|
||||
// sync synchronizes node conditions with the apiserver.
|
||||
func (c *conditionManager) sync() {
|
||||
func (c *conditionManager) sync(ctx context.Context) {
|
||||
c.latestTry = c.clock.Now()
|
||||
c.resyncNeeded = false
|
||||
conditions := []v1.NodeCondition{}
|
||||
for i := range c.conditions {
|
||||
conditions = append(conditions, problemutil.ConvertToAPICondition(c.conditions[i]))
|
||||
}
|
||||
if err := c.client.SetConditions(conditions); err != nil {
|
||||
if err := c.client.SetConditions(ctx, conditions); err != nil {
|
||||
// The conditions will be updated again in future sync
|
||||
glog.Errorf("failed to update node conditions: %v", err)
|
||||
c.resyncNeeded = true
|
||||
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
package condition
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -109,7 +110,7 @@ func TestResync(t *testing.T) {
|
||||
m, fakeClient, fakeClock := newTestManager()
|
||||
condition := newTestCondition("TestCondition")
|
||||
m.conditions = map[string]types.Condition{condition.Type: condition}
|
||||
m.sync()
|
||||
m.sync(context.Background())
|
||||
expected := []v1.NodeCondition{problemutil.ConvertToAPICondition(condition)}
|
||||
assert.Nil(t, fakeClient.AssertConditions(expected), "Condition should be updated via client")
|
||||
|
||||
@@ -118,7 +119,7 @@ func TestResync(t *testing.T) {
|
||||
assert.False(t, m.needResync(), "Should not resync after resync period without resync needed")
|
||||
|
||||
fakeClient.InjectError("SetConditions", fmt.Errorf("injected error"))
|
||||
m.sync()
|
||||
m.sync(context.Background())
|
||||
|
||||
assert.False(t, m.needResync(), "Should not resync before resync period")
|
||||
fakeClock.Step(resyncPeriod)
|
||||
@@ -129,7 +130,7 @@ func TestHeartbeat(t *testing.T) {
|
||||
m, fakeClient, fakeClock := newTestManager()
|
||||
condition := newTestCondition("TestCondition")
|
||||
m.conditions = map[string]types.Condition{condition.Type: condition}
|
||||
m.sync()
|
||||
m.sync(context.Background())
|
||||
expected := []v1.NodeCondition{problemutil.ConvertToAPICondition(condition)}
|
||||
assert.Nil(t, fakeClient.AssertConditions(expected), "Condition should be updated via client")
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
package k8sexporter
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"net/http"
|
||||
_ "net/http/pprof"
|
||||
@@ -44,7 +45,7 @@ type k8sExporter struct {
|
||||
//
|
||||
// Note that this function may be blocked (until a timeout occurs) before
|
||||
// kube-apiserver becomes ready.
|
||||
func NewExporterOrDie(npdo *options.NodeProblemDetectorOptions) types.Exporter {
|
||||
func NewExporterOrDie(ctx context.Context, npdo *options.NodeProblemDetectorOptions) types.Exporter {
|
||||
if !npdo.EnableK8sExporter {
|
||||
return nil
|
||||
}
|
||||
@@ -52,7 +53,7 @@ func NewExporterOrDie(npdo *options.NodeProblemDetectorOptions) types.Exporter {
|
||||
c := problemclient.NewClientOrDie(npdo)
|
||||
|
||||
glog.Infof("Waiting for kube-apiserver to be ready (timeout %v)...", npdo.APIServerWaitTimeout)
|
||||
if err := waitForAPIServerReadyWithTimeout(c, npdo); err != nil {
|
||||
if err := waitForAPIServerReadyWithTimeout(ctx, c, npdo); err != nil {
|
||||
glog.Warningf("kube-apiserver did not become ready: timed out on waiting for kube-apiserver to return the node object: %v", err)
|
||||
}
|
||||
|
||||
@@ -62,7 +63,7 @@ func NewExporterOrDie(npdo *options.NodeProblemDetectorOptions) types.Exporter {
|
||||
}
|
||||
|
||||
ke.startHTTPReporting(npdo)
|
||||
ke.conditionManager.Start()
|
||||
ke.conditionManager.Start(ctx)
|
||||
|
||||
return &ke
|
||||
}
|
||||
@@ -103,11 +104,11 @@ func (ke *k8sExporter) startHTTPReporting(npdo *options.NodeProblemDetectorOptio
|
||||
}()
|
||||
}
|
||||
|
||||
func waitForAPIServerReadyWithTimeout(c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
|
||||
func waitForAPIServerReadyWithTimeout(ctx context.Context, c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
|
||||
return wait.PollImmediate(npdo.APIServerWaitInterval, npdo.APIServerWaitTimeout, func() (done bool, err error) {
|
||||
// If NPD can get the node object from kube-apiserver, the server is
|
||||
// ready and the RBAC permission is set correctly.
|
||||
if _, err := c.GetNode(); err != nil {
|
||||
if _, err := c.GetNode(ctx); err != nil {
|
||||
glog.Errorf("Can't get node object: %v", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
package problemclient
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sync"
|
||||
@@ -60,7 +61,7 @@ func (f *FakeProblemClient) AssertConditions(expected []v1.NodeCondition) error
|
||||
}
|
||||
|
||||
// SetConditions is a fake mimic of SetConditions, it only update the internal condition cache.
|
||||
func (f *FakeProblemClient) SetConditions(conditions []v1.NodeCondition) error {
|
||||
func (f *FakeProblemClient) SetConditions(ctx context.Context, conditions []v1.NodeCondition) error {
|
||||
f.Lock()
|
||||
defer f.Unlock()
|
||||
if err, ok := f.errors["SetConditions"]; ok {
|
||||
@@ -73,7 +74,7 @@ func (f *FakeProblemClient) SetConditions(conditions []v1.NodeCondition) error {
|
||||
}
|
||||
|
||||
// GetConditions is a fake mimic of GetConditions, it returns the conditions cached internally.
|
||||
func (f *FakeProblemClient) GetConditions(types []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
|
||||
func (f *FakeProblemClient) GetConditions(ctx context.Context, types []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
|
||||
f.Lock()
|
||||
defer f.Unlock()
|
||||
if err, ok := f.errors["GetConditions"]; ok {
|
||||
@@ -93,6 +94,6 @@ func (f *FakeProblemClient) GetConditions(types []v1.NodeConditionType) ([]*v1.N
|
||||
func (f *FakeProblemClient) Eventf(eventType string, source, reason, messageFmt string, args ...interface{}) {
|
||||
}
|
||||
|
||||
func (f *FakeProblemClient) GetNode() (*v1.Node, error) {
|
||||
func (f *FakeProblemClient) GetNode(ctx context.Context) (*v1.Node, error) {
|
||||
return nil, fmt.Errorf("GetNode() not implemented")
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
package problemclient
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/url"
|
||||
@@ -40,14 +41,14 @@ import (
|
||||
// Client is the interface of problem client
|
||||
type Client interface {
|
||||
// GetConditions get all specific conditions of current node.
|
||||
GetConditions(conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error)
|
||||
GetConditions(ctx context.Context, conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error)
|
||||
// SetConditions set or update conditions of current node.
|
||||
SetConditions(conditions []v1.NodeCondition) error
|
||||
SetConditions(ctx context.Context, conditionTypes []v1.NodeCondition) error
|
||||
// Eventf reports the event.
|
||||
Eventf(eventType string, source, reason, messageFmt string, args ...interface{})
|
||||
// GetNode returns the Node object of the node on which the
|
||||
// node-problem-detector runs.
|
||||
GetNode() (*v1.Node, error)
|
||||
GetNode(ctx context.Context) (*v1.Node, error)
|
||||
}
|
||||
|
||||
type nodeProblemClient struct {
|
||||
@@ -81,8 +82,8 @@ func NewClientOrDie(npdo *options.NodeProblemDetectorOptions) Client {
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *nodeProblemClient) GetConditions(conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
|
||||
node, err := c.GetNode()
|
||||
func (c *nodeProblemClient) GetConditions(ctx context.Context, conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
|
||||
node, err := c.GetNode(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -97,7 +98,7 @@ func (c *nodeProblemClient) GetConditions(conditionTypes []v1.NodeConditionType)
|
||||
return conditions, nil
|
||||
}
|
||||
|
||||
func (c *nodeProblemClient) SetConditions(newConditions []v1.NodeCondition) error {
|
||||
func (c *nodeProblemClient) SetConditions(ctx context.Context, newConditions []v1.NodeCondition) error {
|
||||
for i := range newConditions {
|
||||
// Each time we update the conditions, we update the heart beat time
|
||||
newConditions[i].LastHeartbeatTime = metav1.NewTime(c.clock.Now())
|
||||
@@ -119,7 +120,7 @@ func (c *nodeProblemClient) Eventf(eventType, source, reason, messageFmt string,
|
||||
recorder.Eventf(c.nodeRef, eventType, reason, messageFmt, args...)
|
||||
}
|
||||
|
||||
func (c *nodeProblemClient) GetNode() (*v1.Node, error) {
|
||||
func (c *nodeProblemClient) GetNode(ctx context.Context) (*v1.Node, error) {
|
||||
return c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
package problemdetector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/golang/glog"
|
||||
@@ -26,7 +27,7 @@ import (
|
||||
|
||||
// ProblemDetector collects statuses from all problem daemons and update the node condition and send node event.
|
||||
type ProblemDetector interface {
|
||||
Run(termCh <-chan error) error
|
||||
Run(context.Context) error
|
||||
}
|
||||
|
||||
type problemDetector struct {
|
||||
@@ -44,7 +45,7 @@ func NewProblemDetector(monitors []types.Monitor, exporters []types.Exporter) Pr
|
||||
}
|
||||
|
||||
// Run starts the problem detector.
|
||||
func (p *problemDetector) Run(termCh <-chan error) error {
|
||||
func (p *problemDetector) Run(ctx context.Context) error {
|
||||
// Start the log monitors one by one.
|
||||
var chans []<-chan *types.Status
|
||||
failureCount := 0
|
||||
@@ -77,7 +78,7 @@ func (p *problemDetector) Run(termCh <-chan error) error {
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-termCh:
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
case status := <-ch:
|
||||
for _, exporter := range p.exporters {
|
||||
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
package problemdetector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"k8s.io/node-problem-detector/pkg/types"
|
||||
@@ -24,7 +25,7 @@ import (
|
||||
|
||||
func TestEmpty(t *testing.T) {
|
||||
pd := NewProblemDetector([]types.Monitor{}, []types.Exporter{})
|
||||
if err := pd.Run(nil); err == nil {
|
||||
if err := pd.Run(context.Background()); err == nil {
|
||||
t.Error("expected error when running an empty problem detector")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user