add context to long running operations

This commit is contained in:
Fan Shang Xiang
2023-06-30 10:17:13 +08:00
parent 55586431bd
commit 471ab88240
12 changed files with 82 additions and 90 deletions

View File

@@ -17,6 +17,8 @@ limitations under the License.
package main
import (
"context"
"github.com/golang/glog"
_ "k8s.io/node-problem-detector/cmd/nodeproblemdetector/exporterplugins"
@@ -31,16 +33,7 @@ import (
"k8s.io/node-problem-detector/pkg/version"
)
func npdInteractive(npdo *options.NodeProblemDetectorOptions) {
termCh := make(chan error, 1)
defer close(termCh)
if err := npdMain(npdo, termCh); err != nil {
glog.Fatalf("Problem detector failed with error: %v", err)
}
}
func npdMain(npdo *options.NodeProblemDetectorOptions, termCh <-chan error) error {
func npdMain(ctx context.Context, npdo *options.NodeProblemDetectorOptions) error {
if npdo.PrintVersion {
version.PrintVersion()
return nil
@@ -58,7 +51,7 @@ func npdMain(npdo *options.NodeProblemDetectorOptions, termCh <-chan error) erro
// Initialize exporters.
defaultExporters := []types.Exporter{}
if ke := k8sexporter.NewExporterOrDie(npdo); ke != nil {
if ke := k8sexporter.NewExporterOrDie(ctx, npdo); ke != nil {
defaultExporters = append(defaultExporters, ke)
glog.Info("K8s exporter started.")
}
@@ -79,5 +72,5 @@ func npdMain(npdo *options.NodeProblemDetectorOptions, termCh <-chan error) erro
// Initialize NPD core.
p := problemdetector.NewProblemDetector(problemDaemons, npdExporters)
return p.Run(termCh)
return p.Run(ctx)
}

View File

@@ -17,6 +17,9 @@ limitations under the License.
package main
import (
"context"
"github.com/golang/glog"
"github.com/spf13/pflag"
"k8s.io/node-problem-detector/cmd/options"
)
@@ -26,5 +29,7 @@ func main() {
npdo.AddFlags(pflag.CommandLine)
pflag.Parse()
npdInteractive(npdo)
if err := npdMain(context.Background(), npdo); err != nil {
glog.Fatalf("Problem detector failed with error: %v", err)
}
}

View File

@@ -20,7 +20,7 @@ limitations under the License.
package main
import (
"errors"
"context"
"fmt"
"os"
"strings"
@@ -81,11 +81,9 @@ func TestNPDMain(t *testing.T) {
npdo, cleanup := setupNPD(t)
defer cleanup()
termCh := make(chan error, 2)
termCh <- errors.New("close")
defer close(termCh)
if err := npdMain(npdo, termCh); err != nil {
ctx, cancelFunc := context.WithCancel(context.Background())
cancelFunc()
if err := npdMain(ctx, npdo); err != nil {
t.Errorf("termination signal should not return error got, %v", err)
}
}

View File

@@ -17,7 +17,7 @@ limitations under the License.
package main
import (
"errors"
"context"
"fmt"
"sync"
"time"
@@ -102,26 +102,20 @@ type npdService struct {
}
func (s *npdService) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (bool, uint32) {
appTermCh := make(chan error, 1)
svcLoopTermCh := make(chan error, 1)
defer func() {
close(appTermCh)
close(svcLoopTermCh)
}()
changes <- svc.Status{State: svc.StartPending}
changes <- svc.Status{State: svc.Running, Accepts: svcCommandsAccepted}
var appWG sync.WaitGroup
var svcWG sync.WaitGroup
options := s.options
ctx, cancelFunc := context.WithCancel(context.Background())
// NPD application goroutine.
appWG.Add(1)
go func() {
defer appWG.Done()
if err := npdMain(options, appTermCh); err != nil {
if err := npdMain(ctx, options); err != nil {
elog.Warning(windowsEventLogID, err.Error())
}
@@ -132,16 +126,36 @@ func (s *npdService) Execute(args []string, r <-chan svc.ChangeRequest, changes
svcWG.Add(1)
go func() {
defer svcWG.Done()
serviceLoop(r, changes, appTermCh, svcLoopTermCh)
for {
select {
case <-ctx.Done():
return
case c := <-r:
switch c.Cmd {
case svc.Interrogate:
changes <- c.CurrentStatus
// Testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4
time.Sleep(100 * time.Millisecond)
changes <- c.CurrentStatus
case svc.Stop, svc.Shutdown:
elog.Info(windowsEventLogID, fmt.Sprintf("Stopping %s service, %v", svcName, c.Context))
cancelFunc()
case svc.Pause:
elog.Info(windowsEventLogID, "ignoring pause command from Windows service control, not supported")
changes <- svc.Status{State: svc.Paused, Accepts: svcCommandsAccepted}
case svc.Continue:
elog.Info(windowsEventLogID, "ignoring continue command from Windows service control, not supported")
changes <- svc.Status{State: svc.Running, Accepts: svcCommandsAccepted}
default:
elog.Error(windowsEventLogID, fmt.Sprintf("unexpected control request #%d", c))
}
}
}
}()
// Wait for the application go routine to die.
appWG.Wait()
// Ensure that the service control loop is killed.
svcLoopTermCh <- nil
// Wait for the service control loop to terminate.
// Otherwise it's possible that the channel closures cause the application to panic.
svcWG.Wait()
@@ -151,31 +165,3 @@ func (s *npdService) Execute(args []string, r <-chan svc.ChangeRequest, changes
return false, uint32(0)
}
func serviceLoop(r <-chan svc.ChangeRequest, changes chan<- svc.Status, appTermCh chan error, svcLoopTermCh chan error) {
for {
select {
case <-svcLoopTermCh:
return
case c := <-r:
switch c.Cmd {
case svc.Interrogate:
changes <- c.CurrentStatus
// Testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4
time.Sleep(100 * time.Millisecond)
changes <- c.CurrentStatus
case svc.Stop, svc.Shutdown:
elog.Info(windowsEventLogID, fmt.Sprintf("Stopping %s service, %v", svcName, c.Context))
appTermCh <- errors.New("stopping service")
case svc.Pause:
elog.Info(windowsEventLogID, "ignoring pause command from Windows service control, not supported")
changes <- svc.Status{State: svc.Paused, Accepts: svcCommandsAccepted}
case svc.Continue:
elog.Info(windowsEventLogID, "ignoring continue command from Windows service control, not supported")
changes <- svc.Status{State: svc.Running, Accepts: svcCommandsAccepted}
default:
elog.Error(windowsEventLogID, fmt.Sprintf("unexpected control request #%d", c))
}
}
}
}

View File

@@ -20,6 +20,7 @@ limitations under the License.
package main
import (
"context"
"testing"
"golang.org/x/sys/windows/svc"