mirror of
https://github.com/woodpecker-ci/woodpecker.git
synced 2026-02-13 21:00:00 +00:00
Simplify and Fix server task queue (#6017)
This commit is contained in:
@@ -296,7 +296,7 @@ func run(ctx context.Context, c *cli.Command, backends []types.Backend) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Debug().Msg("polling new steps")
|
||||
log.Debug().Msg("polling new workflow")
|
||||
if err := runner.Run(agentCtx, shutdownCtx); err != nil {
|
||||
log.Error().Err(err).Msg("runner error, retrying...")
|
||||
// Check if context is canceled
|
||||
|
||||
@@ -39,31 +39,28 @@ func Cancel(ctx context.Context, _forge forge.Forge, store store.Store, repo *mo
|
||||
return &ErrNotFound{Msg: err.Error()}
|
||||
}
|
||||
|
||||
// First cancel/evict steps in the queue in one go
|
||||
// First cancel/evict workflows in the queue in one go
|
||||
var (
|
||||
stepsToCancel []string
|
||||
stepsToEvict []string
|
||||
workflowsToCancel []string
|
||||
workflowsToEvict []string
|
||||
)
|
||||
for _, workflow := range workflows {
|
||||
if workflow.State == model.StatusRunning {
|
||||
stepsToCancel = append(stepsToCancel, fmt.Sprint(workflow.ID))
|
||||
workflowsToCancel = append(workflowsToCancel, fmt.Sprint(workflow.ID))
|
||||
}
|
||||
if workflow.State == model.StatusPending {
|
||||
stepsToEvict = append(stepsToEvict, fmt.Sprint(workflow.ID))
|
||||
workflowsToEvict = append(workflowsToEvict, fmt.Sprint(workflow.ID))
|
||||
}
|
||||
}
|
||||
|
||||
if len(stepsToEvict) != 0 {
|
||||
if err := server.Config.Services.Queue.EvictAtOnce(ctx, stepsToEvict); err != nil {
|
||||
log.Error().Err(err).Msgf("queue: evict_at_once: %v", stepsToEvict)
|
||||
}
|
||||
if err := server.Config.Services.Queue.ErrorAtOnce(ctx, stepsToEvict, queue.ErrCancel); err != nil {
|
||||
log.Error().Err(err).Msgf("queue: evict_at_once: %v", stepsToEvict)
|
||||
if len(workflowsToEvict) != 0 {
|
||||
if err := server.Config.Services.Queue.ErrorAtOnce(ctx, workflowsToEvict, queue.ErrCancel); err != nil {
|
||||
log.Error().Err(err).Msgf("queue: evict_at_once: %v", workflowsToEvict)
|
||||
}
|
||||
}
|
||||
if len(stepsToCancel) != 0 {
|
||||
if err := server.Config.Services.Queue.ErrorAtOnce(ctx, stepsToCancel, queue.ErrCancel); err != nil {
|
||||
log.Error().Err(err).Msgf("queue: evict_at_once: %v", stepsToCancel)
|
||||
if len(workflowsToCancel) != 0 {
|
||||
if err := server.Config.Services.Queue.ErrorAtOnce(ctx, workflowsToCancel, queue.ErrCancel); err != nil {
|
||||
log.Error().Err(err).Msgf("queue: evict_at_once: %v", workflowsToCancel)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2017, Brad Rydzewski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -17,7 +17,7 @@ package queue
|
||||
import (
|
||||
"container/list"
|
||||
"context"
|
||||
"fmt"
|
||||
"errors"
|
||||
"slices"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -58,8 +58,6 @@ type fifo struct {
|
||||
// as the agent pull in 10 milliseconds we should also give them work asap.
|
||||
const processTimeInterval = 100 * time.Millisecond
|
||||
|
||||
var ErrWorkerKicked = fmt.Errorf("worker was kicked")
|
||||
|
||||
// NewMemoryQueue returns a new fifo queue.
|
||||
func NewMemoryQueue(ctx context.Context) Queue {
|
||||
q := &fifo{
|
||||
@@ -90,23 +88,23 @@ func (q *fifo) Poll(c context.Context, agentID int64, filter FilterFn) (*model.T
|
||||
q.Lock()
|
||||
ctx, stop := context.WithCancelCause(c)
|
||||
|
||||
_worker := &worker{
|
||||
w := &worker{
|
||||
agentID: agentID,
|
||||
channel: make(chan *model.Task, 1),
|
||||
filter: filter,
|
||||
stop: stop,
|
||||
}
|
||||
q.workers[_worker] = struct{}{}
|
||||
q.workers[w] = struct{}{}
|
||||
q.Unlock()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
q.Lock()
|
||||
delete(q.workers, _worker)
|
||||
delete(q.workers, w)
|
||||
q.Unlock()
|
||||
return nil, ctx.Err()
|
||||
case t := <-_worker.channel:
|
||||
case t := <-w.channel:
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
@@ -122,47 +120,40 @@ func (q *fifo) Error(_ context.Context, id string, err error) error {
|
||||
return q.finished([]string{id}, model.StatusFailure, err)
|
||||
}
|
||||
|
||||
// ErrorAtOnce signals multiple done are complete with an error.
|
||||
// ErrorAtOnce signals multiple tasks are done and complete with an error.
|
||||
// If still pending they will just get removed from the queue.
|
||||
func (q *fifo) ErrorAtOnce(_ context.Context, ids []string, err error) error {
|
||||
if errors.Is(err, ErrCancel) {
|
||||
return q.finished(ids, model.StatusKilled, err)
|
||||
}
|
||||
return q.finished(ids, model.StatusFailure, err)
|
||||
}
|
||||
|
||||
// locks the queue itself!
|
||||
func (q *fifo) finished(ids []string, exitStatus model.StatusValue, err error) error {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
|
||||
var errs []error
|
||||
// we first process the tasks itself
|
||||
for _, id := range ids {
|
||||
taskEntry, ok := q.running[id]
|
||||
if ok {
|
||||
if taskEntry, ok := q.running[id]; ok {
|
||||
taskEntry.error = err
|
||||
close(taskEntry.done)
|
||||
delete(q.running, id)
|
||||
} else {
|
||||
q.removeFromPending(id)
|
||||
errs = append(errs, q.removeFromPendingAndWaiting(id))
|
||||
}
|
||||
}
|
||||
|
||||
// next we aim for there dependencies
|
||||
// we do this because in our ids list there could be tasks and its dependencies
|
||||
// so not to mess things up
|
||||
for _, id := range ids {
|
||||
q.updateDepStatusInQueue(id, exitStatus)
|
||||
}
|
||||
|
||||
q.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// EvictAtOnce removes multiple pending tasks from the queue.
|
||||
func (q *fifo) EvictAtOnce(_ context.Context, taskIDs []string) error {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
|
||||
for _, id := range taskIDs {
|
||||
var next *list.Element
|
||||
for element := q.pending.Front(); element != nil; element = next {
|
||||
next = element.Next()
|
||||
task, ok := element.Value.(*model.Task)
|
||||
if ok && task.ID == id {
|
||||
q.pending.Remove(element)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return ErrNotFound
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
// Wait waits until the item is done executing.
|
||||
@@ -286,19 +277,15 @@ func (q *fifo) process() {
|
||||
|
||||
func (q *fifo) filterWaiting() {
|
||||
// resubmits all waiting tasks to pending, deps may have cleared
|
||||
var nextWaiting *list.Element
|
||||
for e := q.waitingOnDeps.Front(); e != nil; e = nextWaiting {
|
||||
nextWaiting = e.Next()
|
||||
task, _ := e.Value.(*model.Task)
|
||||
for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
|
||||
task, _ := element.Value.(*model.Task)
|
||||
q.pending.PushBack(task)
|
||||
}
|
||||
|
||||
// rebuild waitingDeps
|
||||
q.waitingOnDeps = list.New()
|
||||
var filtered []*list.Element
|
||||
var nextPending *list.Element
|
||||
for element := q.pending.Front(); element != nil; element = nextPending {
|
||||
nextPending = element.Next()
|
||||
for element := q.pending.Front(); element != nil; element = element.Next() {
|
||||
task, _ := element.Value.(*model.Task)
|
||||
if q.depsInQueue(task) {
|
||||
log.Debug().Msgf("queue: waiting due to unmet dependencies %v", task.ID)
|
||||
@@ -314,12 +301,10 @@ func (q *fifo) filterWaiting() {
|
||||
}
|
||||
|
||||
func (q *fifo) assignToWorker() (*list.Element, *worker) {
|
||||
var next *list.Element
|
||||
var bestWorker *worker
|
||||
var bestScore int
|
||||
|
||||
for element := q.pending.Front(); element != nil; element = next {
|
||||
next = element.Next()
|
||||
for element := q.pending.Front(); element != nil; element = element.Next() {
|
||||
task, _ := element.Value.(*model.Task)
|
||||
log.Debug().Msgf("queue: trying to assign task: %v with deps %v", task.ID, task.Dependencies)
|
||||
|
||||
@@ -352,9 +337,7 @@ func (q *fifo) resubmitExpiredPipelines() {
|
||||
}
|
||||
|
||||
func (q *fifo) depsInQueue(task *model.Task) bool {
|
||||
var next *list.Element
|
||||
for element := q.pending.Front(); element != nil; element = next {
|
||||
next = element.Next()
|
||||
for element := q.pending.Front(); element != nil; element = element.Next() {
|
||||
possibleDep, ok := element.Value.(*model.Task)
|
||||
log.Debug().Msgf("queue: pending right now: %v", possibleDep.ID)
|
||||
for _, dep := range task.Dependencies {
|
||||
@@ -372,13 +355,12 @@ func (q *fifo) depsInQueue(task *model.Task) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// expects the q to be currently owned e.g. locked by caller!
|
||||
func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
|
||||
var next *list.Element
|
||||
for element := q.pending.Front(); element != nil; element = next {
|
||||
next = element.Next()
|
||||
pending, ok := element.Value.(*model.Task)
|
||||
for element := q.pending.Front(); element != nil; element = element.Next() {
|
||||
pending, _ := element.Value.(*model.Task)
|
||||
for _, dep := range pending.Dependencies {
|
||||
if ok && taskID == dep {
|
||||
if taskID == dep {
|
||||
pending.DepStatus[dep] = status
|
||||
}
|
||||
}
|
||||
@@ -392,27 +374,40 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
|
||||
}
|
||||
}
|
||||
|
||||
for element := q.waitingOnDeps.Front(); element != nil; element = next {
|
||||
next = element.Next()
|
||||
waiting, ok := element.Value.(*model.Task)
|
||||
for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
|
||||
waiting, _ := element.Value.(*model.Task)
|
||||
for _, dep := range waiting.Dependencies {
|
||||
if ok && taskID == dep {
|
||||
if taskID == dep {
|
||||
waiting.DepStatus[dep] = status
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (q *fifo) removeFromPending(taskID string) {
|
||||
// expects the q to be currently owned e.g. locked by caller!
|
||||
func (q *fifo) removeFromPendingAndWaiting(taskID string) error {
|
||||
log.Debug().Msgf("queue: trying to remove %s", taskID)
|
||||
var next *list.Element
|
||||
for element := q.pending.Front(); element != nil; element = next {
|
||||
next = element.Next()
|
||||
|
||||
// we assume pending first
|
||||
for element := q.pending.Front(); element != nil; element = element.Next() {
|
||||
task, _ := element.Value.(*model.Task)
|
||||
if task.ID == taskID {
|
||||
log.Debug().Msgf("queue: %s is removed from pending", taskID)
|
||||
q.pending.Remove(element)
|
||||
return
|
||||
_ = q.pending.Remove(element)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// well looks like it's waiting
|
||||
for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
|
||||
task, _ := element.Value.(*model.Task)
|
||||
if task.ID == taskID {
|
||||
log.Debug().Msgf("queue: %s is removed from waitingOnDeps", taskID)
|
||||
_ = q.waitingOnDeps.Remove(element)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// well it could not be found
|
||||
return ErrNotFound
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -228,63 +228,6 @@ func (_c *MockQueue_ErrorAtOnce_Call) RunAndReturn(run func(c context.Context, i
|
||||
return _c
|
||||
}
|
||||
|
||||
// EvictAtOnce provides a mock function for the type MockQueue
|
||||
func (_mock *MockQueue) EvictAtOnce(c context.Context, ids []string) error {
|
||||
ret := _mock.Called(c, ids)
|
||||
|
||||
if len(ret) == 0 {
|
||||
panic("no return value specified for EvictAtOnce")
|
||||
}
|
||||
|
||||
var r0 error
|
||||
if returnFunc, ok := ret.Get(0).(func(context.Context, []string) error); ok {
|
||||
r0 = returnFunc(c, ids)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
}
|
||||
return r0
|
||||
}
|
||||
|
||||
// MockQueue_EvictAtOnce_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'EvictAtOnce'
|
||||
type MockQueue_EvictAtOnce_Call struct {
|
||||
*mock.Call
|
||||
}
|
||||
|
||||
// EvictAtOnce is a helper method to define mock.On call
|
||||
// - c context.Context
|
||||
// - ids []string
|
||||
func (_e *MockQueue_Expecter) EvictAtOnce(c interface{}, ids interface{}) *MockQueue_EvictAtOnce_Call {
|
||||
return &MockQueue_EvictAtOnce_Call{Call: _e.mock.On("EvictAtOnce", c, ids)}
|
||||
}
|
||||
|
||||
func (_c *MockQueue_EvictAtOnce_Call) Run(run func(c context.Context, ids []string)) *MockQueue_EvictAtOnce_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
var arg0 context.Context
|
||||
if args[0] != nil {
|
||||
arg0 = args[0].(context.Context)
|
||||
}
|
||||
var arg1 []string
|
||||
if args[1] != nil {
|
||||
arg1 = args[1].([]string)
|
||||
}
|
||||
run(
|
||||
arg0,
|
||||
arg1,
|
||||
)
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *MockQueue_EvictAtOnce_Call) Return(err error) *MockQueue_EvictAtOnce_Call {
|
||||
_c.Call.Return(err)
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *MockQueue_EvictAtOnce_Call) RunAndReturn(run func(c context.Context, ids []string) error) *MockQueue_EvictAtOnce_Call {
|
||||
_c.Call.Return(run)
|
||||
return _c
|
||||
}
|
||||
|
||||
// Extend provides a mock function for the type MockQueue
|
||||
func (_mock *MockQueue) Extend(c context.Context, agentID int64, workflowID string) error {
|
||||
ret := _mock.Called(c, agentID, workflowID)
|
||||
|
||||
@@ -72,19 +72,6 @@ func (q *persistentQueue) Poll(c context.Context, agentID int64, f FilterFn) (*m
|
||||
return task, err
|
||||
}
|
||||
|
||||
// EvictAtOnce removes multiple pending tasks from the queue.
|
||||
func (q *persistentQueue) EvictAtOnce(c context.Context, ids []string) error {
|
||||
if err := q.Queue.EvictAtOnce(c, ids); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, id := range ids {
|
||||
if err := q.store.TaskDelete(id); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Error signals the task is done with an error.
|
||||
func (q *persistentQueue) Error(c context.Context, id string, err error) error {
|
||||
if err := q.Queue.Error(c, id, err); err != nil {
|
||||
@@ -93,7 +80,8 @@ func (q *persistentQueue) Error(c context.Context, id string, err error) error {
|
||||
return q.store.TaskDelete(id)
|
||||
}
|
||||
|
||||
// ErrorAtOnce signals multiple tasks are done with an error.
|
||||
// ErrorAtOnce signals multiple tasks are done and complete with an error.
|
||||
// If still pending they will just get removed from the queue.
|
||||
func (q *persistentQueue) ErrorAtOnce(c context.Context, ids []string, err error) error {
|
||||
if err := q.Queue.ErrorAtOnce(c, ids, err); err != nil {
|
||||
return err
|
||||
|
||||
@@ -36,6 +36,9 @@ var (
|
||||
|
||||
// ErrTaskExpired indicates a running task exceeded its lease/deadline and was resubmitted.
|
||||
ErrTaskExpired = errors.New("queue: task expired")
|
||||
|
||||
// ErrWorkerKicked worker of an agent got kicked.
|
||||
ErrWorkerKicked = errors.New("worker was kicked")
|
||||
)
|
||||
|
||||
// InfoT provides runtime information.
|
||||
@@ -93,12 +96,10 @@ type Queue interface {
|
||||
// Error signals the task is done with an error.
|
||||
Error(c context.Context, id string, err error) error
|
||||
|
||||
// ErrorAtOnce signals multiple done are complete with an error.
|
||||
// ErrorAtOnce signals multiple tasks are done and complete with an error.
|
||||
// If still pending they will just get removed from the queue.
|
||||
ErrorAtOnce(c context.Context, ids []string, err error) error
|
||||
|
||||
// EvictAtOnce removes multiple pending tasks from the queue.
|
||||
EvictAtOnce(c context.Context, ids []string) error
|
||||
|
||||
// Wait waits until the task is complete.
|
||||
Wait(c context.Context, id string) error
|
||||
|
||||
|
||||
Reference in New Issue
Block a user