Files
open-cluster-management/pkg/work/spoke/controllers/statuscontroller/availablestatus_controller.go
Jian Qiu 33310619d9 🌱 use SDK basecontroller for better logging. (#1269)
* Use basecontroller in sdk-go instead for better logging

Signed-off-by: Jian Qiu <jqiu@redhat.com>

* Rename to fakeSyncContext

Signed-off-by: Jian Qiu <jqiu@redhat.com>

---------

Signed-off-by: Jian Qiu <jqiu@redhat.com>
2025-12-01 03:07:02 +00:00

271 lines
10 KiB
Go

package statuscontroller
import (
"context"
"fmt"
"time"
"k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/dynamic"
"k8s.io/klog/v2"
workv1client "open-cluster-management.io/api/client/work/clientset/versioned/typed/work/v1"
workinformer "open-cluster-management.io/api/client/work/informers/externalversions/work/v1"
worklister "open-cluster-management.io/api/client/work/listers/work/v1"
workapiv1 "open-cluster-management.io/api/work/v1"
"open-cluster-management.io/sdk-go/pkg/basecontroller/factory"
"open-cluster-management.io/sdk-go/pkg/patcher"
commonhelper "open-cluster-management.io/ocm/pkg/common/helpers"
"open-cluster-management.io/ocm/pkg/common/queue"
"open-cluster-management.io/ocm/pkg/work/helper"
"open-cluster-management.io/ocm/pkg/work/spoke/conditions"
"open-cluster-management.io/ocm/pkg/work/spoke/statusfeedback"
)
const (
statusFeedbackConditionType = "StatusFeedbackSynced"
controllerName = "AvailableStatusController"
)
// AvailableStatusController is to update the available status conditions of both manifests and manifestworks.
// It is also used to get the status value based on status feedback configuration and get condition values
// based on condition rules in manifestwork. The functions are logically disinct, however, they are put in the same
// control loop to reduce live get call to spoke apiserver and status update call to hub apiserver.
type AvailableStatusController struct {
patcher patcher.Patcher[*workapiv1.ManifestWork, workapiv1.ManifestWorkSpec, workapiv1.ManifestWorkStatus]
manifestWorkLister worklister.ManifestWorkNamespaceLister
spokeDynamicClient dynamic.Interface
statusReader *statusfeedback.StatusReader
conditionReader *conditions.ConditionReader
syncInterval time.Duration
}
// NewAvailableStatusController returns a AvailableStatusController
func NewAvailableStatusController(
spokeDynamicClient dynamic.Interface,
manifestWorkClient workv1client.ManifestWorkInterface,
manifestWorkInformer workinformer.ManifestWorkInformer,
manifestWorkLister worklister.ManifestWorkNamespaceLister,
maxJSONRawLength int32,
syncInterval time.Duration,
) (factory.Controller, error) {
conditionReader, err := conditions.NewConditionReader()
if err != nil {
return nil, err
}
controller := &AvailableStatusController{
patcher: patcher.NewPatcher[
*workapiv1.ManifestWork, workapiv1.ManifestWorkSpec, workapiv1.ManifestWorkStatus](
manifestWorkClient),
manifestWorkLister: manifestWorkLister,
spokeDynamicClient: spokeDynamicClient,
syncInterval: syncInterval,
statusReader: statusfeedback.NewStatusReader().WithMaxJsonRawLength(maxJSONRawLength),
conditionReader: conditionReader,
}
return factory.New().
WithInformersQueueKeysFunc(queue.QueueKeyByMetaName, manifestWorkInformer.Informer()).
WithSync(controller.sync).ToController(controllerName), nil
}
func (c *AvailableStatusController) sync(ctx context.Context, controllerContext factory.SyncContext, manifestWorkName string) error {
logger := klog.FromContext(ctx).WithValues("manifestWorkName", manifestWorkName)
logger.V(4).Info("Reconciling ManifestWork")
ctx = klog.NewContext(ctx, logger)
// sync a particular manifestwork
manifestWork, err := c.manifestWorkLister.Get(manifestWorkName)
if errors.IsNotFound(err) {
// work not found, could have been deleted, do nothing.
return nil
}
if err != nil {
return fmt.Errorf("unable to fetch manifestwork %q: %w", manifestWorkName, err)
}
err = c.syncManifestWork(ctx, manifestWork)
if err != nil {
return fmt.Errorf("unable to sync manifestwork %q: %w", manifestWork.Name, err)
}
// requeue with a certain jitter
controllerContext.Queue().AddAfter(manifestWorkName, wait.Jitter(c.syncInterval, 0.9))
return nil
}
func (c *AvailableStatusController) syncManifestWork(ctx context.Context, originalManifestWork *workapiv1.ManifestWork) error {
manifestWork := originalManifestWork.DeepCopy()
// do nothing when finalizer is not added.
if !commonhelper.HasFinalizer(manifestWork.Finalizers, workapiv1.ManifestWorkFinalizer) {
return nil
}
// wait until work has the applied condition.
if cond := meta.FindStatusCondition(manifestWork.Status.Conditions, workapiv1.WorkApplied); cond == nil {
return nil
}
// handle status condition of manifests
// TODO revist this controller since this might bring races when user change the manifests in spec.
for index, manifest := range manifestWork.Status.ResourceStatus.Manifests {
obj, availableStatusCondition, err := buildAvailableStatusCondition(manifest.ResourceMeta, c.spokeDynamicClient)
manifestConditions := &manifestWork.Status.ResourceStatus.Manifests[index].Conditions
meta.SetStatusCondition(manifestConditions, availableStatusCondition)
if err != nil {
// skip getting status values if resource is not available.
continue
}
option := helper.FindManifestConfiguration(manifest.ResourceMeta, manifestWork.Spec.ManifestConfigs)
// Read status of the resource according to feedback rules.
values, statusFeedbackCondition := c.getFeedbackValues(obj, option)
meta.SetStatusCondition(manifestConditions, statusFeedbackCondition)
manifestWork.Status.ResourceStatus.Manifests[index].StatusFeedbacks.Values = values
// Update manifest conditions according to condition rules
c.evaluateConditionRules(ctx, manifestConditions, obj, option, manifestWork.Generation)
}
// aggregate condtions generated by rules and update work conditions
aggregatedConditions := conditions.AggregateManifestConditions(manifestWork.Generation, manifestWork.Status.ResourceStatus.Manifests)
for _, condition := range aggregatedConditions {
meta.SetStatusCondition(&manifestWork.Status.Conditions, condition)
}
conditions.PruneConditionsGeneratedByConditionRules(&manifestWork.Status.Conditions, manifestWork.Generation)
// no work if the status of manifestwork does not change
if equality.Semantic.DeepEqual(originalManifestWork.Status.ResourceStatus, manifestWork.Status.ResourceStatus) &&
equality.Semantic.DeepEqual(originalManifestWork.Status.Conditions, manifestWork.Status.Conditions) {
return nil
}
// update status of manifestwork. if this conflicts, try again later
_, err := c.patcher.PatchStatus(ctx, manifestWork, manifestWork.Status, originalManifestWork.Status)
return err
}
func (c *AvailableStatusController) getFeedbackValues(
obj *unstructured.Unstructured,
option *workapiv1.ManifestConfigOption) ([]workapiv1.FeedbackValue, metav1.Condition) {
var errs []error
var values []workapiv1.FeedbackValue
if option == nil || len(option.FeedbackRules) == 0 {
return values, metav1.Condition{
Type: statusFeedbackConditionType,
Reason: "NoStatusFeedbackSynced",
Status: metav1.ConditionTrue,
}
}
for _, rule := range option.FeedbackRules {
valuesByRule, err := c.statusReader.GetValuesByRule(obj, rule)
if err != nil {
errs = append(errs, err)
}
if len(valuesByRule) > 0 {
values = append(values, valuesByRule...)
}
}
err := utilerrors.NewAggregate(errs)
if err != nil {
return values, metav1.Condition{
Type: statusFeedbackConditionType,
Reason: "StatusFeedbackSyncFailed",
Status: metav1.ConditionFalse,
Message: fmt.Sprintf("Sync status feedback failed with error %v", err),
}
}
return values, metav1.Condition{
Type: statusFeedbackConditionType,
Reason: "StatusFeedbackSynced",
Status: metav1.ConditionTrue,
}
}
// evaluateConditionRules updates manifestConditions based on configured condition rules for the manifest
func (c *AvailableStatusController) evaluateConditionRules(ctx context.Context,
manifestConditions *[]metav1.Condition, obj *unstructured.Unstructured, option *workapiv1.ManifestConfigOption, generation int64,
) {
// Evaluate rules
var newConditions []metav1.Condition
if option != nil && len(option.ConditionRules) > 0 {
newConditions = c.conditionReader.EvaluateConditions(ctx, obj, option.ConditionRules)
}
// Update manifest conditions with latest condition rule results and observed generation
for _, condition := range newConditions {
condition.ObservedGeneration = generation
meta.SetStatusCondition(manifestConditions, condition)
}
// Remove conditions set by old rules that no longer exist
// Conditions are merged into the existing slice because they are managed in multiple controllers
// (e.g. manifestwork_reconciler adds the "Applied" condition), so we must explicitly
// delete conditions that were created by rules which no longer exist.
conditions.PruneConditionsGeneratedByConditionRules(manifestConditions, generation)
}
// buildAvailableStatusCondition returns a StatusCondition with type Available for a given manifest resource
func buildAvailableStatusCondition(resourceMeta workapiv1.ManifestResourceMeta,
dynamicClient dynamic.Interface) (*unstructured.Unstructured, metav1.Condition, error) {
conditionType := workapiv1.ManifestAvailable
if len(resourceMeta.Resource) == 0 || len(resourceMeta.Version) == 0 || len(resourceMeta.Name) == 0 {
return nil, metav1.Condition{
Type: conditionType,
Status: metav1.ConditionUnknown,
Reason: "IncompletedResourceMeta",
Message: "Resource meta is incompleted",
}, fmt.Errorf("incomplete resource meta")
}
gvr := schema.GroupVersionResource{
Group: resourceMeta.Group,
Version: resourceMeta.Version,
Resource: resourceMeta.Resource,
}
obj, err := dynamicClient.Resource(gvr).Namespace(resourceMeta.Namespace).Get(context.TODO(), resourceMeta.Name, metav1.GetOptions{})
switch {
case errors.IsNotFound(err):
return nil, metav1.Condition{
Type: conditionType,
Status: metav1.ConditionFalse,
Reason: "ResourceNotAvailable",
Message: "Resource is not available",
}, err
case err != nil:
return nil, metav1.Condition{
Type: conditionType,
Status: metav1.ConditionUnknown,
Reason: "FetchingResourceFailed",
Message: fmt.Sprintf("Failed to fetch resource: %v", err),
}, err
}
return obj, metav1.Condition{
Type: conditionType,
Status: metav1.ConditionTrue,
Reason: "ResourceAvailable",
Message: "Resource is available",
}, nil
}