Skip to content

Commit eb50da8

Browse files
committed
more logs for the controller
Signed-off-by: Swati Gupta <[email protected]>
1 parent 5321f9c commit eb50da8

File tree

4 files changed

+28
-3
lines changed

4 files changed

+28
-3
lines changed

cmd/compute-domain-controller/cleanup.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ type CleanupManager[T metav1.Object] struct {
2626
}
2727

2828
func NewCleanupManager[T metav1.Object](informer cache.SharedIndexInformer, getComputeDomain GetComputeDomainFunc, callback CleanupCallback[T]) *CleanupManager[T] {
29+
klog.Infof("Creating new Cleanup Manager for %T", *new(T))
2930
return &CleanupManager[T]{
3031
informer: informer,
3132
getComputeDomain: getComputeDomain,
@@ -61,19 +62,25 @@ func (m *CleanupManager[T]) periodicCleanup(ctx context.Context) {
6162
case <-ticker.C:
6263
klog.V(6).Infof("Running periodic sync to remove %T objects owned by stale ComputeDomain", *new(T))
6364
store := m.informer.GetStore()
64-
for _, item := range store.List() {
65+
items := store.List()
66+
klog.V(6).Infof("Found %d items to check for cleanup", len(items))
67+
68+
for _, item := range items {
6569
obj, ok := item.(T)
6670
if !ok {
71+
klog.V(6).Infof("Expected object %T but got %T, skipping..", *new(T), obj)
6772
continue
6873
}
6974

7075
labels := obj.GetLabels()
7176
if labels == nil {
77+
klog.V(6).Infof("Object %T has no labels, skipping..", *new(T))
7278
continue
7379
}
7480

7581
uid, exists := labels[computeDomainLabelKey]
7682
if !exists {
83+
klog.V(6).Infof("Object %T does not have ComputeDomain label, skipping..", *new(T))
7784
continue
7885
}
7986

@@ -84,6 +91,7 @@ func (m *CleanupManager[T]) periodicCleanup(ctx context.Context) {
8491
}
8592

8693
if computeDomain != nil {
94+
klog.V(6).Infof("ComputeDomain with UID %s still exists, skipping cleanup", uid)
8795
continue
8896
}
8997

cmd/compute-domain-controller/controller.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags"
2525
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/workqueue"
26+
"k8s.io/klog/v2"
2627
)
2728

2829
// ManagerConfig defines the common configuration options shared across all managers.
@@ -57,6 +58,8 @@ func NewController(config *Config) *Controller {
5758
// It initializes the work queue, starts the ComputeDomain manager, and handles
5859
// graceful shutdown when the context is cancelled.
5960
func (c *Controller) Run(ctx context.Context) error {
61+
klog.Info("Starting ComputeDomain Controller")
62+
6063
workQueue := workqueue.New(workqueue.DefaultControllerRateLimiter())
6164

6265
managerConfig := &ManagerConfig{
@@ -78,5 +81,6 @@ func (c *Controller) Run(ctx context.Context) error {
7881
return fmt.Errorf("error stopping ComputeDomain manager: %w", err)
7982
}
8083

84+
klog.Info("ComputeDomain Controller is shutdown")
8185
return nil
8286
}

cmd/compute-domain-controller/daemonset.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ func NewDaemonSetManager(config *ManagerConfig, getComputeDomain GetComputeDomai
8686

8787
informer := factory.Apps().V1().DaemonSets().Informer()
8888

89+
klog.Infof("Creating new DaemonSetManager for driver %s/%s", config.driverNamespace, config.driverName)
8990
m := &DaemonSetManager{
9091
config: config,
9192
getComputeDomain: getComputeDomain,
@@ -162,7 +163,7 @@ func (m *DaemonSetManager) Create(ctx context.Context, namespace string, cd *nva
162163
return nil, fmt.Errorf("error retrieving DaemonSet: %w", err)
163164
}
164165
if len(ds) > 1 {
165-
return nil, fmt.Errorf("more than one DaemonSet found with same ComputeDomain UID")
166+
return nil, fmt.Errorf("more than one DaemonSet found with same ComputeDomain UID %s", cd.UID)
166167
}
167168
if len(ds) == 1 {
168169
return ds[0], nil
@@ -209,6 +210,7 @@ func (m *DaemonSetManager) Create(ctx context.Context, namespace string, cd *nva
209210
return nil, fmt.Errorf("error creating DaemonSet: %w", err)
210211
}
211212

213+
klog.V(2).Infof("Successfully created DaemonSet %s/%s for ComputeDomain %s/%s", d.Namespace, d.Name, cd.Namespace, cd.Name)
212214
return d, nil
213215
}
214216

@@ -218,9 +220,10 @@ func (m *DaemonSetManager) Delete(ctx context.Context, cdUID string) error {
218220
return fmt.Errorf("error retrieving DaemonSet: %w", err)
219221
}
220222
if len(ds) > 1 {
221-
return fmt.Errorf("more than one DaemonSet found with same ComputeDomain UID")
223+
return fmt.Errorf("more than one DaemonSet found with same ComputeDomain UID %s", cdUID)
222224
}
223225
if len(ds) == 0 {
226+
klog.V(2).Infof("No DaemonSet found for ComputeDomain UID %s, nothing to delete", cdUID)
224227
return nil
225228
}
226229

@@ -231,6 +234,7 @@ func (m *DaemonSetManager) Delete(ctx context.Context, cdUID string) error {
231234
}
232235

233236
if d.GetDeletionTimestamp() != nil {
237+
klog.V(2).Infof("DaemonSet %s/%s is already marked for deletion", d.Namespace, d.Name)
234238
return nil
235239
}
236240

@@ -239,6 +243,7 @@ func (m *DaemonSetManager) Delete(ctx context.Context, cdUID string) error {
239243
return fmt.Errorf("erroring deleting DaemonSet: %w", err)
240244
}
241245

246+
klog.V(2).Infof("Successfully deleted DaemonSet %s/%s for ComputeDomain UID %s", d.Namespace, d.Name, cdUID)
242247
return nil
243248
}
244249

@@ -271,6 +276,7 @@ func (m *DaemonSetManager) removeFinalizer(ctx context.Context, cdUID string) er
271276
return fmt.Errorf("more than one DaemonSet found with same ComputeDomain UID")
272277
}
273278
if len(ds) == 0 {
279+
klog.V(2).Infof("No DaemonSet found for ComputeDomain UID %s, nothing to remove finalizer from", cdUID)
274280
return nil
275281
}
276282

@@ -288,6 +294,7 @@ func (m *DaemonSetManager) removeFinalizer(ctx context.Context, cdUID string) er
288294
}
289295
}
290296
if len(d.Finalizers) == len(newD.Finalizers) {
297+
klog.V(2).Infof("Finalizer %s not found on DaemonSet %s/%s", computeDomainFinalizer, d.Namespace, d.Name)
291298
return nil
292299
}
293300

@@ -322,10 +329,12 @@ func (m *DaemonSetManager) onAddOrUpdate(ctx context.Context, obj any) error {
322329
return fmt.Errorf("error getting ComputeDomain: %w", err)
323330
}
324331
if cd == nil {
332+
klog.V(2).Info("No ComputeDomain found, skipping processing")
325333
return nil
326334
}
327335

328336
if int(d.Status.NumberReady) != cd.Spec.NumNodes {
337+
klog.V(2).Infof("DaemonSet %s/%s has %d ready nodes, expecting %d, waiting for all nodes to be ready", d.Namespace, d.Name, d.Status.NumberReady, cd.Spec.NumNodes)
329338
return nil
330339
}
331340

cmd/compute-domain-controller/indexers.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2424
"k8s.io/client-go/tools/cache"
25+
"k8s.io/klog/v2"
2526
)
2627

2728
func uidIndexer[T metav1.ObjectMetaAccessor](obj any) ([]string, error) {
@@ -43,6 +44,7 @@ func addComputeDomainLabelIndexer[T metav1.ObjectMetaAccessor](informer cache.Sh
4344
if value, exists := labels[computeDomainLabelKey]; exists {
4445
return []string{value}, nil
4546
}
47+
klog.V(2).Info("No object found with ComputeDomain Label")
4648
return nil, nil
4749
},
4850
})
@@ -58,6 +60,7 @@ func getByComputeDomainUID[T1 *T2, T2 any](ctx context.Context, informer cache.S
5860
return nil, fmt.Errorf("error getting %T via ComputeDomain label: %w", *new(T1), err)
5961
}
6062
if len(objs) == 0 {
63+
klog.V(2).Infof("No object found with ComputeDomain Label with UID %s", cdUID)
6164
return nil, nil
6265
}
6366

@@ -70,5 +73,6 @@ func getByComputeDomainUID[T1 *T2, T2 any](ctx context.Context, informer cache.S
7073
ds = append(ds, d)
7174
}
7275

76+
klog.V(2).Infof("Found %d objects with ComputeDomain Label with UID %s", len(ds), cdUID)
7377
return ds, nil
7478
}

0 commit comments

Comments
 (0)