Skip to content

Commit fee9a79

Browse files
drivebyerpjuarezd
andauthored
fix: multi controller run concurrently after leadership lost (#2309)
Co-authored-by: Pedro Juarez <[email protected]>
1 parent b28a0b4 commit fee9a79

File tree

1 file changed

+20
-4
lines changed

1 file changed

+20
-4
lines changed

pkg/controller/main-controller.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -492,12 +492,12 @@ func leaderRun(ctx context.Context, c *Controller, threadiness int, stopCh <-cha
492492
for {
493493
select {
494494
case oerr := <-notificationChannel:
495-
if !errors.Is(oerr.Err, http.ErrServerClosed) {
495+
if oerr != nil && !errors.Is(oerr.Err, http.ErrServerClosed) {
496496
klog.Errorf("STS API Server stopped: %v, going to restart", oerr.Err)
497497
go c.startSTSAPIServer(ctx, notificationChannel)
498498
}
499499
case err := <-upgradeServerChannel:
500-
if err != http.ErrServerClosed {
500+
if err != nil && !errors.Is(err, http.ErrServerClosed) {
501501
klog.Errorf("Upgrade Server stopped: %v, going to restart", err)
502502
upgradeServerChannel = c.startUpgradeServer()
503503
}
@@ -584,8 +584,24 @@ func (c *Controller) Start(threadiness int, stopCh <-chan struct{}) error {
584584
leaderRun(ctx, c, threadiness, stopCh, notificationChannel)
585585
},
586586
OnStoppedLeading: func() {
587-
// we can do cleanup here
588-
klog.Infof("leader lost: %s", c.podName)
587+
klog.Infof("leader lost, removing any leader labels that I '%s' might have", c.podName)
588+
p := []patchAnnotation{{
589+
Op: "remove",
590+
Path: "/metadata/labels/operator",
591+
}}
592+
593+
payloadBytes, err := json.Marshal(p)
594+
if err != nil {
595+
klog.Errorf("failed to marshal patch: %#v", err)
596+
} else {
597+
c.kubeClientSet.CoreV1().Pods(leaseLockNamespace).Patch(ctx, c.podName, types.JSONPatchType, payloadBytes, metav1.PatchOptions{})
598+
}
599+
// Even if Stop() is called twice, stopping it here ensures the sync handler no longer is handling events,
600+
// in case SIGTERM fails or the controller takes longer to exit.
601+
c.Stop()
602+
if err := syscall.Kill(os.Getpid(), syscall.SIGTERM); err != nil {
603+
klog.Errorf("error sending SIGTERM: %v", err)
604+
}
589605
},
590606
OnNewLeader: func(identity string) {
591607
// we're notified when new leader elected

0 commit comments

Comments
 (0)