Skip to content

Commit 8a4dc7c

Browse files
authored
fix(hatchery): priority for models registration (#6414)
1 parent 5292853 commit 8a4dc7c

File tree

1 file changed

+40
-14
lines changed

1 file changed

+40
-14
lines changed

sdk/hatchery/register.go

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package hatchery
22

33
import (
44
"context"
5+
"sort"
56
"strings"
67
"sync/atomic"
78

@@ -32,14 +33,39 @@ func workerRegister(ctx context.Context, h InterfaceWithModels, startWorkerChan
3233
log.Error(ctx, "worker pool error: %v", err)
3334
}
3435

36+
ms := make([]sdk.Model, len(models))
37+
copy(ms, models)
38+
39+
// Sort to first register models with NeedRegistration
40+
sort.Slice(ms, func(i, j int) bool {
41+
modelI, modelJ := ms[i], ms[j]
42+
var modelIPriority, modelJPriority int
43+
if modelI.CheckRegistration {
44+
modelIPriority++
45+
}
46+
if modelI.NeedRegistration {
47+
modelIPriority += 2
48+
}
49+
if modelJ.CheckRegistration {
50+
modelJPriority++
51+
}
52+
if modelJ.NeedRegistration {
53+
modelJPriority += 2
54+
}
55+
if modelIPriority == modelJPriority {
56+
return modelI.Name < modelJ.Name
57+
}
58+
return modelIPriority > modelJPriority
59+
})
60+
3561
atomic.StoreInt64(&nbRegisteringWorkerModels, int64(len(currentRegistering)))
3662
loopModels:
37-
for k := range models {
38-
if models[k].Type != h.ModelType() {
63+
for k := range ms {
64+
if ms[k].Type != h.ModelType() {
3965
continue
4066
}
41-
if h.CanSpawn(ctx, &models[k], 0, nil) && (h.NeedRegistration(ctx, &models[k]) || models[k].CheckRegistration) {
42-
log.Debug(ctx, "model %q need to register", models[k].Path())
67+
if h.CanSpawn(ctx, &ms[k], 0, nil) && (h.NeedRegistration(ctx, &ms[k]) || ms[k].CheckRegistration) {
68+
log.Debug(ctx, "model %q need to register", ms[k].Path())
4369
} else {
4470
continue
4571
}
@@ -60,40 +86,40 @@ loopModels:
6086

6187
// Check if there is a pending registering worker
6288
for _, w := range currentRegistering {
63-
if strings.Contains(w.Name, models[k].Name) {
64-
log.Info(ctx, "model %q is already registering (%s)", models[k].Name, w.Name)
89+
if strings.Contains(w.Name, ms[k].Name) {
90+
log.Info(ctx, "model %q is already registering (%s)", ms[k].Name, w.Name)
6591
continue loopModels
6692
}
6793
}
6894

6995
// if current hatchery is in same group than worker model -> do not avoid spawn, even if worker model is in error
70-
if models[k].NbSpawnErr > 5 {
71-
log.Warn(ctx, "Too many errors on spawn with model %s, please check this worker model", models[k].Name)
96+
if ms[k].NbSpawnErr > 5 {
97+
log.Warn(ctx, "Too many errors on spawn with model %s, please check this worker model", ms[k].Name)
7298
continue
7399
}
74100

75-
if err := h.CDSClient().WorkerModelBook(models[k].Group.Name, models[k].Name); err != nil {
101+
if err := h.CDSClient().WorkerModelBook(ms[k].Group.Name, ms[k].Name); err != nil {
76102
ctx := log.ContextWithStackTrace(ctx, err)
77103
if sdk.ErrorIs(err, sdk.ErrWorkerModelAlreadyBooked) {
78-
log.Info(ctx, "worker model already booked. model %s with id %d: %v", models[k].Path(), models[k].ID, err)
104+
log.Info(ctx, "worker model already booked. model %s with id %d: %v", ms[k].Path(), ms[k].ID, err)
79105
} else {
80-
log.Error(ctx, "cannot book model %s with id %d: %v", models[k].Path(), models[k].ID, err)
106+
log.Error(ctx, "cannot book model %s with id %d: %v", ms[k].Path(), ms[k].ID, err)
81107
}
82108
continue
83109
}
84110

85111
log.Info(ctx, "model %q (%d) has been booked and will be spawned for registration", models[k].Name, models[k].ID)
86112

87113
// Interpolate model secrets
88-
if err := ModelInterpolateSecrets(h, &models[k]); err != nil {
114+
if err := ModelInterpolateSecrets(h, &ms[k]); err != nil {
89115
ctx := log.ContextWithStackTrace(ctx, err)
90-
log.Error(ctx, "cannot interpolate secrets for model %s: %v", models[k].Path(), err)
116+
log.Error(ctx, "cannot interpolate secrets for model %s: %v", ms[k].Path(), err)
91117
continue
92118
}
93119

94120
//Ask for the creation
95121
startWorkerChan <- workerStarterRequest{
96-
registerWorkerModel: &models[k],
122+
registerWorkerModel: &ms[k],
97123
ctx: ctx,
98124
cancel: end,
99125
}

0 commit comments

Comments
 (0)