Skip to content

Commit 0b9ee7d

Browse files
author
Sherif Akoush
authored
fix: Handle unload too quick after load (#5504)
* fix note * add ability to check loading state * change func name to be more reflective
1 parent 3ce7029 commit 0b9ee7d

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

scheduler/pkg/scheduler/scheduler.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ func (s *SimpleScheduler) scheduleToServer(modelName string) error {
119119
}
120120

121121
if model.Deleted {
122-
// we need to LoadedModels anyway:
122+
// we need to call UpdateLoadedModels anyway:
123123
// - in case where we are deleting a model that doesnt have a server (FailedSchedule), server is ""
124124
// - otherwise proceed a normal
125125
server := ""
@@ -202,8 +202,12 @@ func (s *SimpleScheduler) scheduleToServer(modelName string) error {
202202
if !ok {
203203
msg := "Failed to schedule model as no matching server had enough suitable replicas"
204204
logger.Debug(msg)
205-
// we do not want to reset the server if it has live replicas
206-
s.store.FailedScheduling(latestModel, msg, !latestModel.HasLiveReplicas())
205+
// we do not want to reset the server if it has live replicas or loading replicas
206+
// in the case of loading replicas, we need to make sure that we can unload them later.
207+
// for example in the case that a model is just marked as loading on a particular server replica
208+
// then it gets a delete request (before it is marked as loaded or available) we need to make sure
209+
// that we can unload it from the server
210+
s.store.FailedScheduling(latestModel, msg, !latestModel.HasLiveReplicas() && !latestModel.IsLoadingOrLoadedOnServer())
207211
return errors.New(msg)
208212
}
209213

scheduler/pkg/store/mesh.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,17 @@ func (m *ModelVersion) IsLoadingOrLoaded(server string, replicaIdx int) bool {
488488
return false
489489
}
490490

491+
func (m *ModelVersion) IsLoadingOrLoadedOnServer() bool {
492+
m.mu.RLock()
493+
defer m.mu.RUnlock()
494+
for _, v := range m.replicas {
495+
if v.State.AlreadyLoadingOrLoaded() {
496+
return true
497+
}
498+
}
499+
return false
500+
}
501+
491502
func (m *ModelVersion) HasLiveReplicas() bool {
492503
m.mu.RLock()
493504
defer m.mu.RUnlock()

0 commit comments

Comments
 (0)