-
Notifications
You must be signed in to change notification settings - Fork 845
feat(scheduler): set a ttl on deleted pipelines and experiments #5948
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
717801c
6178ab4
8ed3ba8
8ae331c
6ef4250
f9837c8
98096e5
b31caad
869accf
099fcab
1906589
078e757
4de0b79
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,22 +14,25 @@ import ( | |
"os" | ||
"path/filepath" | ||
"sync" | ||
"time" | ||
|
||
"github.com/mitchellh/copystructure" | ||
"github.com/sirupsen/logrus" | ||
|
||
"github.com/seldonio/seldon-core/scheduler/v2/pkg/coordinator" | ||
"github.com/seldonio/seldon-core/scheduler/v2/pkg/store" | ||
"github.com/seldonio/seldon-core/scheduler/v2/pkg/store/pipeline" | ||
"github.com/seldonio/seldon-core/scheduler/v2/pkg/store/utils" | ||
) | ||
|
||
const ( | ||
pendingSyncsQueueSize int = 1000 | ||
experimentStartEventSource = "experiment.store.start" | ||
experimentStopEventSource = "experiment.store.stop" | ||
modelEventHandlerName = "experiment.store.models" | ||
pipelineEventHandlerName = "experiment.store.pipelines" | ||
experimentDbFolder = "experimentdb" | ||
deletedExperimentTTL time.Duration = time.Duration(time.Hour * 24) | ||
pendingSyncsQueueSize int = 1000 | ||
experimentStartEventSource = "experiment.store.start" | ||
experimentStopEventSource = "experiment.store.stop" | ||
modelEventHandlerName = "experiment.store.models" | ||
pipelineEventHandlerName = "experiment.store.pipelines" | ||
experimentDbFolder = "experimentdb" | ||
) | ||
|
||
type ExperimentServer interface { | ||
|
@@ -57,7 +60,6 @@ type ExperimentStore struct { | |
} | ||
|
||
func NewExperimentServer(logger logrus.FieldLogger, eventHub *coordinator.EventHub, store store.ModelStore, pipelineStore pipeline.PipelineHandler) *ExperimentStore { | ||
|
||
es := &ExperimentStore{ | ||
logger: logger.WithField("source", "experimentServer"), | ||
experiments: make(map[string]*Experiment), | ||
|
@@ -95,7 +97,7 @@ func getExperimentDbFolder(basePath string) string { | |
// we just add a reference to the experiment in the memory store | ||
// so that we can keep track of it in case we need to replay the event (to the controller) | ||
// we do not trigger an event though as envoy has a clean state when the scheduler restarts | ||
func (es *ExperimentStore) AddExperimentInMap(experiment *Experiment) error { | ||
func (es *ExperimentStore) addExperimentInMap(experiment *Experiment) error { | ||
es.mu.Lock() | ||
defer es.mu.Unlock() | ||
if _, ok := es.experiments[experiment.Name]; !ok { | ||
|
@@ -120,10 +122,18 @@ func (es *ExperimentStore) InitialiseOrRestoreDB(path string) error { | |
} | ||
es.db = db | ||
// If database already existed we can restore else this is a noop | ||
err = es.db.restore(es.StartExperiment, es.AddExperimentInMap) | ||
err = es.db.restore(es.StartExperiment, es.addExperimentInMap) | ||
if err != nil { | ||
return err | ||
} | ||
go func() { | ||
ticker := time.NewTicker(utils.DeletedResourceCleanupFrequency) | ||
defer ticker.Stop() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
for range ticker.C { | ||
es.cleanupDeletedExperiments() | ||
} | ||
}() | ||
|
||
return nil | ||
} | ||
|
||
|
@@ -302,7 +312,7 @@ func (es *ExperimentStore) setStatusImpl(experimentName string, active bool, rea | |
if experiment, ok := es.experiments[experimentName]; !ok { | ||
return nil, &ExperimentNotFound{experimentName: experimentName} | ||
} else { | ||
if !experiment.Deleted || !active { //can't reactivate a deleted experiment | ||
if !experiment.Deleted || !active { // can't reactivate a deleted experiment | ||
currentActive := experiment.Active | ||
experiment.Active = active | ||
experiment.StatusDescription = reason | ||
|
@@ -321,7 +331,7 @@ func (es *ExperimentStore) StartExperiment(experiment *Experiment) error { | |
} | ||
if es.eventHub != nil { | ||
if modelEvt != nil { | ||
es.eventHub.PublishModelEvent(experimentStateEventSource, *modelEvt) | ||
es.eventHub.PublishModelEvent(experimentStartEventSource, *modelEvt) | ||
sakoush marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
if pipelineEvt != nil { | ||
es.eventHub.PublishPipelineEvent(experimentStartEventSource, *pipelineEvt) | ||
|
@@ -356,9 +366,8 @@ func (es *ExperimentStore) startExperimentImpl(experiment *Experiment) (*coordin | |
ModelName: *resourceName, | ||
} | ||
default: | ||
return nil, nil, nil, fmt.Errorf("Unknown resource type %v", experiment.ResourceType) | ||
return nil, nil, nil, fmt.Errorf("unknown resource type %v", experiment.ResourceType) | ||
} | ||
|
||
} | ||
es.updateExperimentState(experiment) | ||
if es.db != nil { | ||
|
@@ -381,7 +390,7 @@ func (es *ExperimentStore) StopExperiment(experimentName string) error { | |
es.eventHub.PublishModelEvent(experimentStopEventSource, *modelEvt) | ||
} | ||
if pipelineEvt != nil { | ||
es.eventHub.PublishPipelineEvent(experimentStartEventSource, *pipelineEvt) | ||
es.eventHub.PublishPipelineEvent(experimentStopEventSource, *pipelineEvt) | ||
} | ||
if expEvt != nil { | ||
es.eventHub.PublishExperimentEvent(experimentStopEventSource, *expEvt) | ||
|
@@ -399,6 +408,7 @@ func (es *ExperimentStore) stopExperimentImpl(experimentName string) (*coordinat | |
var modelEvt *coordinator.ModelEventMsg | ||
var pipelineEvt *coordinator.PipelineEventMsg | ||
experiment.Deleted = true | ||
experiment.DeletedAt = time.Now() | ||
experiment.Active = false | ||
es.cleanExperimentState(experiment) | ||
if experiment.Default != nil { | ||
|
@@ -413,7 +423,7 @@ func (es *ExperimentStore) stopExperimentImpl(experimentName string) (*coordinat | |
ModelName: *experiment.Default, | ||
} | ||
default: | ||
return nil, nil, nil, fmt.Errorf("Unknown resource type %v", experiment.ResourceType) | ||
return nil, nil, nil, fmt.Errorf("unknown resource type %v", experiment.ResourceType) | ||
} | ||
} | ||
if es.db != nil { | ||
|
@@ -461,3 +471,22 @@ func (es *ExperimentStore) GetExperiments() ([]*Experiment, error) { | |
} | ||
return foundExperiments, nil | ||
} | ||
|
||
func (es *ExperimentStore) cleanupDeletedExperiments() { | ||
es.logger.Info("cleaning up deleted experiments") | ||
for _, experiment := range es.experiments { | ||
if experiment.Deleted { | ||
es.mu.Lock() | ||
defer es.mu.Unlock() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the lock should be done once before the for loop? otherwise using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as mentioned above I think that this logic can also delete from badger db and not rely on ttl as well. just to make it simpler? |
||
if experiment.DeletedAt.IsZero() { | ||
experiment.DeletedAt = time.Now() | ||
err := es.db.save(experiment) | ||
if err != nil { | ||
es.logger.Warnf("could not update DB TTL for experiment: %s", experiment.Name) | ||
} | ||
} else if experiment.DeletedAt.Add(utils.DeletedResourceTTL).Before(time.Now()) { | ||
es.experiments[experiment.Name] = nil | ||
driev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.