Skip to content

Commit 850eeae

Browse files
authored
feat(env): parameters exposed as env variables (#5875)
* feat(env): parameters exposed as env variables * fix(typo): typo updates
1 parent 0812c51 commit 850eeae

File tree

4 files changed

+288
-152
lines changed

4 files changed

+288
-152
lines changed

scheduler/cmd/agent/cli/cli.go

Lines changed: 192 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -20,106 +20,134 @@ import (
2020
)
2121

2222
const (
23-
envServerHttpPort = "SELDON_SERVER_HTTP_PORT"
24-
envServerGrpcPort = "SELDON_SERVER_GRPC_PORT"
25-
envReverseProxyHttpPort = "SELDON_REVERSE_PROXY_HTTP_PORT"
26-
envReverseProxyGrpcPort = "SELDON_REVERSE_PROXY_GRPC_PORT"
27-
envDebugGrpcPort = "SELDON_DEBUG_GRPC_PORT"
28-
envMetricsPort = "SELDON_METRICS_PORT"
29-
envPodName = "POD_NAME"
30-
envSchedulerHost = "SELDON_SCHEDULER_HOST"
31-
envSchedulerPort = "SELDON_SCHEDULER_PORT"
32-
envSchedulerTlsPort = "SELDON_SCHEDULER_TLS_PORT"
33-
envReplicaConfig = "SELDON_REPLICA_CONFIG"
34-
envLogLevel = "SELDON_LOG_LEVEL"
35-
envServerType = "SELDON_SERVER_TYPE"
36-
envMemoryRequest = "MEMORY_REQUEST"
37-
envCapabilities = "SELDON_SERVER_CAPABILITIES"
38-
envOverCommitPercentage = "SELDON_OVERCOMMIT_PERCENTAGE"
39-
envEnvoyHost = "SELDON_ENVOY_HOST"
40-
envEnvoyPort = "SELDON_ENVOY_PORT"
41-
envDrainerServicePort = "SELDON_DRAINER_PORT"
42-
envModelInferenceLagThreshold = "SELDON_MODEL_INFERENCE_LAG_THRESHOLD"
43-
envModelInactiveSecondsThreshold = "SELDON_MODEL_INACTIVE_SECONDS_THRESHOLD"
44-
envScalingStatsPeriodSeconds = "SELDON_SCALING_STATS_PERIOD_SECONDS"
45-
46-
flagSchedulerHost = "scheduler-host"
47-
flagSchedulerPlaintxtPort = "scheduler-port"
48-
flagSchedulerTlsPort = "scheduler-tls-port"
49-
flagServerName = "server-name"
50-
flagServerIdx = "server-idx"
51-
flagInferenceHttpPort = "inference-http-port"
52-
flagInferenceGrpcPort = "inference-grpc-port"
53-
flagReverseProxyHttpPort = "reverse-proxy-http-port"
54-
flagReverseProxyGrpcPort = "reverse-proxy-grpc-port"
55-
flagDebugGrpcPort = "debug-grpc-port"
56-
flagMetricsPort = "metrics-port"
57-
flagReplicaConfig = "replica-config"
58-
flagLogLevel = "log-level"
59-
flagServerType = "server-type"
60-
flagMemoryBytes = "memory-bytes"
61-
flagCapabilities = "capabilities"
62-
flagOverCommitPercentage = "over-commit-percentage"
63-
flagTracingConfigPath = "tracing-config-path"
64-
flagEnvoyHost = "envoy-host"
65-
flagEnvoyPort = "envoy-port"
66-
flagDrainerServicePort = "drainer-port"
67-
flagModelInferenceLagThreshold = "model-inference-lag-threshold"
68-
flagModelInactiveSecondsThreshold = "model-inactive-seconds-threshold"
69-
flagScalingStatsPeriodSeconds = "scaling-stats-period-seconds"
23+
envServerHttpPort = "SELDON_SERVER_HTTP_PORT"
24+
envServerGrpcPort = "SELDON_SERVER_GRPC_PORT"
25+
envReverseProxyHttpPort = "SELDON_REVERSE_PROXY_HTTP_PORT"
26+
envReverseProxyGrpcPort = "SELDON_REVERSE_PROXY_GRPC_PORT"
27+
envDebugGrpcPort = "SELDON_DEBUG_GRPC_PORT"
28+
envMetricsPort = "SELDON_METRICS_PORT"
29+
envPodName = "POD_NAME"
30+
envSchedulerHost = "SELDON_SCHEDULER_HOST"
31+
envSchedulerPort = "SELDON_SCHEDULER_PORT"
32+
envSchedulerTlsPort = "SELDON_SCHEDULER_TLS_PORT"
33+
envReplicaConfig = "SELDON_REPLICA_CONFIG"
34+
envLogLevel = "SELDON_LOG_LEVEL"
35+
envServerType = "SELDON_SERVER_TYPE"
36+
envMemoryRequest = "MEMORY_REQUEST"
37+
envCapabilities = "SELDON_SERVER_CAPABILITIES"
38+
envOverCommitPercentage = "SELDON_OVERCOMMIT_PERCENTAGE"
39+
envEnvoyHost = "SELDON_ENVOY_HOST"
40+
envEnvoyPort = "SELDON_ENVOY_PORT"
41+
envDrainerServicePort = "SELDON_DRAINER_PORT"
42+
envModelInferenceLagThreshold = "SELDON_MODEL_INFERENCE_LAG_THRESHOLD"
43+
envModelInactiveSecondsThreshold = "SELDON_MODEL_INACTIVE_SECONDS_THRESHOLD"
44+
envScalingStatsPeriodSeconds = "SELDON_SCALING_STATS_PERIOD_SECONDS"
45+
envMaxElapsedTimeReadySubServiceAfterStartSeconds = "SELDON_MAX_TIME_READY_SUB_SERVICE_AFTER_START_SECONDS"
46+
envMaxElapsedTimeReadySubServiceBeforeStartMinutes = "SELDON_MAX_ELAPSED_TIME_READY_SUB_SERVICE_BEFORE_START_MINUTES"
47+
envPeriodReadySubServiceSeconds = "SELDON_PERIOD_READY_SUB_SERVICE_SECONDS"
48+
envMaxLoadElapsedTimeMinutes = "SELDON_MAX_LOAD_ELAPSED_TIME_MINUTES"
49+
envMaxUnloadElapsedTimeMinutes = "SELDON_MAX_UNLOAD_ELAPSED_TIME_MINUTES"
50+
envMaxLoadRetryCount = "SELDON_MAX_LOAD_RETRY_COUNT"
51+
envMaxUnloadRetryCount = "SELDON_MAX_UNLOAD_RETRY_COUNT"
52+
53+
flagSchedulerHost = "scheduler-host"
54+
flagSchedulerPlaintxtPort = "scheduler-port"
55+
flagSchedulerTlsPort = "scheduler-tls-port"
56+
flagServerName = "server-name"
57+
flagServerIdx = "server-idx"
58+
flagInferenceHttpPort = "inference-http-port"
59+
flagInferenceGrpcPort = "inference-grpc-port"
60+
flagReverseProxyHttpPort = "reverse-proxy-http-port"
61+
flagReverseProxyGrpcPort = "reverse-proxy-grpc-port"
62+
flagDebugGrpcPort = "debug-grpc-port"
63+
flagMetricsPort = "metrics-port"
64+
flagReplicaConfig = "replica-config"
65+
flagLogLevel = "log-level"
66+
flagServerType = "server-type"
67+
flagMemoryBytes = "memory-bytes"
68+
flagCapabilities = "capabilities"
69+
flagOverCommitPercentage = "over-commit-percentage"
70+
flagTracingConfigPath = "tracing-config-path"
71+
flagEnvoyHost = "envoy-host"
72+
flagEnvoyPort = "envoy-port"
73+
flagDrainerServicePort = "drainer-port"
74+
flagModelInferenceLagThreshold = "model-inference-lag-threshold"
75+
flagModelInactiveSecondsThreshold = "model-inactive-seconds-threshold"
76+
flagScalingStatsPeriodSeconds = "scaling-stats-period-seconds"
77+
flagMaxElapsedTimeReadySubServiceAfterStartSeconds = "max-elapsed-time-ready-sub-service-after-start-seconds"
78+
flagMaxElapsedTimeReadySubServiceBeforeStartMinutes = "max-elapsed-time-ready-sub-service-before-start-minutes"
79+
flagPeriodReadySubServiceSeconds = "period-ready-sub-service-seconds"
80+
flagMaxLoadElapsedTimeMinutes = "max-load-elapsed-time-minutes"
81+
flagMaxUnloadElapsedTimeMinutes = "max-unload-elapsed-time-minutes"
82+
flagMaxLoadRetryCount = "max-load-retry-count"
83+
flagMaxUnloadRetryCount = "max-unload-retry-count"
7084
)
7185

7286
const (
73-
defaultInferenceHttpPort = 8080
74-
defaultInferenceGrpcPort = 9500
75-
defaultRclonePort = 5572
76-
defaultSchedulerPort = 9005
77-
defaultSchedulerTlsPort = 9055
78-
defaultMetricsPort = 9006
79-
defaultEnvoyHost = "0.0.0.0"
80-
defaultEnvoyPort = 9000
81-
defaultDrainerServicePort = 9007
82-
statsPeriodSecondsDefault = 5
83-
lagThresholdDefault = 30
84-
lastUsedThresholdSecondsDefault = 30
87+
defaultInferenceHttpPort = 8080
88+
defaultInferenceGrpcPort = 9500
89+
defaultRclonePort = 5572
90+
defaultSchedulerPort = 9005
91+
defaultSchedulerTlsPort = 9055
92+
defaultMetricsPort = 9006
93+
defaultEnvoyHost = "0.0.0.0"
94+
defaultEnvoyPort = 9000
95+
defaultDrainerServicePort = 9007
96+
statsPeriodSecondsDefault = 5
97+
lagThresholdDefault = 30
98+
lastUsedThresholdSecondsDefault = 30
99+
defaultMaxElapsedTimeReadySubServiceAfterStartSeconds = 30
100+
defaultMaxElapsedTimeReadySubServiceBeforeStartMinutes = 15
101+
defaultPeriodReadySubServiceSeconds = 60
102+
defaultMaxLoadElapsedTimeMinute = 120
103+
defaultMaxUnloadElapsedTimeMinute = 15
104+
defaultMaxLoadRetryCount = 5
105+
defaultMaxUnloadRetryCount = 1
85106
)
86107

87108
var (
88-
agentHost string
89-
ServerName string
90-
ReplicaIdx uint
91-
SchedulerHost string
92-
SchedulerPort int
93-
SchedulerTlsPort int
94-
RcloneHost string
95-
RclonePort int
96-
InferenceHost string
97-
InferenceHttpPort int
98-
InferenceGrpcPort int
99-
ReverseProxyHttpPort int
100-
ReverseProxyGrpcPort int
101-
DebugGrpcPort int
102-
MetricsPort int
103-
AgentFolder string
104-
Namespace string
105-
ReplicaConfigStr string
106-
InferenceSvcName string
107-
ConfigPath string
108-
LogLevel string
109-
ServerType string
110-
memoryBytes int
111-
MemoryBytes64 uint64
112-
capabilitiesList string
113-
Capabilities []string
114-
OverCommitPercentage int
115-
serverTypes = [...]string{"mlserver", "triton"}
116-
TracingConfigPath string
117-
EnvoyHost string
118-
EnvoyPort int
119-
DrainerServicePort int
120-
ModelInferenceLagThreshold int
121-
ModelInactiveSecondsThreshold int
122-
ScalingStatsPeriodSeconds int
109+
agentHost string
110+
ServerName string
111+
ReplicaIdx uint
112+
SchedulerHost string
113+
SchedulerPort int
114+
SchedulerTlsPort int
115+
RcloneHost string
116+
RclonePort int
117+
InferenceHost string
118+
InferenceHttpPort int
119+
InferenceGrpcPort int
120+
ReverseProxyHttpPort int
121+
ReverseProxyGrpcPort int
122+
DebugGrpcPort int
123+
MetricsPort int
124+
AgentFolder string
125+
Namespace string
126+
ReplicaConfigStr string
127+
InferenceSvcName string
128+
ConfigPath string
129+
LogLevel string
130+
ServerType string
131+
memoryBytes int
132+
MemoryBytes64 uint64
133+
capabilitiesList string
134+
Capabilities []string
135+
OverCommitPercentage int
136+
serverTypes = [...]string{"mlserver", "triton"}
137+
TracingConfigPath string
138+
EnvoyHost string
139+
EnvoyPort int
140+
DrainerServicePort int
141+
ModelInferenceLagThreshold int
142+
ModelInactiveSecondsThreshold int
143+
ScalingStatsPeriodSeconds int
144+
MaxElapsedTimeReadySubServiceAfterStartSeconds int
145+
MaxElapsedTimeReadySubServiceBeforeStartMinutes int
146+
PeriodReadySubServiceSeconds int
147+
MaxLoadElapsedTimeMinute int
148+
MaxUnloadElapsedTimeMinute int
149+
MaxLoadRetryCount int
150+
MaxUnloadRetryCount int
123151
)
124152

125153
func init() {
@@ -156,6 +184,13 @@ func updateFlagsFromEnv() {
156184
maybeUpdateModelInferenceLagThreshold()
157185
maybeUpdateModelInactiveSecondsThreshold()
158186
maybeUpdateScalingStatsPeriodSeconds()
187+
maybeMaxElapsedTimeReadySubServiceAfterStartSeconds()
188+
maybeMaxElapsedTimeReadySubServiceBeforeStartMinutes()
189+
maybePeriodReadySubServiceSeconds()
190+
maybeMaxLoadElapsedTimeMinute()
191+
maybeMaxUnloadElapsedTimeMinute()
192+
maybeMaxLoadRetryCount()
193+
maybeMaxUnloadRetryCount()
159194
}
160195

161196
func maybeUpdateModelInferenceLagThreshold() {
@@ -338,6 +373,69 @@ func maybeUpdateMetricsPort() {
338373
maybeUpdatePort(flagMetricsPort, envMetricsPort, &MetricsPort)
339374
}
340375

376+
func maybeMaxElapsedTimeReadySubServiceAfterStartSeconds() {
377+
maybeUpdateFromIntEnv(
378+
flagMaxElapsedTimeReadySubServiceAfterStartSeconds,
379+
envMaxElapsedTimeReadySubServiceAfterStartSeconds,
380+
&MaxElapsedTimeReadySubServiceAfterStartSeconds,
381+
"sub service after start seconds",
382+
)
383+
}
384+
385+
func maybeMaxElapsedTimeReadySubServiceBeforeStartMinutes() {
386+
maybeUpdateFromIntEnv(
387+
flagMaxElapsedTimeReadySubServiceBeforeStartMinutes,
388+
envMaxElapsedTimeReadySubServiceBeforeStartMinutes,
389+
&MaxElapsedTimeReadySubServiceBeforeStartMinutes,
390+
"sub service before start minutes",
391+
)
392+
}
393+
394+
func maybePeriodReadySubServiceSeconds() {
395+
maybeUpdateFromIntEnv(
396+
flagPeriodReadySubServiceSeconds,
397+
envPeriodReadySubServiceSeconds,
398+
&PeriodReadySubServiceSeconds,
399+
"period ready sub service seconds",
400+
)
401+
}
402+
403+
func maybeMaxLoadElapsedTimeMinute() {
404+
maybeUpdateFromIntEnv(
405+
flagMaxLoadElapsedTimeMinutes,
406+
envMaxLoadElapsedTimeMinutes,
407+
&MaxLoadElapsedTimeMinute,
408+
"max load elapsed time minutes",
409+
)
410+
}
411+
412+
func maybeMaxUnloadElapsedTimeMinute() {
413+
maybeUpdateFromIntEnv(
414+
flagMaxUnloadElapsedTimeMinutes,
415+
envMaxUnloadElapsedTimeMinutes,
416+
&MaxUnloadElapsedTimeMinute,
417+
"max unload elapsed time minutes",
418+
)
419+
}
420+
421+
func maybeMaxLoadRetryCount() {
422+
maybeUpdateFromIntEnv(
423+
flagMaxLoadRetryCount,
424+
envMaxLoadRetryCount,
425+
&MaxLoadRetryCount,
426+
"max load retry count",
427+
)
428+
}
429+
430+
func maybeMaxUnloadRetryCount() {
431+
maybeUpdateFromIntEnv(
432+
flagMaxUnloadRetryCount,
433+
envMaxUnloadRetryCount,
434+
&MaxUnloadRetryCount,
435+
"max unload retry count",
436+
)
437+
}
438+
341439
func maybeUpdateSchedulerHost() {
342440
if isFlagPassed(flagSchedulerHost) {
343441
return

scheduler/cmd/agent/cli/flags.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ func makeArgs() {
5151
flag.IntVar(&ModelInferenceLagThreshold, flagModelInferenceLagThreshold, lagThresholdDefault, "Model inference lag threshold")
5252
flag.IntVar(&ModelInactiveSecondsThreshold, flagModelInactiveSecondsThreshold, lastUsedThresholdSecondsDefault, "Model inactive seconds threshold")
5353
flag.IntVar(&ScalingStatsPeriodSeconds, flagScalingStatsPeriodSeconds, statsPeriodSecondsDefault, "Scaling stats period seconds")
54+
flag.IntVar(&MaxElapsedTimeReadySubServiceAfterStartSeconds, flagMaxElapsedTimeReadySubServiceAfterStartSeconds, defaultMaxElapsedTimeReadySubServiceAfterStartSeconds, "Ready sub service after start seconds")
55+
flag.IntVar(&MaxElapsedTimeReadySubServiceBeforeStartMinutes, flagMaxElapsedTimeReadySubServiceBeforeStartMinutes, defaultMaxElapsedTimeReadySubServiceBeforeStartMinutes, "Max elapsed time sub service before start minutes")
56+
flag.IntVar(&PeriodReadySubServiceSeconds, flagPeriodReadySubServiceSeconds, defaultPeriodReadySubServiceSeconds, "Period in seconds for subservice ready \"cron\"")
57+
flag.IntVar(&MaxLoadElapsedTimeMinute, flagMaxLoadElapsedTimeMinutes, defaultMaxLoadElapsedTimeMinute, "Max time in minutes to wait for a model server to load a model, including retries")
58+
flag.IntVar(&MaxUnloadElapsedTimeMinute, flagMaxUnloadElapsedTimeMinutes, defaultMaxUnloadElapsedTimeMinute, "Max time in minutes to wait for a model server to unload a model, including retries")
59+
flag.IntVar(&MaxLoadRetryCount, flagMaxLoadRetryCount, defaultMaxLoadRetryCount, "Number of retries for loading a model onto a server")
60+
flag.IntVar(&MaxUnloadRetryCount, flagMaxUnloadRetryCount, defaultMaxUnloadRetryCount, "Number of retries for unloading a model onto a server")
5461
}
5562

5663
func parseFlags() {

0 commit comments

Comments
 (0)