@@ -20,106 +20,134 @@ import (
20
20
)
21
21
22
22
const (
23
- envServerHttpPort = "SELDON_SERVER_HTTP_PORT"
24
- envServerGrpcPort = "SELDON_SERVER_GRPC_PORT"
25
- envReverseProxyHttpPort = "SELDON_REVERSE_PROXY_HTTP_PORT"
26
- envReverseProxyGrpcPort = "SELDON_REVERSE_PROXY_GRPC_PORT"
27
- envDebugGrpcPort = "SELDON_DEBUG_GRPC_PORT"
28
- envMetricsPort = "SELDON_METRICS_PORT"
29
- envPodName = "POD_NAME"
30
- envSchedulerHost = "SELDON_SCHEDULER_HOST"
31
- envSchedulerPort = "SELDON_SCHEDULER_PORT"
32
- envSchedulerTlsPort = "SELDON_SCHEDULER_TLS_PORT"
33
- envReplicaConfig = "SELDON_REPLICA_CONFIG"
34
- envLogLevel = "SELDON_LOG_LEVEL"
35
- envServerType = "SELDON_SERVER_TYPE"
36
- envMemoryRequest = "MEMORY_REQUEST"
37
- envCapabilities = "SELDON_SERVER_CAPABILITIES"
38
- envOverCommitPercentage = "SELDON_OVERCOMMIT_PERCENTAGE"
39
- envEnvoyHost = "SELDON_ENVOY_HOST"
40
- envEnvoyPort = "SELDON_ENVOY_PORT"
41
- envDrainerServicePort = "SELDON_DRAINER_PORT"
42
- envModelInferenceLagThreshold = "SELDON_MODEL_INFERENCE_LAG_THRESHOLD"
43
- envModelInactiveSecondsThreshold = "SELDON_MODEL_INACTIVE_SECONDS_THRESHOLD"
44
- envScalingStatsPeriodSeconds = "SELDON_SCALING_STATS_PERIOD_SECONDS"
45
-
46
- flagSchedulerHost = "scheduler-host"
47
- flagSchedulerPlaintxtPort = "scheduler-port"
48
- flagSchedulerTlsPort = "scheduler-tls-port"
49
- flagServerName = "server-name"
50
- flagServerIdx = "server-idx"
51
- flagInferenceHttpPort = "inference-http-port"
52
- flagInferenceGrpcPort = "inference-grpc-port"
53
- flagReverseProxyHttpPort = "reverse-proxy-http-port"
54
- flagReverseProxyGrpcPort = "reverse-proxy-grpc-port"
55
- flagDebugGrpcPort = "debug-grpc-port"
56
- flagMetricsPort = "metrics-port"
57
- flagReplicaConfig = "replica-config"
58
- flagLogLevel = "log-level"
59
- flagServerType = "server-type"
60
- flagMemoryBytes = "memory-bytes"
61
- flagCapabilities = "capabilities"
62
- flagOverCommitPercentage = "over-commit-percentage"
63
- flagTracingConfigPath = "tracing-config-path"
64
- flagEnvoyHost = "envoy-host"
65
- flagEnvoyPort = "envoy-port"
66
- flagDrainerServicePort = "drainer-port"
67
- flagModelInferenceLagThreshold = "model-inference-lag-threshold"
68
- flagModelInactiveSecondsThreshold = "model-inactive-seconds-threshold"
69
- flagScalingStatsPeriodSeconds = "scaling-stats-period-seconds"
23
+ envServerHttpPort = "SELDON_SERVER_HTTP_PORT"
24
+ envServerGrpcPort = "SELDON_SERVER_GRPC_PORT"
25
+ envReverseProxyHttpPort = "SELDON_REVERSE_PROXY_HTTP_PORT"
26
+ envReverseProxyGrpcPort = "SELDON_REVERSE_PROXY_GRPC_PORT"
27
+ envDebugGrpcPort = "SELDON_DEBUG_GRPC_PORT"
28
+ envMetricsPort = "SELDON_METRICS_PORT"
29
+ envPodName = "POD_NAME"
30
+ envSchedulerHost = "SELDON_SCHEDULER_HOST"
31
+ envSchedulerPort = "SELDON_SCHEDULER_PORT"
32
+ envSchedulerTlsPort = "SELDON_SCHEDULER_TLS_PORT"
33
+ envReplicaConfig = "SELDON_REPLICA_CONFIG"
34
+ envLogLevel = "SELDON_LOG_LEVEL"
35
+ envServerType = "SELDON_SERVER_TYPE"
36
+ envMemoryRequest = "MEMORY_REQUEST"
37
+ envCapabilities = "SELDON_SERVER_CAPABILITIES"
38
+ envOverCommitPercentage = "SELDON_OVERCOMMIT_PERCENTAGE"
39
+ envEnvoyHost = "SELDON_ENVOY_HOST"
40
+ envEnvoyPort = "SELDON_ENVOY_PORT"
41
+ envDrainerServicePort = "SELDON_DRAINER_PORT"
42
+ envModelInferenceLagThreshold = "SELDON_MODEL_INFERENCE_LAG_THRESHOLD"
43
+ envModelInactiveSecondsThreshold = "SELDON_MODEL_INACTIVE_SECONDS_THRESHOLD"
44
+ envScalingStatsPeriodSeconds = "SELDON_SCALING_STATS_PERIOD_SECONDS"
45
+ envMaxElapsedTimeReadySubServiceAfterStartSeconds = "SELDON_MAX_TIME_READY_SUB_SERVICE_AFTER_START_SECONDS"
46
+ envMaxElapsedTimeReadySubServiceBeforeStartMinutes = "SELDON_MAX_ELAPSED_TIME_READY_SUB_SERVICE_BEFORE_START_MINUTES"
47
+ envPeriodReadySubServiceSeconds = "SELDON_PERIOD_READY_SUB_SERVICE_SECONDS"
48
+ envMaxLoadElapsedTimeMinutes = "SELDON_MAX_LOAD_ELAPSED_TIME_MINUTES"
49
+ envMaxUnloadElapsedTimeMinutes = "SELDON_MAX_UNLOAD_ELAPSED_TIME_MINUTES"
50
+ envMaxLoadRetryCount = "SELDON_MAX_LOAD_RETRY_COUNT"
51
+ envMaxUnloadRetryCount = "SELDON_MAX_UNLOAD_RETRY_COUNT"
52
+
53
+ flagSchedulerHost = "scheduler-host"
54
+ flagSchedulerPlaintxtPort = "scheduler-port"
55
+ flagSchedulerTlsPort = "scheduler-tls-port"
56
+ flagServerName = "server-name"
57
+ flagServerIdx = "server-idx"
58
+ flagInferenceHttpPort = "inference-http-port"
59
+ flagInferenceGrpcPort = "inference-grpc-port"
60
+ flagReverseProxyHttpPort = "reverse-proxy-http-port"
61
+ flagReverseProxyGrpcPort = "reverse-proxy-grpc-port"
62
+ flagDebugGrpcPort = "debug-grpc-port"
63
+ flagMetricsPort = "metrics-port"
64
+ flagReplicaConfig = "replica-config"
65
+ flagLogLevel = "log-level"
66
+ flagServerType = "server-type"
67
+ flagMemoryBytes = "memory-bytes"
68
+ flagCapabilities = "capabilities"
69
+ flagOverCommitPercentage = "over-commit-percentage"
70
+ flagTracingConfigPath = "tracing-config-path"
71
+ flagEnvoyHost = "envoy-host"
72
+ flagEnvoyPort = "envoy-port"
73
+ flagDrainerServicePort = "drainer-port"
74
+ flagModelInferenceLagThreshold = "model-inference-lag-threshold"
75
+ flagModelInactiveSecondsThreshold = "model-inactive-seconds-threshold"
76
+ flagScalingStatsPeriodSeconds = "scaling-stats-period-seconds"
77
+ flagMaxElapsedTimeReadySubServiceAfterStartSeconds = "max-elapsed-time-ready-sub-service-after-start-seconds"
78
+ flagMaxElapsedTimeReadySubServiceBeforeStartMinutes = "max-elapsed-time-ready-sub-service-before-start-minutes"
79
+ flagPeriodReadySubServiceSeconds = "period-ready-sub-service-seconds"
80
+ flagMaxLoadElapsedTimeMinutes = "max-load-elapsed-time-minutes"
81
+ flagMaxUnloadElapsedTimeMinutes = "max-unload-elapsed-time-minutes"
82
+ flagMaxLoadRetryCount = "max-load-retry-count"
83
+ flagMaxUnloadRetryCount = "max-unload-retry-count"
70
84
)
71
85
72
86
const (
73
- defaultInferenceHttpPort = 8080
74
- defaultInferenceGrpcPort = 9500
75
- defaultRclonePort = 5572
76
- defaultSchedulerPort = 9005
77
- defaultSchedulerTlsPort = 9055
78
- defaultMetricsPort = 9006
79
- defaultEnvoyHost = "0.0.0.0"
80
- defaultEnvoyPort = 9000
81
- defaultDrainerServicePort = 9007
82
- statsPeriodSecondsDefault = 5
83
- lagThresholdDefault = 30
84
- lastUsedThresholdSecondsDefault = 30
87
+ defaultInferenceHttpPort = 8080
88
+ defaultInferenceGrpcPort = 9500
89
+ defaultRclonePort = 5572
90
+ defaultSchedulerPort = 9005
91
+ defaultSchedulerTlsPort = 9055
92
+ defaultMetricsPort = 9006
93
+ defaultEnvoyHost = "0.0.0.0"
94
+ defaultEnvoyPort = 9000
95
+ defaultDrainerServicePort = 9007
96
+ statsPeriodSecondsDefault = 5
97
+ lagThresholdDefault = 30
98
+ lastUsedThresholdSecondsDefault = 30
99
+ defaultMaxElapsedTimeReadySubServiceAfterStartSeconds = 30
100
+ defaultMaxElapsedTimeReadySubServiceBeforeStartMinutes = 15
101
+ defaultPeriodReadySubServiceSeconds = 60
102
+ defaultMaxLoadElapsedTimeMinute = 120
103
+ defaultMaxUnloadElapsedTimeMinute = 15
104
+ defaultMaxLoadRetryCount = 5
105
+ defaultMaxUnloadRetryCount = 1
85
106
)
86
107
87
108
var (
88
- agentHost string
89
- ServerName string
90
- ReplicaIdx uint
91
- SchedulerHost string
92
- SchedulerPort int
93
- SchedulerTlsPort int
94
- RcloneHost string
95
- RclonePort int
96
- InferenceHost string
97
- InferenceHttpPort int
98
- InferenceGrpcPort int
99
- ReverseProxyHttpPort int
100
- ReverseProxyGrpcPort int
101
- DebugGrpcPort int
102
- MetricsPort int
103
- AgentFolder string
104
- Namespace string
105
- ReplicaConfigStr string
106
- InferenceSvcName string
107
- ConfigPath string
108
- LogLevel string
109
- ServerType string
110
- memoryBytes int
111
- MemoryBytes64 uint64
112
- capabilitiesList string
113
- Capabilities []string
114
- OverCommitPercentage int
115
- serverTypes = [... ]string {"mlserver" , "triton" }
116
- TracingConfigPath string
117
- EnvoyHost string
118
- EnvoyPort int
119
- DrainerServicePort int
120
- ModelInferenceLagThreshold int
121
- ModelInactiveSecondsThreshold int
122
- ScalingStatsPeriodSeconds int
109
+ agentHost string
110
+ ServerName string
111
+ ReplicaIdx uint
112
+ SchedulerHost string
113
+ SchedulerPort int
114
+ SchedulerTlsPort int
115
+ RcloneHost string
116
+ RclonePort int
117
+ InferenceHost string
118
+ InferenceHttpPort int
119
+ InferenceGrpcPort int
120
+ ReverseProxyHttpPort int
121
+ ReverseProxyGrpcPort int
122
+ DebugGrpcPort int
123
+ MetricsPort int
124
+ AgentFolder string
125
+ Namespace string
126
+ ReplicaConfigStr string
127
+ InferenceSvcName string
128
+ ConfigPath string
129
+ LogLevel string
130
+ ServerType string
131
+ memoryBytes int
132
+ MemoryBytes64 uint64
133
+ capabilitiesList string
134
+ Capabilities []string
135
+ OverCommitPercentage int
136
+ serverTypes = [... ]string {"mlserver" , "triton" }
137
+ TracingConfigPath string
138
+ EnvoyHost string
139
+ EnvoyPort int
140
+ DrainerServicePort int
141
+ ModelInferenceLagThreshold int
142
+ ModelInactiveSecondsThreshold int
143
+ ScalingStatsPeriodSeconds int
144
+ MaxElapsedTimeReadySubServiceAfterStartSeconds int
145
+ MaxElapsedTimeReadySubServiceBeforeStartMinutes int
146
+ PeriodReadySubServiceSeconds int
147
+ MaxLoadElapsedTimeMinute int
148
+ MaxUnloadElapsedTimeMinute int
149
+ MaxLoadRetryCount int
150
+ MaxUnloadRetryCount int
123
151
)
124
152
125
153
func init () {
@@ -156,6 +184,13 @@ func updateFlagsFromEnv() {
156
184
maybeUpdateModelInferenceLagThreshold ()
157
185
maybeUpdateModelInactiveSecondsThreshold ()
158
186
maybeUpdateScalingStatsPeriodSeconds ()
187
+ maybeMaxElapsedTimeReadySubServiceAfterStartSeconds ()
188
+ maybeMaxElapsedTimeReadySubServiceBeforeStartMinutes ()
189
+ maybePeriodReadySubServiceSeconds ()
190
+ maybeMaxLoadElapsedTimeMinute ()
191
+ maybeMaxUnloadElapsedTimeMinute ()
192
+ maybeMaxLoadRetryCount ()
193
+ maybeMaxUnloadRetryCount ()
159
194
}
160
195
161
196
func maybeUpdateModelInferenceLagThreshold () {
@@ -338,6 +373,69 @@ func maybeUpdateMetricsPort() {
338
373
maybeUpdatePort (flagMetricsPort , envMetricsPort , & MetricsPort )
339
374
}
340
375
376
+ func maybeMaxElapsedTimeReadySubServiceAfterStartSeconds () {
377
+ maybeUpdateFromIntEnv (
378
+ flagMaxElapsedTimeReadySubServiceAfterStartSeconds ,
379
+ envMaxElapsedTimeReadySubServiceAfterStartSeconds ,
380
+ & MaxElapsedTimeReadySubServiceAfterStartSeconds ,
381
+ "sub service after start seconds" ,
382
+ )
383
+ }
384
+
385
+ func maybeMaxElapsedTimeReadySubServiceBeforeStartMinutes () {
386
+ maybeUpdateFromIntEnv (
387
+ flagMaxElapsedTimeReadySubServiceBeforeStartMinutes ,
388
+ envMaxElapsedTimeReadySubServiceBeforeStartMinutes ,
389
+ & MaxElapsedTimeReadySubServiceBeforeStartMinutes ,
390
+ "sub service before start minutes" ,
391
+ )
392
+ }
393
+
394
+ func maybePeriodReadySubServiceSeconds () {
395
+ maybeUpdateFromIntEnv (
396
+ flagPeriodReadySubServiceSeconds ,
397
+ envPeriodReadySubServiceSeconds ,
398
+ & PeriodReadySubServiceSeconds ,
399
+ "period ready sub service seconds" ,
400
+ )
401
+ }
402
+
403
+ func maybeMaxLoadElapsedTimeMinute () {
404
+ maybeUpdateFromIntEnv (
405
+ flagMaxLoadElapsedTimeMinutes ,
406
+ envMaxLoadElapsedTimeMinutes ,
407
+ & MaxLoadElapsedTimeMinute ,
408
+ "max load elapsed time minutes" ,
409
+ )
410
+ }
411
+
412
+ func maybeMaxUnloadElapsedTimeMinute () {
413
+ maybeUpdateFromIntEnv (
414
+ flagMaxUnloadElapsedTimeMinutes ,
415
+ envMaxUnloadElapsedTimeMinutes ,
416
+ & MaxUnloadElapsedTimeMinute ,
417
+ "max unload elapsed time minutes" ,
418
+ )
419
+ }
420
+
421
+ func maybeMaxLoadRetryCount () {
422
+ maybeUpdateFromIntEnv (
423
+ flagMaxLoadRetryCount ,
424
+ envMaxLoadRetryCount ,
425
+ & MaxLoadRetryCount ,
426
+ "max load retry count" ,
427
+ )
428
+ }
429
+
430
+ func maybeMaxUnloadRetryCount () {
431
+ maybeUpdateFromIntEnv (
432
+ flagMaxUnloadRetryCount ,
433
+ envMaxUnloadRetryCount ,
434
+ & MaxUnloadRetryCount ,
435
+ "max unload retry count" ,
436
+ )
437
+ }
438
+
341
439
func maybeUpdateSchedulerHost () {
342
440
if isFlagPassed (flagSchedulerHost ) {
343
441
return
0 commit comments