@@ -24,6 +24,7 @@ import (
2424 "github.com/ovh/cds/sdk"
2525 "github.com/ovh/cds/sdk/cdsclient"
2626 "github.com/ovh/cds/sdk/hatchery"
27+ "github.com/ovh/cds/sdk/slug"
2728)
2829
2930// New instanciates a new hatchery local
@@ -99,8 +100,12 @@ func (h *HatcheryKubernetes) ApplyConfiguration(cfg interface{}) error {
99100// Status returns sdk.MonitoringStatus, implements interface service.Service
100101func (h * HatcheryKubernetes ) Status (ctx context.Context ) * sdk.MonitoringStatus {
101102 m := h .NewMonitoringStatus ()
102- m .AddLine (sdk.MonitoringStatusLine {Component : "Workers" , Value : fmt .Sprintf ("%d/%d" , len (h .WorkersStarted (ctx )), h .Config .Provision .MaxWorker ), Status : sdk .MonitoringStatusOK })
103-
103+ ws , err := h .WorkersStarted (ctx )
104+ if err != nil {
105+ ctx = log .ContextWithStackTrace (ctx , err )
106+ log .Warn (ctx , err .Error ())
107+ }
108+ m .AddLine (sdk.MonitoringStatusLine {Component : "Workers" , Value : fmt .Sprintf ("%d/%d" , len (ws ), h .Config .Provision .MaxWorker ), Status : sdk .MonitoringStatusOK })
104109 return m
105110}
106111
@@ -171,11 +176,6 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
171176 return sdk .WithStack (fmt .Errorf ("no job ID and no register" ))
172177 }
173178
174- label := "execution"
175- if spawnArgs .RegisterOnly {
176- label = "register"
177- }
178-
179179 var logJob string
180180 if spawnArgs .JobID > 0 {
181181 logJob = fmt .Sprintf ("for workflow job %d," , spawnArgs .JobID )
@@ -221,7 +221,6 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
221221 envsWm := workerConfig .InjectEnvVars
222222 envsWm ["CDS_MODEL_MEMORY" ] = fmt .Sprintf ("%d" , memory )
223223 envsWm ["CDS_FROM_WORKER_IMAGE" ] = "true"
224- envsWm ["CDS_CONFIG" ] = workerConfig .EncodeBase64 ()
225224
226225 for envName , envValue := range spawnArgs .Model .ModelDocker .Envs {
227226 envsWm [envName ] = envValue
@@ -239,16 +238,33 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
239238 pullPolicy = "Always"
240239 }
241240
241+ // Create secret for worker config
242+ configSecretName , err := h .createConfigSecret (ctx , workerConfig )
243+ if err != nil {
244+ return sdk .WrapError (err , "cannot create secret for config %s" , workerConfig .Name )
245+ }
246+ envs = append (envs , apiv1.EnvVar {
247+ Name : "CDS_CONFIG" ,
248+ ValueFrom : & apiv1.EnvVarSource {
249+ SecretKeyRef : & apiv1.SecretKeySelector {
250+ LocalObjectReference : apiv1.LocalObjectReference {
251+ Name : configSecretName ,
252+ },
253+ Key : "CDS_CONFIG" ,
254+ },
255+ },
256+ })
257+
242258 var gracePeriodSecs int64
243259 podSchema := apiv1.Pod {
244260 ObjectMeta : metav1.ObjectMeta {
245261 Name : spawnArgs .WorkerName ,
246262 Namespace : h .Config .Namespace ,
247263 DeletionGracePeriodSeconds : & gracePeriodSecs ,
248264 Labels : map [string ]string {
249- LABEL_WORKER : label ,
250- LABEL_WORKER_MODEL : strings . ToLower ( spawnArgs . Model . Name ) ,
251- LABEL_HATCHERY_NAME : h . Configuration (). Name ,
265+ LABEL_HATCHERY_NAME : h . Configuration (). Name ,
266+ LABEL_WORKER_NAME : workerConfig . Name ,
267+ LABEL_WORKER_MODEL_PATH : slug . Convert ( spawnArgs . Model . Path ()) ,
252268 },
253269 Annotations : map [string ]string {},
254270 },
@@ -273,6 +289,15 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
273289 },
274290 }
275291
292+ // Check here to add secret if needed
293+ if spawnArgs .Model .ModelDocker .Private {
294+ secretRegistryName , err := h .createRegistrySecret (ctx , * spawnArgs .Model )
295+ if err != nil {
296+ return sdk .WrapError (err , "cannot create secret for model %s" , spawnArgs .Model .Path ())
297+ }
298+ podSchema .Spec .ImagePullSecrets = []apiv1.LocalObjectReference {{Name : secretRegistryName }}
299+ }
300+
276301 var services []sdk.Requirement
277302 for _ , req := range spawnArgs .Requirements {
278303 if req .Type == sdk .ServiceRequirement {
@@ -286,16 +311,6 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
286311 podSchema .Spec .HostAliases [0 ].Hostnames [0 ] = "worker"
287312 }
288313
289- // Check here to add secret if needed
290- secretName := "cds-credreg-" + spawnArgs .Model .Name
291- if spawnArgs .Model .ModelDocker .Private {
292- if err := h .createSecret (ctx , secretName , * spawnArgs .Model ); err != nil {
293- return sdk .WrapError (err , "cannot create secret for model %s" , spawnArgs .Model .Path ())
294- }
295- podSchema .Spec .ImagePullSecrets = []apiv1.LocalObjectReference {{Name : secretName }}
296- podSchema .ObjectMeta .Labels [LABEL_SECRET ] = secretName
297- }
298-
299314 for i , serv := range services {
300315 //name= <alias> => the name of the host put in /etc/hosts of the worker
301316 //value= "postgres:latest env_1=blabla env_2=blabla"" => we can add env variables in requirement name
@@ -345,7 +360,7 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
345360 podSchema .Spec .HostAliases [0 ].Hostnames [i + 1 ] = strings .ToLower (serv .Name )
346361 }
347362
348- _ , err : = h .kubeClient .PodCreate (ctx , h .Config .Namespace , & podSchema , metav1.CreateOptions {})
363+ _ , err = h .kubeClient .PodCreate (ctx , h .Config .Namespace , & podSchema , metav1.CreateOptions {})
349364 log .Debug (ctx , "hatchery> kubernetes> SpawnWorker> %s > Pod created" , spawnArgs .WorkerName )
350365 return sdk .WithStack (err )
351366}
@@ -356,20 +371,18 @@ func (h *HatcheryKubernetes) GetLogger() *logrus.Logger {
356371
357372// WorkersStarted returns the number of instances started but
358373// not necessarily register on CDS yet
359- func (h * HatcheryKubernetes ) WorkersStarted (ctx context.Context ) []string {
360- list , err := h .kubeClient .PodList (ctx , h .Config .Namespace , metav1.ListOptions {LabelSelector : LABEL_HATCHERY_NAME })
374+ func (h * HatcheryKubernetes ) WorkersStarted (ctx context.Context ) ([]string , error ) {
375+ list , err := h .kubeClient .PodList (ctx , h .Config .Namespace , metav1.ListOptions {
376+ LabelSelector : fmt .Sprintf ("%s=%s,%s" , LABEL_HATCHERY_NAME , h .Config .Name , LABEL_WORKER_NAME ),
377+ })
361378 if err != nil {
362- log .Warn (ctx , "WorkersStarted> unable to list pods on namespace %s" , h .Config .Namespace )
363- return nil
379+ return nil , sdk .WrapError (err , "unable to list pods on namespace %s" , h .Config .Namespace )
364380 }
365381 workerNames := make ([]string , 0 , list .Size ())
366382 for _ , pod := range list .Items {
367- labels := pod .GetLabels ()
368- if labels [LABEL_HATCHERY_NAME ] == h .Configuration ().Name {
369- workerNames = append (workerNames , pod .GetName ())
370- }
383+ workerNames = append (workerNames , pod .GetName ())
371384 }
372- return workerNames
385+ return workerNames , nil
373386}
374387
375388// NeedRegistration return true if worker model need regsitration
@@ -381,31 +394,33 @@ func (h *HatcheryKubernetes) NeedRegistration(_ context.Context, m *sdk.Model) b
381394}
382395
383396func (h * HatcheryKubernetes ) routines (ctx context.Context ) {
384- ticker := time .NewTicker (10 * time .Minute )
397+ ticker := time .NewTicker (10 * time .Second )
385398 defer ticker .Stop ()
386399
387400 for {
388401 select {
389402 case <- ticker .C :
390403 h .GoRoutines .Exec (ctx , "getCDNConfiguration" , func (ctx context.Context ) {
391404 if err := h .Common .RefreshServiceLogger (ctx ); err != nil {
392- log .Error (ctx , "hatchery> kubernetes> cannot get cdn configuration : %v" , err )
405+ log .ErrorWithStackTrace (ctx , sdk . WrapError ( err , " cannot get cdn configuration" ) )
393406 }
394407 })
395408
396409 h .GoRoutines .Exec (ctx , "getServicesLogs" , func (ctx context.Context ) {
397410 if err := h .getServicesLogs (ctx ); err != nil {
398- log .Error (ctx , "Hatchery> Kubernetes> Cannot get service logs : %v" , err )
411+ log .ErrorWithStackTrace (ctx , sdk . WrapError ( err , "cannot get service logs" ) )
399412 }
400413 })
401414
402415 h .GoRoutines .Exec (ctx , "killAwolWorker" , func (ctx context.Context ) {
403- _ = h .killAwolWorkers (ctx )
416+ if err := h .killAwolWorkers (ctx ); err != nil {
417+ log .ErrorWithStackTrace (ctx , sdk .WrapError (err , "cannot delete awol worker" ))
418+ }
404419 })
405420
406421 h .GoRoutines .Exec (ctx , "deleteSecrets" , func (ctx context.Context ) {
407422 if err := h .deleteSecrets (ctx ); err != nil {
408- log .Error (ctx , "hatchery> kubernetes> cannot handle secrets : %v" , err )
423+ log .ErrorWithStackTrace (ctx , sdk . WrapError ( err , " cannot delete secrets" ) )
409424 }
410425 })
411426 case <- ctx .Done ():
0 commit comments