@@ -71,10 +71,9 @@ type RuntimeMetadata struct {
7171}
7272
7373type  Resources  struct  {
74- 	Slot      * network.Slot 
75- 	rootfs    rootfs.Provider 
76- 	memory    uffd.MemoryBackend 
77- 	uffdExit  chan  error 
74+ 	Slot    * network.Slot 
75+ 	rootfs  rootfs.Provider 
76+ 	memory  uffd.MemoryBackend 
7877}
7978
8079type  Metadata  struct  {
@@ -99,6 +98,8 @@ type Sandbox struct {
9998	Checks  * Checks 
10099
101100	APIStoredConfig  * orchestrator.SandboxConfig 
101+ 
102+ 	exit  * utils.SetOnce [struct {}]
102103}
103104
104105func  (s  * Sandbox ) LoggerMetadata () sbxlogger.SandboxMetadata  {
@@ -131,6 +132,8 @@ func CreateSandbox(
131132	childCtx , childSpan  :=  tracer .Start (ctx , "create-sandbox" )
132133	defer  childSpan .End ()
133134
135+ 	exit  :=  utils .NewSetOnce [struct {}]()
136+ 
134137	cleanup  :=  NewCleanup ()
135138	defer  func () {
136139		if  e  !=  nil  {
@@ -248,10 +251,9 @@ func CreateSandbox(
248251	telemetry .ReportEvent (childCtx , "created fc process" )
249252
250253	resources  :=  & Resources {
251- 		Slot :     ips .slot ,
252- 		rootfs :   rootfsProvider ,
253- 		memory :   uffd .NewNoopMemory (memfileSize , memfile .BlockSize ()),
254- 		uffdExit : make (chan  error , 1 ),
254+ 		Slot :   ips .slot ,
255+ 		rootfs : rootfsProvider ,
256+ 		memory : uffd .NewNoopMemory (memfileSize , memfile .BlockSize ()),
255257	}
256258
257259	metadata  :=  & Metadata {
@@ -273,6 +275,8 @@ func CreateSandbox(
273275		cleanup : cleanup ,
274276
275277		APIStoredConfig : apiConfigToStore ,
278+ 
279+ 		exit : exit ,
276280	}
277281
278282	checks , err  :=  NewChecks (ctx , tracer , sbx , false )
@@ -282,9 +286,18 @@ func CreateSandbox(
282286	sbx .Checks  =  checks 
283287
284288	cleanup .AddPriority (func (ctx  context.Context ) error  {
285- 		return  sbx .Close (ctx , tracer )
289+ 		// Stop the sandbox first if it is still running, otherwise do nothing 
290+ 		return  sbx .Stop (ctx , tracer )
286291	})
287292
293+ 	go  func () {
294+ 		// If the process exists, stop the sandbox properly 
295+ 		_ , fcErr  :=  fcHandle .Exit .Wait ()
296+ 		err  :=  s .Stop (context .WithoutCancel (ctx ), tracer )
297+ 
298+ 		exit .SetResult (struct {}{}, errors .Join (err , fcErr ))
299+ 	}()
300+ 
288301	return  sbx , nil 
289302}
290303
@@ -307,6 +320,8 @@ func ResumeSandbox(
307320	childCtx , childSpan  :=  tracer .Start (ctx , "resume-sandbox" )
308321	defer  childSpan .End ()
309322
323+ 	exit  :=  utils .NewSetOnce [struct {}]()
324+ 
310325	cleanup  :=  NewCleanup ()
311326	defer  func () {
312327		if  e  !=  nil  {
@@ -379,18 +394,16 @@ func ResumeSandbox(
379394		return  nil , fmt .Errorf ("failed to serve memory: %w" , err )
380395	}
381396
397+ 	// ==== END of resources initialization ==== 
382398	uffdStartCtx , cancelUffdStartCtx  :=  context .WithCancelCause (ctx )
383399	defer  cancelUffdStartCtx (fmt .Errorf ("uffd finished starting" ))
384400
385- 	uffdExit  :=  make (chan  error , 1 )
386401	go  func () {
387- 		uffdWaitErr  :=  <- fcUffd .Exit ()
388- 		uffdExit  <-  uffdWaitErr 
402+ 		_ , uffdWaitErr  :=  fcUffd .Exit ().Wait ()
389403
390404		cancelUffdStartCtx (fmt .Errorf ("uffd process exited: %w" , errors .Join (uffdWaitErr , context .Cause (uffdStartCtx ))))
391405	}()
392406
393- 	// / ==== END of resources initialization ==== 
394407	rootfsPath , err  :=  rootfsOverlay .Path ()
395408	if  err  !=  nil  {
396409		return  nil , fmt .Errorf ("failed to get rootfs path: %w" , err )
@@ -452,10 +465,9 @@ func ResumeSandbox(
452465	telemetry .ReportEvent (childCtx , "initialized FC" )
453466
454467	resources  :=  & Resources {
455- 		Slot :     ips .slot ,
456- 		rootfs :   rootfsOverlay ,
457- 		memory :   fcUffd ,
458- 		uffdExit : uffdExit ,
468+ 		Slot :   ips .slot ,
469+ 		rootfs : rootfsOverlay ,
470+ 		memory : fcUffd ,
459471	}
460472
461473	metadata  :=  & Metadata {
@@ -477,6 +489,8 @@ func ResumeSandbox(
477489		cleanup : cleanup ,
478490
479491		APIStoredConfig : apiConfigToStore ,
492+ 
493+ 		exit : exit ,
480494	}
481495
482496	// Part of the sandbox as we need to stop Checks before pausing the sandbox 
@@ -489,7 +503,8 @@ func ResumeSandbox(
489503	sbx .Checks  =  checks 
490504
491505	cleanup .AddPriority (func (ctx  context.Context ) error  {
492- 		return  sbx .Close (ctx , tracer )
506+ 		// Stop the sandbox first if it is still running, otherwise do nothing 
507+ 		return  sbx .Stop (ctx , tracer )
493508	})
494509
495510	err  =  sbx .WaitForEnvd (
@@ -503,40 +518,38 @@ func ResumeSandbox(
503518
504519	go  sbx .Checks .Start ()
505520
506- 	return  sbx , nil 
507- }
521+ 	go  func () {
522+ 		// Wait for either uffd or fc process to exit 
523+ 		select  {
524+ 		case  <- fcUffd .Exit ().Done :
525+ 		case  <- fcHandle .Exit .Done :
526+ 		}
508527
509- func  (s  * Sandbox ) Wait (ctx  context.Context ) error  {
510- 	select  {
511- 	case  <- ctx .Done ():
512- 		return  ctx .Err ()
513- 	case  <- s .process .Exit .Done :
514- 		_ , fcErr  :=  s .process .Exit .Result ()
515- 		stopErr  :=  s .Stop (ctx )
516- 		uffdErr  :=  <- s .uffdExit 
528+ 		err  :=  s .Stop (context .WithoutCancel (ctx ), tracer )
517529
518- 		return  errors .Join (fcErr , stopErr , uffdErr )
519- 	case  uffdErr  :=  <- s .uffdExit :
520- 		stopErr  :=  s .Stop (ctx )
530+ 		_ , uffdWaitErr  :=  fcUffd .Exit ().Wait ()
531+ 		_ , fcErr  :=  fcHandle .Exit .Wait ()
532+ 		exit .SetResult (struct {}{}, errors .Join (err , fcErr , uffdWaitErr ))
533+ 	}()
521534
522- 		_ , fcErr  :=  s .process .Exit .WaitWithContext (ctx )
535+ 	return  sbx , nil 
536+ }
523537
524- 		return  errors .Join (uffdErr , stopErr , fcErr )
525- 	}
538+ func  (s  * Sandbox ) Wait (ctx  context.Context ) error  {
539+ 	_ , err  :=  s .exit .WaitWithContext (ctx )
540+ 	return  err 
526541}
527542
528- // Stop starts the cleanup process for the sandbox. 
529- func  (s  * Sandbox ) Stop (ctx  context.Context ) error  {
543+ func  (s  * Sandbox ) Close (ctx  context.Context ) error  {
530544	err  :=  s .cleanup .Run (ctx )
531545	if  err  !=  nil  {
532- 		sbxlogger .I (s ).Error ("failed to stop sandbox" , zap .Error (err ))
533- 		return  fmt .Errorf ("failed to stop sandbox: %w" , err )
546+ 		return  fmt .Errorf ("failed to cleanup sandbox: %w" , err )
534547	}
535- 
536548	return  nil 
537549}
538550
539- func  (s  * Sandbox ) Close (ctx  context.Context , tracer  trace.Tracer ) error  {
551+ // Stop kills the sandbox. 
552+ func  (s  * Sandbox ) Stop (ctx  context.Context , tracer  trace.Tracer ) error  {
540553	_ , span  :=  tracer .Start (ctx , "sandbox-close" )
541554	defer  span .End ()
542555
@@ -659,7 +672,7 @@ func (s *Sandbox) Pause(
659672		originalRootfs .Header (),
660673		& RootfsDiffCreator {
661674			rootfs :   s .rootfs ,
662- 			stopHook : s .Stop ,
675+ 			stopHook : s .Close ,
663676		},
664677	)
665678	if  err  !=  nil  {
@@ -894,8 +907,8 @@ func (s *Sandbox) WaitForExit(
894907		return  fmt .Errorf ("waiting for exit took too long" )
895908	case  <- ctx .Done ():
896909		return  nil 
897- 	case  <- s .process . Exit .Done :
898- 		_ , err  :=  s .process . Exit .Result ()
910+ 	case  <- s .exit .Done :
911+ 		_ , err  :=  s .exit .Result ()
899912		if  err  ==  nil  {
900913			return  nil 
901914		}
0 commit comments