@@ -9,23 +9,31 @@ import (
99 "net/http"
1010 "time"
1111
12+ "go.opentelemetry.io/otel/attribute"
13+ "go.opentelemetry.io/otel/metric"
14+ "go.opentelemetry.io/otel/trace"
15+ "go.uber.org/zap"
16+
1217 "github.com/e2b-dev/infra/packages/shared/pkg/consts"
18+ "github.com/e2b-dev/infra/packages/shared/pkg/logger"
19+ "github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
1320)
1421
1522const (
16- requestTimeout = 50 * time .Millisecond
17- loopDelay = 5 * time .Millisecond
23+ loopDelay = 5 * time .Millisecond
1824)
1925
2026// doRequestWithInfiniteRetries does a request with infinite retries until the context is done.
2127// The parent context should have a deadline or a timeout.
22- func doRequestWithInfiniteRetries (ctx context.Context , method , address string , requestBody []byte , accessToken * string ) (* http.Response , error ) {
28+ func doRequestWithInfiniteRetries (ctx context.Context , method , address string , requestBody []byte , accessToken * string , envdInitRequestTimeout time.Duration , sandboxID , envdVersion string ) (* http.Response , int64 , error ) {
29+ requestCount := int64 (0 )
2330 for {
24- reqCtx , cancel := context .WithTimeout (ctx , requestTimeout )
31+ requestCount ++
32+ reqCtx , cancel := context .WithTimeout (ctx , envdInitRequestTimeout )
2533 request , err := http .NewRequestWithContext (reqCtx , method , address , bytes .NewReader (requestBody ))
2634 if err != nil {
2735 cancel ()
28- return nil , err
36+ return nil , requestCount , err
2937 }
3038
3139 // make sure request to already authorized envd will not fail
@@ -38,12 +46,14 @@ func doRequestWithInfiniteRetries(ctx context.Context, method, address string, r
3846 cancel ()
3947
4048 if err == nil {
41- return response , nil
49+ return response , requestCount , nil
4250 }
4351
52+ zap .L ().Warn ("failed to do request to envd, retrying" , logger .WithSandboxID (sandboxID ), logger .WithEnvdVersion (envdVersion ), zap .Int64 ("timeout_ms" , envdInitRequestTimeout .Milliseconds ()), zap .Error (err ))
53+
4454 select {
4555 case <- ctx .Done ():
46- return nil , fmt .Errorf ("%w with cause: %w" , ctx .Err (), context .Cause (ctx ))
56+ return nil , requestCount , fmt .Errorf ("%w with cause: %w" , ctx .Err (), context .Cause (ctx ))
4757 case <- time .After (loopDelay ):
4858 }
4959 }
@@ -56,17 +66,21 @@ type PostInitJSONBody struct {
5666 Timestamp * time.Time `json:"timestamp,omitempty"`
5767}
5868
59- func (s * Sandbox ) initEnvd (ctx context.Context , envVars map [string ]string , accessToken * string ) error {
60- childCtx , childSpan := tracer .Start (ctx , "envd-init" )
61- defer childSpan .End ()
69+ func (s * Sandbox ) initEnvd (ctx context.Context ) error {
70+ ctx , span := tracer .Start (ctx , "envd-init" , trace .WithAttributes (telemetry .WithEnvdVersion (s .Config .Envd .Version )))
71+ defer span .End ()
72+
73+ attributes := []attribute.KeyValue {telemetry .WithEnvdVersion (s .Config .Envd .Version ), attribute .Int64 ("timeout_ms" , s .internalConfig .EnvdInitRequestTimeout .Milliseconds ())}
74+ attributesFail := append (attributes , attribute .Bool ("success" , false ))
75+ attributesSuccess := append (attributes , attribute .Bool ("success" , true ))
6276
6377 hyperloopIP := s .Slot .HyperloopIPString ()
6478 address := fmt .Sprintf ("http://%s:%d/init" , s .Slot .HostIPString (), consts .DefaultEnvdServerPort )
6579 now := time .Now ()
6680 jsonBody := & PostInitJSONBody {
67- EnvVars : & envVars ,
81+ EnvVars : & s . Config . Envd . Vars ,
6882 HyperloopIP : & hyperloopIP ,
69- AccessToken : accessToken ,
83+ AccessToken : s . Config . Envd . AccessToken ,
7084 Timestamp : & now ,
7185 }
7286
@@ -75,11 +89,20 @@ func (s *Sandbox) initEnvd(ctx context.Context, envVars map[string]string, acces
7589 return err
7690 }
7791
78- response , err := doRequestWithInfiniteRetries (childCtx , "POST" , address , body , accessToken )
92+ response , count , err := doRequestWithInfiniteRetries (ctx , "POST" , address , body , s . Config . Envd . AccessToken , s . internalConfig . EnvdInitRequestTimeout , s . Runtime . SandboxID , s . Config . Envd . Version )
7993 if err != nil {
94+ envdInitCalls .Add (ctx , count , metric .WithAttributes (attributesFail ... ))
8095 return fmt .Errorf ("failed to init envd: %w" , err )
8196 }
8297
98+ if count > 1 {
99+ // Track failed envd init calls
100+ envdInitCalls .Add (ctx , count - 1 , metric .WithAttributes (attributesFail ... ))
101+ }
102+
103+ // Track successful envd init
104+ envdInitCalls .Add (ctx , 1 , metric .WithAttributes (attributesSuccess ... ))
105+
83106 defer response .Body .Close ()
84107 if response .StatusCode != http .StatusNoContent {
85108 return fmt .Errorf ("unexpected status code: %d" , response .StatusCode )
0 commit comments