@@ -9,23 +9,29 @@ import (
99 "net/http"
1010 "time"
1111
12+ "go.opentelemetry.io/otel/attribute"
13+ "go.opentelemetry.io/otel/metric"
14+ "go.opentelemetry.io/otel/trace"
15+
1216 "github.com/e2b-dev/infra/packages/shared/pkg/consts"
17+ "github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
1318)
1419
1520const (
16- requestTimeout = 50 * time .Millisecond
17- loopDelay = 5 * time .Millisecond
21+ loopDelay = 5 * time .Millisecond
1822)
1923
2024// doRequestWithInfiniteRetries does a request with infinite retries until the context is done.
2125// The parent context should have a deadline or a timeout.
22- func doRequestWithInfiniteRetries (ctx context.Context , method , address string , requestBody []byte , accessToken * string ) (* http.Response , error ) {
26+ func doRequestWithInfiniteRetries (ctx context.Context , method , address string , requestBody []byte , accessToken * string , envdInitRequestTimeout time.Duration , sandboxID string ) (* http.Response , int64 , error ) {
27+ requestCount := int64 (0 )
2328 for {
24- reqCtx , cancel := context .WithTimeout (ctx , requestTimeout )
29+ requestCount ++
30+ reqCtx , cancel := context .WithTimeout (ctx , envdInitRequestTimeout )
2531 request , err := http .NewRequestWithContext (reqCtx , method , address , bytes .NewReader (requestBody ))
2632 if err != nil {
2733 cancel ()
28- return nil , err
34+ return nil , requestCount , err
2935 }
3036
3137 // make sure request to already authorized envd will not fail
@@ -38,12 +44,12 @@ func doRequestWithInfiniteRetries(ctx context.Context, method, address string, r
3844 cancel ()
3945
4046 if err == nil {
41- return response , nil
47+ return response , requestCount , nil
4248 }
4349
4450 select {
4551 case <- ctx .Done ():
46- return nil , fmt .Errorf ("%w with cause: %w" , ctx .Err (), context .Cause (ctx ))
52+ return nil , requestCount , fmt .Errorf ("%w with cause: %w" , ctx .Err (), context .Cause (ctx ))
4753 case <- time .After (loopDelay ):
4854 }
4955 }
@@ -56,9 +62,11 @@ type PostInitJSONBody struct {
5662 Timestamp * time.Time `json:"timestamp,omitempty"`
5763}
5864
59- func (s * Sandbox ) initEnvd (ctx context.Context , envVars map [string ]string , accessToken * string ) error {
60- childCtx , childSpan := tracer .Start (ctx , "envd-init" )
61- defer childSpan .End ()
65+ func (s * Sandbox ) initEnvd (ctx context.Context , envVars map [string ]string , accessToken * string , envdInitRequestTimeout time.Duration ) error {
66+ ctx , span := tracer .Start (ctx , "envd-init" , trace .WithAttributes (telemetry .WithEnvdVersion (s .Config .Envd .Version )))
67+ defer span .End ()
68+
69+ attributes := metric .WithAttributes (telemetry .WithEnvdVersion (s .Config .Envd .Version ), attribute .Int64 ("timeout_ms" , envdInitRequestTimeout .Milliseconds ()))
6270
6371 hyperloopIP := s .Slot .HyperloopIPString ()
6472 address := fmt .Sprintf ("http://%s:%d/init" , s .Slot .HostIPString (), consts .DefaultEnvdServerPort )
@@ -75,7 +83,8 @@ func (s *Sandbox) initEnvd(ctx context.Context, envVars map[string]string, acces
7583 return err
7684 }
7785
78- response , err := doRequestWithInfiniteRetries (childCtx , "POST" , address , body , accessToken )
86+ response , count , err := doRequestWithInfiniteRetries (ctx , "POST" , address , body , accessToken , envdInitRequestTimeout , s .Runtime .SandboxID )
87+ envdInitAttempts .Add (ctx , count , attributes )
7988 if err != nil {
8089 return fmt .Errorf ("failed to init envd: %w" , err )
8190 }
@@ -90,5 +99,8 @@ func (s *Sandbox) initEnvd(ctx context.Context, envVars map[string]string, acces
9099 return err
91100 }
92101
102+ // Track successful envd init
103+ envdInitSuccess .Add (ctx , 1 , attributes )
104+
93105 return nil
94106}
0 commit comments