@@ -33,54 +33,40 @@ type RawBundle struct {
33
33
Bundle []byte
34
34
}
35
35
36
- func (n * Node ) DownloadBundles (
37
- ctx context.Context ,
36
+ // Determines where to find the chunks we need to download for a given batch. For each chunk in a batch, there will
37
+ // be one or more relays that are responsible for serving that chunk. This function determines which relays to contact
38
+ // for each chunk, and sorts the requests by relayID to support batching. Additionally, this method also calculates
39
+ // the size of the chunk data that will be downloaded, in bytes.
40
+ func (n * Node ) DetermineChunkLocations (
38
41
batch * corev2.Batch ,
39
42
operatorState * core.OperatorState ,
40
43
probe * common.SequenceProbe ,
41
- ) ([]* corev2.BlobShard , []* RawBundle , error ) {
42
-
43
- probe .SetStage ("prepare_to_download" )
44
-
45
- relayClient , ok := n .RelayClient .Load ().(relay.RelayClient )
44
+ ) (downloadSizeInBytes uint64 , relayRequests map [corev2.RelayKey ]* relayRequest , err error ) {
46
45
47
- if ! ok || relayClient == nil {
48
- return nil , nil , fmt .Errorf ("relay client is not set" )
49
- }
46
+ probe .SetStage ("determine_chunk_locations" )
50
47
51
48
blobVersionParams := n .BlobVersionParams .Load ()
52
49
if blobVersionParams == nil {
53
- return nil , nil , fmt .Errorf ("blob version params is nil" )
50
+ return 0 , nil , fmt .Errorf ("blob version params is nil" )
54
51
}
55
52
56
- blobShards := make ([]* corev2.BlobShard , len (batch .BlobCertificates ))
57
- rawBundles := make ([]* RawBundle , len (batch .BlobCertificates ))
58
- requests := make (map [corev2.RelayKey ]* relayRequest )
59
-
60
- // Tally the number of bytes we are about to download.
61
- var downloadSizeInBytes uint64
53
+ relayRequests = make (map [corev2.RelayKey ]* relayRequest )
62
54
63
55
for i , cert := range batch .BlobCertificates {
64
56
blobKey , err := cert .BlobHeader .BlobKey ()
65
57
if err != nil {
66
- return nil , nil , fmt .Errorf ("failed to get blob key: %v " , err )
58
+ return 0 , nil , fmt .Errorf ("failed to get blob key: %w " , err )
67
59
}
68
60
69
61
if len (cert .RelayKeys ) == 0 {
70
- return nil , nil , fmt .Errorf ("no relay keys in the certificate" )
71
- }
72
- blobShards [i ] = & corev2.BlobShard {
73
- BlobCertificate : cert ,
74
- }
75
- rawBundles [i ] = & RawBundle {
76
- BlobCertificate : cert ,
62
+ return 0 , nil , fmt .Errorf ("no relay keys in the certificate" )
77
63
}
78
64
relayIndex := rand .Intn (len (cert .RelayKeys ))
79
65
relayKey := cert .RelayKeys [relayIndex ]
80
66
81
67
blobParams , ok := blobVersionParams .Get (cert .BlobHeader .BlobVersion )
82
68
if ! ok {
83
- return nil , nil , fmt .Errorf ("blob version %d not found" , cert .BlobHeader .BlobVersion )
69
+ return 0 , nil , fmt .Errorf ("blob version %d not found" , cert .BlobHeader .BlobVersion )
84
70
}
85
71
86
72
assgn , err := corev2 .GetAssignmentForBlob (operatorState , blobParams , cert .BlobHeader .QuorumNumbers , n .Config .ID )
@@ -91,17 +77,17 @@ func (n *Node) DownloadBundles(
91
77
92
78
chunkLength , err := blobParams .GetChunkLength (uint32 (cert .BlobHeader .BlobCommitments .Length ))
93
79
if err != nil {
94
- return nil , nil , fmt .Errorf ("failed to get chunk length: %w" , err )
80
+ return 0 , nil , fmt .Errorf ("failed to get chunk length: %w" , err )
95
81
}
96
82
downloadSizeInBytes += uint64 (assgn .NumChunks () * chunkLength )
97
83
98
- req , ok := requests [relayKey ]
84
+ req , ok := relayRequests [relayKey ]
99
85
if ! ok {
100
86
req = & relayRequest {
101
87
chunkRequests : make ([]* relay.ChunkRequestByIndex , 0 ),
102
88
metadata : make ([]* requestMetadata , 0 ),
103
89
}
104
- requests [relayKey ] = req
90
+ relayRequests [relayKey ] = req
105
91
}
106
92
// Chunks from one blob are requested to the same relay
107
93
req .chunkRequests = append (req .chunkRequests , & relay.ChunkRequestByIndex {
@@ -115,27 +101,40 @@ func (n *Node) DownloadBundles(
115
101
116
102
}
117
103
118
- // storeChunksSemaphore can be nil during unit tests, since there are a bunch of places where the Node struct
119
- // is instantiated directly without using the constructor.
120
- if n .storeChunksSemaphore != nil {
121
- // So far, we've only downloaded metadata for the blob. Before downloading the actual chunks, make sure there
122
- // is capacity in the store chunks buffer. This is an OOM safety measure.
104
+ return downloadSizeInBytes , relayRequests , nil
105
+ }
123
106
124
- probe .SetStage ("acquire_buffer_capacity" )
125
- semaphoreCtx , cancel := context .WithTimeout (ctx , n .Config .StoreChunksBufferTimeout )
126
- defer cancel ()
127
- err := n .storeChunksSemaphore .Acquire (semaphoreCtx , int64 (downloadSizeInBytes ))
128
- if err != nil {
129
- return nil , nil , fmt .Errorf ("failed to acquire buffer capacity: %w" , err )
107
+ // This method takes a "download plan" from DetermineChunkLocations() and downloads the chunks from the relays.
108
+ // It also deserializes the responses from the relays into BlobShards and RawBundles.
109
+ func (n * Node ) DownloadChunksFromRelays (
110
+ ctx context.Context ,
111
+ batch * corev2.Batch ,
112
+ relayRequests map [corev2.RelayKey ]* relayRequest ,
113
+ probe * common.SequenceProbe ,
114
+ ) (blobShards []* corev2.BlobShard , rawBundles []* RawBundle , err error ) {
115
+
116
+ blobShards = make ([]* corev2.BlobShard , len (batch .BlobCertificates ))
117
+ rawBundles = make ([]* RawBundle , len (batch .BlobCertificates ))
118
+ for i , cert := range batch .BlobCertificates {
119
+ blobShards [i ] = & corev2.BlobShard {
120
+ BlobCertificate : cert ,
130
121
}
122
+ rawBundles [i ] = & RawBundle {
123
+ BlobCertificate : cert ,
124
+ }
125
+ }
126
+
127
+ relayClient , ok := n .RelayClient .Load ().(relay.RelayClient )
128
+ if ! ok || relayClient == nil {
129
+ return nil , nil , fmt .Errorf ("relay client is not set" )
131
130
}
132
131
133
132
probe .SetStage ("download" )
134
133
135
- bundleChan := make (chan response , len (requests ))
136
- for relayKey := range requests {
134
+ bundleChan := make (chan response , len (relayRequests ))
135
+ for relayKey := range relayRequests {
137
136
relayKey := relayKey
138
- req := requests [relayKey ]
137
+ req := relayRequests [relayKey ]
139
138
n .DownloadPool .Submit (func () {
140
139
ctxTimeout , cancel := context .WithTimeout (ctx , n .Config .ChunkDownloadTimeout )
141
140
defer cancel ()
@@ -157,23 +156,24 @@ func (n *Node) DownloadBundles(
157
156
})
158
157
}
159
158
160
- responses := make ([]response , len (requests ))
161
- for i := 0 ; i < len (requests ); i ++ {
159
+ responses := make ([]response , len (relayRequests ))
160
+ for i := 0 ; i < len (relayRequests ); i ++ {
162
161
responses [i ] = <- bundleChan
163
162
}
164
163
165
164
probe .SetStage ("deserialize" )
166
165
167
- for i := 0 ; i < len (requests ); i ++ {
166
+ for i := 0 ; i < len (relayRequests ); i ++ {
168
167
resp := responses [i ]
169
168
if resp .err != nil {
170
169
// TODO (cody-littley) this is flaky, and will fail if any relay fails. We should retry failures
171
170
return nil , nil , fmt .Errorf ("failed to get chunks from relays: %v" , resp .err )
172
171
}
173
172
174
173
if len (resp .bundles ) != len (resp .metadata ) {
175
- return nil , nil , fmt .Errorf ("number of bundles and metadata do not match (%d != %d)" ,
176
- len (resp .bundles ), len (resp .metadata ))
174
+ return nil , nil ,
175
+ fmt .Errorf ("number of bundles and metadata do not match (%d != %d)" ,
176
+ len (resp .bundles ), len (resp .metadata ))
177
177
}
178
178
179
179
for j , bundle := range resp .bundles {
0 commit comments