@@ -173,21 +173,18 @@ async def get_task_ids_running_in_a_worker(self, worker_id: str) -> List[str]:
173
173
async def get_worker_details_for_running_task (
174
174
self , task_id : str , attempt_number : int
175
175
) -> Tuple [Optional [int ], Optional [str ]]:
176
- """
177
- Retrieves worker details for a specific task and attempt number.
176
+ """Retrieves worker details for a specific task and attempt number.
178
177
179
178
Args:
180
179
task_id: The ID of the task.
181
180
attempt_number: The attempt number of the task.
182
181
183
182
Returns:
184
- Tuple[Optional[int], Optional[str]]: A tuple
185
- containing the worker's PID (process ID),
186
- and worker's ID.
183
+ Tuple[Optional[int], Optional[str]]: A tuple containing the worker's PID
184
+ (process ID), and worker's ID.
187
185
188
186
Raises:
189
- ValueError: If the task attempt is not running or
190
- the state APi is not initialized.
187
+ ValueError: If the task attempt is not running or the state API is not initialized.
191
188
"""
192
189
if self ._state_api is None :
193
190
raise ValueError ("The state API is not initialized yet. Please retry." )
@@ -217,35 +214,27 @@ async def get_worker_details_for_running_task(
217
214
return pid , worker_id
218
215
219
216
@routes .get ("/task/traceback" )
220
- async def get_task_traceback (self , req ) -> aiohttp .web .Response :
221
- """
222
- Retrieves the traceback information for a specific task.
217
+ async def get_task_traceback (
218
+ self , req : aiohttp .web .Request
219
+ ) -> aiohttp .web .Response :
220
+ """Retrieves the traceback information for a specific task.
223
221
Note that one worker process works on one task at a time
224
222
or one worker works on multiple async tasks.
225
223
226
- Args:
227
- req (aiohttp.web.Request): The HTTP request object.
228
-
229
224
Params:
230
225
task_id: The ID of the task.
231
226
attempt_number: The attempt number of the task.
232
227
node_id: The ID of the node.
233
228
234
229
Returns:
235
- aiohttp.web.Response: The HTTP response containing
236
- the traceback information.
230
+ aiohttp.web.Response: The HTTP response containing the traceback information.
237
231
238
232
Raises:
239
- ValueError: If the "task_id" parameter
240
- is missing in the request query.
241
- ValueError: If the "attempt_number" parameter
242
- is missing in the request query.
243
- ValueError: If the worker begins working on
244
- another task during the traceback retrieval.
245
- aiohttp.web.HTTPInternalServerError: If there is
246
- an internal server error during the traceback retrieval.
233
+ ValueError: If the "task_id" parameter is missing in the request query.
234
+ ValueError: If the "attempt_number" parameter is missing in the request query.
235
+ ValueError: If the worker begins working on another task during the traceback retrieval.
236
+ aiohttp.web.HTTPInternalServerError: If there is an internal server error during the traceback retrieval.
247
237
"""
248
-
249
238
if "task_id" not in req .query :
250
239
raise ValueError ("task_id is required" )
251
240
if "attempt_number" not in req .query :
@@ -317,30 +306,23 @@ async def get_task_traceback(self, req) -> aiohttp.web.Response:
317
306
)
318
307
319
308
@routes .get ("/task/cpu_profile" )
320
- async def get_task_cpu_profile (self , req ) -> aiohttp .web .Response :
321
- """
322
- Retrieves the CPU profile for a specific task.
309
+ async def get_task_cpu_profile (
310
+ self , req : aiohttp .web .Request
311
+ ) -> aiohttp .web .Response :
312
+ """Retrieves the CPU profile for a specific task.
323
313
Note that one worker process works on one task at a time
324
314
or one worker works on multiple async tasks.
325
315
326
- Args:
327
- req (aiohttp.web.Request): The HTTP request object.
328
-
329
316
Returns:
330
317
aiohttp.web.Response: The HTTP response containing the CPU profile data.
331
318
332
319
Raises:
333
- ValueError: If the "task_id" parameter is
334
- missing in the request query.
335
- ValueError: If the "attempt_number" parameter is
336
- missing in the request query.
320
+ ValueError: If the "task_id" parameter is missing in the request query.
321
+ ValueError: If the "attempt_number" parameter is missing in the request query.
337
322
ValueError: If the maximum duration allowed is exceeded.
338
- ValueError: If the worker begins working on
339
- another task during the profile retrieval.
340
- aiohttp.web.HTTPInternalServerError: If there is
341
- an internal server error during the profile retrieval.
342
- aiohttp.web.HTTPInternalServerError: If the CPU Flame
343
- Graph information for the task is not found.
323
+ ValueError: If the worker begins working on another task during the profile retrieval.
324
+ aiohttp.web.HTTPInternalServerError: If there is an internal server error during the profile retrieval.
325
+ aiohttp.web.HTTPInternalServerError: If the CPU Flame Graph information for the task is not found.
344
326
"""
345
327
if "task_id" not in req .query :
346
328
raise ValueError ("task_id is required" )
@@ -420,8 +402,9 @@ async def get_task_cpu_profile(self, req) -> aiohttp.web.Response:
420
402
)
421
403
422
404
@routes .get ("/worker/traceback" )
423
- async def get_traceback (self , req ) -> aiohttp .web .Response :
424
- """
405
+ async def get_traceback (self , req : aiohttp .web .Request ) -> aiohttp .web .Response :
406
+ """Retrieves the traceback information for a specific worker.
407
+
425
408
Params:
426
409
pid: Required. The PID of the worker.
427
410
ip: Required. The IP address of the node.
@@ -457,11 +440,21 @@ async def get_traceback(self, req) -> aiohttp.web.Response:
457
440
return aiohttp .web .HTTPInternalServerError (text = reply .output )
458
441
459
442
@routes .get ("/worker/cpu_profile" )
460
- async def cpu_profile (self , req ) -> aiohttp .web .Response :
461
- """
443
+ async def cpu_profile (self , req : aiohttp .web .Request ) -> aiohttp .web .Response :
444
+ """Retrieves the CPU profile for a specific worker.
445
+
462
446
Params:
463
447
pid: Required. The PID of the worker.
464
448
ip: Required. The IP address of the node.
449
+ duration: Optional. Duration in seconds for profiling (default: 5, max: 60).
450
+ format: Optional. Output format (default: "flamegraph").
451
+ native: Optional. Whether to use native profiling (default: false).
452
+
453
+ Raises:
454
+ ValueError: If pid is not provided.
455
+ ValueError: If ip is not provided.
456
+ ValueError: If duration exceeds 60 seconds.
457
+ aiohttp.web.HTTPInternalServerError: If there is an internal server error during the profile retrieval.
465
458
"""
466
459
pid = req .query .get ("pid" )
467
460
ip = req .query .get ("ip" )
@@ -510,21 +503,18 @@ async def cpu_profile(self, req) -> aiohttp.web.Response:
510
503
return aiohttp .web .HTTPInternalServerError (text = reply .output )
511
504
512
505
@routes .get ("/memory_profile" )
513
- async def memory_profile (self , req ) -> aiohttp .web .Response :
514
- """
515
- Retrieves the memory profile for a specific worker or task.
506
+ async def memory_profile (self , req : aiohttp .web .Request ) -> aiohttp .web .Response :
507
+ """Retrieves the memory profile for a specific worker or task.
516
508
Note that for tasks, one worker process works on one task at a time
517
509
or one worker works on multiple async tasks.
518
510
519
- Args:
520
- req (aiohttp.web.Request): The HTTP request object.
521
-
522
511
Returns:
523
512
aiohttp.web.Response: The HTTP response containing the memory profile data.
524
513
525
514
Params (1):
526
515
pid: The PID of the worker.
527
516
ip: The IP address of the node.
517
+
528
518
Params (2):
529
519
task_id: The ID of the task.
530
520
attempt_number: The attempt number of the task.
@@ -538,7 +528,7 @@ async def memory_profile(self, req) -> aiohttp.web.Response:
538
528
or "node id" is missing in the request query.
539
529
aiohttp.web.HTTPInternalServerError: If the maximum
540
530
duration allowed is exceeded.
541
- aiohttp.web.HTTPInternalServerError If requesting task
531
+ aiohttp.web.HTTPInternalServerError: If requesting task
542
532
profiling for the worker begins working on another task
543
533
during the profile retrieval.
544
534
aiohttp.web.HTTPInternalServerError: If there is
0 commit comments