[ci][distributed] fix flaky tests (vllm-project#6806)

youkaichao · LeiWang1999 · commit 6409969c0415 · 2025-03-26T10:10:29.000Z
Signed-off-by: LeiWang1999 &lt;leiwang1999@outlook.com&gt;
diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
@@ -1,3 +1,10 @@
+"""
+WARNING: This test runs in both single-node (4 GPUs) and multi-node
+ (2 node with 2 GPUs each) modes. If the test only uses 2 GPUs, it is
+ important to set the distributed backend to "mp" to avoid Ray scheduling
+ all workers in a node other than the head node, which can cause the test
+ to fail.
+"""
 import os
 
 import pytest
@@ -78,7 +85,7 @@ def test_pp_cudagraph(PP_SIZE, MODEL_NAME, ATTN_BACKEND):
         "--pipeline-parallel-size",
         str(PP_SIZE),
         "--distributed-executor-backend",
-        "ray",
+        "mp",
     ]
     os.environ["VLLM_ATTENTION_BACKEND"] = ATTN_BACKEND