[CI][SpecDecode] Fix spec decode tests, use flash attention backend for spec decode CI tests. (vllm-project#8975)

LiuXiaoxuanPKU · garg-amit · commit 1e2607733d1e · 2024-10-28T06:09:04.000Z
Signed-off-by: Amit Garg &lt;mitgarg17495@gmail.com&gt;
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -207,8 +207,6 @@ steps:
   - vllm/spec_decode
   - tests/spec_decode
   commands:
-    # See https://github.com/vllm-project/vllm/issues/5152
-    - export VLLM_ATTENTION_BACKEND=XFORMERS
     - pytest -v -s spec_decode/e2e/test_multistep_correctness.py
     - pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
 
diff --git a/tests/spec_decode/test_multi_step_worker.py b/tests/spec_decode/test_multi_step_worker.py
@@ -673,7 +673,10 @@ def test_use_draft_model_runner_advance_step():
     worker.model_runner._gpu_advance_step.side_effect = ValueError(
         exception_secret)
 
-    seq_group_metadata_list, _, _ = create_batch(batch_size, k)
+    seq_group_metadata_list, _, _ = create_batch(batch_size,
+                                                 k,
+                                                 block_size=block_size,
+                                                 num_gpu_blocks=num_gpu_blocks)
 
     # Fallback (should not call) when num_steps=1.
     execute_model_req = ExecuteModelRequest(