1
1
from http import HTTPStatus
2
+ from typing import List
2
3
3
4
import openai
4
5
import pytest
12
13
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
13
14
14
15
16
+ @pytest .fixture (scope = 'module' )
17
+ def server_args (request : pytest .FixtureRequest ) -> List [str ]:
18
+ """ Provide extra arguments to the server via indirect parametrization
19
+
20
+ Usage:
21
+
22
+ >>> @pytest.mark.parametrize(
23
+ >>> "server_args",
24
+ >>> [
25
+ >>> ["--disable-frontend-multiprocessing"],
26
+ >>> [
27
+ >>> "--model=NousResearch/Hermes-3-Llama-3.1-70B",
28
+ >>> "--enable-auto-tool-choice",
29
+ >>> ],
30
+ >>> ],
31
+ >>> indirect=True,
32
+ >>> )
33
+ >>> def test_foo(server, client):
34
+ >>> ...
35
+
36
+ This will run `test_foo` twice with servers with:
37
+ - `--disable-frontend-multiprocessing`
38
+ - `--model=NousResearch/Hermes-3-Llama-3.1-70B --enable-auto-tool-choice`.
39
+
40
+ """
41
+ if not hasattr (request , "param" ):
42
+ return []
43
+
44
+ val = request .param
45
+
46
+ if isinstance (val , str ):
47
+ return [val ]
48
+
49
+ return request .param
50
+
51
+
15
52
@pytest .fixture (scope = "module" )
16
- def server ():
53
+ def server (server_args ):
17
54
args = [
18
55
# use half precision for speed and memory savings in CI environment
19
56
"--dtype" ,
@@ -23,6 +60,7 @@ def server():
23
60
"--enforce-eager" ,
24
61
"--max-num-seqs" ,
25
62
"128" ,
63
+ * server_args ,
26
64
]
27
65
28
66
with RemoteOpenAIServer (MODEL_NAME , args ) as remote_server :
@@ -35,6 +73,15 @@ async def client(server):
35
73
yield async_client
36
74
37
75
76
+ @pytest .mark .parametrize (
77
+ "server_args" ,
78
+ [
79
+ pytest .param ([], id = "default-frontend-multiprocessing" ),
80
+ pytest .param (["--disable-frontend-multiprocessing" ],
81
+ id = "disable-frontend-multiprocessing" )
82
+ ],
83
+ indirect = True ,
84
+ )
38
85
@pytest .mark .asyncio
39
86
async def test_show_version (client : openai .AsyncOpenAI ):
40
87
base_url = str (client .base_url )[:- 3 ].strip ("/" )
@@ -45,6 +92,15 @@ async def test_show_version(client: openai.AsyncOpenAI):
45
92
assert response .json () == {"version" : VLLM_VERSION }
46
93
47
94
95
+ @pytest .mark .parametrize (
96
+ "server_args" ,
97
+ [
98
+ pytest .param ([], id = "default-frontend-multiprocessing" ),
99
+ pytest .param (["--disable-frontend-multiprocessing" ],
100
+ id = "disable-frontend-multiprocessing" )
101
+ ],
102
+ indirect = True ,
103
+ )
48
104
@pytest .mark .asyncio
49
105
async def test_check_health (client : openai .AsyncOpenAI ):
50
106
base_url = str (client .base_url )[:- 3 ].strip ("/" )
0 commit comments