18
18
from vllm .sampling_params import SamplingParams
19
19
from vllm .transformers_utils .tokenizer_group import init_tokenizer_from_configs
20
20
21
+ # Time to wait before checking it the server process is alive.
22
+ SERVER_START_TIMEOUT_MS = 1000
23
+
21
24
22
25
class AsyncEngineRPCClient :
23
26
@@ -61,7 +64,16 @@ def socket(self):
61
64
socket .connect (self .rpc_path )
62
65
yield socket
63
66
finally :
64
- socket .close ()
67
+ # linger == 0 means discard unsent messages
68
+ # when the socket is closed. This is necessary
69
+ # because otherwise self.context.destroy() will
70
+ # wait for 30 seconds until unsent messages are
71
+ # received, which is impossible if the server
72
+ # crashed. In the absence of a server crash we
73
+ # always expect a response before closing the
74
+ # socket anyway.
75
+ # Reference: http://api.zeromq.org/4-2:zmq-setsockopt#toc24
76
+ socket .close (linger = 0 )
65
77
66
78
async def _send_get_data_rpc_request (self , request : RPCUtilityRequest ,
67
79
expected_type : Any ,
@@ -85,14 +97,19 @@ async def _send_get_data_rpc_request(self, request: RPCUtilityRequest,
85
97
86
98
return data
87
99
88
- async def _send_one_way_rpc_request (self , request : RPC_REQUEST_TYPE ,
89
- error_message : str ):
100
+ async def _send_one_way_rpc_request (self ,
101
+ request : RPC_REQUEST_TYPE ,
102
+ error_message : str ,
103
+ timeout : Optional [int ] = None ):
90
104
"""Send one-way RPC request to trigger an action."""
91
105
with self .socket () as socket :
92
106
# Ping RPC Server with request.
93
107
await socket .send (cloudpickle .dumps (request ))
94
108
95
109
# Await acknowledgement from RPCServer.
110
+ if timeout is not None and await socket .poll (timeout = timeout ) == 0 :
111
+ raise TimeoutError (f"server didn't reply within { timeout } ms" )
112
+
96
113
response = cloudpickle .loads (await socket .recv ())
97
114
98
115
if not isinstance (response , str ) or response != VLLM_RPC_SUCCESS_STR :
@@ -117,7 +134,8 @@ async def wait_for_server(self):
117
134
118
135
await self ._send_one_way_rpc_request (
119
136
request = RPCUtilityRequest .IS_SERVER_READY ,
120
- error_message = "Unable to start RPC Server." )
137
+ error_message = "Unable to start RPC Server." ,
138
+ timeout = SERVER_START_TIMEOUT_MS )
121
139
122
140
async def _get_model_config_rpc (self ) -> ModelConfig :
123
141
"""Get the ModelConfig object from the RPC Server"""
0 commit comments