-
-
Notifications
You must be signed in to change notification settings - Fork 9.4k
[Frontend] Kill the server on engine death #6594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
0df9465
0f8ffc9
6c20c20
fc386c4
d94a88b
1537c9d
ecd88b3
cff5a19
bb459bc
214585f
b846a86
bd491bb
8e34011
f98b441
34a46ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import json | ||
import os | ||
|
||
import openai | ||
import pytest | ||
|
||
from ...utils import RemoteOpenAIServer | ||
|
||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_shutdown_on_engine_failure(tmp_path): | ||
# Use a bad adapter to crash the engine | ||
# (This test will fail when that bug is fixed) | ||
os.mkdir(tmp_path / "bad_adapter") | ||
with open(tmp_path / "bad_adapter" / "adapter_model_config.json", | ||
"w") as f: | ||
json.dump({"not": "real"}, f) | ||
with open(tmp_path / "bad_adapter" / "adapter_model.safetensors", | ||
"wb") as f: | ||
f.write(b"this is fake") | ||
|
||
args = [ | ||
"--dtype", | ||
"bfloat16", | ||
"--enforce-eager", | ||
"--enable-lora", | ||
"--lora-modules", | ||
f"bad-adapter={tmp_path / 'bad_adapter'}", | ||
] | ||
|
||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: | ||
client = remote_server.get_async_client() | ||
|
||
with pytest.raises(openai.APIConnectionError): | ||
# This crashes the engine | ||
await client.completions.create(model="bad-adapter", | ||
prompt="Hello, my name is") | ||
|
||
# Now the server should shut down | ||
rc = remote_server.proc.wait(timeout=1) | ||
assert rc is not None |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,6 +134,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: | |
help="When --max-logprobs is specified, represents single tokens as" | ||
"strings of the form 'token_id:{token_id}' so that tokens that" | ||
"are not JSON-encodable can be identified.") | ||
parser.add_argument("--keep-alive-on-engine-death", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only minor but I wonder if this would be better as an env var since I think it would only be used in debugging scenarios... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, yeah I can change that. I only recently learned that there's an explicit split between the cli args and |
||
action="store_true", | ||
help="The default behavior is to stop the server " | ||
"process when the LLM engine dies. Set this flag to " | ||
"keep the server up instead.") | ||
|
||
parser = AsyncEngineArgs.add_cli_args(parser) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(This has just triggered me every time I see the log)