Skip to content

Commit f02eef0

Browse files
pooyadavoodiLeiWang1999
authored andcommitted
[Frontend] Publish Prometheus metrics in run_batch API (vllm-project#7641)
Signed-off-by: LeiWang1999 <[email protected]>
1 parent 7a1c5b0 commit f02eef0

File tree

2 files changed

+76
-0
lines changed

2 files changed

+76
-0
lines changed

tests/entrypoints/openai/test_metrics.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import subprocess
2+
import sys
3+
import tempfile
4+
import time
15
from http import HTTPStatus
26

37
import openai
@@ -177,3 +181,48 @@ async def test_metrics_exist(client: openai.AsyncOpenAI):
177181

178182
for metric in EXPECTED_METRICS:
179183
assert metric in response.text
184+
185+
186+
def test_metrics_exist_run_batch():
187+
input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}""" # noqa: E501
188+
189+
base_url = "0.0.0.0"
190+
port = "8001"
191+
server_url = f"http://{base_url}:{port}"
192+
193+
with tempfile.NamedTemporaryFile(
194+
"w") as input_file, tempfile.NamedTemporaryFile(
195+
"r") as output_file:
196+
input_file.write(input_batch)
197+
input_file.flush()
198+
proc = subprocess.Popen([
199+
sys.executable,
200+
"-m",
201+
"vllm.entrypoints.openai.run_batch",
202+
"-i",
203+
input_file.name,
204+
"-o",
205+
output_file.name,
206+
"--model",
207+
"intfloat/e5-mistral-7b-instruct",
208+
"--enable-metrics",
209+
"--url",
210+
base_url,
211+
"--port",
212+
port,
213+
], )
214+
215+
def is_server_up(url):
216+
try:
217+
response = requests.get(url)
218+
return response.status_code == 200
219+
except requests.ConnectionError:
220+
return False
221+
222+
while not is_server_up(server_url):
223+
time.sleep(1)
224+
225+
response = requests.get(server_url + "/metrics")
226+
assert response.status_code == HTTPStatus.OK
227+
228+
proc.wait()

vllm/entrypoints/openai/run_batch.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Awaitable, Callable, List
44

55
import aiohttp
6+
from prometheus_client import start_http_server
67

78
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
89
from vllm.engine.async_llm_engine import AsyncLLMEngine
@@ -56,6 +57,24 @@ def parse_args():
5657
'ID numbers being printed in log.'
5758
'\n\nDefault: Unlimited')
5859

60+
parser.add_argument("--enable-metrics",
61+
action="store_true",
62+
help="Enable Prometheus metrics")
63+
parser.add_argument(
64+
"--url",
65+
type=str,
66+
default="0.0.0.0",
67+
help="URL to the Prometheus metrics server "
68+
"(only needed if enable-metrics is set).",
69+
)
70+
parser.add_argument(
71+
"--port",
72+
type=int,
73+
default=8000,
74+
help="Port number for the Prometheus metrics server "
75+
"(only needed if enable-metrics is set).",
76+
)
77+
5978
return parser.parse_args()
6079

6180

@@ -184,4 +203,12 @@ async def main(args):
184203
logger.info("vLLM batch processing API version %s", VLLM_VERSION)
185204
logger.info("args: %s", args)
186205

206+
# Start the Prometheus metrics server. LLMEngine uses the Prometheus client
207+
# to publish metrics at the /metrics endpoint.
208+
if args.enable_metrics:
209+
logger.info("Prometheus metrics enabled")
210+
start_http_server(port=args.port, addr=args.url)
211+
else:
212+
logger.info("Prometheus metrics disabled")
213+
187214
asyncio.run(main(args))

0 commit comments

Comments
 (0)