Skip to content

Commit 3d39d1f

Browse files
committed
Ensure metrics are logged regardless of requests
Metrics are currently logged at the end of each step, but if there are no requests there are no new logs/metrics, so the last values are reported to prometheus indefinitely. Also, for some reason, it always reports one running request.
1 parent aee8ef6 commit 3d39d1f

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

vllm/engine/async_llm_engine.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,3 +506,9 @@ def from_engine_args(cls,
506506
max_log_len=engine_args.max_log_len,
507507
start_engine_loop=start_engine_loop)
508508
return engine
509+
510+
async def do_log_stats(self) -> None:
511+
if self.engine_use_ray:
512+
await self.engine.do_log_stats.remote()
513+
else:
514+
self.engine.do_log_stats()

vllm/engine/llm_engine.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,9 @@ def step(self) -> List[RequestOutput]:
641641

642642
return self._process_model_outputs(output, scheduler_outputs)
643643

644+
def do_log_stats(self) -> None:
645+
self._log_system_stats(False, 0)
646+
644647
def _log_system_stats(
645648
self,
646649
prompt_run: bool,

vllm/entrypoints/openai/api_server.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import codecs
77
import json
88
import time
9+
from contextlib import asynccontextmanager
910
from http import HTTPStatus
1011
from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
1112

@@ -38,11 +39,28 @@
3839

3940
logger = init_logger(__name__)
4041
served_model = None
41-
app = fastapi.FastAPI()
42+
engine_args = None
4243
engine = None
4344
response_role = None
4445

4546

47+
@asynccontextmanager
48+
async def lifespan(app: fastapi.FastAPI):
49+
50+
async def _force_log():
51+
while True:
52+
await asyncio.sleep(10)
53+
await engine.do_log_stats()
54+
55+
if not engine_args.disable_log_stats:
56+
asyncio.create_task(_force_log())
57+
58+
yield
59+
60+
61+
app = fastapi.FastAPI(lifespan=lifespan)
62+
63+
4664
def parse_args():
4765
parser = argparse.ArgumentParser(
4866
description="vLLM OpenAI-Compatible RESTful API server.")

0 commit comments

Comments
 (0)