vllm-project · mfournioux · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024
@@ -59,3 +59,22 @@ async def test_log_metrics(client: openai.AsyncOpenAI):
     response = requests.get(base_url + "/metrics")
 
     assert response.status_code == HTTPStatus.OK
+
+
+@pytest.mark.asyncio
+async def test_get_readiness_ok(client: openai.AsyncOpenAI):
+    """Test the technical route /readiness when the model is fully loaded"""
+    base_url = str(client.base_url)[:-3].strip("/")
+
+    response = requests.get(base_url + "/ready")
+
+    assert response.status_code == HTTPStatus.OK
+
+@pytest.mark.asyncio
+async def test_get_readiness_ok(client: openai.AsyncOpenAI):
+    """Test the technical route /readiness when the model is fully loaded"""
+    base_url = str(client.base_url)[:-3].strip("/")
+
+    response = requests.get(base_url + "/ready")
+
+    assert response.status_code == HTTPStatus.OK
@@ -90,6 +90,22 @@ async def health() -> Response:
     return Response(status_code=200)
 
 
+@router.get(
+    "/ready",
+    name="readiness",
+    tags=["technical"],
+)
+async def get_readiness() -> Response:
+    """Readiness probe for k8s"""
+    d_worker = openai_serving_chat.engine.engine.model_executor.driver_worker
+    model_weights = d_worker.model_runner.model_memory_usage
+
+    if model_weights > 0:
+        return Response(status_code=200)
+    else:
+        return Response(status_code=500)
+
+
 @router.post("/tokenize")
 async def tokenize(request: TokenizeRequest):
     generator = await openai_serving_tokenization.create_tokenize(request)

@@ -719,4 +719,4 @@ class DetokenizeRequest(OpenAIBaseModel):
 
 
 class DetokenizeResponse(OpenAIBaseModel):
-    prompt: str
+    prompt: str
Original file line number	Diff line number	Diff line change
Expand Up		@@ -719,4 +719,4 @@ class DetokenizeRequest(OpenAIBaseModel):


		class DetokenizeResponse(OpenAIBaseModel):
		prompt: str
		prompt: str
Copy link Member DarkLight1337 Aug 5, 2024 • edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Please avoid deleting the last line here. (Since otherwise, the file remains unchanged) mfournioux reacted with thumbs up emoji