17
17
from vllm .executor .executor_base import ExecutorAsyncBase
18
18
from vllm .executor .gpu_executor import GPUExecutorAsync
19
19
from vllm .executor .ray_utils import initialize_ray_cluster
20
- from vllm .inputs import PromptInputs
20
+ from vllm .inputs import PromptType
21
21
from vllm .logger import init_logger
22
22
from vllm .lora .request import LoRARequest
23
23
from vllm .model_executor .layers .sampler import SamplerOutput
@@ -405,7 +405,7 @@ async def stop_remote_worker_execution_loop_async(self) -> None:
405
405
async def add_request_async (
406
406
self ,
407
407
request_id : str ,
408
- inputs : PromptInputs ,
408
+ prompt : PromptType ,
409
409
params : Union [SamplingParams , PoolingParams ],
410
410
arrival_time : Optional [float ] = None ,
411
411
lora_request : Optional [LoRARequest ] = None ,
@@ -420,7 +420,7 @@ async def add_request_async(
420
420
arrival_time = time .time ()
421
421
422
422
preprocessed_inputs = await self .input_preprocessor .preprocess_async (
423
- inputs ,
423
+ prompt ,
424
424
request_id = request_id ,
425
425
lora_request = lora_request ,
426
426
prompt_adapter_request = prompt_adapter_request ,
@@ -777,7 +777,7 @@ async def run_engine_loop(engine_ref: ReferenceType):
777
777
async def add_request (
778
778
self ,
779
779
request_id : str ,
780
- inputs : PromptInputs ,
780
+ prompt : PromptType ,
781
781
params : Union [SamplingParams , PoolingParams ],
782
782
arrival_time : Optional [float ] = None ,
783
783
lora_request : Optional [LoRARequest ] = None ,
@@ -797,7 +797,7 @@ async def add_request(
797
797
stream = self ._request_tracker .add_request (
798
798
request_id ,
799
799
verbose = self .log_requests ,
800
- inputs = inputs ,
800
+ prompt = prompt ,
801
801
params = params ,
802
802
arrival_time = arrival_time or time .time (),
803
803
lora_request = lora_request ,
@@ -808,7 +808,7 @@ async def add_request(
808
808
809
809
async def generate (
810
810
self ,
811
- inputs : PromptInputs ,
811
+ prompt : PromptType ,
812
812
sampling_params : SamplingParams ,
813
813
request_id : str ,
814
814
lora_request : Optional [LoRARequest ] = None ,
@@ -822,8 +822,7 @@ async def generate(
822
822
from the LLMEngine to the caller.
823
823
824
824
Args:
825
- inputs: The inputs to the LLM. See
826
- :class:`~vllm.inputs.PromptInputs`
825
+ prompt: The prompt to the LLM. See :class:`~vllm.inputs.PromptType`
827
826
for more details about the format of each input.
828
827
sampling_params: The sampling parameters of the request.
829
828
request_id: The unique id of the request.
@@ -881,7 +880,7 @@ async def generate(
881
880
"""
882
881
async for output in await self .add_request (
883
882
request_id ,
884
- inputs ,
883
+ prompt ,
885
884
sampling_params ,
886
885
lora_request = lora_request ,
887
886
trace_headers = trace_headers ,
@@ -891,7 +890,7 @@ async def generate(
891
890
892
891
async def encode (
893
892
self ,
894
- inputs : PromptInputs ,
893
+ prompt : PromptType ,
895
894
pooling_params : PoolingParams ,
896
895
request_id : str ,
897
896
lora_request : Optional [LoRARequest ] = None ,
@@ -904,8 +903,7 @@ async def encode(
904
903
from the LLMEngine to the caller.
905
904
906
905
Args:
907
- inputs: The inputs to the LLM. See
908
- :class:`~vllm.inputs.PromptInputs`
906
+ prompt: The prompt to the LLM. See :class:`~vllm.inputs.PromptType`
909
907
for more details about the format of each input.
910
908
pooling_params: The pooling parameters of the request.
911
909
request_id: The unique id of the request.
@@ -959,7 +957,7 @@ async def encode(
959
957
"""
960
958
async for output in await self .add_request (
961
959
request_id ,
962
- inputs ,
960
+ prompt ,
963
961
pooling_params ,
964
962
lora_request = lora_request ,
965
963
trace_headers = trace_headers ,
0 commit comments