File tree Expand file tree Collapse file tree 1 file changed +11
-2
lines changed
Expand file tree Collapse file tree 1 file changed +11
-2
lines changed Original file line number Diff line number Diff line change 2222 logger ,
2323 worker_id ,
2424)
25- from fastchat .utils import get_context_length
25+ from fastchat .utils import get_context_length , is_partial_stop
2626
2727
2828app = FastAPI ()
@@ -119,7 +119,12 @@ async def generate_stream(self, params):
119119 else :
120120 text_outputs = [output .text for output in request_output .outputs ]
121121 text_outputs = " " .join (text_outputs )
122- # Note: usage is not supported yet
122+
123+ partial_stop = any (is_partial_stop (text_outputs , i ) for i in stop )
124+ # prevent yielding partial stop sequence
125+ if partial_stop :
126+ continue
127+
123128 prompt_tokens = len (request_output .prompt_token_ids )
124129 completion_tokens = sum (
125130 len (output .token_ids ) for output in request_output .outputs
@@ -139,6 +144,10 @@ async def generate_stream(self, params):
139144 if len (request_output .outputs ) == 1
140145 else [output .finish_reason for output in request_output .outputs ],
141146 }
147+ # Emit twice here to ensure a 'finish_reason' with empty content in the OpenAI API response.
148+ # This aligns with the behavior of model_worker.
149+ if request_output .finished :
150+ yield (json .dumps (ret | {"finish_reason" : None }) + "\0 " ).encode ()
142151 yield (json .dumps (ret ) + "\0 " ).encode ()
143152
144153 async def generate (self , params ):
You can’t perform that action at this time.
0 commit comments