Skip to content

Commit f8f302f

Browse files
authored
Fix chunk handling when partial chunks are returned (#2485)
1 parent 2855bf9 commit f8f302f

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

fastchat/serve/openai_api_server.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -599,12 +599,14 @@ async def generate_completion_stream(payload: Dict[str, Any], worker_addr: str):
599599
timeout=WORKER_API_TIMEOUT,
600600
) as response:
601601
# content = await response.aread()
602+
buffer = b""
602603
async for raw_chunk in response.aiter_raw():
603-
for chunk in raw_chunk.split(delimiter):
604+
buffer += raw_chunk
605+
while (chunk_end := buffer.find(delimiter)) >= 0:
606+
chunk, buffer = buffer[:chunk_end], buffer[chunk_end + 1 :]
604607
if not chunk:
605608
continue
606-
data = json.loads(chunk.decode())
607-
yield data
609+
yield json.loads(chunk.decode())
608610

609611

610612
async def generate_completion(payload: Dict[str, Any], worker_addr: str):

0 commit comments

Comments
 (0)