Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions fastchat/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ class Conversation:
stop_str: Union[str, List[str]] = None
# Stops generation if meeting any token in this list
stop_token_ids: List[int] = None
# Tags to be used in the template
tags: Tuple[str] = None

def get_prompt(self) -> str:
"""Get the prompt for generation."""
Expand Down Expand Up @@ -130,7 +128,7 @@ def get_prompt(self) -> str:
else:
ret = "[INST] "
for i, (role, message) in enumerate(self.messages):
tag = self.tags[i % 2]
tag = self.roles[i % 2]
if message:
if i == 0:
ret += message + " "
Expand Down Expand Up @@ -269,7 +267,6 @@ def copy(self):
sep2=self.sep2,
stop_str=self.stop_str,
stop_token_ids=self.stop_token_ids,
tags=self.tags,
)

def dict(self):
Expand Down Expand Up @@ -850,7 +847,7 @@ def get_conv_template(name: str) -> Conversation:
Conversation(
name="mistral",
system_template="",
tags=("[INST]", "[/INST]"),
roles=("[INST]", "[/INST]"),
sep_style=SeparatorStyle.LLAMA2,
sep=" ",
sep2="</s>",
Expand All @@ -864,7 +861,7 @@ def get_conv_template(name: str) -> Conversation:
Conversation(
name="llama-2",
system_template="[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n",
tags=("[INST]", "[/INST]"),
roles=("[INST]", "[/INST]"),
sep_style=SeparatorStyle.LLAMA2,
sep=" ",
sep2=" </s><s>",
Expand Down
4 changes: 1 addition & 3 deletions fastchat/serve/base_model_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import threading
import time
from typing import List
import uuid

from fastapi import FastAPI, Request, BackgroundTasks
from fastapi.responses import StreamingResponse, JSONResponse
Expand All @@ -13,7 +12,6 @@
from fastchat.utils import pretty_print_semaphore, build_logger


worker_id = str(uuid.uuid4())[:8]
worker = None
logger = None

Expand Down Expand Up @@ -56,7 +54,7 @@ def __init__(
self.heart_beat_thread = None

if logger is None:
logger = build_logger("model_worker", f"model_worker_{worker_id}.log")
logger = build_logger("model_worker", f"model_worker_{self.worker_id}.log")
if worker is None:
worker = self

Expand Down
2 changes: 1 addition & 1 deletion fastchat/serve/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def prompt_for_input(self, role) -> str:
return prompt_input

def prompt_for_output(self, role: str):
self._console.print(f"[bold]{role}:")
self._console.print(f"[bold]{role.replace('/', '|')}:")

def stream_output(self, output_stream):
"""Stream output from a role."""
Expand Down
20 changes: 1 addition & 19 deletions fastchat/serve/model_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,12 @@
"""
import argparse
import base64
import dataclasses
import gc
import json
import os
from typing import List, Optional
import uuid


try:
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
LlamaTokenizer,
AutoModel,
)
except ImportError:
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
LLaMATokenizer,
AutoModel,
)
import torch
import torch.nn.functional as F
from transformers import set_seed
Expand All @@ -36,17 +20,15 @@
add_model_args,
get_generate_stream_function,
)
from fastchat.serve.base_model_worker import BaseModelWorker, app
from fastchat.modules.gptq import GptqConfig
from fastchat.modules.awq import AWQConfig
from fastchat.modules.exllama import ExllamaConfig
from fastchat.modules.gptq import GptqConfig
from fastchat.serve.base_model_worker import BaseModelWorker, app
from fastchat.utils import (
build_logger,
get_context_length,
str_to_torch_dtype,
)
from fastchat.utils import build_logger, get_context_length


worker_id = str(uuid.uuid4())[:8]
Expand Down
6 changes: 3 additions & 3 deletions fastchat/serve/vllm_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ async def generate_stream(self, params):
"output_token_len": [
len(output.token_ids) for output in request_output.outputs
],
"finish_reason": [
output.finish_reason for output in request_output.outputs
],
"finish_reason": request_output.outputs[0].finish_reason
if len(request_output.outputs) == 1
else [output.finish_reason for output in request_output.outputs],
}
yield (json.dumps(ret) + "\0").encode()

Expand Down