Skip to content

Commit decceed

Browse files
authored
Add deepseek chat (#2760)
1 parent 686ab04 commit decceed

File tree

3 files changed

+51
-1
lines changed

3 files changed

+51
-1
lines changed

docs/model_support.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
- [OpenLemur/lemur-70b-chat-v1](https://huggingface.co/OpenLemur/lemur-70b-chat-v1)
5555
- [allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b)
5656
- [Microsoft/Orca-2-7b](https://huggingface.co/microsoft/Orca-2-7b)
57+
- [deepseek-ai/deepseek-llm-67b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat)
58+
- [deepseek-ai/deepseek-coder-33b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct)
5759
- Any [EleutherAI](https://huggingface.co/EleutherAI) pythia model such as [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b)
5860
- Any [Peft](https://github.com/huggingface/peft) adapter trained on top of a
5961
model above. To activate, must have `peft` in the model path. Note: If

fastchat/conversation.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class SeparatorStyle(IntEnum):
2929
ROBIN = auto()
3030
FALCON_CHAT = auto()
3131
CHATGLM3 = auto()
32+
DEEPSEEK_CHAT = auto()
3233

3334

3435
@dataclasses.dataclass
@@ -224,6 +225,15 @@ def get_prompt(self) -> str:
224225
ret += role + ":"
225226

226227
return ret
228+
elif self.sep_style == SeparatorStyle.DEEPSEEK_CHAT:
229+
seps = [self.sep, self.sep2]
230+
ret = system_prompt
231+
for i, (role, message) in enumerate(self.messages):
232+
if message:
233+
ret += role + ": " + message + seps[i % 2]
234+
else:
235+
ret += role + ":"
236+
return ret
227237
else:
228238
raise ValueError(f"Invalid style: {self.sep_style}")
229239

@@ -530,7 +540,7 @@ def get_conv_template(name: str) -> Conversation:
530540
# Deepseek code default template
531541
register_conv_template(
532542
Conversation(
533-
name="deepseek",
543+
name="deepseek-coder",
534544
system_template="You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.",
535545
roles=("### Instruction:", "### Response:"),
536546
sep="\n",
@@ -1265,6 +1275,20 @@ def get_conv_template(name: str) -> Conversation:
12651275
)
12661276
)
12671277

1278+
# Deepseek-chat template
1279+
# reference: https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat/blob/main/tokenizer_config.json
1280+
register_conv_template(
1281+
Conversation(
1282+
name="deepseek-chat",
1283+
system_message="<|begin▁of▁sentence|>", # must add a bos token before first message
1284+
roles=("User", "Assistant"),
1285+
sep_style=SeparatorStyle.DEEPSEEK_CHAT,
1286+
sep="\n\n",
1287+
sep2="<|end▁of▁sentence|>",
1288+
stop_str="<|end▁of▁sentence|>",
1289+
)
1290+
)
1291+
12681292
if __name__ == "__main__":
12691293
from fastchat.conversation import get_conv_template
12701294

fastchat/model/model_adapter.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1934,6 +1934,28 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
19341934
return get_conv_template("Yi-34b-chat")
19351935

19361936

1937+
class DeepseekCoderAdapter(BaseModelAdapter):
1938+
"""The model adapter for deepseek-ai's coder models"""
1939+
1940+
def match(self, model_path: str):
1941+
return "deepseek-coder" in model_path.lower()
1942+
1943+
def get_default_conv_template(self, model_path: str) -> Conversation:
1944+
return get_conv_template("deepseek-coder")
1945+
1946+
1947+
class DeepseekChatAdapter(BaseModelAdapter):
1948+
"""The model adapter for deepseek-ai's chat models"""
1949+
1950+
# Note: that this model will require tokenizer version >= 0.13.3 because the tokenizer class is LlamaTokenizerFast
1951+
1952+
def match(self, model_path: str):
1953+
return "deepseek-llm" in model_path.lower() and "chat" in model_path.lower()
1954+
1955+
def get_default_conv_template(self, model_path: str) -> Conversation:
1956+
return get_conv_template("deepseek-chat")
1957+
1958+
19371959
# Note: the registration order matters.
19381960
# The one registered earlier has a higher matching priority.
19391961
register_model_adapter(PeftModelAdapter)
@@ -2005,6 +2027,8 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
20052027
register_model_adapter(PygmalionAdapter)
20062028
register_model_adapter(MicrosoftOrcaAdapter)
20072029
register_model_adapter(YiAdapter)
2030+
register_model_adapter(DeepseekCoderAdapter)
2031+
register_model_adapter(DeepseekChatAdapter)
20082032

20092033
# After all adapters, try the default base adapter.
20102034
register_model_adapter(BaseModelAdapter)

0 commit comments

Comments
 (0)