Skip to content

Commit df900ef

Browse files
authored
support Mistral-7b-v0.2 (modelscope#605)
1 parent 06ff0e8 commit df900ef

File tree

4 files changed

+58
-0
lines changed

4 files changed

+58
-0
lines changed

docs/source/LLM/支持的模型和数据集.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@
141141
|openbuddy-deepseek-67b-chat|[OpenBuddy/openbuddy-deepseek-67b-v15.2](https://modelscope.cn/models/OpenBuddy/openbuddy-deepseek-67b-v15.2/summary)|q_proj, k_proj, v_proj|openbuddy|✔|✔||-|
142142
|openbuddy-mixtral-moe-7b-chat|[OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k](https://modelscope.cn/models/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/summary)|q_proj, k_proj, v_proj|openbuddy|✔|✔|transformers>=4.36|-|
143143
|mistral-7b|[AI-ModelScope/Mistral-7B-v0.1](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-v0.1/summary)|q_proj, k_proj, v_proj|default-generation-bos|✔|✔|transformers>=4.34|-|
144+
|mistral-7b-v2|[AI-ModelScope/Mistral-7B-v0.2-hf](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-v0.2-hf/summary)|q_proj, k_proj, v_proj|default-generation-bos|✔|✔|transformers>=4.34|-|
144145
|mistral-7b-instruct|[AI-ModelScope/Mistral-7B-Instruct-v0.1](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.1/summary)|q_proj, k_proj, v_proj|llama|✔|✔|transformers>=4.34|-|
145146
|mistral-7b-instruct-v2|[AI-ModelScope/Mistral-7B-Instruct-v0.2](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.2/summary)|q_proj, k_proj, v_proj|llama|✔|✔|transformers>=4.34|-|
146147
|mixtral-moe-7b|[AI-ModelScope/Mixtral-8x7B-v0.1](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-v0.1/summary)|q_proj, k_proj, v_proj|default-generation-bos|✔|✔|transformers>=4.36|-|
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Experimental environment: A100
2+
# 16GB GPU memory
3+
PYTHONPATH=../../.. \
4+
CUDA_VISIBLE_DEVICES=0 \
5+
python llm_infer.py \
6+
--ckpt_dir "output/mistral-7b-v2/vx-xxx/checkpoint-xxx" \
7+
--load_dataset_config true \
8+
--use_flash_attn true \
9+
--max_new_tokens 2048 \
10+
--temperature 0.5 \
11+
--top_p 0.7 \
12+
--repetition_penalty 1. \
13+
--do_sample true \
14+
--merge_lora false \
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Experimental environment: A100
2+
# 19GB GPU memory
3+
PYTHONPATH=../../.. \
4+
CUDA_VISIBLE_DEVICES=0 \
5+
python llm_sft.py \
6+
--model_id_or_path AI-ModelScope/Mistral-7B-v0.2-hf \
7+
--model_revision master \
8+
--sft_type lora \
9+
--tuner_backend swift \
10+
--template_type AUTO \
11+
--dtype AUTO \
12+
--output_dir output \
13+
--dataset dureader-robust-zh \
14+
--train_dataset_sample -1 \
15+
--num_train_epochs 1 \
16+
--max_length 2048 \
17+
--check_dataset_strategy warning \
18+
--lora_rank 8 \
19+
--lora_alpha 32 \
20+
--lora_dropout_p 0.05 \
21+
--lora_target_modules DEFAULT \
22+
--gradient_checkpointing true \
23+
--batch_size 1 \
24+
--weight_decay 0.1 \
25+
--learning_rate 1e-4 \
26+
--gradient_accumulation_steps 16 \
27+
--max_grad_norm 0.5 \
28+
--warmup_ratio 0.03 \
29+
--eval_steps 100 \
30+
--save_steps 100 \
31+
--save_total_limit 2 \
32+
--logging_steps 10 \
33+
--use_flash_attn true \
34+
--save_only_model true \

swift/llm/utils/model.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ class ModelType:
191191
openbuddy_mixtral_moe_7b_chat = 'openbuddy-mixtral-moe-7b-chat'
192192
# mistral
193193
mistral_7b = 'mistral-7b'
194+
mistral_7b_v2 = 'mistral-7b-v2'
194195
mistral_7b_instruct = 'mistral-7b-instruct'
195196
mistral_7b_instruct_v2 = 'mistral-7b-instruct-v2'
196197
mixtral_moe_7b = 'mixtral-moe-7b'
@@ -1174,6 +1175,14 @@ def cross_entropy_forward(self, inputs: Tensor,
11741175
requires=['transformers>=4.34'],
11751176
support_flash_attn=True,
11761177
support_vllm=True)
1178+
@register_model(
1179+
ModelType.mistral_7b_v2,
1180+
'AI-ModelScope/Mistral-7B-v0.2-hf',
1181+
LoRATM.llama2,
1182+
TemplateType.default_generation_bos,
1183+
requires=['transformers>=4.34'],
1184+
support_flash_attn=True,
1185+
support_vllm=True)
11771186
@register_model(
11781187
ModelType.mixtral_moe_7b,
11791188
'AI-ModelScope/Mixtral-8x7B-v0.1',

0 commit comments

Comments
 (0)