Skip to content

Commit 51cd071

Browse files
authored
Support internlm2 1.8b (modelscope#473)
1 parent 61630a9 commit 51cd071

File tree

9 files changed

+282
-259
lines changed

9 files changed

+282
-259
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ app_ui_main(infer_args)
234234
- [yi](https://github.com/01-ai/Yi) series: yi-6b, yi-6b-200k, yi-6b-chat, yi-34b, yi-34b-200k, yi-34b-chat.
235235
- [internlm](https://github.com/InternLM/InternLM) series:
236236
- internlm-7b, internlm-7b-chat, internlm-7b-chat-8k, internlm-20b, internlm-20b-chat.
237-
- internlm2-7b-base, internlm2-7b, internlm2-7b-sft-chat, internlm2-7b-chat, internlm2-20b-base, internlm2-20b, internlm2-20b-sft-chat, internlm2-20b-chat.
237+
- internlm2-1_8b, internlm2-1_8b-sft-chat, internlm2-1_8b-chat, internlm2-7b-base, internlm2-7b, internlm2-7b-sft-chat, internlm2-7b-chat, internlm2-20b-base, internlm2-20b, internlm2-20b-sft-chat, internlm2-20b-chat.
238238
- [deepseek](https://github.com/deepseek-ai/deepseek-LLM) series: deepseek-7b, deepseek-7b-chat, deepseek-67b, deepseek-67b-chat, deepseek-moe-16b, deepseek-moe-16b-chat.
239239
- [gemma](https://github.com/google/gemma_pytorch) series: gemma-2b, gemma-2b-instruct, gemma-7b, gemma-7b-instruct.
240240
- [openbmb-minicpm](https://github.com/OpenBMB/mlc-MiniCPM) series: openbmb-minicpm-2b-sft-chat, openbmb-minicpm-2b-chat.

README_CN.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ app_ui_main(infer_args)
233233
- [yi](https://github.com/01-ai/Yi) 系列: yi-6b, yi-6b-200k, yi-6b-chat, yi-34b, yi-34b-200k, yi-34b-chat.
234234
- [internlm](https://github.com/InternLM/InternLM) 系列:
235235
- internlm-7b, internlm-7b-chat, internlm-7b-chat-8k, internlm-20b, internlm-20b-chat.
236-
- internlm2-7b-base, internlm2-7b, internlm2-7b-sft-chat, internlm2-7b-chat, internlm2-20b-base, internlm2-20b, internlm2-20b-sft-chat, internlm2-20b-chat.
236+
- internlm2-1_8b, internlm2-1_8b-sft-chat, internlm2-1_8b-chat, internlm2-7b-base, internlm2-7b, internlm2-7b-sft-chat, internlm2-7b-chat, internlm2-20b-base, internlm2-20b, internlm2-20b-sft-chat, internlm2-20b-chat.
237237
- [deepseek](https://github.com/deepseek-ai/deepseek-LLM) 系列: deepseek-7b, deepseek-7b-chat, deepseek-67b, deepseek-67b-chat, deepseek-moe-16b, deepseek-moe-16b-chat.
238238
- [gemma](https://github.com/google/gemma_pytorch) 系列: gemma-2b, gemma-2b-instruct, gemma-7b, gemma-7b-instruct.
239239
- [openbmb-minicpm](https://github.com/OpenBMB/mlc-MiniCPM) 系列: openbmb-minicpm-2b-sft-chat, openbmb-minicpm-2b-chat.

ROADMAP.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,6 @@ The development of SWIFT V1.7 is between Feb/1/2024 and Feb/29/2024 ideally.
5050
- *Support More LLM Models*:
5151
- codefuse-ai/CodeFuse-DeepSeek-33B
5252
- codefuse-ai/CodeFuse-13B
53-
- Shanghai_AI_Laboratory/internlm2-1_8b
54-
- Shanghai_AI_Laboratory/internlm2-chat-1_8b
55-
- Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft
5653
- 01ai/Yi-34B-Chat-4bits
5754
- 01ai/Yi-34B-Chat-8bits
5855
- 01ai/Yi-6B-Chat-4bits

docs/source/LLM/支持的模型和数据集.md

Lines changed: 238 additions & 235 deletions
Large diffs are not rendered by default.

scripts/utils/run_dataset_info.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import os
2-
import re
3-
from typing import List
42

53
from datasets import concatenate_datasets
64

@@ -15,7 +13,7 @@ def write_dataset_info() -> None:
1513
if os.path.exists(fpath):
1614
with open(fpath, 'r', encoding='utf-8') as f:
1715
text = f.read()
18-
idx = text.find('| | Dataset Name |')
16+
idx = text.find('| Dataset Name |')
1917
pre_text = text[:idx]
2018
text = text[idx:]
2119
text_list = [t for t in text.split('\n') if len(t.strip()) > 0]
@@ -25,18 +23,18 @@ def write_dataset_info() -> None:
2523
res_text_list = []
2624

2725
res_text_list.append(
28-
'| | Dataset Name | Dataset ID | Train Size | Val Size | Statistic (token) | Tags |'
26+
'| Dataset Name | Dataset ID | Train Size | Val Size | Statistic (token) | Tags |'
2927
)
3028
res_text_list.append(
31-
'| - | ------------ | ---------- | ---------- | -------- | ----------------- | ---- |'
29+
'| ------------ | ---------- | ---------- | -------- | ----------------- | ---- |'
3230
)
3331
if len(text_list) >= 2:
3432
text_list = text_list[2:]
3533
else:
3634
text_list = []
3735

3836
ignore_dataset = {
39-
text.split('|', 3)[2].lstrip('🔥 '): text
37+
text.split('|', 2)[1].lstrip('🔥 '): text
4038
for text in text_list
4139
}
4240
dataset_name_list = DatasetName.get_dataset_name_list()
@@ -53,7 +51,7 @@ def write_dataset_info() -> None:
5351
template_type = get_default_template_type(model_type)
5452
template = get_template(template_type, tokenizer)
5553
mapping[task_type] = template
56-
for i, dataset_name in enumerate(dataset_name_list):
54+
for dataset_name in dataset_name_list:
5755
dataset_info = DATASET_MAPPING[dataset_name]
5856
tags = dataset_info.get('tags', [])
5957
if 'audio' in tags:
@@ -64,7 +62,7 @@ def write_dataset_info() -> None:
6462
template = mapping['llm']
6563
if dataset_name in ignore_dataset:
6664
train_size, val_size, stat_str = ignore_dataset[
67-
dataset_name].split('|')[4:7]
65+
dataset_name].split('|')[3:6]
6866
else:
6967
train_dataset, val_dataset = get_dataset([dataset_name])
7068
train_size = len(train_dataset)
@@ -92,8 +90,9 @@ def write_dataset_info() -> None:
9290
if len(tags_str) == 0:
9391
tags_str = '-'
9492
res_text_list.append(
95-
f"|{i+1}|{dataset_name}|[{dataset_info['dataset_id_or_path']}]({url})|{train_size}|"
93+
f"|{dataset_name}|[{dataset_info['dataset_id_or_path']}]({url})|{train_size}|"
9694
f'{val_size}|{stat_str}|{tags_str}|')
95+
print(f'数据集总数: {len(dataset_name_list)}')
9796
text = '\n'.join(res_text_list)
9897
text = pre_text + text + '\n'
9998
with open(fpath, 'w', encoding='utf-8') as f:

scripts/utils/run_model_info.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
import re
2-
from typing import Dict, List, Tuple
1+
from typing import List
32

43
from swift.llm import MODEL_MAPPING, ModelType
54

65

76
def get_model_info_table() -> List[str]:
87
model_name_list = ModelType.get_model_name_list()
98
result = (
10-
'| | Model Type | Model ID | Default Lora Target Modules | Default Template |'
9+
'| Model Type | Model ID | Default Lora Target Modules | Default Template |'
1110
' Support Flash Attn | Support VLLM | Requires |\n'
12-
'| - | --------- | -------- | --------------------------- | ---------------- |'
11+
'| --------- | -------- | --------------------------- | ---------------- |'
1312
' ------------------ | ------------ | -------- |\n')
1413
res: List[str] = []
1514
bool_mapping = {True: '✔', False: '✘'}
@@ -29,15 +28,16 @@ def get_model_info_table() -> List[str]:
2928
]
3029
res.append(r)
3130
text = ''
32-
for i, r in enumerate(res):
31+
for r in res:
3332
url = f'https://modelscope.cn/models/{r[1]}/summary'
34-
text += f'|{i+1}|{r[0]}|[{r[1]}]({url})|{r[2]}|{r[3]}|{r[4]}|{r[5]}|{r[6]}|\n'
33+
text += f'|{r[0]}|[{r[1]}]({url})|{r[2]}|{r[3]}|{r[4]}|{r[5]}|{r[6]}|\n'
34+
print(f'模型总数: {len(res)}')
3535
result += text
3636
#
3737
fpath = 'docs/source/LLM/支持的模型和数据集.md'
3838
with open(fpath, 'r') as f:
3939
text = f.read()
40-
start_idx = text.find('| | Model Type |')
40+
start_idx = text.find('| Model Type |')
4141
end_idx = text.find('## 数据集')
4242
output = text[:start_idx] + result + '\n\n' + text[end_idx:]
4343
with open(fpath, 'w') as f:

scripts/utils/run_template.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from typing import Dict, List, Tuple
2-
31
from swift.llm import TemplateType
42

53
if __name__ == '__main__':

swift/llm/infer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def llm_infer(args: InferArguments) -> None:
224224
if args.merge_lora:
225225
merge_lora(args, device_map='cpu')
226226
if args.infer_backend == 'vllm':
227-
from swift.llm import prepare_vllm_engine_template, inference_stream_vllm, inference_vllm
227+
from .utils import prepare_vllm_engine_template, inference_stream_vllm, inference_vllm
228228
llm_engine, template = prepare_vllm_engine_template(args)
229229
else:
230230
model, template = prepare_model_template(args)

swift/llm/utils/model.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ class ModelType:
116116
internlm_20b = 'internlm-20b'
117117
internlm_20b_chat = 'internlm-20b-chat'
118118
# internlm2
119+
internlm2_1_8b = 'internlm2-1_8b'
120+
internlm2_1_8b_sft_chat = 'internlm2-1_8b-sft-chat'
121+
internlm2_1_8b_chat = 'internlm2-1_8b-chat'
119122
internlm2_7b_base = 'internlm2-7b-base'
120123
internlm2_7b = 'internlm2-7b'
121124
internlm2_7b_sft_chat = 'internlm2-7b-sft-chat'
@@ -1291,6 +1294,29 @@ def get_model_tokenizer_qwen1half_intx(model_dir: str,
12911294
load_model, **kwargs)
12921295

12931296

1297+
@register_model(
1298+
ModelType.internlm2_1_8b,
1299+
'Shanghai_AI_Laboratory/internlm2-1_8b',
1300+
LoRATM.internlm2,
1301+
TemplateType.default_generation_bos,
1302+
support_flash_attn=True,
1303+
support_vllm=True)
1304+
@register_model(
1305+
ModelType.internlm2_1_8b_sft_chat,
1306+
'Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft',
1307+
LoRATM.internlm2,
1308+
TemplateType.internlm2,
1309+
eos_token='<|im_end|>',
1310+
support_flash_attn=True,
1311+
support_vllm=True)
1312+
@register_model(
1313+
ModelType.internlm2_1_8b_chat,
1314+
'Shanghai_AI_Laboratory/internlm2-chat-1_8b',
1315+
LoRATM.internlm2,
1316+
TemplateType.internlm2,
1317+
eos_token='<|im_end|>',
1318+
support_flash_attn=True,
1319+
support_vllm=True)
12941320
@register_model(
12951321
ModelType.internlm2_math_7b,
12961322
'Shanghai_AI_Laboratory/internlm2-math-base-7b',

0 commit comments

Comments
 (0)